ammintrin.h revision 296417
1239313Sdim/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------=== 2239313Sdim * 3239313Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy 4239313Sdim * of this software and associated documentation files (the "Software"), to deal 5239313Sdim * in the Software without restriction, including without limitation the rights 6239313Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7239313Sdim * copies of the Software, and to permit persons to whom the Software is 8239313Sdim * furnished to do so, subject to the following conditions: 9239313Sdim * 10239313Sdim * The above copyright notice and this permission notice shall be included in 11239313Sdim * all copies or substantial portions of the Software. 12239313Sdim * 13239313Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14239313Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15239313Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16239313Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17239313Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18239313Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19239313Sdim * THE SOFTWARE. 20239313Sdim * 21239313Sdim *===-----------------------------------------------------------------------=== 22239313Sdim */ 23239313Sdim 24239313Sdim#ifndef __AMMINTRIN_H 25239313Sdim#define __AMMINTRIN_H 26239313Sdim 27239313Sdim#include <pmmintrin.h> 28239313Sdim 29288943Sdim/* Define the default attributes for the functions in this file. */ 30296417Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"))) 31288943Sdim 32288943Sdim/// \brief Extracts the specified bits from the lower 64 bits of the 128-bit 33288943Sdim/// integer vector operand at the index idx and of the length len. 34288943Sdim/// 35288943Sdim/// \headerfile <x86intrin.h> 36288943Sdim/// 37296417Sdim/// \code 38288943Sdim/// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx); 39296417Sdim/// \endcode 40288943Sdim/// 41296417Sdim/// \code 42288943Sdim/// This intrinsic corresponds to the \c EXTRQ instruction. 43296417Sdim/// \endcode 44288943Sdim/// 45288943Sdim/// \param x 46288943Sdim/// The value from which bits are extracted. 47288943Sdim/// \param len 48288943Sdim/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] 49288943Sdim/// are zero, the length is interpreted as 64. 50288943Sdim/// \param idx 51296417Sdim/// Bits [5:0] specify the index of the least significant bit; the other 52296417Sdim/// bits are ignored. If the sum of the index and length is greater than 53296417Sdim/// 64, the result is undefined. If the length and index are both zero, 54296417Sdim/// bits [63:0] of parameter x are extracted. If the length is zero 55288943Sdim/// but the index is non-zero, the result is undefined. 56288943Sdim/// \returns A 128-bit integer vector whose lower 64 bits contain the bits 57288943Sdim/// extracted from the source operand. 58239313Sdim#define _mm_extracti_si64(x, len, idx) \ 59239313Sdim ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \ 60239313Sdim (char)(len), (char)(idx))) 61239313Sdim 62288943Sdim/// \brief Extracts the specified bits from the lower 64 bits of the 128-bit 63288943Sdim/// integer vector operand at the index and of the length specified by __y. 64288943Sdim/// 65288943Sdim/// \headerfile <x86intrin.h> 66288943Sdim/// 67296417Sdim/// \code 68288943Sdim/// This intrinsic corresponds to the \c EXTRQ instruction. 69296417Sdim/// \endcode 70288943Sdim/// 71288943Sdim/// \param __x 72288943Sdim/// The value from which bits are extracted. 73288943Sdim/// \param __y 74296417Sdim/// Specifies the index of the least significant bit at [13:8] 75296417Sdim/// and the length at [5:0]; all other bits are ignored. 76288943Sdim/// If bits [5:0] are zero, the length is interpreted as 64. 77296417Sdim/// If the sum of the index and length is greater than 64, the result is 78296417Sdim/// undefined. If the length and index are both zero, bits [63:0] of 79296417Sdim/// parameter __x are extracted. If the length is zero but the index is 80296417Sdim/// non-zero, the result is undefined. 81296417Sdim/// \returns A 128-bit vector whose lower 64 bits contain the bits extracted 82288943Sdim/// from the source operand. 83288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 84239313Sdim_mm_extract_si64(__m128i __x, __m128i __y) 85239313Sdim{ 86239313Sdim return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y); 87239313Sdim} 88239313Sdim 89296417Sdim/// \brief Inserts bits of a specified length from the source integer vector 90296417Sdim/// y into the lower 64 bits of the destination integer vector x at the 91288943Sdim/// index idx and of the length len. 92288943Sdim/// 93288943Sdim/// \headerfile <x86intrin.h> 94288943Sdim/// 95296417Sdim/// \code 96288943Sdim/// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len, 97288943Sdim/// const int idx); 98296417Sdim/// \endcode 99288943Sdim/// 100296417Sdim/// \code 101288943Sdim/// This intrinsic corresponds to the \c INSERTQ instruction. 102296417Sdim/// \endcode 103288943Sdim/// 104288943Sdim/// \param x 105296417Sdim/// The destination operand where bits will be inserted. The inserted bits 106296417Sdim/// are defined by the length len and by the index idx specifying the least 107288943Sdim/// significant bit. 108288943Sdim/// \param y 109296417Sdim/// The source operand containing the bits to be extracted. The extracted 110288943Sdim/// bits are the least significant bits of operand y of length len. 111288943Sdim/// \param len 112288943Sdim/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] 113288943Sdim/// are zero, the length is interpreted as 64. 114288943Sdim/// \param idx 115296417Sdim/// Bits [5:0] specify the index of the least significant bit; the other 116296417Sdim/// bits are ignored. If the sum of the index and length is greater than 117296417Sdim/// 64, the result is undefined. If the length and index are both zero, 118296417Sdim/// bits [63:0] of parameter y are inserted into parameter x. If the 119288943Sdim/// length is zero but the index is non-zero, the result is undefined. 120296417Sdim/// \returns A 128-bit integer vector containing the original lower 64-bits 121288943Sdim/// of destination operand x with the specified bitfields replaced by the 122296417Sdim/// lower bits of source operand y. The upper 64 bits of the return value 123288943Sdim/// are undefined. 124288943Sdim 125239313Sdim#define _mm_inserti_si64(x, y, len, idx) \ 126239313Sdim ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \ 127239313Sdim (__v2di)(__m128i)(y), \ 128239313Sdim (char)(len), (char)(idx))) 129239313Sdim 130296417Sdim/// \brief Inserts bits of a specified length from the source integer vector 131296417Sdim/// __y into the lower 64 bits of the destination integer vector __x at 132288943Sdim/// the index and of the length specified by __y. 133288943Sdim/// 134288943Sdim/// \headerfile <x86intrin.h> 135288943Sdim/// 136296417Sdim/// \code 137288943Sdim/// This intrinsic corresponds to the \c INSERTQ instruction. 138296417Sdim/// \endcode 139288943Sdim/// 140288943Sdim/// \param __x 141296417Sdim/// The destination operand where bits will be inserted. The inserted bits 142296417Sdim/// are defined by the length and by the index of the least significant bit 143288943Sdim/// specified by operand __y. 144288943Sdim/// \param __y 145296417Sdim/// The source operand containing the bits to be extracted. The extracted 146288943Sdim/// bits are the least significant bits of operand __y with length specified 147296417Sdim/// by bits [69:64]. These are inserted into the destination at the index 148288943Sdim/// specified by bits [77:72]; all other bits are ignored. 149288943Sdim/// If bits [69:64] are zero, the length is interpreted as 64. 150296417Sdim/// If the sum of the index and length is greater than 64, the result is 151296417Sdim/// undefined. If the length and index are both zero, bits [63:0] of 152288943Sdim/// parameter __y are inserted into parameter __x. If the length 153296417Sdim/// is zero but the index is non-zero, the result is undefined. 154296417Sdim/// \returns A 128-bit integer vector containing the original lower 64-bits 155288943Sdim/// of destination operand __x with the specified bitfields replaced by the 156296417Sdim/// lower bits of source operand __y. The upper 64 bits of the return value 157288943Sdim/// are undefined. 158288943Sdim 159288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 160239313Sdim_mm_insert_si64(__m128i __x, __m128i __y) 161239313Sdim{ 162239313Sdim return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y); 163239313Sdim} 164239313Sdim 165296417Sdim/// \brief Stores a 64-bit double-precision value in a 64-bit memory location. 166288943Sdim/// To minimize caching, the data is flagged as non-temporal (unlikely to be 167288943Sdim/// used again soon). 168288943Sdim/// 169288943Sdim/// \headerfile <x86intrin.h> 170288943Sdim/// 171296417Sdim/// \code 172288943Sdim/// This intrinsic corresponds to the \c MOVNTSD instruction. 173296417Sdim/// \endcode 174288943Sdim/// 175288943Sdim/// \param __p 176288943Sdim/// The 64-bit memory location used to store the register value. 177288943Sdim/// \param __a 178288943Sdim/// The 64-bit double-precision floating-point register value to 179288943Sdim/// be stored. 180288943Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 181239313Sdim_mm_stream_sd(double *__p, __m128d __a) 182239313Sdim{ 183239313Sdim __builtin_ia32_movntsd(__p, (__v2df)__a); 184239313Sdim} 185239313Sdim 186288943Sdim/// \brief Stores a 32-bit single-precision floating-point value in a 32-bit 187288943Sdim/// memory location. To minimize caching, the data is flagged as 188288943Sdim/// non-temporal (unlikely to be used again soon). 189288943Sdim/// 190288943Sdim/// \headerfile <x86intrin.h> 191288943Sdim/// 192296417Sdim/// \code 193288943Sdim/// This intrinsic corresponds to the \c MOVNTSS instruction. 194296417Sdim/// \endcode 195288943Sdim/// 196288943Sdim/// \param __p 197288943Sdim/// The 32-bit memory location used to store the register value. 198288943Sdim/// \param __a 199288943Sdim/// The 32-bit single-precision floating-point register value to 200288943Sdim/// be stored. 201288943Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 202239313Sdim_mm_stream_ss(float *__p, __m128 __a) 203239313Sdim{ 204239313Sdim __builtin_ia32_movntss(__p, (__v4sf)__a); 205239313Sdim} 206239313Sdim 207288943Sdim#undef __DEFAULT_FN_ATTRS 208288943Sdim 209239313Sdim#endif /* __AMMINTRIN_H */ 210