1239313Sdim/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------===
2239313Sdim *
3353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim * See https://llvm.org/LICENSE.txt for license information.
5353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6239313Sdim *
7239313Sdim *===-----------------------------------------------------------------------===
8239313Sdim */
9239313Sdim
10239313Sdim#ifndef __AMMINTRIN_H
11239313Sdim#define __AMMINTRIN_H
12239313Sdim
13239313Sdim#include <pmmintrin.h>
14239313Sdim
15288943Sdim/* Define the default attributes for the functions in this file. */
16341825Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128)))
17288943Sdim
18341825Sdim/// Extracts the specified bits from the lower 64 bits of the 128-bit
19314564Sdim///    integer vector operand at the index \a idx and of the length \a len.
20288943Sdim///
21288943Sdim/// \headerfile <x86intrin.h>
22288943Sdim///
23296417Sdim/// \code
24288943Sdim/// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx);
25296417Sdim/// \endcode
26288943Sdim///
27314564Sdim/// This intrinsic corresponds to the <c> EXTRQ </c> instruction.
28288943Sdim///
29288943Sdim/// \param x
30288943Sdim///    The value from which bits are extracted.
31288943Sdim/// \param len
32288943Sdim///    Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
33288943Sdim///    are zero, the length is interpreted as 64.
34288943Sdim/// \param idx
35296417Sdim///    Bits [5:0] specify the index of the least significant bit; the other
36309124Sdim///    bits are ignored. If the sum of the index and length is greater than 64,
37309124Sdim///    the result is undefined. If the length and index are both zero, bits
38314564Sdim///    [63:0] of parameter \a x are extracted. If the length is zero but the
39314564Sdim///    index is non-zero, the result is undefined.
40288943Sdim/// \returns A 128-bit integer vector whose lower 64 bits contain the bits
41288943Sdim///    extracted from the source operand.
42239313Sdim#define _mm_extracti_si64(x, len, idx) \
43239313Sdim  ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \
44239313Sdim                                  (char)(len), (char)(idx)))
45239313Sdim
46341825Sdim/// Extracts the specified bits from the lower 64 bits of the 128-bit
47314564Sdim///    integer vector operand at the index and of the length specified by
48314564Sdim///    \a __y.
49288943Sdim///
50288943Sdim/// \headerfile <x86intrin.h>
51288943Sdim///
52314564Sdim/// This intrinsic corresponds to the <c> EXTRQ </c> instruction.
53288943Sdim///
54288943Sdim/// \param __x
55288943Sdim///    The value from which bits are extracted.
56288943Sdim/// \param __y
57309124Sdim///    Specifies the index of the least significant bit at [13:8] and the
58309124Sdim///    length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the
59309124Sdim///    length is interpreted as 64. If the sum of the index and length is
60309124Sdim///    greater than 64, the result is undefined. If the length and index are
61314564Sdim///    both zero, bits [63:0] of parameter \a __x are extracted. If the length
62314564Sdim///    is zero but the index is non-zero, the result is undefined.
63296417Sdim/// \returns A 128-bit vector whose lower 64 bits contain the bits extracted
64288943Sdim///    from the source operand.
65288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
66239313Sdim_mm_extract_si64(__m128i __x, __m128i __y)
67239313Sdim{
68239313Sdim  return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
69239313Sdim}
70239313Sdim
71341825Sdim/// Inserts bits of a specified length from the source integer vector
72314564Sdim///    \a y into the lower 64 bits of the destination integer vector \a x at
73314564Sdim///    the index \a idx and of the length \a len.
74288943Sdim///
75288943Sdim/// \headerfile <x86intrin.h>
76288943Sdim///
77296417Sdim/// \code
78288943Sdim/// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len,
79288943Sdim/// const int idx);
80296417Sdim/// \endcode
81288943Sdim///
82314564Sdim/// This intrinsic corresponds to the <c> INSERTQ </c> instruction.
83288943Sdim///
84288943Sdim/// \param x
85296417Sdim///    The destination operand where bits will be inserted. The inserted bits
86314564Sdim///    are defined by the length \a len and by the index \a idx specifying the
87314564Sdim///    least significant bit.
88288943Sdim/// \param y
89296417Sdim///    The source operand containing the bits to be extracted. The extracted
90314564Sdim///    bits are the least significant bits of operand \a y of length \a len.
91288943Sdim/// \param len
92288943Sdim///    Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
93288943Sdim///    are zero, the length is interpreted as 64.
94288943Sdim/// \param idx
95296417Sdim///    Bits [5:0] specify the index of the least significant bit; the other
96309124Sdim///    bits are ignored. If the sum of the index and length is greater than 64,
97309124Sdim///    the result is undefined. If the length and index are both zero, bits
98314564Sdim///    [63:0] of parameter \a y are inserted into parameter \a x. If the length
99314564Sdim///    is zero but the index is non-zero, the result is undefined.
100309124Sdim/// \returns A 128-bit integer vector containing the original lower 64-bits of
101314564Sdim///    destination operand \a x with the specified bitfields replaced by the
102314564Sdim///    lower bits of source operand \a y. The upper 64 bits of the return value
103314564Sdim///    are undefined.
104239313Sdim#define _mm_inserti_si64(x, y, len, idx) \
105239313Sdim  ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \
106239313Sdim                                    (__v2di)(__m128i)(y), \
107239313Sdim                                    (char)(len), (char)(idx)))
108239313Sdim
109341825Sdim/// Inserts bits of a specified length from the source integer vector
110314564Sdim///    \a __y into the lower 64 bits of the destination integer vector \a __x
111314564Sdim///    at the index and of the length specified by \a __y.
112288943Sdim///
113288943Sdim/// \headerfile <x86intrin.h>
114288943Sdim///
115314564Sdim/// This intrinsic corresponds to the <c> INSERTQ </c> instruction.
116288943Sdim///
117288943Sdim/// \param __x
118296417Sdim///    The destination operand where bits will be inserted. The inserted bits
119296417Sdim///    are defined by the length and by the index of the least significant bit
120314564Sdim///    specified by operand \a __y.
121288943Sdim/// \param __y
122296417Sdim///    The source operand containing the bits to be extracted. The extracted
123314564Sdim///    bits are the least significant bits of operand \a __y with length
124314564Sdim///    specified by bits [69:64]. These are inserted into the destination at the
125314564Sdim///    index specified by bits [77:72]; all other bits are ignored. If bits
126314564Sdim///    [69:64] are zero, the length is interpreted as 64. If the sum of the
127314564Sdim///    index and length is greater than 64, the result is undefined. If the
128314564Sdim///    length and index are both zero, bits [63:0] of parameter \a __y are
129314564Sdim///    inserted into parameter \a __x. If the length is zero but the index is
130314564Sdim///    non-zero, the result is undefined.
131309124Sdim/// \returns A 128-bit integer vector containing the original lower 64-bits of
132314564Sdim///    destination operand \a __x with the specified bitfields replaced by the
133314564Sdim///    lower bits of source operand \a __y. The upper 64 bits of the return
134314564Sdim///    value are undefined.
135288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
136239313Sdim_mm_insert_si64(__m128i __x, __m128i __y)
137239313Sdim{
138239313Sdim  return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);
139239313Sdim}
140239313Sdim
141341825Sdim/// Stores a 64-bit double-precision value in a 64-bit memory location.
142288943Sdim///    To minimize caching, the data is flagged as non-temporal (unlikely to be
143288943Sdim///    used again soon).
144288943Sdim///
145288943Sdim/// \headerfile <x86intrin.h>
146288943Sdim///
147314564Sdim/// This intrinsic corresponds to the <c> MOVNTSD </c> instruction.
148288943Sdim///
149288943Sdim/// \param __p
150288943Sdim///    The 64-bit memory location used to store the register value.
151288943Sdim/// \param __a
152309124Sdim///    The 64-bit double-precision floating-point register value to be stored.
153288943Sdimstatic __inline__ void __DEFAULT_FN_ATTRS
154239313Sdim_mm_stream_sd(double *__p, __m128d __a)
155239313Sdim{
156239313Sdim  __builtin_ia32_movntsd(__p, (__v2df)__a);
157239313Sdim}
158239313Sdim
159341825Sdim/// Stores a 32-bit single-precision floating-point value in a 32-bit
160288943Sdim///    memory location. To minimize caching, the data is flagged as
161288943Sdim///    non-temporal (unlikely to be used again soon).
162288943Sdim///
163288943Sdim/// \headerfile <x86intrin.h>
164288943Sdim///
165314564Sdim/// This intrinsic corresponds to the <c> MOVNTSS </c> instruction.
166288943Sdim///
167288943Sdim/// \param __p
168288943Sdim///    The 32-bit memory location used to store the register value.
169288943Sdim/// \param __a
170309124Sdim///    The 32-bit single-precision floating-point register value to be stored.
171288943Sdimstatic __inline__ void __DEFAULT_FN_ATTRS
172239313Sdim_mm_stream_ss(float *__p, __m128 __a)
173239313Sdim{
174239313Sdim  __builtin_ia32_movntss(__p, (__v4sf)__a);
175239313Sdim}
176239313Sdim
177288943Sdim#undef __DEFAULT_FN_ATTRS
178288943Sdim
179239313Sdim#endif /* __AMMINTRIN_H */
180