1261991Sdim/*===---- f16cintrin.h - F16C intrinsics -----------------------------------===
2243791Sdim *
3353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim * See https://llvm.org/LICENSE.txt for license information.
5353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6243791Sdim *
7243791Sdim *===-----------------------------------------------------------------------===
8243791Sdim */
9243791Sdim
10341825Sdim#if !defined __IMMINTRIN_H
11341825Sdim#error "Never use <f16cintrin.h> directly; include <immintrin.h> instead."
12243791Sdim#endif
13243791Sdim
14243791Sdim#ifndef __F16CINTRIN_H
15243791Sdim#define __F16CINTRIN_H
16243791Sdim
17288943Sdim/* Define the default attributes for the functions in this file. */
18341825Sdim#define __DEFAULT_FN_ATTRS128 \
19341825Sdim  __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128)))
20341825Sdim#define __DEFAULT_FN_ATTRS256 \
21341825Sdim  __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))
22288943Sdim
23341825Sdim/* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,
24341825Sdim * but that's because icc can emulate these without f16c using a library call.
25341825Sdim * Since we don't do that let's leave these in f16cintrin.h.
26341825Sdim */
27341825Sdim
28341825Sdim/// Converts a 16-bit half-precision float value into a 32-bit float
29309124Sdim///    value.
30309124Sdim///
31309124Sdim/// \headerfile <x86intrin.h>
32309124Sdim///
33314564Sdim/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
34309124Sdim///
35309124Sdim/// \param __a
36309124Sdim///    A 16-bit half-precision float value.
37309124Sdim/// \returns The converted 32-bit float value.
38341825Sdimstatic __inline float __DEFAULT_FN_ATTRS128
39309124Sdim_cvtsh_ss(unsigned short __a)
40309124Sdim{
41353358Sdim  __v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
42353358Sdim  __v4sf __r = __builtin_ia32_vcvtph2ps(__v);
43353358Sdim  return __r[0];
44309124Sdim}
45243791Sdim
46341825Sdim/// Converts a 32-bit single-precision float value to a 16-bit
47309124Sdim///    half-precision float value.
48309124Sdim///
49309124Sdim/// \headerfile <x86intrin.h>
50309124Sdim///
51309124Sdim/// \code
52309124Sdim/// unsigned short _cvtss_sh(float a, const int imm);
53309124Sdim/// \endcode
54309124Sdim///
55314564Sdim/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
56309124Sdim///
57309124Sdim/// \param a
58309124Sdim///    A 32-bit single-precision float value to be converted to a 16-bit
59309124Sdim///    half-precision float value.
60309124Sdim/// \param imm
61314564Sdim///    An immediate value controlling rounding using bits [2:0]: \n
62314564Sdim///    000: Nearest \n
63314564Sdim///    001: Down \n
64314564Sdim///    010: Up \n
65314564Sdim///    011: Truncate \n
66309124Sdim///    1XX: Use MXCSR.RC for rounding
67309124Sdim/// \returns The converted 16-bit half-precision float value.
68341825Sdim#define _cvtss_sh(a, imm) \
69321369Sdim  (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \
70341825Sdim                                                     (imm)))[0])
71309124Sdim
72341825Sdim/// Converts a 128-bit vector containing 32-bit float values into a
73309124Sdim///    128-bit vector containing 16-bit half-precision float values.
74309124Sdim///
75309124Sdim/// \headerfile <x86intrin.h>
76309124Sdim///
77309124Sdim/// \code
78309124Sdim/// __m128i _mm_cvtps_ph(__m128 a, const int imm);
79309124Sdim/// \endcode
80309124Sdim///
81314564Sdim/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
82309124Sdim///
83309124Sdim/// \param a
84309124Sdim///    A 128-bit vector containing 32-bit float values.
85309124Sdim/// \param imm
86314564Sdim///    An immediate value controlling rounding using bits [2:0]: \n
87314564Sdim///    000: Nearest \n
88314564Sdim///    001: Down \n
89314564Sdim///    010: Up \n
90314564Sdim///    011: Truncate \n
91309124Sdim///    1XX: Use MXCSR.RC for rounding
92309124Sdim/// \returns A 128-bit vector containing converted 16-bit half-precision float
93309124Sdim///    values. The lower 64 bits are used to store the converted 16-bit
94309124Sdim///    half-precision floating-point values.
95341825Sdim#define _mm_cvtps_ph(a, imm) \
96341825Sdim  (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm))
97309124Sdim
98341825Sdim/// Converts a 128-bit vector containing 16-bit half-precision float
99309124Sdim///    values into a 128-bit vector containing 32-bit float values.
100309124Sdim///
101309124Sdim/// \headerfile <x86intrin.h>
102309124Sdim///
103314564Sdim/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
104309124Sdim///
105309124Sdim/// \param __a
106309124Sdim///    A 128-bit vector containing 16-bit half-precision float values. The lower
107309124Sdim///    64 bits are used in the conversion.
108309124Sdim/// \returns A 128-bit vector of [4 x float] containing converted float values.
109341825Sdimstatic __inline __m128 __DEFAULT_FN_ATTRS128
110249423Sdim_mm_cvtph_ps(__m128i __a)
111243791Sdim{
112249423Sdim  return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
113243791Sdim}
114243791Sdim
115341825Sdim/// Converts a 256-bit vector of [8 x float] into a 128-bit vector
116341825Sdim///    containing 16-bit half-precision float values.
117341825Sdim///
118341825Sdim/// \headerfile <x86intrin.h>
119341825Sdim///
120341825Sdim/// \code
121341825Sdim/// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
122341825Sdim/// \endcode
123341825Sdim///
124341825Sdim/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
125341825Sdim///
126341825Sdim/// \param a
127341825Sdim///    A 256-bit vector containing 32-bit single-precision float values to be
128341825Sdim///    converted to 16-bit half-precision float values.
129341825Sdim/// \param imm
130341825Sdim///    An immediate value controlling rounding using bits [2:0]: \n
131341825Sdim///    000: Nearest \n
132341825Sdim///    001: Down \n
133341825Sdim///    010: Up \n
134341825Sdim///    011: Truncate \n
135341825Sdim///    1XX: Use MXCSR.RC for rounding
136341825Sdim/// \returns A 128-bit vector containing the converted 16-bit half-precision
137341825Sdim///    float values.
138341825Sdim#define _mm256_cvtps_ph(a, imm) \
139341825Sdim (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm))
140288943Sdim
141341825Sdim/// Converts a 128-bit vector containing 16-bit half-precision float
142341825Sdim///    values into a 256-bit vector of [8 x float].
143341825Sdim///
144341825Sdim/// \headerfile <x86intrin.h>
145341825Sdim///
146341825Sdim/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
147341825Sdim///
148341825Sdim/// \param __a
149341825Sdim///    A 128-bit vector containing 16-bit half-precision float values to be
150341825Sdim///    converted to 32-bit single-precision float values.
151341825Sdim/// \returns A vector of [8 x float] containing the converted 32-bit
152341825Sdim///    single-precision float values.
153341825Sdimstatic __inline __m256 __DEFAULT_FN_ATTRS256
154341825Sdim_mm256_cvtph_ps(__m128i __a)
155341825Sdim{
156341825Sdim  return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
157341825Sdim}
158341825Sdim
159341825Sdim#undef __DEFAULT_FN_ATTRS128
160341825Sdim#undef __DEFAULT_FN_ATTRS256
161341825Sdim
162243791Sdim#endif /* __F16CINTRIN_H */
163