1261991Sdim/*===---- f16cintrin.h - F16C intrinsics -----------------------------------=== 2243791Sdim * 3353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim * See https://llvm.org/LICENSE.txt for license information. 5353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6243791Sdim * 7243791Sdim *===-----------------------------------------------------------------------=== 8243791Sdim */ 9243791Sdim 10341825Sdim#if !defined __IMMINTRIN_H 11341825Sdim#error "Never use <f16cintrin.h> directly; include <immintrin.h> instead." 12243791Sdim#endif 13243791Sdim 14243791Sdim#ifndef __F16CINTRIN_H 15243791Sdim#define __F16CINTRIN_H 16243791Sdim 17288943Sdim/* Define the default attributes for the functions in this file. */ 18341825Sdim#define __DEFAULT_FN_ATTRS128 \ 19341825Sdim __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128))) 20341825Sdim#define __DEFAULT_FN_ATTRS256 \ 21341825Sdim __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256))) 22288943Sdim 23341825Sdim/* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h, 24341825Sdim * but that's because icc can emulate these without f16c using a library call. 25341825Sdim * Since we don't do that let's leave these in f16cintrin.h. 26341825Sdim */ 27341825Sdim 28341825Sdim/// Converts a 16-bit half-precision float value into a 32-bit float 29309124Sdim/// value. 30309124Sdim/// 31309124Sdim/// \headerfile <x86intrin.h> 32309124Sdim/// 33314564Sdim/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction. 34309124Sdim/// 35309124Sdim/// \param __a 36309124Sdim/// A 16-bit half-precision float value. 37309124Sdim/// \returns The converted 32-bit float value. 38341825Sdimstatic __inline float __DEFAULT_FN_ATTRS128 39309124Sdim_cvtsh_ss(unsigned short __a) 40309124Sdim{ 41353358Sdim __v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0}; 42353358Sdim __v4sf __r = __builtin_ia32_vcvtph2ps(__v); 43353358Sdim return __r[0]; 44309124Sdim} 45243791Sdim 46341825Sdim/// Converts a 32-bit single-precision float value to a 16-bit 47309124Sdim/// half-precision float value. 48309124Sdim/// 49309124Sdim/// \headerfile <x86intrin.h> 50309124Sdim/// 51309124Sdim/// \code 52309124Sdim/// unsigned short _cvtss_sh(float a, const int imm); 53309124Sdim/// \endcode 54309124Sdim/// 55314564Sdim/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction. 56309124Sdim/// 57309124Sdim/// \param a 58309124Sdim/// A 32-bit single-precision float value to be converted to a 16-bit 59309124Sdim/// half-precision float value. 60309124Sdim/// \param imm 61314564Sdim/// An immediate value controlling rounding using bits [2:0]: \n 62314564Sdim/// 000: Nearest \n 63314564Sdim/// 001: Down \n 64314564Sdim/// 010: Up \n 65314564Sdim/// 011: Truncate \n 66309124Sdim/// 1XX: Use MXCSR.RC for rounding 67309124Sdim/// \returns The converted 16-bit half-precision float value. 68341825Sdim#define _cvtss_sh(a, imm) \ 69321369Sdim (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \ 70341825Sdim (imm)))[0]) 71309124Sdim 72341825Sdim/// Converts a 128-bit vector containing 32-bit float values into a 73309124Sdim/// 128-bit vector containing 16-bit half-precision float values. 74309124Sdim/// 75309124Sdim/// \headerfile <x86intrin.h> 76309124Sdim/// 77309124Sdim/// \code 78309124Sdim/// __m128i _mm_cvtps_ph(__m128 a, const int imm); 79309124Sdim/// \endcode 80309124Sdim/// 81314564Sdim/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction. 82309124Sdim/// 83309124Sdim/// \param a 84309124Sdim/// A 128-bit vector containing 32-bit float values. 85309124Sdim/// \param imm 86314564Sdim/// An immediate value controlling rounding using bits [2:0]: \n 87314564Sdim/// 000: Nearest \n 88314564Sdim/// 001: Down \n 89314564Sdim/// 010: Up \n 90314564Sdim/// 011: Truncate \n 91309124Sdim/// 1XX: Use MXCSR.RC for rounding 92309124Sdim/// \returns A 128-bit vector containing converted 16-bit half-precision float 93309124Sdim/// values. The lower 64 bits are used to store the converted 16-bit 94309124Sdim/// half-precision floating-point values. 95341825Sdim#define _mm_cvtps_ph(a, imm) \ 96341825Sdim (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)) 97309124Sdim 98341825Sdim/// Converts a 128-bit vector containing 16-bit half-precision float 99309124Sdim/// values into a 128-bit vector containing 32-bit float values. 100309124Sdim/// 101309124Sdim/// \headerfile <x86intrin.h> 102309124Sdim/// 103314564Sdim/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction. 104309124Sdim/// 105309124Sdim/// \param __a 106309124Sdim/// A 128-bit vector containing 16-bit half-precision float values. The lower 107309124Sdim/// 64 bits are used in the conversion. 108309124Sdim/// \returns A 128-bit vector of [4 x float] containing converted float values. 109341825Sdimstatic __inline __m128 __DEFAULT_FN_ATTRS128 110249423Sdim_mm_cvtph_ps(__m128i __a) 111243791Sdim{ 112249423Sdim return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a); 113243791Sdim} 114243791Sdim 115341825Sdim/// Converts a 256-bit vector of [8 x float] into a 128-bit vector 116341825Sdim/// containing 16-bit half-precision float values. 117341825Sdim/// 118341825Sdim/// \headerfile <x86intrin.h> 119341825Sdim/// 120341825Sdim/// \code 121341825Sdim/// __m128i _mm256_cvtps_ph(__m256 a, const int imm); 122341825Sdim/// \endcode 123341825Sdim/// 124341825Sdim/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction. 125341825Sdim/// 126341825Sdim/// \param a 127341825Sdim/// A 256-bit vector containing 32-bit single-precision float values to be 128341825Sdim/// converted to 16-bit half-precision float values. 129341825Sdim/// \param imm 130341825Sdim/// An immediate value controlling rounding using bits [2:0]: \n 131341825Sdim/// 000: Nearest \n 132341825Sdim/// 001: Down \n 133341825Sdim/// 010: Up \n 134341825Sdim/// 011: Truncate \n 135341825Sdim/// 1XX: Use MXCSR.RC for rounding 136341825Sdim/// \returns A 128-bit vector containing the converted 16-bit half-precision 137341825Sdim/// float values. 138341825Sdim#define _mm256_cvtps_ph(a, imm) \ 139341825Sdim (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)) 140288943Sdim 141341825Sdim/// Converts a 128-bit vector containing 16-bit half-precision float 142341825Sdim/// values into a 256-bit vector of [8 x float]. 143341825Sdim/// 144341825Sdim/// \headerfile <x86intrin.h> 145341825Sdim/// 146341825Sdim/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction. 147341825Sdim/// 148341825Sdim/// \param __a 149341825Sdim/// A 128-bit vector containing 16-bit half-precision float values to be 150341825Sdim/// converted to 32-bit single-precision float values. 151341825Sdim/// \returns A vector of [8 x float] containing the converted 32-bit 152341825Sdim/// single-precision float values. 153341825Sdimstatic __inline __m256 __DEFAULT_FN_ATTRS256 154341825Sdim_mm256_cvtph_ps(__m128i __a) 155341825Sdim{ 156341825Sdim return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a); 157341825Sdim} 158341825Sdim 159341825Sdim#undef __DEFAULT_FN_ATTRS128 160341825Sdim#undef __DEFAULT_FN_ATTRS256 161341825Sdim 162243791Sdim#endif /* __F16CINTRIN_H */ 163