1/*===--------------- sha512intrin.h - SHA512 intrinsics -----------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10#ifndef __IMMINTRIN_H 11#error "Never use <sha512intrin.h> directly; include <immintrin.h> instead." 12#endif // __IMMINTRIN_H 13 14#ifndef __SHA512INTRIN_H 15#define __SHA512INTRIN_H 16 17#define __DEFAULT_FN_ATTRS256 \ 18 __attribute__((__always_inline__, __nodebug__, __target__("sha512"), \ 19 __min_vector_width__(256))) 20 21/// This intrinisc is one of the two SHA512 message scheduling instructions. 22/// The intrinsic performs an intermediate calculation for the next four 23/// SHA512 message qwords. The calculated results are stored in \a dst. 24/// 25/// \headerfile <immintrin.h> 26/// 27/// \code 28/// __m256i _mm256_sha512msg1_epi64(__m256i __A, __m128i __B) 29/// \endcode 30/// 31/// This intrinsic corresponds to the \c VSHA512MSG1 instruction. 32/// 33/// \param __A 34/// A 256-bit vector of [4 x long long]. 35/// \param __B 36/// A 128-bit vector of [2 x long long]. 37/// \returns 38/// A 256-bit vector of [4 x long long]. 39/// 40/// \code{.operation} 41/// DEFINE ROR64(qword, n) { 42/// count := n % 64 43/// dest := (qword >> count) | (qword << (64 - count)) 44/// RETURN dest 45/// } 46/// DEFINE SHR64(qword, n) { 47/// RETURN qword >> n 48/// } 49/// DEFINE s0(qword): 50/// RETURN ROR64(qword,1) ^ ROR64(qword, 8) ^ SHR64(qword, 7) 51/// } 52/// W[4] := __B.qword[0] 53/// W[3] := __A.qword[3] 54/// W[2] := __A.qword[2] 55/// W[1] := __A.qword[1] 56/// W[0] := __A.qword[0] 57/// dst.qword[3] := W[3] + s0(W[4]) 58/// dst.qword[2] := W[2] + s0(W[3]) 59/// dst.qword[1] := W[1] + s0(W[2]) 60/// dst.qword[0] := W[0] + s0(W[1]) 61/// dst[MAX:256] := 0 62/// \endcode 63static __inline__ __m256i __DEFAULT_FN_ATTRS256 64_mm256_sha512msg1_epi64(__m256i __A, __m128i __B) { 65 return (__m256i)__builtin_ia32_vsha512msg1((__v4du)__A, (__v2du)__B); 66} 67 68/// This intrinisc is one of the two SHA512 message scheduling instructions. 69/// The intrinsic performs the final calculation for the next four SHA512 70/// message qwords. The calculated results are stored in \a dst. 71/// 72/// \headerfile <immintrin.h> 73/// 74/// \code 75/// __m256i _mm256_sha512msg2_epi64(__m256i __A, __m256i __B) 76/// \endcode 77/// 78/// This intrinsic corresponds to the \c VSHA512MSG2 instruction. 79/// 80/// \param __A 81/// A 256-bit vector of [4 x long long]. 82/// \param __B 83/// A 256-bit vector of [4 x long long]. 84/// \returns 85/// A 256-bit vector of [4 x long long]. 86/// 87/// \code{.operation} 88/// DEFINE ROR64(qword, n) { 89/// count := n % 64 90/// dest := (qword >> count) | (qword << (64 - count)) 91/// RETURN dest 92/// } 93/// DEFINE SHR64(qword, n) { 94/// RETURN qword >> n 95/// } 96/// DEFINE s1(qword) { 97/// RETURN ROR64(qword,19) ^ ROR64(qword, 61) ^ SHR64(qword, 6) 98/// } 99/// W[14] := __B.qword[2] 100/// W[15] := __B.qword[3] 101/// W[16] := __A.qword[0] + s1(W[14]) 102/// W[17] := __A.qword[1] + s1(W[15]) 103/// W[18] := __A.qword[2] + s1(W[16]) 104/// W[19] := __A.qword[3] + s1(W[17]) 105/// dst.qword[3] := W[19] 106/// dst.qword[2] := W[18] 107/// dst.qword[1] := W[17] 108/// dst.qword[0] := W[16] 109/// dst[MAX:256] := 0 110/// \endcode 111static __inline__ __m256i __DEFAULT_FN_ATTRS256 112_mm256_sha512msg2_epi64(__m256i __A, __m256i __B) { 113 return (__m256i)__builtin_ia32_vsha512msg2((__v4du)__A, (__v4du)__B); 114} 115 116/// This intrinisc performs two rounds of SHA512 operation using initial SHA512 117/// state (C,D,G,H) from \a __A, an initial SHA512 state (A,B,E,F) from 118/// \a __A, and a pre-computed sum of the next two round message qwords and 119/// the corresponding round constants from \a __C (only the two lower qwords 120/// of the third operand). The updated SHA512 state (A,B,E,F) is written to 121/// \a __A, and \a __A can be used as the updated state (C,D,G,H) in later 122/// rounds. 123/// 124/// \headerfile <immintrin.h> 125/// 126/// \code 127/// __m256i _mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C) 128/// \endcode 129/// 130/// This intrinsic corresponds to the \c VSHA512RNDS2 instruction. 131/// 132/// \param __A 133/// A 256-bit vector of [4 x long long]. 134/// \param __B 135/// A 256-bit vector of [4 x long long]. 136/// \param __C 137/// A 128-bit vector of [2 x long long]. 138/// \returns 139/// A 256-bit vector of [4 x long long]. 140/// 141/// \code{.operation} 142/// DEFINE ROR64(qword, n) { 143/// count := n % 64 144/// dest := (qword >> count) | (qword << (64 - count)) 145/// RETURN dest 146/// } 147/// DEFINE SHR64(qword, n) { 148/// RETURN qword >> n 149/// } 150/// DEFINE cap_sigma0(qword) { 151/// RETURN ROR64(qword,28) ^ ROR64(qword, 34) ^ ROR64(qword, 39) 152/// } 153/// DEFINE cap_sigma1(qword) { 154/// RETURN ROR64(qword,14) ^ ROR64(qword, 18) ^ ROR64(qword, 41) 155/// } 156/// DEFINE MAJ(a,b,c) { 157/// RETURN (a & b) ^ (a & c) ^ (b & c) 158/// } 159/// DEFINE CH(e,f,g) { 160/// RETURN (e & f) ^ (g & ~e) 161/// } 162/// A[0] := __B.qword[3] 163/// B[0] := __B.qword[2] 164/// C[0] := __C.qword[3] 165/// D[0] := __C.qword[2] 166/// E[0] := __B.qword[1] 167/// F[0] := __B.qword[0] 168/// G[0] := __C.qword[1] 169/// H[0] := __C.qword[0] 170/// WK[0]:= __A.qword[0] 171/// WK[1]:= __A.qword[1] 172/// FOR i := 0 to 1: 173/// A[i+1] := CH(E[i], F[i], G[i]) + 174/// cap_sigma1(E[i]) + WK[i] + H[i] + 175/// MAJ(A[i], B[i], C[i]) + 176/// cap_sigma0(A[i]) 177/// B[i+1] := A[i] 178/// C[i+1] := B[i] 179/// D[i+1] := C[i] 180/// E[i+1] := CH(E[i], F[i], G[i]) + 181/// cap_sigma1(E[i]) + WK[i] + H[i] + D[i] 182/// F[i+1] := E[i] 183/// G[i+1] := F[i] 184/// H[i+1] := G[i] 185/// ENDFOR 186/// dst.qword[3] := A[2] 187/// dst.qword[2] := B[2] 188/// dst.qword[1] := E[2] 189/// dst.qword[0] := F[2] 190/// dst[MAX:256] := 0 191/// \endcode 192static __inline__ __m256i __DEFAULT_FN_ATTRS256 193_mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C) { 194 return (__m256i)__builtin_ia32_vsha512rnds2((__v4du)__A, (__v4du)__B, 195 (__v2du)__C); 196} 197 198#undef __DEFAULT_FN_ATTRS256 199 200#endif // __SHA512INTRIN_H 201