1// tiger.cpp - written and placed in the public domain by Wei Dai 2 3#include "pch.h" 4#include "tiger.h" 5#include "misc.h" 6#include "cpu.h" 7 8NAMESPACE_BEGIN(CryptoPP) 9 10void Tiger::InitState(HashWordType *state) 11{ 12 state[0] = W64LIT(0x0123456789ABCDEF); 13 state[1] = W64LIT(0xFEDCBA9876543210); 14 state[2] = W64LIT(0xF096A5B4C3B2E187); 15} 16 17void Tiger::TruncatedFinal(byte *hash, size_t size) 18{ 19 ThrowIfInvalidTruncatedSize(size); 20 21 PadLastBlock(56, 0x01); 22 CorrectEndianess(m_data, m_data, 56); 23 24 m_data[7] = GetBitCountLo(); 25 26 Transform(m_state, m_data); 27 CorrectEndianess(m_state, m_state, DigestSize()); 28 memcpy(hash, m_state, size); 29 30 Restart(); // reinit for next use 31} 32 33void Tiger::Transform (word64 *digest, const word64 *X) 34{ 35#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86 36 if (HasSSE2()) 37 { 38#ifdef __GNUC__ 39 __asm__ __volatile__ 40 ( 41 ".intel_syntax noprefix;" 42 AS1( push ebx) 43#else 44 #if _MSC_VER < 1300 45 const word64 *t = table; 46 AS2( mov edx, t) 47 #else 48 AS2( lea edx, [table]) 49 #endif 50 AS2( mov eax, digest) 51 AS2( mov esi, X) 52#endif 53 AS2( movq mm0, [eax]) 54 AS2( movq mm1, [eax+1*8]) 55 AS2( movq mm5, mm1) 56 AS2( movq mm2, [eax+2*8]) 57 AS2( movq mm7, [edx+4*2048+0*8]) 58 AS2( movq mm6, [edx+4*2048+1*8]) 59 AS2( mov ecx, esp) 60 AS2( and esp, 0xfffffff0) 61 AS2( sub esp, 8*8) 62 AS1( push ecx) 63 64#define SSE2_round(a,b,c,x,mul) \ 65 AS2( pxor c, [x])\ 66 AS2( movd ecx, c)\ 67 AS2( movzx edi, cl)\ 68 AS2( movq mm3, [edx+0*2048+edi*8])\ 69 AS2( movzx edi, ch)\ 70 AS2( movq mm4, [edx+3*2048+edi*8])\ 71 AS2( shr ecx, 16)\ 72 AS2( movzx edi, cl)\ 73 AS2( pxor mm3, [edx+1*2048+edi*8])\ 74 AS2( movzx edi, ch)\ 75 AS2( pxor mm4, [edx+2*2048+edi*8])\ 76 AS3( pextrw ecx, c, 2)\ 77 AS2( movzx edi, cl)\ 78 AS2( pxor mm3, [edx+2*2048+edi*8])\ 79 AS2( movzx edi, ch)\ 80 AS2( pxor mm4, [edx+1*2048+edi*8])\ 81 AS3( pextrw ecx, c, 3)\ 82 AS2( movzx edi, cl)\ 83 AS2( pxor mm3, [edx+3*2048+edi*8])\ 84 AS2( psubq a, mm3)\ 85 AS2( movzx edi, ch)\ 86 AS2( pxor mm4, [edx+0*2048+edi*8])\ 87 AS2( paddq b, mm4)\ 88 SSE2_mul_##mul(b) 89 90#define SSE2_mul_5(b) \ 91 AS2( movq mm3, b)\ 92 AS2( psllq b, 2)\ 93 AS2( paddq b, mm3) 94 95#define SSE2_mul_7(b) \ 96 AS2( movq mm3, b)\ 97 AS2( psllq b, 3)\ 98 AS2( psubq b, mm3) 99 100#define SSE2_mul_9(b) \ 101 AS2( movq mm3, b)\ 102 AS2( psllq b, 3)\ 103 AS2( paddq b, mm3) 104 105#define label2_5 1 106#define label2_7 2 107#define label2_9 3 108 109#define SSE2_pass(A,B,C,mul,X) \ 110 AS2( xor ebx, ebx)\ 111 ASL(mul)\ 112 SSE2_round(A,B,C,X+0*8+ebx,mul)\ 113 SSE2_round(B,C,A,X+1*8+ebx,mul)\ 114 AS2( cmp ebx, 6*8)\ 115 ASJ( je, label2_##mul, f)\ 116 SSE2_round(C,A,B,X+2*8+ebx,mul)\ 117 AS2( add ebx, 3*8)\ 118 ASJ( jmp, mul, b)\ 119 ASL(label2_##mul) 120 121#define SSE2_key_schedule(Y,X) \ 122 AS2( movq mm3, [X+7*8])\ 123 AS2( pxor mm3, mm6)\ 124 AS2( movq mm4, [X+0*8])\ 125 AS2( psubq mm4, mm3)\ 126 AS2( movq [Y+0*8], mm4)\ 127 AS2( pxor mm4, [X+1*8])\ 128 AS2( movq mm3, mm4)\ 129 AS2( movq [Y+1*8], mm4)\ 130 AS2( paddq mm4, [X+2*8])\ 131 AS2( pxor mm3, mm7)\ 132 AS2( psllq mm3, 19)\ 133 AS2( movq [Y+2*8], mm4)\ 134 AS2( pxor mm3, mm4)\ 135 AS2( movq mm4, [X+3*8])\ 136 AS2( psubq mm4, mm3)\ 137 AS2( movq [Y+3*8], mm4)\ 138 AS2( pxor mm4, [X+4*8])\ 139 AS2( movq mm3, mm4)\ 140 AS2( movq [Y+4*8], mm4)\ 141 AS2( paddq mm4, [X+5*8])\ 142 AS2( pxor mm3, mm7)\ 143 AS2( psrlq mm3, 23)\ 144 AS2( movq [Y+5*8], mm4)\ 145 AS2( pxor mm3, mm4)\ 146 AS2( movq mm4, [X+6*8])\ 147 AS2( psubq mm4, mm3)\ 148 AS2( movq [Y+6*8], mm4)\ 149 AS2( pxor mm4, [X+7*8])\ 150 AS2( movq mm3, mm4)\ 151 AS2( movq [Y+7*8], mm4)\ 152 AS2( paddq mm4, [Y+0*8])\ 153 AS2( pxor mm3, mm7)\ 154 AS2( psllq mm3, 19)\ 155 AS2( movq [Y+0*8], mm4)\ 156 AS2( pxor mm3, mm4)\ 157 AS2( movq mm4, [Y+1*8])\ 158 AS2( psubq mm4, mm3)\ 159 AS2( movq [Y+1*8], mm4)\ 160 AS2( pxor mm4, [Y+2*8])\ 161 AS2( movq mm3, mm4)\ 162 AS2( movq [Y+2*8], mm4)\ 163 AS2( paddq mm4, [Y+3*8])\ 164 AS2( pxor mm3, mm7)\ 165 AS2( psrlq mm3, 23)\ 166 AS2( movq [Y+3*8], mm4)\ 167 AS2( pxor mm3, mm4)\ 168 AS2( movq mm4, [Y+4*8])\ 169 AS2( psubq mm4, mm3)\ 170 AS2( movq [Y+4*8], mm4)\ 171 AS2( pxor mm4, [Y+5*8])\ 172 AS2( movq [Y+5*8], mm4)\ 173 AS2( paddq mm4, [Y+6*8])\ 174 AS2( movq [Y+6*8], mm4)\ 175 AS2( pxor mm4, [edx+4*2048+2*8])\ 176 AS2( movq mm3, [Y+7*8])\ 177 AS2( psubq mm3, mm4)\ 178 AS2( movq [Y+7*8], mm3) 179 180 SSE2_pass(mm0, mm1, mm2, 5, esi) 181 SSE2_key_schedule(esp+4, esi) 182 SSE2_pass(mm2, mm0, mm1, 7, esp+4) 183 SSE2_key_schedule(esp+4, esp+4) 184 SSE2_pass(mm1, mm2, mm0, 9, esp+4) 185 186 AS2( pxor mm0, [eax+0*8]) 187 AS2( movq [eax+0*8], mm0) 188 AS2( psubq mm1, mm5) 189 AS2( movq [eax+1*8], mm1) 190 AS2( paddq mm2, [eax+2*8]) 191 AS2( movq [eax+2*8], mm2) 192 193 AS1( pop esp) 194 AS1( emms) 195#ifdef __GNUC__ 196 AS1( pop ebx) 197 ".att_syntax prefix;" 198 : 199 : "a" (digest), "S" (X), "d" (table) 200 : "%ecx", "%edi", "memory", "cc" 201 ); 202#endif 203 } 204 else 205#endif 206 { 207 word64 a = digest[0]; 208 word64 b = digest[1]; 209 word64 c = digest[2]; 210 word64 Y[8]; 211 212#define t1 (table) 213#define t2 (table+256) 214#define t3 (table+256*2) 215#define t4 (table+256*3) 216 217#define round(a,b,c,x,mul) \ 218 c ^= x; \ 219 a -= t1[GETBYTE(c,0)] ^ t2[GETBYTE(c,2)] ^ t3[GETBYTE(c,4)] ^ t4[GETBYTE(c,6)]; \ 220 b += t4[GETBYTE(c,1)] ^ t3[GETBYTE(c,3)] ^ t2[GETBYTE(c,5)] ^ t1[GETBYTE(c,7)]; \ 221 b *= mul 222 223#define pass(a,b,c,mul,X) {\ 224 int i=0;\ 225 while (true)\ 226 {\ 227 round(a,b,c,X[i+0],mul); \ 228 round(b,c,a,X[i+1],mul); \ 229 if (i==6)\ 230 break;\ 231 round(c,a,b,X[i+2],mul); \ 232 i+=3;\ 233 }} 234 235#define key_schedule(Y,X) \ 236 Y[0] = X[0] - (X[7]^W64LIT(0xA5A5A5A5A5A5A5A5)); \ 237 Y[1] = X[1] ^ Y[0]; \ 238 Y[2] = X[2] + Y[1]; \ 239 Y[3] = X[3] - (Y[2] ^ ((~Y[1])<<19)); \ 240 Y[4] = X[4] ^ Y[3]; \ 241 Y[5] = X[5] + Y[4]; \ 242 Y[6] = X[6] - (Y[5] ^ ((~Y[4])>>23)); \ 243 Y[7] = X[7] ^ Y[6]; \ 244 Y[0] += Y[7]; \ 245 Y[1] -= Y[0] ^ ((~Y[7])<<19); \ 246 Y[2] ^= Y[1]; \ 247 Y[3] += Y[2]; \ 248 Y[4] -= Y[3] ^ ((~Y[2])>>23); \ 249 Y[5] ^= Y[4]; \ 250 Y[6] += Y[5]; \ 251 Y[7] -= Y[6] ^ W64LIT(0x0123456789ABCDEF) 252 253 pass(a,b,c,5,X); 254 key_schedule(Y,X); 255 pass(c,a,b,7,Y); 256 key_schedule(Y,Y); 257 pass(b,c,a,9,Y); 258 259 digest[0] = a ^ digest[0]; 260 digest[1] = b - digest[1]; 261 digest[2] = c + digest[2]; 262 } 263} 264 265NAMESPACE_END 266