1// tiger.cpp - written and placed in the public domain by Wei Dai
2
3#include "pch.h"
4#include "tiger.h"
5#include "misc.h"
6#include "cpu.h"
7
8NAMESPACE_BEGIN(CryptoPP)
9
10void Tiger::InitState(HashWordType *state)
11{
12	state[0] = W64LIT(0x0123456789ABCDEF);
13	state[1] = W64LIT(0xFEDCBA9876543210);
14	state[2] = W64LIT(0xF096A5B4C3B2E187);
15}
16
17void Tiger::TruncatedFinal(byte *hash, size_t size)
18{
19	ThrowIfInvalidTruncatedSize(size);
20
21	PadLastBlock(56, 0x01);
22	CorrectEndianess(m_data, m_data, 56);
23
24	m_data[7] = GetBitCountLo();
25
26	Transform(m_state, m_data);
27	CorrectEndianess(m_state, m_state, DigestSize());
28	memcpy(hash, m_state, size);
29
30	Restart();		// reinit for next use
31}
32
33void Tiger::Transform (word64 *digest, const word64 *X)
34{
35#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
36	if (HasSSE2())
37	{
38#ifdef __GNUC__
39		__asm__ __volatile__
40		(
41		".intel_syntax noprefix;"
42		AS1(	push	ebx)
43#else
44	#if _MSC_VER < 1300
45		const word64 *t = table;
46		AS2(	mov		edx, t)
47	#else
48		AS2(	lea		edx, [table])
49	#endif
50		AS2(	mov		eax, digest)
51		AS2(	mov		esi, X)
52#endif
53		AS2(	movq	mm0, [eax])
54		AS2(	movq	mm1, [eax+1*8])
55		AS2(	movq	mm5, mm1)
56		AS2(	movq	mm2, [eax+2*8])
57		AS2(	movq	mm7, [edx+4*2048+0*8])
58		AS2(	movq	mm6, [edx+4*2048+1*8])
59		AS2(	mov		ecx, esp)
60		AS2(	and		esp, 0xfffffff0)
61		AS2(	sub		esp, 8*8)
62		AS1(	push	ecx)
63
64#define SSE2_round(a,b,c,x,mul) \
65		AS2(	pxor	c, [x])\
66		AS2(	movd	ecx, c)\
67		AS2(	movzx	edi, cl)\
68		AS2(	movq	mm3, [edx+0*2048+edi*8])\
69		AS2(	movzx	edi, ch)\
70		AS2(	movq	mm4, [edx+3*2048+edi*8])\
71		AS2(	shr		ecx, 16)\
72		AS2(	movzx	edi, cl)\
73		AS2(	pxor	mm3, [edx+1*2048+edi*8])\
74		AS2(	movzx	edi, ch)\
75		AS2(	pxor	mm4, [edx+2*2048+edi*8])\
76		AS3(	pextrw	ecx, c, 2)\
77		AS2(	movzx	edi, cl)\
78		AS2(	pxor	mm3, [edx+2*2048+edi*8])\
79		AS2(	movzx	edi, ch)\
80		AS2(	pxor	mm4, [edx+1*2048+edi*8])\
81		AS3(	pextrw	ecx, c, 3)\
82		AS2(	movzx	edi, cl)\
83		AS2(	pxor	mm3, [edx+3*2048+edi*8])\
84		AS2(	psubq	a, mm3)\
85		AS2(	movzx	edi, ch)\
86		AS2(	pxor	mm4, [edx+0*2048+edi*8])\
87		AS2(	paddq	b, mm4)\
88		SSE2_mul_##mul(b)
89
90#define SSE2_mul_5(b)	\
91		AS2(	movq	mm3, b)\
92		AS2(	psllq	b, 2)\
93		AS2(	paddq	b, mm3)
94
95#define SSE2_mul_7(b)	\
96		AS2(	movq	mm3, b)\
97		AS2(	psllq	b, 3)\
98		AS2(	psubq	b, mm3)
99
100#define SSE2_mul_9(b)	\
101		AS2(	movq	mm3, b)\
102		AS2(	psllq	b, 3)\
103		AS2(	paddq	b, mm3)
104
105#define label2_5 1
106#define label2_7 2
107#define label2_9 3
108
109#define SSE2_pass(A,B,C,mul,X)	\
110		AS2(	xor		ebx, ebx)\
111		ASL(mul)\
112		SSE2_round(A,B,C,X+0*8+ebx,mul)\
113		SSE2_round(B,C,A,X+1*8+ebx,mul)\
114		AS2(	cmp		ebx, 6*8)\
115		ASJ(	je,		label2_##mul, f)\
116		SSE2_round(C,A,B,X+2*8+ebx,mul)\
117		AS2(	add		ebx, 3*8)\
118		ASJ(	jmp,	mul, b)\
119		ASL(label2_##mul)
120
121#define SSE2_key_schedule(Y,X) \
122		AS2(	movq	mm3, [X+7*8])\
123		AS2(	pxor	mm3, mm6)\
124		AS2(	movq	mm4, [X+0*8])\
125		AS2(	psubq	mm4, mm3)\
126		AS2(	movq	[Y+0*8], mm4)\
127		AS2(	pxor	mm4, [X+1*8])\
128		AS2(	movq	mm3, mm4)\
129		AS2(	movq	[Y+1*8], mm4)\
130		AS2(	paddq	mm4, [X+2*8])\
131		AS2(	pxor	mm3, mm7)\
132		AS2(	psllq	mm3, 19)\
133		AS2(	movq	[Y+2*8], mm4)\
134		AS2(	pxor	mm3, mm4)\
135		AS2(	movq	mm4, [X+3*8])\
136		AS2(	psubq	mm4, mm3)\
137		AS2(	movq	[Y+3*8], mm4)\
138		AS2(	pxor	mm4, [X+4*8])\
139		AS2(	movq	mm3, mm4)\
140		AS2(	movq	[Y+4*8], mm4)\
141		AS2(	paddq	mm4, [X+5*8])\
142		AS2(	pxor	mm3, mm7)\
143		AS2(	psrlq	mm3, 23)\
144		AS2(	movq	[Y+5*8], mm4)\
145		AS2(	pxor	mm3, mm4)\
146		AS2(	movq	mm4, [X+6*8])\
147		AS2(	psubq	mm4, mm3)\
148		AS2(	movq	[Y+6*8], mm4)\
149		AS2(	pxor	mm4, [X+7*8])\
150		AS2(	movq	mm3, mm4)\
151		AS2(	movq	[Y+7*8], mm4)\
152		AS2(	paddq	mm4, [Y+0*8])\
153		AS2(	pxor	mm3, mm7)\
154		AS2(	psllq	mm3, 19)\
155		AS2(	movq	[Y+0*8], mm4)\
156		AS2(	pxor	mm3, mm4)\
157		AS2(	movq	mm4, [Y+1*8])\
158		AS2(	psubq	mm4, mm3)\
159		AS2(	movq	[Y+1*8], mm4)\
160		AS2(	pxor	mm4, [Y+2*8])\
161		AS2(	movq	mm3, mm4)\
162		AS2(	movq	[Y+2*8], mm4)\
163		AS2(	paddq	mm4, [Y+3*8])\
164		AS2(	pxor	mm3, mm7)\
165		AS2(	psrlq	mm3, 23)\
166		AS2(	movq	[Y+3*8], mm4)\
167		AS2(	pxor	mm3, mm4)\
168		AS2(	movq	mm4, [Y+4*8])\
169		AS2(	psubq	mm4, mm3)\
170		AS2(	movq	[Y+4*8], mm4)\
171		AS2(	pxor	mm4, [Y+5*8])\
172		AS2(	movq	[Y+5*8], mm4)\
173		AS2(	paddq	mm4, [Y+6*8])\
174		AS2(	movq	[Y+6*8], mm4)\
175		AS2(	pxor	mm4, [edx+4*2048+2*8])\
176		AS2(	movq	mm3, [Y+7*8])\
177		AS2(	psubq	mm3, mm4)\
178		AS2(	movq	[Y+7*8], mm3)
179
180		SSE2_pass(mm0, mm1, mm2, 5, esi)
181		SSE2_key_schedule(esp+4, esi)
182		SSE2_pass(mm2, mm0, mm1, 7, esp+4)
183		SSE2_key_schedule(esp+4, esp+4)
184		SSE2_pass(mm1, mm2, mm0, 9, esp+4)
185
186		AS2(	pxor	mm0, [eax+0*8])
187		AS2(	movq	[eax+0*8], mm0)
188		AS2(	psubq	mm1, mm5)
189		AS2(	movq	[eax+1*8], mm1)
190		AS2(	paddq	mm2, [eax+2*8])
191		AS2(	movq	[eax+2*8], mm2)
192
193		AS1(	pop		esp)
194		AS1(	emms)
195#ifdef __GNUC__
196		AS1(	pop		ebx)
197		".att_syntax prefix;"
198			:
199			: "a" (digest), "S" (X), "d" (table)
200			: "%ecx", "%edi", "memory", "cc"
201		);
202#endif
203	}
204	else
205#endif
206	{
207		word64 a = digest[0];
208		word64 b = digest[1];
209		word64 c = digest[2];
210		word64 Y[8];
211
212#define t1 (table)
213#define t2 (table+256)
214#define t3 (table+256*2)
215#define t4 (table+256*3)
216
217#define round(a,b,c,x,mul) \
218	c ^= x; \
219	a -= t1[GETBYTE(c,0)] ^ t2[GETBYTE(c,2)] ^ t3[GETBYTE(c,4)] ^ t4[GETBYTE(c,6)]; \
220	b += t4[GETBYTE(c,1)] ^ t3[GETBYTE(c,3)] ^ t2[GETBYTE(c,5)] ^ t1[GETBYTE(c,7)]; \
221	b *= mul
222
223#define pass(a,b,c,mul,X) {\
224	int i=0;\
225	while (true)\
226	{\
227		round(a,b,c,X[i+0],mul); \
228		round(b,c,a,X[i+1],mul); \
229		if (i==6)\
230			break;\
231		round(c,a,b,X[i+2],mul); \
232		i+=3;\
233	}}
234
235#define key_schedule(Y,X) \
236	Y[0] = X[0] - (X[7]^W64LIT(0xA5A5A5A5A5A5A5A5)); \
237	Y[1] = X[1] ^ Y[0]; \
238	Y[2] = X[2] + Y[1]; \
239	Y[3] = X[3] - (Y[2] ^ ((~Y[1])<<19)); \
240	Y[4] = X[4] ^ Y[3]; \
241	Y[5] = X[5] + Y[4]; \
242	Y[6] = X[6] - (Y[5] ^ ((~Y[4])>>23)); \
243	Y[7] = X[7] ^ Y[6]; \
244	Y[0] += Y[7]; \
245	Y[1] -= Y[0] ^ ((~Y[7])<<19); \
246	Y[2] ^= Y[1]; \
247	Y[3] += Y[2]; \
248	Y[4] -= Y[3] ^ ((~Y[2])>>23); \
249	Y[5] ^= Y[4]; \
250	Y[6] += Y[5]; \
251	Y[7] -= Y[6] ^ W64LIT(0x0123456789ABCDEF)
252
253		pass(a,b,c,5,X);
254		key_schedule(Y,X);
255		pass(c,a,b,7,Y);
256		key_schedule(Y,Y);
257		pass(b,c,a,9,Y);
258
259		digest[0] = a ^ digest[0];
260		digest[1] = b - digest[1];
261		digest[2] = c + digest[2];
262	}
263}
264
265NAMESPACE_END
266