1238384Sjkim/* Copyright (c) 2005 Hewlett-Packard Development Company, L.P.
2238384Sjkim
3238384SjkimPermission is hereby granted, free of charge, to any person obtaining
4238384Sjkima copy of this software and associated documentation files (the
5238384Sjkim"Software"), to deal in the Software without restriction, including
6238384Sjkimwithout limitation the rights to use, copy, modify, merge, publish,
7238384Sjkimdistribute, sublicense, and/or sell copies of the Software, and to
8238384Sjkimpermit persons to whom the Software is furnished to do so, subject to
9238384Sjkimthe following conditions:
10238384Sjkim
11238384SjkimThe above copyright notice and this permission notice shall be
12238384Sjkimincluded in all copies or substantial portions of the Software.
13238384Sjkim
14238384SjkimTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15238384SjkimEXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16238384SjkimMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17238384SjkimNONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18238384SjkimLIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19238384SjkimOF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20238384SjkimWITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
21238384Sjkim
22238384Sjkim//	Common registers are assigned as follows:
23238384Sjkim//
24238384Sjkim//	COMMON
25238384Sjkim//
26238384Sjkim//	t0		Const Tbl Ptr	TPtr
27238384Sjkim//	t1		Round Constant	TRound
28238384Sjkim//	t4		Block residual	LenResid
29238384Sjkim//	t5		Residual Data	DTmp
30238384Sjkim//
31238384Sjkim//	{in,out}0	Block 0 Cycle	RotateM0
32238384Sjkim//	{in,out}1	Block Value 12	M12
33238384Sjkim//	{in,out}2	Block Value 8	M8
34238384Sjkim//	{in,out}3	Block Value 4	M4
35238384Sjkim//	{in,out}4	Block Value 0	M0
36238384Sjkim//	{in,out}5	Block 1 Cycle	RotateM1
37238384Sjkim//	{in,out}6	Block Value 13	M13
38238384Sjkim//	{in,out}7	Block Value 9	M9
39238384Sjkim//	{in,out}8	Block Value 5	M5
40238384Sjkim//	{in,out}9	Block Value 1	M1
41238384Sjkim//	{in,out}10	Block 2 Cycle	RotateM2
42238384Sjkim//	{in,out}11	Block Value 14	M14
43238384Sjkim//	{in,out}12	Block Value 10	M10
44238384Sjkim//	{in,out}13	Block Value 6	M6
45238384Sjkim//	{in,out}14	Block Value 2	M2
46238384Sjkim//	{in,out}15	Block 3 Cycle	RotateM3
47238384Sjkim//	{in,out}16	Block Value 15	M15
48238384Sjkim//	{in,out}17	Block Value 11	M11
49238384Sjkim//	{in,out}18	Block Value 7	M7
50238384Sjkim//	{in,out}19	Block Value 3	M3
51238384Sjkim//	{in,out}20	Scratch			Z
52238384Sjkim//	{in,out}21	Scratch			Y
53238384Sjkim//	{in,out}22	Scratch			X
54238384Sjkim//	{in,out}23	Scratch			W
55238384Sjkim//	{in,out}24	Digest A		A
56238384Sjkim//	{in,out}25	Digest B		B
57238384Sjkim//	{in,out}26	Digest C		C
58238384Sjkim//	{in,out}27	Digest D		D
59238384Sjkim//	{in,out}28	Active Data Ptr	DPtr
60238384Sjkim//	in28		Dummy Value		-
61238384Sjkim//	out28		Dummy Value		-
62238384Sjkim//	bt0			Coroutine Link	QUICK_RTN
63238384Sjkim//
64238384Sjkim///	These predicates are used for computing the padding block(s) and
65238384Sjkim///	are shared between the driver and digest co-routines
66238384Sjkim//
67238384Sjkim//	pt0			Extra Pad Block	pExtra
68238384Sjkim//	pt1			Load next word	pLoad
69238384Sjkim//	pt2			Skip next word	pSkip
70238384Sjkim//	pt3			Search for Pad	pNoPad
71238384Sjkim//	pt4			Pad Word 0		pPad0
72238384Sjkim//	pt5			Pad Word 1		pPad1
73238384Sjkim//	pt6			Pad Word 2		pPad2
74238384Sjkim//	pt7			Pad Word 3		pPad3
75238384Sjkim
76238384Sjkim#define	DTmp		r19
77238384Sjkim#define	LenResid	r18
78238384Sjkim#define	QUICK_RTN	b6
79238384Sjkim#define	TPtr		r14
80238384Sjkim#define	TRound		r15
81238384Sjkim#define	pExtra		p6
82238384Sjkim#define	pLoad		p7
83238384Sjkim#define	pNoPad		p9
84238384Sjkim#define	pPad0		p10
85238384Sjkim#define	pPad1		p11
86238384Sjkim#define	pPad2		p12
87238384Sjkim#define	pPad3		p13
88238384Sjkim#define	pSkip		p8
89238384Sjkim
90238384Sjkim#define	A_		out24
91238384Sjkim#define	B_		out25
92238384Sjkim#define	C_		out26
93238384Sjkim#define	D_		out27
94238384Sjkim#define	DPtr_		out28
95238384Sjkim#define	M0_		out4
96238384Sjkim#define	M1_		out9
97238384Sjkim#define	M10_		out12
98238384Sjkim#define	M11_		out17
99238384Sjkim#define	M12_		out1
100238384Sjkim#define	M13_		out6
101238384Sjkim#define	M14_		out11
102238384Sjkim#define	M15_		out16
103238384Sjkim#define	M2_		out14
104238384Sjkim#define	M3_		out19
105238384Sjkim#define	M4_		out3
106238384Sjkim#define	M5_		out8
107238384Sjkim#define	M6_		out13
108238384Sjkim#define	M7_		out18
109238384Sjkim#define	M8_		out2
110238384Sjkim#define	M9_		out7
111238384Sjkim#define	RotateM0_	out0
112238384Sjkim#define	RotateM1_	out5
113238384Sjkim#define	RotateM2_	out10
114238384Sjkim#define	RotateM3_	out15
115238384Sjkim#define	W_		out23
116238384Sjkim#define	X_		out22
117238384Sjkim#define	Y_		out21
118238384Sjkim#define	Z_		out20
119238384Sjkim
120238384Sjkim#define	A		in24
121238384Sjkim#define	B		in25
122238384Sjkim#define	C		in26
123238384Sjkim#define	D		in27
124238384Sjkim#define	DPtr		in28
125238384Sjkim#define	M0		in4
126238384Sjkim#define	M1		in9
127238384Sjkim#define	M10		in12
128238384Sjkim#define	M11		in17
129238384Sjkim#define	M12		in1
130238384Sjkim#define	M13		in6
131238384Sjkim#define	M14		in11
132238384Sjkim#define	M15		in16
133238384Sjkim#define	M2		in14
134238384Sjkim#define	M3		in19
135238384Sjkim#define	M4		in3
136238384Sjkim#define	M5		in8
137238384Sjkim#define	M6		in13
138238384Sjkim#define	M7		in18
139238384Sjkim#define	M8		in2
140238384Sjkim#define	M9		in7
141238384Sjkim#define	RotateM0	in0
142238384Sjkim#define	RotateM1	in5
143238384Sjkim#define	RotateM2	in10
144238384Sjkim#define	RotateM3	in15
145238384Sjkim#define	W		in23
146238384Sjkim#define	X		in22
147238384Sjkim#define	Y		in21
148238384Sjkim#define	Z		in20
149238384Sjkim
150238384Sjkim/* register stack configuration for md5_block_asm_data_order(): */
151238384Sjkim#define	MD5_NINP	3
152238384Sjkim#define	MD5_NLOC	0
153238384Sjkim#define MD5_NOUT	29
154238384Sjkim#define MD5_NROT	0
155238384Sjkim
156238384Sjkim/* register stack configuration for helpers: */
157238384Sjkim#define	_NINPUTS	MD5_NOUT
158238384Sjkim#define	_NLOCALS	0
159238384Sjkim#define _NOUTPUT	0
160238384Sjkim#define	_NROTATE	24	/* this must be <= _NINPUTS */
161238384Sjkim
162238384Sjkim#if defined(_HPUX_SOURCE) && !defined(_LP64)
163238384Sjkim#define	ADDP	addp4
164238384Sjkim#else
165238384Sjkim#define	ADDP	add
166238384Sjkim#endif
167238384Sjkim
168238384Sjkim#if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
169238384Sjkim#define HOST_IS_BIG_ENDIAN
170238384Sjkim#endif
171238384Sjkim
172238384Sjkim//	Macros for getting the left and right portions of little-endian words
173238384Sjkim
174238384Sjkim#define	GETLW(dst, src, align)	dep.z dst = src, 32 - 8 * align, 8 * align
175238384Sjkim#define	GETRW(dst, src, align)	extr.u dst = src, 8 * align, 32 - 8 * align
176238384Sjkim
177238384Sjkim//	MD5 driver
178238384Sjkim//
179238384Sjkim//		Reads an input block, then calls the digest block
180238384Sjkim//		subroutine and adds the results to the accumulated
181238384Sjkim//		digest.  It allocates 32 outs which the subroutine
182238384Sjkim//		uses as it's inputs and rotating
183238384Sjkim//		registers. Initializes the round constant pointer and
184238384Sjkim//		takes care of saving/restoring ar.lc
185238384Sjkim//
186238384Sjkim///	INPUT
187238384Sjkim//
188238384Sjkim//	in0		Context Ptr		CtxPtr0
189238384Sjkim//	in1		Input Data Ptr		DPtrIn
190238384Sjkim//	in2		Integral Blocks		BlockCount
191238384Sjkim//	rp		Return Address		-
192238384Sjkim//
193238384Sjkim///	CODE
194238384Sjkim//
195238384Sjkim//	v2		Input Align		InAlign
196238384Sjkim//	t0		Shared w/digest		-
197238384Sjkim//	t1		Shared w/digest		-
198238384Sjkim//	t2		Shared w/digest		-
199238384Sjkim//	t3		Shared w/digest		-
200238384Sjkim//	t4		Shared w/digest		-
201238384Sjkim//	t5		Shared w/digest		-
202238384Sjkim//	t6		PFS Save		PFSSave
203238384Sjkim//	t7		ar.lc Save		LCSave
204238384Sjkim//	t8		Saved PR		PRSave
205238384Sjkim//	t9		2nd CtxPtr		CtxPtr1
206238384Sjkim//	t10		Table Base		CTable
207238384Sjkim//	t11		Table[0]		CTable0
208238384Sjkim//	t13		Accumulator A		AccumA
209238384Sjkim//	t14		Accumulator B		AccumB
210238384Sjkim//	t15		Accumulator C		AccumC
211238384Sjkim//	t16		Accumulator D		AccumD
212238384Sjkim//	pt0		Shared w/digest		-
213238384Sjkim//	pt1		Shared w/digest		-
214238384Sjkim//	pt2		Shared w/digest		-
215238384Sjkim//	pt3		Shared w/digest		-
216238384Sjkim//	pt4		Shared w/digest		-
217238384Sjkim//	pt5		Shared w/digest		-
218238384Sjkim//	pt6		Shared w/digest		-
219238384Sjkim//	pt7		Shared w/digest		-
220238384Sjkim//	pt8		Not Aligned		pOff
221238384Sjkim//	pt8		Blocks Left		pAgain
222238384Sjkim
223238384Sjkim#define	AccumA		r27
224238384Sjkim#define	AccumB		r28
225238384Sjkim#define	AccumC		r29
226238384Sjkim#define	AccumD		r30
227238384Sjkim#define	CTable		r24
228238384Sjkim#define	CTable0		r25
229238384Sjkim#define	CtxPtr0		in0
230238384Sjkim#define	CtxPtr1		r23
231238384Sjkim#define	DPtrIn		in1
232238384Sjkim#define	BlockCount	in2
233238384Sjkim#define	InAlign		r10
234238384Sjkim#define	LCSave		r21
235238384Sjkim#define	PFSSave		r20
236238384Sjkim#define	PRSave		r22
237238384Sjkim#define	pAgain		p63
238238384Sjkim#define	pOff		p63
239238384Sjkim
240238384Sjkim	.text
241238384Sjkim
242238384Sjkim/* md5_block_asm_data_order(MD5_CTX *c, const void *data, size_t num)
243238384Sjkim
244238384Sjkim     where:
245238384Sjkim      c: a pointer to a structure of this type:
246238384Sjkim
247238384Sjkim	   typedef struct MD5state_st
248238384Sjkim	     {
249238384Sjkim	       MD5_LONG A,B,C,D;
250238384Sjkim	       MD5_LONG Nl,Nh;
251238384Sjkim	       MD5_LONG data[MD5_LBLOCK];
252238384Sjkim	       unsigned int num;
253238384Sjkim	     }
254238384Sjkim	   MD5_CTX;
255238384Sjkim
256238384Sjkim      data: a pointer to the input data (may be misaligned)
257238384Sjkim      num:  the number of 16-byte blocks to hash (i.e., the length
258238384Sjkim            of DATA is 16*NUM.
259238384Sjkim
260238384Sjkim   */
261238384Sjkim
262238384Sjkim	.type	md5_block_asm_data_order, @function
263238384Sjkim	.global	md5_block_asm_data_order
264238384Sjkim	.align	32
265238384Sjkim	.proc	md5_block_asm_data_order
266238384Sjkimmd5_block_asm_data_order:
267238384Sjkim.md5_block:
268238384Sjkim	.prologue
269238384Sjkim{	.mmi
270238384Sjkim	.save	ar.pfs, PFSSave
271238384Sjkim	alloc	PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT
272238384Sjkim	ADDP	CtxPtr1 = 8, CtxPtr0
273238384Sjkim	mov	CTable = ip
274238384Sjkim}
275238384Sjkim{	.mmi
276238384Sjkim	ADDP	DPtrIn = 0, DPtrIn
277238384Sjkim	ADDP	CtxPtr0 = 0, CtxPtr0
278238384Sjkim	.save	ar.lc, LCSave
279238384Sjkim	mov	LCSave = ar.lc
280238384Sjkim}
281238384Sjkim;;
282238384Sjkim{	.mmi
283238384Sjkim	add	CTable = .md5_tbl_data_order#-.md5_block#, CTable
284238384Sjkim	and	InAlign = 0x3, DPtrIn
285238384Sjkim}
286238384Sjkim
287238384Sjkim{	.mmi
288238384Sjkim	ld4	AccumA = [CtxPtr0], 4
289238384Sjkim	ld4	AccumC = [CtxPtr1], 4
290238384Sjkim	.save pr, PRSave
291238384Sjkim	mov	PRSave = pr
292238384Sjkim	.body
293238384Sjkim}
294238384Sjkim;;
295238384Sjkim{	.mmi
296238384Sjkim	ld4	AccumB = [CtxPtr0]
297238384Sjkim	ld4	AccumD = [CtxPtr1]
298238384Sjkim	dep	DPtr_ = 0, DPtrIn, 0, 2
299238384Sjkim} ;;
300238384Sjkim#ifdef HOST_IS_BIG_ENDIAN
301238384Sjkim	rum	psr.be;;	// switch to little-endian
302238384Sjkim#endif
303238384Sjkim{	.mmb
304238384Sjkim	ld4	CTable0 = [CTable], 4
305238384Sjkim	cmp.ne	pOff, p0 = 0, InAlign
306238384Sjkim(pOff)	br.cond.spnt.many .md5_unaligned
307238384Sjkim} ;;
308238384Sjkim
309238384Sjkim//	The FF load/compute loop rotates values three times, so that
310238384Sjkim//	loading into M12 here produces the M0 value, M13 -> M1, etc.
311238384Sjkim
312238384Sjkim.md5_block_loop0:
313238384Sjkim{	.mmi
314238384Sjkim	ld4	M12_ = [DPtr_], 4
315238384Sjkim	mov	TPtr = CTable
316238384Sjkim	mov	TRound = CTable0
317238384Sjkim} ;;
318238384Sjkim{	.mmi
319238384Sjkim	ld4	M13_ = [DPtr_], 4
320238384Sjkim	mov	A_ = AccumA
321238384Sjkim	mov	B_ = AccumB
322238384Sjkim} ;;
323238384Sjkim{	.mmi
324238384Sjkim	ld4	M14_ = [DPtr_], 4
325238384Sjkim	mov	C_ = AccumC
326238384Sjkim	mov	D_ = AccumD
327238384Sjkim} ;;
328238384Sjkim{	.mmb
329238384Sjkim	ld4	M15_ = [DPtr_], 4
330238384Sjkim	add	BlockCount = -1, BlockCount
331238384Sjkim	br.call.sptk.many QUICK_RTN = md5_digest_block0
332238384Sjkim} ;;
333238384Sjkim
334238384Sjkim//	Now, we add the new digest values and do some clean-up
335238384Sjkim//	before checking if there's another full block to process
336238384Sjkim
337238384Sjkim{	.mmi
338238384Sjkim	add	AccumA = AccumA, A_
339238384Sjkim	add	AccumB = AccumB, B_
340238384Sjkim	cmp.ne	pAgain, p0 = 0, BlockCount
341238384Sjkim}
342238384Sjkim{	.mib
343238384Sjkim	add	AccumC = AccumC, C_
344238384Sjkim	add	AccumD = AccumD, D_
345238384Sjkim(pAgain) br.cond.dptk.many .md5_block_loop0
346238384Sjkim} ;;
347238384Sjkim
348238384Sjkim.md5_exit:
349238384Sjkim#ifdef HOST_IS_BIG_ENDIAN
350238384Sjkim	sum	psr.be;;	// switch back to big-endian mode
351238384Sjkim#endif
352238384Sjkim{	.mmi
353238384Sjkim	st4	[CtxPtr0] = AccumB, -4
354238384Sjkim	st4	[CtxPtr1] = AccumD, -4
355238384Sjkim	mov	pr = PRSave, 0x1ffff ;;
356238384Sjkim}
357238384Sjkim{	.mmi
358238384Sjkim	st4	[CtxPtr0] = AccumA
359238384Sjkim	st4	[CtxPtr1] = AccumC
360238384Sjkim	mov	ar.lc = LCSave
361238384Sjkim} ;;
362238384Sjkim{	.mib
363238384Sjkim	mov	ar.pfs = PFSSave
364238384Sjkim	br.ret.sptk.few	rp
365238384Sjkim} ;;
366238384Sjkim
367238384Sjkim#define	MD5UNALIGNED(offset)						\
368238384Sjkim.md5_process##offset:							\
369238384Sjkim{	.mib ;								\
370238384Sjkim	nop	0x0	;						\
371238384Sjkim	GETRW(DTmp, DTmp, offset) ;					\
372238384Sjkim} ;;									\
373238384Sjkim.md5_block_loop##offset:						\
374238384Sjkim{	.mmi ;								\
375238384Sjkim	ld4	Y_ = [DPtr_], 4 ;					\
376238384Sjkim	mov	TPtr = CTable ;						\
377238384Sjkim	mov	TRound = CTable0 ;					\
378238384Sjkim} ;;									\
379238384Sjkim{	.mmi ;								\
380238384Sjkim	ld4	M13_ = [DPtr_], 4 ;					\
381238384Sjkim	mov	A_ = AccumA ;						\
382238384Sjkim	mov	B_ = AccumB ;						\
383238384Sjkim} ;;									\
384238384Sjkim{	.mii ;								\
385238384Sjkim	ld4	M14_ = [DPtr_], 4 ;					\
386238384Sjkim	GETLW(W_, Y_, offset) ;						\
387238384Sjkim	mov	C_ = AccumC ;						\
388238384Sjkim}									\
389238384Sjkim{	.mmi ;								\
390238384Sjkim	mov	D_ = AccumD ;;						\
391238384Sjkim	or	M12_ = W_, DTmp ;					\
392238384Sjkim	GETRW(DTmp, Y_, offset) ;					\
393238384Sjkim}									\
394238384Sjkim{	.mib ;								\
395238384Sjkim	ld4	M15_ = [DPtr_], 4 ;					\
396238384Sjkim	add	BlockCount = -1, BlockCount ;				\
397238384Sjkim	br.call.sptk.many QUICK_RTN = md5_digest_block##offset;		\
398238384Sjkim} ;;									\
399238384Sjkim{	.mmi ;								\
400238384Sjkim	add	AccumA = AccumA, A_ ;					\
401238384Sjkim	add	AccumB = AccumB, B_ ;					\
402238384Sjkim	cmp.ne	pAgain, p0 = 0, BlockCount ;				\
403238384Sjkim}									\
404238384Sjkim{	.mib ;								\
405238384Sjkim	add	AccumC = AccumC, C_ ;					\
406238384Sjkim	add	AccumD = AccumD, D_ ;					\
407238384Sjkim(pAgain) br.cond.dptk.many .md5_block_loop##offset ;			\
408238384Sjkim} ;;									\
409238384Sjkim{	.mib ;								\
410238384Sjkim	nop	0x0 ;							\
411238384Sjkim	nop	0x0 ;							\
412238384Sjkim	br.cond.sptk.many .md5_exit ;					\
413238384Sjkim} ;;
414238384Sjkim
415238384Sjkim	.align	32
416238384Sjkim.md5_unaligned:
417238384Sjkim//
418238384Sjkim//	Because variable shifts are expensive, we special case each of
419238384Sjkim//	the four alignements. In practice, this won't hurt too much
420238384Sjkim//	since only one working set of code will be loaded.
421238384Sjkim//
422238384Sjkim{	.mib
423238384Sjkim	ld4	DTmp = [DPtr_], 4
424238384Sjkim	cmp.eq	pOff, p0 = 1, InAlign
425238384Sjkim(pOff)	br.cond.dpnt.many .md5_process1
426238384Sjkim} ;;
427238384Sjkim{	.mib
428238384Sjkim	cmp.eq	pOff, p0 = 2, InAlign
429238384Sjkim	nop	0x0
430238384Sjkim(pOff)	br.cond.dpnt.many .md5_process2
431238384Sjkim} ;;
432238384Sjkim	MD5UNALIGNED(3)
433238384Sjkim	MD5UNALIGNED(1)
434238384Sjkim	MD5UNALIGNED(2)
435238384Sjkim
436238384Sjkim	.endp md5_block_asm_data_order
437238384Sjkim
438238384Sjkim
439238384Sjkim// MD5 Perform the F function and load
440238384Sjkim//
441238384Sjkim// Passed the first 4 words (M0 - M3) and initial (A, B, C, D) values,
442238384Sjkim// computes the FF() round of functions, then branches to the common
443238384Sjkim// digest code to finish up with GG(), HH, and II().
444238384Sjkim//
445238384Sjkim// INPUT
446238384Sjkim//
447238384Sjkim// rp Return Address -
448238384Sjkim//
449238384Sjkim// CODE
450238384Sjkim//
451238384Sjkim// v0 PFS bit bucket PFS
452238384Sjkim// v1 Loop Trip Count LTrip
453238384Sjkim// pt0 Load next word pMore
454238384Sjkim
455238384Sjkim/* For F round: */
456238384Sjkim#define LTrip	r9
457238384Sjkim#define PFS	r8
458238384Sjkim#define pMore	p6
459238384Sjkim
460238384Sjkim/* For GHI rounds: */
461238384Sjkim#define T	r9
462238384Sjkim#define U	r10
463238384Sjkim#define V	r11
464238384Sjkim
465238384Sjkim#define COMPUTE(a, b, s, M, R)			\
466238384Sjkim{						\
467238384Sjkim	.mii ;					\
468238384Sjkim	ld4 TRound = [TPtr], 4 ;		\
469238384Sjkim	dep.z Y = Z, 32, 32 ;;			\
470238384Sjkim	shrp Z = Z, Y, 64 - s ;			\
471238384Sjkim} ;;						\
472238384Sjkim{						\
473238384Sjkim	.mmi ;					\
474238384Sjkim	add a = Z, b ;				\
475238384Sjkim	mov R = M ;				\
476238384Sjkim	nop 0x0 ;				\
477238384Sjkim} ;;
478238384Sjkim
479238384Sjkim#define LOOP(a, b, s, M, R, label)		\
480238384Sjkim{	.mii ;					\
481238384Sjkim	ld4 TRound = [TPtr], 4 ;		\
482238384Sjkim	dep.z Y = Z, 32, 32 ;;			\
483238384Sjkim	shrp Z = Z, Y, 64 - s ;			\
484238384Sjkim} ;;						\
485238384Sjkim{	.mib ;					\
486238384Sjkim	add a = Z, b ;				\
487238384Sjkim	mov R = M ;				\
488238384Sjkim	br.ctop.sptk.many label ;		\
489238384Sjkim} ;;
490238384Sjkim
491238384Sjkim// G(B, C, D) = (B & D) | (C & ~D)
492238384Sjkim
493238384Sjkim#define G(a, b, c, d, M)			\
494238384Sjkim{	.mmi ;					\
495238384Sjkim	add Z = M, TRound ;			\
496238384Sjkim	and Y = b, d ;				\
497238384Sjkim	andcm X = c, d ;			\
498238384Sjkim} ;;						\
499238384Sjkim{	.mii ;					\
500238384Sjkim	add Z = Z, a ;				\
501238384Sjkim	or Y = Y, X ;;				\
502238384Sjkim	add Z = Z, Y ;				\
503238384Sjkim} ;;
504238384Sjkim
505238384Sjkim// H(B, C, D) = B ^ C ^ D
506238384Sjkim
507238384Sjkim#define H(a, b, c, d, M)			\
508238384Sjkim{	.mmi ;					\
509238384Sjkim	add Z = M, TRound ;			\
510238384Sjkim	xor Y = b, c ;				\
511238384Sjkim	nop 0x0 ;				\
512238384Sjkim} ;;						\
513238384Sjkim{	.mii ;					\
514238384Sjkim	add Z = Z, a ;				\
515238384Sjkim	xor Y = Y, d ;;				\
516238384Sjkim	add Z = Z, Y ;				\
517238384Sjkim} ;;
518238384Sjkim
519238384Sjkim// I(B, C, D) = C ^ (B | ~D)
520238384Sjkim//
521238384Sjkim// However, since we have an andcm operator, we use the fact that
522238384Sjkim//
523238384Sjkim// Y ^ Z == ~Y ^ ~Z
524238384Sjkim//
525238384Sjkim// to rewrite the expression as
526238384Sjkim//
527238384Sjkim// I(B, C, D) = ~C ^ (~B & D)
528238384Sjkim
529238384Sjkim#define I(a, b, c, d, M)			\
530238384Sjkim{	.mmi ;					\
531238384Sjkim	add Z = M, TRound ;			\
532238384Sjkim	andcm Y = d, b ;			\
533238384Sjkim	andcm X = -1, c ;			\
534238384Sjkim} ;;						\
535238384Sjkim{	.mii ;					\
536238384Sjkim	add Z = Z, a ;				\
537238384Sjkim	xor Y = Y, X ;;				\
538238384Sjkim	add Z = Z, Y ;				\
539238384Sjkim} ;;
540238384Sjkim
541238384Sjkim#define GG4(label)				\
542238384Sjkim	G(A, B, C, D, M0)			\
543238384Sjkim	COMPUTE(A, B, 5, M0, RotateM0)		\
544238384Sjkim	G(D, A, B, C, M1)			\
545238384Sjkim	COMPUTE(D, A, 9, M1, RotateM1)		\
546238384Sjkim	G(C, D, A, B, M2)			\
547238384Sjkim	COMPUTE(C, D, 14, M2, RotateM2)		\
548238384Sjkim	G(B, C, D, A, M3)			\
549238384Sjkim	LOOP(B, C, 20, M3, RotateM3, label)
550238384Sjkim
551238384Sjkim#define HH4(label)				\
552238384Sjkim	H(A, B, C, D, M0)			\
553238384Sjkim	COMPUTE(A, B, 4, M0, RotateM0)		\
554238384Sjkim	H(D, A, B, C, M1)			\
555238384Sjkim	COMPUTE(D, A, 11, M1, RotateM1)		\
556238384Sjkim	H(C, D, A, B, M2)			\
557238384Sjkim	COMPUTE(C, D, 16, M2, RotateM2)		\
558238384Sjkim	H(B, C, D, A, M3)			\
559238384Sjkim	LOOP(B, C, 23, M3, RotateM3, label)
560238384Sjkim
561238384Sjkim#define II4(label)				\
562238384Sjkim	I(A, B, C, D, M0)			\
563238384Sjkim	COMPUTE(A, B, 6, M0, RotateM0)		\
564238384Sjkim	I(D, A, B, C, M1)			\
565238384Sjkim	COMPUTE(D, A, 10, M1, RotateM1)		\
566238384Sjkim	I(C, D, A, B, M2)			\
567238384Sjkim	COMPUTE(C, D, 15, M2, RotateM2)		\
568238384Sjkim	I(B, C, D, A, M3)			\
569238384Sjkim	LOOP(B, C, 21, M3, RotateM3, label)
570238384Sjkim
571238384Sjkim#define FFLOAD(a, b, c, d, M, N, s)		\
572238384Sjkim{	.mii ;					\
573238384Sjkim(pMore) ld4 N = [DPtr], 4 ;			\
574238384Sjkim	add Z = M, TRound ;			\
575238384Sjkim	and Y = c, b ;				\
576238384Sjkim}						\
577238384Sjkim{	.mmi ;					\
578238384Sjkim	andcm X = d, b ;;			\
579238384Sjkim	add Z = Z, a ;				\
580238384Sjkim	or Y = Y, X ;				\
581238384Sjkim} ;;						\
582238384Sjkim{	.mii ;					\
583238384Sjkim	ld4 TRound = [TPtr], 4 ;		\
584238384Sjkim	add Z = Z, Y ;;				\
585238384Sjkim	dep.z Y = Z, 32, 32 ;			\
586238384Sjkim} ;;						\
587238384Sjkim{	.mii ;					\
588238384Sjkim	nop 0x0 ;				\
589238384Sjkim	shrp Z = Z, Y, 64 - s ;;		\
590238384Sjkim	add a = Z, b ;				\
591238384Sjkim} ;;
592238384Sjkim
593238384Sjkim#define FFLOOP(a, b, c, d, M, N, s, dest)	\
594238384Sjkim{	.mii ;					\
595238384Sjkim(pMore)	ld4 N = [DPtr], 4 ;			\
596238384Sjkim	add Z = M, TRound ;			\
597238384Sjkim	and Y = c, b ;				\
598238384Sjkim}						\
599238384Sjkim{	.mmi ;					\
600238384Sjkim	andcm X = d, b ;;			\
601238384Sjkim	add Z = Z, a ;				\
602238384Sjkim	or Y = Y, X ;				\
603238384Sjkim} ;;						\
604238384Sjkim{	.mii ;					\
605238384Sjkim	ld4 TRound = [TPtr], 4 ;		\
606238384Sjkim	add Z = Z, Y ;;				\
607238384Sjkim	dep.z Y = Z, 32, 32 ;			\
608238384Sjkim} ;;						\
609238384Sjkim{	.mii ;					\
610238384Sjkim	nop 0x0 ;				\
611238384Sjkim	shrp Z = Z, Y, 64 - s ;;		\
612238384Sjkim	add a = Z, b ;				\
613238384Sjkim}						\
614238384Sjkim{	.mib ;					\
615238384Sjkim	cmp.ne pMore, p0 = 0, LTrip ;		\
616238384Sjkim	add LTrip = -1, LTrip ;			\
617238384Sjkim	br.ctop.dptk.many dest ;		\
618238384Sjkim} ;;
619238384Sjkim
620238384Sjkim	.type md5_digest_block0, @function
621238384Sjkim	.align 32
622238384Sjkim
623238384Sjkim	.proc md5_digest_block0
624238384Sjkim	.prologue
625238384Sjkimmd5_digest_block0:
626238384Sjkim	.altrp QUICK_RTN
627238384Sjkim	.body
628238384Sjkim{	.mmi
629238384Sjkim	alloc PFS = ar.pfs, _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE
630238384Sjkim	mov LTrip = 2
631238384Sjkim	mov ar.lc = 3
632238384Sjkim} ;;
633238384Sjkim{	.mii
634238384Sjkim	cmp.eq pMore, p0 = r0, r0
635238384Sjkim	mov ar.ec = 0
636238384Sjkim	nop 0x0
637238384Sjkim} ;;
638238384Sjkim
639238384Sjkim.md5_FF_round0:
640238384Sjkim	FFLOAD(A, B, C, D, M12, RotateM0, 7)
641238384Sjkim	FFLOAD(D, A, B, C, M13, RotateM1, 12)
642238384Sjkim	FFLOAD(C, D, A, B, M14, RotateM2, 17)
643238384Sjkim	FFLOOP(B, C, D, A, M15, RotateM3, 22, .md5_FF_round0)
644238384Sjkim	//
645238384Sjkim	// !!! Fall through to md5_digest_GHI
646238384Sjkim	//
647238384Sjkim	.endp md5_digest_block0
648238384Sjkim
649238384Sjkim	.type md5_digest_GHI, @function
650238384Sjkim	.align 32
651238384Sjkim
652238384Sjkim	.proc md5_digest_GHI
653238384Sjkim	.prologue
654238384Sjkim	.regstk _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE
655238384Sjkimmd5_digest_GHI:
656238384Sjkim	.altrp QUICK_RTN
657238384Sjkim	.body
658238384Sjkim//
659238384Sjkim// The following sequence shuffles the block counstants round for the
660238384Sjkim// next round:
661238384Sjkim//
662238384Sjkim// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
663238384Sjkim// 1 6 11 0 5 10 14 4 9 14 3 8 13 2 7 12
664238384Sjkim//
665238384Sjkim{	.mmi
666238384Sjkim	mov Z = M0
667238384Sjkim	mov Y = M15
668238384Sjkim	mov ar.lc = 3
669238384Sjkim}
670238384Sjkim{	.mmi
671238384Sjkim	mov X = M2
672238384Sjkim	mov W = M9
673238384Sjkim	mov V = M4
674238384Sjkim} ;;
675238384Sjkim
676238384Sjkim{	.mmi
677238384Sjkim	mov M0 = M1
678238384Sjkim	mov M15 = M12
679238384Sjkim	mov ar.ec = 1
680238384Sjkim}
681238384Sjkim{	.mmi
682238384Sjkim	mov M2 = M11
683238384Sjkim	mov M9 = M14
684238384Sjkim	mov M4 = M5
685238384Sjkim} ;;
686238384Sjkim
687238384Sjkim{	.mmi
688238384Sjkim	mov M1 = M6
689238384Sjkim	mov M12 = M13
690238384Sjkim	mov U = M3
691238384Sjkim}
692238384Sjkim{	.mmi
693238384Sjkim	mov M11 = M8
694238384Sjkim	mov M14 = M7
695238384Sjkim	mov M5 = M10
696238384Sjkim} ;;
697238384Sjkim
698238384Sjkim{	.mmi
699238384Sjkim	mov M6 = Y
700238384Sjkim	mov M13 = X
701238384Sjkim	mov M3 = Z
702238384Sjkim}
703238384Sjkim{	.mmi
704238384Sjkim	mov M8 = W
705238384Sjkim	mov M7 = V
706238384Sjkim	mov M10 = U
707238384Sjkim} ;;
708238384Sjkim
709238384Sjkim.md5_GG_round:
710238384Sjkim	GG4(.md5_GG_round)
711238384Sjkim
712238384Sjkim// The following sequence shuffles the block constants round for the
713238384Sjkim// next round:
714238384Sjkim//
715238384Sjkim// 1 6 11 0 5 10 14 4 9 14 3 8 13 2 7 12
716238384Sjkim// 5 8 11 14 1 4 7 10 13 0 3 6 9 12 15 2
717238384Sjkim
718238384Sjkim{	.mmi
719238384Sjkim	mov Z = M0
720238384Sjkim	mov Y = M1
721238384Sjkim	mov ar.lc = 3
722238384Sjkim}
723238384Sjkim{	.mmi
724238384Sjkim	mov X = M3
725238384Sjkim	mov W = M5
726238384Sjkim	mov V = M6
727238384Sjkim} ;;
728238384Sjkim
729238384Sjkim{	.mmi
730238384Sjkim	mov M0 = M4
731238384Sjkim	mov M1 = M11
732238384Sjkim	mov ar.ec = 1
733238384Sjkim}
734238384Sjkim{	.mmi
735238384Sjkim	mov M3 = M9
736238384Sjkim	mov U = M8
737238384Sjkim	mov T = M13
738238384Sjkim} ;;
739238384Sjkim
740238384Sjkim{	.mmi
741238384Sjkim	mov M4 = Z
742238384Sjkim	mov M11 = Y
743238384Sjkim	mov M5 = M7
744238384Sjkim}
745238384Sjkim{	.mmi
746238384Sjkim	mov M6 = M14
747238384Sjkim	mov M8 = M12
748238384Sjkim	mov M13 = M15
749238384Sjkim} ;;
750238384Sjkim
751238384Sjkim{	.mmi
752238384Sjkim	mov M7 = W
753238384Sjkim	mov M14 = V
754238384Sjkim	nop 0x0
755238384Sjkim}
756238384Sjkim{	.mmi
757238384Sjkim	mov M9 = X
758238384Sjkim	mov M12 = U
759238384Sjkim	mov M15 = T
760238384Sjkim} ;;
761238384Sjkim
762238384Sjkim.md5_HH_round:
763238384Sjkim	HH4(.md5_HH_round)
764238384Sjkim
765238384Sjkim// The following sequence shuffles the block constants round for the
766238384Sjkim// next round:
767238384Sjkim//
768238384Sjkim// 5 8 11 14 1 4 7 10 13 0 3 6 9 12 15 2
769238384Sjkim// 0 7 14 5 12 3 10 1 8 15 6 13 4 11 2 9
770238384Sjkim
771238384Sjkim{	.mmi
772238384Sjkim	mov Z = M0
773238384Sjkim	mov Y = M15
774238384Sjkim	mov ar.lc = 3
775238384Sjkim}
776238384Sjkim{	.mmi
777238384Sjkim	mov X = M10
778238384Sjkim	mov W = M1
779238384Sjkim	mov V = M4
780238384Sjkim} ;;
781238384Sjkim
782238384Sjkim{	.mmi
783238384Sjkim	mov M0 = M9
784238384Sjkim	mov M15 = M12
785238384Sjkim	mov ar.ec = 1
786238384Sjkim}
787238384Sjkim{	.mmi
788238384Sjkim	mov M10 = M11
789238384Sjkim	mov M1 = M6
790238384Sjkim	mov M4 = M13
791238384Sjkim} ;;
792238384Sjkim
793238384Sjkim{	.mmi
794238384Sjkim	mov M9 = M14
795238384Sjkim	mov M12 = M5
796238384Sjkim	mov U = M3
797238384Sjkim}
798238384Sjkim{	.mmi
799238384Sjkim	mov M11 = M8
800238384Sjkim	mov M6 = M7
801238384Sjkim	mov M13 = M2
802238384Sjkim} ;;
803238384Sjkim
804238384Sjkim{	.mmi
805238384Sjkim	mov M14 = Y
806238384Sjkim	mov M5 = X
807238384Sjkim	mov M3 = Z
808238384Sjkim}
809238384Sjkim{	.mmi
810238384Sjkim	mov M8 = W
811238384Sjkim	mov M7 = V
812238384Sjkim	mov M2 = U
813238384Sjkim} ;;
814238384Sjkim
815238384Sjkim.md5_II_round:
816238384Sjkim	II4(.md5_II_round)
817238384Sjkim
818238384Sjkim{	.mib
819238384Sjkim	nop 0x0
820238384Sjkim	nop 0x0
821238384Sjkim	br.ret.sptk.many QUICK_RTN
822238384Sjkim} ;;
823238384Sjkim
824238384Sjkim	.endp md5_digest_GHI
825238384Sjkim
826238384Sjkim#define FFLOADU(a, b, c, d, M, P, N, s, offset)	\
827238384Sjkim{	.mii ;					\
828238384Sjkim(pMore) ld4 N = [DPtr], 4 ;			\
829238384Sjkim	add Z = M, TRound ;			\
830238384Sjkim	and Y = c, b ;				\
831238384Sjkim}						\
832238384Sjkim{	.mmi ;					\
833238384Sjkim	andcm X = d, b ;;			\
834238384Sjkim	add Z = Z, a ;				\
835238384Sjkim	or Y = Y, X ;				\
836238384Sjkim} ;;						\
837238384Sjkim{	.mii ;					\
838238384Sjkim	ld4 TRound = [TPtr], 4 ;		\
839238384Sjkim	GETLW(W, P, offset) ;			\
840238384Sjkim	add Z = Z, Y ;				\
841238384Sjkim} ;;						\
842238384Sjkim{	.mii ;					\
843238384Sjkim	or W = W, DTmp ;			\
844238384Sjkim	dep.z Y = Z, 32, 32 ;;			\
845238384Sjkim	shrp Z = Z, Y, 64 - s ;			\
846238384Sjkim} ;;						\
847238384Sjkim{	.mii ;					\
848238384Sjkim	add a = Z, b ;				\
849238384Sjkim	GETRW(DTmp, P, offset) ;		\
850238384Sjkim	mov P = W ;				\
851238384Sjkim} ;;
852238384Sjkim
853238384Sjkim#define FFLOOPU(a, b, c, d, M, P, N, s, offset)		\
854238384Sjkim{	.mii ;						\
855238384Sjkim(pMore) ld4 N = [DPtr], 4 ;				\
856238384Sjkim	add Z = M, TRound ;				\
857238384Sjkim	and Y = c, b ;					\
858238384Sjkim}							\
859238384Sjkim{	.mmi ;						\
860238384Sjkim	andcm X = d, b ;;				\
861238384Sjkim	add Z = Z, a ;					\
862238384Sjkim	or Y = Y, X ;					\
863238384Sjkim} ;;							\
864238384Sjkim{	.mii ;						\
865238384Sjkim	ld4 TRound = [TPtr], 4 ;			\
866238384Sjkim(pMore) GETLW(W, P, offset) 	;			\
867238384Sjkim	add Z = Z, Y ;					\
868238384Sjkim} ;;							\
869238384Sjkim{	.mii ;						\
870238384Sjkim(pMore) or W = W, DTmp ;				\
871238384Sjkim	dep.z Y = Z, 32, 32 ;;				\
872238384Sjkim	shrp Z = Z, Y, 64 - s ;				\
873238384Sjkim} ;;							\
874238384Sjkim{	.mii ;						\
875238384Sjkim	add a = Z, b ;					\
876238384Sjkim(pMore) GETRW(DTmp, P, offset) 	;			\
877238384Sjkim(pMore) mov P = W ;					\
878238384Sjkim}							\
879238384Sjkim{	.mib ;						\
880238384Sjkim	cmp.ne pMore, p0 = 0, LTrip ;			\
881238384Sjkim	add LTrip = -1, LTrip ;				\
882238384Sjkim	br.ctop.sptk.many .md5_FF_round##offset ;	\
883238384Sjkim} ;;
884238384Sjkim
885238384Sjkim#define MD5FBLOCK(offset)						\
886238384Sjkim	.type md5_digest_block##offset, @function ;			\
887238384Sjkim									\
888238384Sjkim	.align 32 ;							\
889238384Sjkim	.proc md5_digest_block##offset ;				\
890238384Sjkim	.prologue ;							\
891238384Sjkim	.altrp QUICK_RTN ;						\
892238384Sjkim	.body ;								\
893238384Sjkimmd5_digest_block##offset:						\
894238384Sjkim{	.mmi ;								\
895238384Sjkim	alloc PFS = ar.pfs, _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE ;	\
896238384Sjkim	mov LTrip = 2 ;							\
897238384Sjkim	mov ar.lc = 3 ;							\
898238384Sjkim} ;;									\
899238384Sjkim{	.mii ;								\
900238384Sjkim	cmp.eq pMore, p0 = r0, r0 ;					\
901238384Sjkim	mov ar.ec = 0 ;							\
902238384Sjkim	nop 0x0 ;							\
903238384Sjkim} ;;									\
904238384Sjkim									\
905238384Sjkim	.pred.rel "mutex", pLoad, pSkip ;				\
906238384Sjkim.md5_FF_round##offset:							\
907238384Sjkim	FFLOADU(A, B, C, D, M12, M13, RotateM0, 7, offset)		\
908238384Sjkim	FFLOADU(D, A, B, C, M13, M14, RotateM1, 12, offset)		\
909238384Sjkim	FFLOADU(C, D, A, B, M14, M15, RotateM2, 17, offset)		\
910238384Sjkim	FFLOOPU(B, C, D, A, M15, RotateM0, RotateM3, 22, offset)	\
911238384Sjkim									\
912238384Sjkim{	.mib ;								\
913238384Sjkim	nop 0x0 ;							\
914238384Sjkim	nop 0x0 ;							\
915238384Sjkim	br.cond.sptk.many md5_digest_GHI ;				\
916238384Sjkim} ;;									\
917238384Sjkim	.endp md5_digest_block##offset
918238384Sjkim
919238384SjkimMD5FBLOCK(1)
920238384SjkimMD5FBLOCK(2)
921238384SjkimMD5FBLOCK(3)
922238384Sjkim
923238384Sjkim	.align 64
924238384Sjkim	.type md5_constants, @object
925238384Sjkimmd5_constants:
926238384Sjkim.md5_tbl_data_order:			// To ensure little-endian data
927238384Sjkim					// order, code as bytes.
928238384Sjkim	data1 0x78, 0xa4, 0x6a, 0xd7	//     0
929238384Sjkim	data1 0x56, 0xb7, 0xc7, 0xe8	//     1
930238384Sjkim	data1 0xdb, 0x70, 0x20, 0x24	//     2
931238384Sjkim	data1 0xee, 0xce, 0xbd, 0xc1	//     3
932238384Sjkim	data1 0xaf, 0x0f, 0x7c, 0xf5	//     4
933238384Sjkim	data1 0x2a, 0xc6, 0x87, 0x47	//     5
934238384Sjkim	data1 0x13, 0x46, 0x30, 0xa8	//     6
935238384Sjkim	data1 0x01, 0x95, 0x46, 0xfd	//     7
936238384Sjkim	data1 0xd8, 0x98, 0x80, 0x69	//     8
937238384Sjkim	data1 0xaf, 0xf7, 0x44, 0x8b	//     9
938238384Sjkim	data1 0xb1, 0x5b, 0xff, 0xff	//    10
939238384Sjkim	data1 0xbe, 0xd7, 0x5c, 0x89	//    11
940238384Sjkim	data1 0x22, 0x11, 0x90, 0x6b	//    12
941238384Sjkim	data1 0x93, 0x71, 0x98, 0xfd	//    13
942238384Sjkim	data1 0x8e, 0x43, 0x79, 0xa6	//    14
943238384Sjkim	data1 0x21, 0x08, 0xb4, 0x49	//    15
944238384Sjkim	data1 0x62, 0x25, 0x1e, 0xf6	//    16
945238384Sjkim	data1 0x40, 0xb3, 0x40, 0xc0	//    17
946238384Sjkim	data1 0x51, 0x5a, 0x5e, 0x26	//    18
947238384Sjkim	data1 0xaa, 0xc7, 0xb6, 0xe9	//    19
948238384Sjkim	data1 0x5d, 0x10, 0x2f, 0xd6	//    20
949238384Sjkim	data1 0x53, 0x14, 0x44, 0x02	//    21
950238384Sjkim	data1 0x81, 0xe6, 0xa1, 0xd8	//    22
951238384Sjkim	data1 0xc8, 0xfb, 0xd3, 0xe7	//    23
952238384Sjkim	data1 0xe6, 0xcd, 0xe1, 0x21	//    24
953238384Sjkim	data1 0xd6, 0x07, 0x37, 0xc3	//    25
954238384Sjkim	data1 0x87, 0x0d, 0xd5, 0xf4	//    26
955238384Sjkim	data1 0xed, 0x14, 0x5a, 0x45	//    27
956238384Sjkim	data1 0x05, 0xe9, 0xe3, 0xa9	//    28
957238384Sjkim	data1 0xf8, 0xa3, 0xef, 0xfc	//    29
958238384Sjkim	data1 0xd9, 0x02, 0x6f, 0x67	//    30
959238384Sjkim	data1 0x8a, 0x4c, 0x2a, 0x8d	//    31
960238384Sjkim	data1 0x42, 0x39, 0xfa, 0xff	//    32
961238384Sjkim	data1 0x81, 0xf6, 0x71, 0x87	//    33
962238384Sjkim	data1 0x22, 0x61, 0x9d, 0x6d	//    34
963238384Sjkim	data1 0x0c, 0x38, 0xe5, 0xfd	//    35
964238384Sjkim	data1 0x44, 0xea, 0xbe, 0xa4	//    36
965238384Sjkim	data1 0xa9, 0xcf, 0xde, 0x4b	//    37
966238384Sjkim	data1 0x60, 0x4b, 0xbb, 0xf6	//    38
967238384Sjkim	data1 0x70, 0xbc, 0xbf, 0xbe	//    39
968238384Sjkim	data1 0xc6, 0x7e, 0x9b, 0x28	//    40
969238384Sjkim	data1 0xfa, 0x27, 0xa1, 0xea	//    41
970238384Sjkim	data1 0x85, 0x30, 0xef, 0xd4	//    42
971238384Sjkim	data1 0x05, 0x1d, 0x88, 0x04	//    43
972238384Sjkim	data1 0x39, 0xd0, 0xd4, 0xd9	//    44
973238384Sjkim	data1 0xe5, 0x99, 0xdb, 0xe6	//    45
974238384Sjkim	data1 0xf8, 0x7c, 0xa2, 0x1f	//    46
975238384Sjkim	data1 0x65, 0x56, 0xac, 0xc4	//    47
976238384Sjkim	data1 0x44, 0x22, 0x29, 0xf4	//    48
977238384Sjkim	data1 0x97, 0xff, 0x2a, 0x43	//    49
978238384Sjkim	data1 0xa7, 0x23, 0x94, 0xab	//    50
979238384Sjkim	data1 0x39, 0xa0, 0x93, 0xfc	//    51
980238384Sjkim	data1 0xc3, 0x59, 0x5b, 0x65	//    52
981238384Sjkim	data1 0x92, 0xcc, 0x0c, 0x8f	//    53
982238384Sjkim	data1 0x7d, 0xf4, 0xef, 0xff	//    54
983238384Sjkim	data1 0xd1, 0x5d, 0x84, 0x85	//    55
984238384Sjkim	data1 0x4f, 0x7e, 0xa8, 0x6f	//    56
985238384Sjkim	data1 0xe0, 0xe6, 0x2c, 0xfe	//    57
986238384Sjkim	data1 0x14, 0x43, 0x01, 0xa3	//    58
987238384Sjkim	data1 0xa1, 0x11, 0x08, 0x4e	//    59
988238384Sjkim	data1 0x82, 0x7e, 0x53, 0xf7	//    60
989238384Sjkim	data1 0x35, 0xf2, 0x3a, 0xbd	//    61
990238384Sjkim	data1 0xbb, 0xd2, 0xd7, 0x2a	//    62
991238384Sjkim	data1 0x91, 0xd3, 0x86, 0xeb	//    63
992238384Sjkim.size	md5_constants#,64*4
993