1238384Sjkim/* Copyright (c) 2005 Hewlett-Packard Development Company, L.P. 2238384Sjkim 3238384SjkimPermission is hereby granted, free of charge, to any person obtaining 4238384Sjkima copy of this software and associated documentation files (the 5238384Sjkim"Software"), to deal in the Software without restriction, including 6238384Sjkimwithout limitation the rights to use, copy, modify, merge, publish, 7238384Sjkimdistribute, sublicense, and/or sell copies of the Software, and to 8238384Sjkimpermit persons to whom the Software is furnished to do so, subject to 9238384Sjkimthe following conditions: 10238384Sjkim 11238384SjkimThe above copyright notice and this permission notice shall be 12238384Sjkimincluded in all copies or substantial portions of the Software. 13238384Sjkim 14238384SjkimTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15238384SjkimEXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16238384SjkimMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17238384SjkimNONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18238384SjkimLIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19238384SjkimOF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20238384SjkimWITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ 21238384Sjkim 22238384Sjkim// Common registers are assigned as follows: 23238384Sjkim// 24238384Sjkim// COMMON 25238384Sjkim// 26238384Sjkim// t0 Const Tbl Ptr TPtr 27238384Sjkim// t1 Round Constant TRound 28238384Sjkim// t4 Block residual LenResid 29238384Sjkim// t5 Residual Data DTmp 30238384Sjkim// 31238384Sjkim// {in,out}0 Block 0 Cycle RotateM0 32238384Sjkim// {in,out}1 Block Value 12 M12 33238384Sjkim// {in,out}2 Block Value 8 M8 34238384Sjkim// {in,out}3 Block Value 4 M4 35238384Sjkim// {in,out}4 Block Value 0 M0 36238384Sjkim// {in,out}5 Block 1 Cycle RotateM1 37238384Sjkim// {in,out}6 Block Value 13 M13 38238384Sjkim// {in,out}7 Block Value 9 M9 39238384Sjkim// {in,out}8 Block Value 5 M5 40238384Sjkim// {in,out}9 Block Value 1 M1 41238384Sjkim// {in,out}10 Block 2 Cycle RotateM2 42238384Sjkim// {in,out}11 Block Value 14 M14 43238384Sjkim// {in,out}12 Block Value 10 M10 44238384Sjkim// {in,out}13 Block Value 6 M6 45238384Sjkim// {in,out}14 Block Value 2 M2 46238384Sjkim// {in,out}15 Block 3 Cycle RotateM3 47238384Sjkim// {in,out}16 Block Value 15 M15 48238384Sjkim// {in,out}17 Block Value 11 M11 49238384Sjkim// {in,out}18 Block Value 7 M7 50238384Sjkim// {in,out}19 Block Value 3 M3 51238384Sjkim// {in,out}20 Scratch Z 52238384Sjkim// {in,out}21 Scratch Y 53238384Sjkim// {in,out}22 Scratch X 54238384Sjkim// {in,out}23 Scratch W 55238384Sjkim// {in,out}24 Digest A A 56238384Sjkim// {in,out}25 Digest B B 57238384Sjkim// {in,out}26 Digest C C 58238384Sjkim// {in,out}27 Digest D D 59238384Sjkim// {in,out}28 Active Data Ptr DPtr 60238384Sjkim// in28 Dummy Value - 61238384Sjkim// out28 Dummy Value - 62238384Sjkim// bt0 Coroutine Link QUICK_RTN 63238384Sjkim// 64238384Sjkim/// These predicates are used for computing the padding block(s) and 65238384Sjkim/// are shared between the driver and digest co-routines 66238384Sjkim// 67238384Sjkim// pt0 Extra Pad Block pExtra 68238384Sjkim// pt1 Load next word pLoad 69238384Sjkim// pt2 Skip next word pSkip 70238384Sjkim// pt3 Search for Pad pNoPad 71238384Sjkim// pt4 Pad Word 0 pPad0 72238384Sjkim// pt5 Pad Word 1 pPad1 73238384Sjkim// pt6 Pad Word 2 pPad2 74238384Sjkim// pt7 Pad Word 3 pPad3 75238384Sjkim 76238384Sjkim#define DTmp r19 77238384Sjkim#define LenResid r18 78238384Sjkim#define QUICK_RTN b6 79238384Sjkim#define TPtr r14 80238384Sjkim#define TRound r15 81238384Sjkim#define pExtra p6 82238384Sjkim#define pLoad p7 83238384Sjkim#define pNoPad p9 84238384Sjkim#define pPad0 p10 85238384Sjkim#define pPad1 p11 86238384Sjkim#define pPad2 p12 87238384Sjkim#define pPad3 p13 88238384Sjkim#define pSkip p8 89238384Sjkim 90238384Sjkim#define A_ out24 91238384Sjkim#define B_ out25 92238384Sjkim#define C_ out26 93238384Sjkim#define D_ out27 94238384Sjkim#define DPtr_ out28 95238384Sjkim#define M0_ out4 96238384Sjkim#define M1_ out9 97238384Sjkim#define M10_ out12 98238384Sjkim#define M11_ out17 99238384Sjkim#define M12_ out1 100238384Sjkim#define M13_ out6 101238384Sjkim#define M14_ out11 102238384Sjkim#define M15_ out16 103238384Sjkim#define M2_ out14 104238384Sjkim#define M3_ out19 105238384Sjkim#define M4_ out3 106238384Sjkim#define M5_ out8 107238384Sjkim#define M6_ out13 108238384Sjkim#define M7_ out18 109238384Sjkim#define M8_ out2 110238384Sjkim#define M9_ out7 111238384Sjkim#define RotateM0_ out0 112238384Sjkim#define RotateM1_ out5 113238384Sjkim#define RotateM2_ out10 114238384Sjkim#define RotateM3_ out15 115238384Sjkim#define W_ out23 116238384Sjkim#define X_ out22 117238384Sjkim#define Y_ out21 118238384Sjkim#define Z_ out20 119238384Sjkim 120238384Sjkim#define A in24 121238384Sjkim#define B in25 122238384Sjkim#define C in26 123238384Sjkim#define D in27 124238384Sjkim#define DPtr in28 125238384Sjkim#define M0 in4 126238384Sjkim#define M1 in9 127238384Sjkim#define M10 in12 128238384Sjkim#define M11 in17 129238384Sjkim#define M12 in1 130238384Sjkim#define M13 in6 131238384Sjkim#define M14 in11 132238384Sjkim#define M15 in16 133238384Sjkim#define M2 in14 134238384Sjkim#define M3 in19 135238384Sjkim#define M4 in3 136238384Sjkim#define M5 in8 137238384Sjkim#define M6 in13 138238384Sjkim#define M7 in18 139238384Sjkim#define M8 in2 140238384Sjkim#define M9 in7 141238384Sjkim#define RotateM0 in0 142238384Sjkim#define RotateM1 in5 143238384Sjkim#define RotateM2 in10 144238384Sjkim#define RotateM3 in15 145238384Sjkim#define W in23 146238384Sjkim#define X in22 147238384Sjkim#define Y in21 148238384Sjkim#define Z in20 149238384Sjkim 150238384Sjkim/* register stack configuration for md5_block_asm_data_order(): */ 151238384Sjkim#define MD5_NINP 3 152238384Sjkim#define MD5_NLOC 0 153238384Sjkim#define MD5_NOUT 29 154238384Sjkim#define MD5_NROT 0 155238384Sjkim 156238384Sjkim/* register stack configuration for helpers: */ 157238384Sjkim#define _NINPUTS MD5_NOUT 158238384Sjkim#define _NLOCALS 0 159238384Sjkim#define _NOUTPUT 0 160238384Sjkim#define _NROTATE 24 /* this must be <= _NINPUTS */ 161238384Sjkim 162238384Sjkim#if defined(_HPUX_SOURCE) && !defined(_LP64) 163238384Sjkim#define ADDP addp4 164238384Sjkim#else 165238384Sjkim#define ADDP add 166238384Sjkim#endif 167238384Sjkim 168238384Sjkim#if defined(_HPUX_SOURCE) || defined(B_ENDIAN) 169238384Sjkim#define HOST_IS_BIG_ENDIAN 170238384Sjkim#endif 171238384Sjkim 172238384Sjkim// Macros for getting the left and right portions of little-endian words 173238384Sjkim 174238384Sjkim#define GETLW(dst, src, align) dep.z dst = src, 32 - 8 * align, 8 * align 175238384Sjkim#define GETRW(dst, src, align) extr.u dst = src, 8 * align, 32 - 8 * align 176238384Sjkim 177238384Sjkim// MD5 driver 178238384Sjkim// 179238384Sjkim// Reads an input block, then calls the digest block 180238384Sjkim// subroutine and adds the results to the accumulated 181238384Sjkim// digest. It allocates 32 outs which the subroutine 182238384Sjkim// uses as it's inputs and rotating 183238384Sjkim// registers. Initializes the round constant pointer and 184238384Sjkim// takes care of saving/restoring ar.lc 185238384Sjkim// 186238384Sjkim/// INPUT 187238384Sjkim// 188238384Sjkim// in0 Context Ptr CtxPtr0 189238384Sjkim// in1 Input Data Ptr DPtrIn 190238384Sjkim// in2 Integral Blocks BlockCount 191238384Sjkim// rp Return Address - 192238384Sjkim// 193238384Sjkim/// CODE 194238384Sjkim// 195238384Sjkim// v2 Input Align InAlign 196238384Sjkim// t0 Shared w/digest - 197238384Sjkim// t1 Shared w/digest - 198238384Sjkim// t2 Shared w/digest - 199238384Sjkim// t3 Shared w/digest - 200238384Sjkim// t4 Shared w/digest - 201238384Sjkim// t5 Shared w/digest - 202238384Sjkim// t6 PFS Save PFSSave 203238384Sjkim// t7 ar.lc Save LCSave 204238384Sjkim// t8 Saved PR PRSave 205238384Sjkim// t9 2nd CtxPtr CtxPtr1 206238384Sjkim// t10 Table Base CTable 207238384Sjkim// t11 Table[0] CTable0 208238384Sjkim// t13 Accumulator A AccumA 209238384Sjkim// t14 Accumulator B AccumB 210238384Sjkim// t15 Accumulator C AccumC 211238384Sjkim// t16 Accumulator D AccumD 212238384Sjkim// pt0 Shared w/digest - 213238384Sjkim// pt1 Shared w/digest - 214238384Sjkim// pt2 Shared w/digest - 215238384Sjkim// pt3 Shared w/digest - 216238384Sjkim// pt4 Shared w/digest - 217238384Sjkim// pt5 Shared w/digest - 218238384Sjkim// pt6 Shared w/digest - 219238384Sjkim// pt7 Shared w/digest - 220238384Sjkim// pt8 Not Aligned pOff 221238384Sjkim// pt8 Blocks Left pAgain 222238384Sjkim 223238384Sjkim#define AccumA r27 224238384Sjkim#define AccumB r28 225238384Sjkim#define AccumC r29 226238384Sjkim#define AccumD r30 227238384Sjkim#define CTable r24 228238384Sjkim#define CTable0 r25 229238384Sjkim#define CtxPtr0 in0 230238384Sjkim#define CtxPtr1 r23 231238384Sjkim#define DPtrIn in1 232238384Sjkim#define BlockCount in2 233238384Sjkim#define InAlign r10 234238384Sjkim#define LCSave r21 235238384Sjkim#define PFSSave r20 236238384Sjkim#define PRSave r22 237238384Sjkim#define pAgain p63 238238384Sjkim#define pOff p63 239238384Sjkim 240238384Sjkim .text 241238384Sjkim 242238384Sjkim/* md5_block_asm_data_order(MD5_CTX *c, const void *data, size_t num) 243238384Sjkim 244238384Sjkim where: 245238384Sjkim c: a pointer to a structure of this type: 246238384Sjkim 247238384Sjkim typedef struct MD5state_st 248238384Sjkim { 249238384Sjkim MD5_LONG A,B,C,D; 250238384Sjkim MD5_LONG Nl,Nh; 251238384Sjkim MD5_LONG data[MD5_LBLOCK]; 252238384Sjkim unsigned int num; 253238384Sjkim } 254238384Sjkim MD5_CTX; 255238384Sjkim 256238384Sjkim data: a pointer to the input data (may be misaligned) 257238384Sjkim num: the number of 16-byte blocks to hash (i.e., the length 258238384Sjkim of DATA is 16*NUM. 259238384Sjkim 260238384Sjkim */ 261238384Sjkim 262238384Sjkim .type md5_block_asm_data_order, @function 263238384Sjkim .global md5_block_asm_data_order 264238384Sjkim .align 32 265238384Sjkim .proc md5_block_asm_data_order 266238384Sjkimmd5_block_asm_data_order: 267238384Sjkim.md5_block: 268238384Sjkim .prologue 269238384Sjkim{ .mmi 270238384Sjkim .save ar.pfs, PFSSave 271238384Sjkim alloc PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT 272238384Sjkim ADDP CtxPtr1 = 8, CtxPtr0 273238384Sjkim mov CTable = ip 274238384Sjkim} 275238384Sjkim{ .mmi 276238384Sjkim ADDP DPtrIn = 0, DPtrIn 277238384Sjkim ADDP CtxPtr0 = 0, CtxPtr0 278238384Sjkim .save ar.lc, LCSave 279238384Sjkim mov LCSave = ar.lc 280238384Sjkim} 281238384Sjkim;; 282238384Sjkim{ .mmi 283238384Sjkim add CTable = .md5_tbl_data_order#-.md5_block#, CTable 284238384Sjkim and InAlign = 0x3, DPtrIn 285238384Sjkim} 286238384Sjkim 287238384Sjkim{ .mmi 288238384Sjkim ld4 AccumA = [CtxPtr0], 4 289238384Sjkim ld4 AccumC = [CtxPtr1], 4 290238384Sjkim .save pr, PRSave 291238384Sjkim mov PRSave = pr 292238384Sjkim .body 293238384Sjkim} 294238384Sjkim;; 295238384Sjkim{ .mmi 296238384Sjkim ld4 AccumB = [CtxPtr0] 297238384Sjkim ld4 AccumD = [CtxPtr1] 298238384Sjkim dep DPtr_ = 0, DPtrIn, 0, 2 299238384Sjkim} ;; 300238384Sjkim#ifdef HOST_IS_BIG_ENDIAN 301238384Sjkim rum psr.be;; // switch to little-endian 302238384Sjkim#endif 303238384Sjkim{ .mmb 304238384Sjkim ld4 CTable0 = [CTable], 4 305238384Sjkim cmp.ne pOff, p0 = 0, InAlign 306238384Sjkim(pOff) br.cond.spnt.many .md5_unaligned 307238384Sjkim} ;; 308238384Sjkim 309238384Sjkim// The FF load/compute loop rotates values three times, so that 310238384Sjkim// loading into M12 here produces the M0 value, M13 -> M1, etc. 311238384Sjkim 312238384Sjkim.md5_block_loop0: 313238384Sjkim{ .mmi 314238384Sjkim ld4 M12_ = [DPtr_], 4 315238384Sjkim mov TPtr = CTable 316238384Sjkim mov TRound = CTable0 317238384Sjkim} ;; 318238384Sjkim{ .mmi 319238384Sjkim ld4 M13_ = [DPtr_], 4 320238384Sjkim mov A_ = AccumA 321238384Sjkim mov B_ = AccumB 322238384Sjkim} ;; 323238384Sjkim{ .mmi 324238384Sjkim ld4 M14_ = [DPtr_], 4 325238384Sjkim mov C_ = AccumC 326238384Sjkim mov D_ = AccumD 327238384Sjkim} ;; 328238384Sjkim{ .mmb 329238384Sjkim ld4 M15_ = [DPtr_], 4 330238384Sjkim add BlockCount = -1, BlockCount 331238384Sjkim br.call.sptk.many QUICK_RTN = md5_digest_block0 332238384Sjkim} ;; 333238384Sjkim 334238384Sjkim// Now, we add the new digest values and do some clean-up 335238384Sjkim// before checking if there's another full block to process 336238384Sjkim 337238384Sjkim{ .mmi 338238384Sjkim add AccumA = AccumA, A_ 339238384Sjkim add AccumB = AccumB, B_ 340238384Sjkim cmp.ne pAgain, p0 = 0, BlockCount 341238384Sjkim} 342238384Sjkim{ .mib 343238384Sjkim add AccumC = AccumC, C_ 344238384Sjkim add AccumD = AccumD, D_ 345238384Sjkim(pAgain) br.cond.dptk.many .md5_block_loop0 346238384Sjkim} ;; 347238384Sjkim 348238384Sjkim.md5_exit: 349238384Sjkim#ifdef HOST_IS_BIG_ENDIAN 350238384Sjkim sum psr.be;; // switch back to big-endian mode 351238384Sjkim#endif 352238384Sjkim{ .mmi 353238384Sjkim st4 [CtxPtr0] = AccumB, -4 354238384Sjkim st4 [CtxPtr1] = AccumD, -4 355238384Sjkim mov pr = PRSave, 0x1ffff ;; 356238384Sjkim} 357238384Sjkim{ .mmi 358238384Sjkim st4 [CtxPtr0] = AccumA 359238384Sjkim st4 [CtxPtr1] = AccumC 360238384Sjkim mov ar.lc = LCSave 361238384Sjkim} ;; 362238384Sjkim{ .mib 363238384Sjkim mov ar.pfs = PFSSave 364238384Sjkim br.ret.sptk.few rp 365238384Sjkim} ;; 366238384Sjkim 367238384Sjkim#define MD5UNALIGNED(offset) \ 368238384Sjkim.md5_process##offset: \ 369238384Sjkim{ .mib ; \ 370238384Sjkim nop 0x0 ; \ 371238384Sjkim GETRW(DTmp, DTmp, offset) ; \ 372238384Sjkim} ;; \ 373238384Sjkim.md5_block_loop##offset: \ 374238384Sjkim{ .mmi ; \ 375238384Sjkim ld4 Y_ = [DPtr_], 4 ; \ 376238384Sjkim mov TPtr = CTable ; \ 377238384Sjkim mov TRound = CTable0 ; \ 378238384Sjkim} ;; \ 379238384Sjkim{ .mmi ; \ 380238384Sjkim ld4 M13_ = [DPtr_], 4 ; \ 381238384Sjkim mov A_ = AccumA ; \ 382238384Sjkim mov B_ = AccumB ; \ 383238384Sjkim} ;; \ 384238384Sjkim{ .mii ; \ 385238384Sjkim ld4 M14_ = [DPtr_], 4 ; \ 386238384Sjkim GETLW(W_, Y_, offset) ; \ 387238384Sjkim mov C_ = AccumC ; \ 388238384Sjkim} \ 389238384Sjkim{ .mmi ; \ 390238384Sjkim mov D_ = AccumD ;; \ 391238384Sjkim or M12_ = W_, DTmp ; \ 392238384Sjkim GETRW(DTmp, Y_, offset) ; \ 393238384Sjkim} \ 394238384Sjkim{ .mib ; \ 395238384Sjkim ld4 M15_ = [DPtr_], 4 ; \ 396238384Sjkim add BlockCount = -1, BlockCount ; \ 397238384Sjkim br.call.sptk.many QUICK_RTN = md5_digest_block##offset; \ 398238384Sjkim} ;; \ 399238384Sjkim{ .mmi ; \ 400238384Sjkim add AccumA = AccumA, A_ ; \ 401238384Sjkim add AccumB = AccumB, B_ ; \ 402238384Sjkim cmp.ne pAgain, p0 = 0, BlockCount ; \ 403238384Sjkim} \ 404238384Sjkim{ .mib ; \ 405238384Sjkim add AccumC = AccumC, C_ ; \ 406238384Sjkim add AccumD = AccumD, D_ ; \ 407238384Sjkim(pAgain) br.cond.dptk.many .md5_block_loop##offset ; \ 408238384Sjkim} ;; \ 409238384Sjkim{ .mib ; \ 410238384Sjkim nop 0x0 ; \ 411238384Sjkim nop 0x0 ; \ 412238384Sjkim br.cond.sptk.many .md5_exit ; \ 413238384Sjkim} ;; 414238384Sjkim 415238384Sjkim .align 32 416238384Sjkim.md5_unaligned: 417238384Sjkim// 418238384Sjkim// Because variable shifts are expensive, we special case each of 419238384Sjkim// the four alignements. In practice, this won't hurt too much 420238384Sjkim// since only one working set of code will be loaded. 421238384Sjkim// 422238384Sjkim{ .mib 423238384Sjkim ld4 DTmp = [DPtr_], 4 424238384Sjkim cmp.eq pOff, p0 = 1, InAlign 425238384Sjkim(pOff) br.cond.dpnt.many .md5_process1 426238384Sjkim} ;; 427238384Sjkim{ .mib 428238384Sjkim cmp.eq pOff, p0 = 2, InAlign 429238384Sjkim nop 0x0 430238384Sjkim(pOff) br.cond.dpnt.many .md5_process2 431238384Sjkim} ;; 432238384Sjkim MD5UNALIGNED(3) 433238384Sjkim MD5UNALIGNED(1) 434238384Sjkim MD5UNALIGNED(2) 435238384Sjkim 436238384Sjkim .endp md5_block_asm_data_order 437238384Sjkim 438238384Sjkim 439238384Sjkim// MD5 Perform the F function and load 440238384Sjkim// 441238384Sjkim// Passed the first 4 words (M0 - M3) and initial (A, B, C, D) values, 442238384Sjkim// computes the FF() round of functions, then branches to the common 443238384Sjkim// digest code to finish up with GG(), HH, and II(). 444238384Sjkim// 445238384Sjkim// INPUT 446238384Sjkim// 447238384Sjkim// rp Return Address - 448238384Sjkim// 449238384Sjkim// CODE 450238384Sjkim// 451238384Sjkim// v0 PFS bit bucket PFS 452238384Sjkim// v1 Loop Trip Count LTrip 453238384Sjkim// pt0 Load next word pMore 454238384Sjkim 455238384Sjkim/* For F round: */ 456238384Sjkim#define LTrip r9 457238384Sjkim#define PFS r8 458238384Sjkim#define pMore p6 459238384Sjkim 460238384Sjkim/* For GHI rounds: */ 461238384Sjkim#define T r9 462238384Sjkim#define U r10 463238384Sjkim#define V r11 464238384Sjkim 465238384Sjkim#define COMPUTE(a, b, s, M, R) \ 466238384Sjkim{ \ 467238384Sjkim .mii ; \ 468238384Sjkim ld4 TRound = [TPtr], 4 ; \ 469238384Sjkim dep.z Y = Z, 32, 32 ;; \ 470238384Sjkim shrp Z = Z, Y, 64 - s ; \ 471238384Sjkim} ;; \ 472238384Sjkim{ \ 473238384Sjkim .mmi ; \ 474238384Sjkim add a = Z, b ; \ 475238384Sjkim mov R = M ; \ 476238384Sjkim nop 0x0 ; \ 477238384Sjkim} ;; 478238384Sjkim 479238384Sjkim#define LOOP(a, b, s, M, R, label) \ 480238384Sjkim{ .mii ; \ 481238384Sjkim ld4 TRound = [TPtr], 4 ; \ 482238384Sjkim dep.z Y = Z, 32, 32 ;; \ 483238384Sjkim shrp Z = Z, Y, 64 - s ; \ 484238384Sjkim} ;; \ 485238384Sjkim{ .mib ; \ 486238384Sjkim add a = Z, b ; \ 487238384Sjkim mov R = M ; \ 488238384Sjkim br.ctop.sptk.many label ; \ 489238384Sjkim} ;; 490238384Sjkim 491238384Sjkim// G(B, C, D) = (B & D) | (C & ~D) 492238384Sjkim 493238384Sjkim#define G(a, b, c, d, M) \ 494238384Sjkim{ .mmi ; \ 495238384Sjkim add Z = M, TRound ; \ 496238384Sjkim and Y = b, d ; \ 497238384Sjkim andcm X = c, d ; \ 498238384Sjkim} ;; \ 499238384Sjkim{ .mii ; \ 500238384Sjkim add Z = Z, a ; \ 501238384Sjkim or Y = Y, X ;; \ 502238384Sjkim add Z = Z, Y ; \ 503238384Sjkim} ;; 504238384Sjkim 505238384Sjkim// H(B, C, D) = B ^ C ^ D 506238384Sjkim 507238384Sjkim#define H(a, b, c, d, M) \ 508238384Sjkim{ .mmi ; \ 509238384Sjkim add Z = M, TRound ; \ 510238384Sjkim xor Y = b, c ; \ 511238384Sjkim nop 0x0 ; \ 512238384Sjkim} ;; \ 513238384Sjkim{ .mii ; \ 514238384Sjkim add Z = Z, a ; \ 515238384Sjkim xor Y = Y, d ;; \ 516238384Sjkim add Z = Z, Y ; \ 517238384Sjkim} ;; 518238384Sjkim 519238384Sjkim// I(B, C, D) = C ^ (B | ~D) 520238384Sjkim// 521238384Sjkim// However, since we have an andcm operator, we use the fact that 522238384Sjkim// 523238384Sjkim// Y ^ Z == ~Y ^ ~Z 524238384Sjkim// 525238384Sjkim// to rewrite the expression as 526238384Sjkim// 527238384Sjkim// I(B, C, D) = ~C ^ (~B & D) 528238384Sjkim 529238384Sjkim#define I(a, b, c, d, M) \ 530238384Sjkim{ .mmi ; \ 531238384Sjkim add Z = M, TRound ; \ 532238384Sjkim andcm Y = d, b ; \ 533238384Sjkim andcm X = -1, c ; \ 534238384Sjkim} ;; \ 535238384Sjkim{ .mii ; \ 536238384Sjkim add Z = Z, a ; \ 537238384Sjkim xor Y = Y, X ;; \ 538238384Sjkim add Z = Z, Y ; \ 539238384Sjkim} ;; 540238384Sjkim 541238384Sjkim#define GG4(label) \ 542238384Sjkim G(A, B, C, D, M0) \ 543238384Sjkim COMPUTE(A, B, 5, M0, RotateM0) \ 544238384Sjkim G(D, A, B, C, M1) \ 545238384Sjkim COMPUTE(D, A, 9, M1, RotateM1) \ 546238384Sjkim G(C, D, A, B, M2) \ 547238384Sjkim COMPUTE(C, D, 14, M2, RotateM2) \ 548238384Sjkim G(B, C, D, A, M3) \ 549238384Sjkim LOOP(B, C, 20, M3, RotateM3, label) 550238384Sjkim 551238384Sjkim#define HH4(label) \ 552238384Sjkim H(A, B, C, D, M0) \ 553238384Sjkim COMPUTE(A, B, 4, M0, RotateM0) \ 554238384Sjkim H(D, A, B, C, M1) \ 555238384Sjkim COMPUTE(D, A, 11, M1, RotateM1) \ 556238384Sjkim H(C, D, A, B, M2) \ 557238384Sjkim COMPUTE(C, D, 16, M2, RotateM2) \ 558238384Sjkim H(B, C, D, A, M3) \ 559238384Sjkim LOOP(B, C, 23, M3, RotateM3, label) 560238384Sjkim 561238384Sjkim#define II4(label) \ 562238384Sjkim I(A, B, C, D, M0) \ 563238384Sjkim COMPUTE(A, B, 6, M0, RotateM0) \ 564238384Sjkim I(D, A, B, C, M1) \ 565238384Sjkim COMPUTE(D, A, 10, M1, RotateM1) \ 566238384Sjkim I(C, D, A, B, M2) \ 567238384Sjkim COMPUTE(C, D, 15, M2, RotateM2) \ 568238384Sjkim I(B, C, D, A, M3) \ 569238384Sjkim LOOP(B, C, 21, M3, RotateM3, label) 570238384Sjkim 571238384Sjkim#define FFLOAD(a, b, c, d, M, N, s) \ 572238384Sjkim{ .mii ; \ 573238384Sjkim(pMore) ld4 N = [DPtr], 4 ; \ 574238384Sjkim add Z = M, TRound ; \ 575238384Sjkim and Y = c, b ; \ 576238384Sjkim} \ 577238384Sjkim{ .mmi ; \ 578238384Sjkim andcm X = d, b ;; \ 579238384Sjkim add Z = Z, a ; \ 580238384Sjkim or Y = Y, X ; \ 581238384Sjkim} ;; \ 582238384Sjkim{ .mii ; \ 583238384Sjkim ld4 TRound = [TPtr], 4 ; \ 584238384Sjkim add Z = Z, Y ;; \ 585238384Sjkim dep.z Y = Z, 32, 32 ; \ 586238384Sjkim} ;; \ 587238384Sjkim{ .mii ; \ 588238384Sjkim nop 0x0 ; \ 589238384Sjkim shrp Z = Z, Y, 64 - s ;; \ 590238384Sjkim add a = Z, b ; \ 591238384Sjkim} ;; 592238384Sjkim 593238384Sjkim#define FFLOOP(a, b, c, d, M, N, s, dest) \ 594238384Sjkim{ .mii ; \ 595238384Sjkim(pMore) ld4 N = [DPtr], 4 ; \ 596238384Sjkim add Z = M, TRound ; \ 597238384Sjkim and Y = c, b ; \ 598238384Sjkim} \ 599238384Sjkim{ .mmi ; \ 600238384Sjkim andcm X = d, b ;; \ 601238384Sjkim add Z = Z, a ; \ 602238384Sjkim or Y = Y, X ; \ 603238384Sjkim} ;; \ 604238384Sjkim{ .mii ; \ 605238384Sjkim ld4 TRound = [TPtr], 4 ; \ 606238384Sjkim add Z = Z, Y ;; \ 607238384Sjkim dep.z Y = Z, 32, 32 ; \ 608238384Sjkim} ;; \ 609238384Sjkim{ .mii ; \ 610238384Sjkim nop 0x0 ; \ 611238384Sjkim shrp Z = Z, Y, 64 - s ;; \ 612238384Sjkim add a = Z, b ; \ 613238384Sjkim} \ 614238384Sjkim{ .mib ; \ 615238384Sjkim cmp.ne pMore, p0 = 0, LTrip ; \ 616238384Sjkim add LTrip = -1, LTrip ; \ 617238384Sjkim br.ctop.dptk.many dest ; \ 618238384Sjkim} ;; 619238384Sjkim 620238384Sjkim .type md5_digest_block0, @function 621238384Sjkim .align 32 622238384Sjkim 623238384Sjkim .proc md5_digest_block0 624238384Sjkim .prologue 625238384Sjkimmd5_digest_block0: 626238384Sjkim .altrp QUICK_RTN 627238384Sjkim .body 628238384Sjkim{ .mmi 629238384Sjkim alloc PFS = ar.pfs, _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE 630238384Sjkim mov LTrip = 2 631238384Sjkim mov ar.lc = 3 632238384Sjkim} ;; 633238384Sjkim{ .mii 634238384Sjkim cmp.eq pMore, p0 = r0, r0 635238384Sjkim mov ar.ec = 0 636238384Sjkim nop 0x0 637238384Sjkim} ;; 638238384Sjkim 639238384Sjkim.md5_FF_round0: 640238384Sjkim FFLOAD(A, B, C, D, M12, RotateM0, 7) 641238384Sjkim FFLOAD(D, A, B, C, M13, RotateM1, 12) 642238384Sjkim FFLOAD(C, D, A, B, M14, RotateM2, 17) 643238384Sjkim FFLOOP(B, C, D, A, M15, RotateM3, 22, .md5_FF_round0) 644238384Sjkim // 645238384Sjkim // !!! Fall through to md5_digest_GHI 646238384Sjkim // 647238384Sjkim .endp md5_digest_block0 648238384Sjkim 649238384Sjkim .type md5_digest_GHI, @function 650238384Sjkim .align 32 651238384Sjkim 652238384Sjkim .proc md5_digest_GHI 653238384Sjkim .prologue 654238384Sjkim .regstk _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE 655238384Sjkimmd5_digest_GHI: 656238384Sjkim .altrp QUICK_RTN 657238384Sjkim .body 658238384Sjkim// 659238384Sjkim// The following sequence shuffles the block counstants round for the 660238384Sjkim// next round: 661238384Sjkim// 662238384Sjkim// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 663238384Sjkim// 1 6 11 0 5 10 14 4 9 14 3 8 13 2 7 12 664238384Sjkim// 665238384Sjkim{ .mmi 666238384Sjkim mov Z = M0 667238384Sjkim mov Y = M15 668238384Sjkim mov ar.lc = 3 669238384Sjkim} 670238384Sjkim{ .mmi 671238384Sjkim mov X = M2 672238384Sjkim mov W = M9 673238384Sjkim mov V = M4 674238384Sjkim} ;; 675238384Sjkim 676238384Sjkim{ .mmi 677238384Sjkim mov M0 = M1 678238384Sjkim mov M15 = M12 679238384Sjkim mov ar.ec = 1 680238384Sjkim} 681238384Sjkim{ .mmi 682238384Sjkim mov M2 = M11 683238384Sjkim mov M9 = M14 684238384Sjkim mov M4 = M5 685238384Sjkim} ;; 686238384Sjkim 687238384Sjkim{ .mmi 688238384Sjkim mov M1 = M6 689238384Sjkim mov M12 = M13 690238384Sjkim mov U = M3 691238384Sjkim} 692238384Sjkim{ .mmi 693238384Sjkim mov M11 = M8 694238384Sjkim mov M14 = M7 695238384Sjkim mov M5 = M10 696238384Sjkim} ;; 697238384Sjkim 698238384Sjkim{ .mmi 699238384Sjkim mov M6 = Y 700238384Sjkim mov M13 = X 701238384Sjkim mov M3 = Z 702238384Sjkim} 703238384Sjkim{ .mmi 704238384Sjkim mov M8 = W 705238384Sjkim mov M7 = V 706238384Sjkim mov M10 = U 707238384Sjkim} ;; 708238384Sjkim 709238384Sjkim.md5_GG_round: 710238384Sjkim GG4(.md5_GG_round) 711238384Sjkim 712238384Sjkim// The following sequence shuffles the block constants round for the 713238384Sjkim// next round: 714238384Sjkim// 715238384Sjkim// 1 6 11 0 5 10 14 4 9 14 3 8 13 2 7 12 716238384Sjkim// 5 8 11 14 1 4 7 10 13 0 3 6 9 12 15 2 717238384Sjkim 718238384Sjkim{ .mmi 719238384Sjkim mov Z = M0 720238384Sjkim mov Y = M1 721238384Sjkim mov ar.lc = 3 722238384Sjkim} 723238384Sjkim{ .mmi 724238384Sjkim mov X = M3 725238384Sjkim mov W = M5 726238384Sjkim mov V = M6 727238384Sjkim} ;; 728238384Sjkim 729238384Sjkim{ .mmi 730238384Sjkim mov M0 = M4 731238384Sjkim mov M1 = M11 732238384Sjkim mov ar.ec = 1 733238384Sjkim} 734238384Sjkim{ .mmi 735238384Sjkim mov M3 = M9 736238384Sjkim mov U = M8 737238384Sjkim mov T = M13 738238384Sjkim} ;; 739238384Sjkim 740238384Sjkim{ .mmi 741238384Sjkim mov M4 = Z 742238384Sjkim mov M11 = Y 743238384Sjkim mov M5 = M7 744238384Sjkim} 745238384Sjkim{ .mmi 746238384Sjkim mov M6 = M14 747238384Sjkim mov M8 = M12 748238384Sjkim mov M13 = M15 749238384Sjkim} ;; 750238384Sjkim 751238384Sjkim{ .mmi 752238384Sjkim mov M7 = W 753238384Sjkim mov M14 = V 754238384Sjkim nop 0x0 755238384Sjkim} 756238384Sjkim{ .mmi 757238384Sjkim mov M9 = X 758238384Sjkim mov M12 = U 759238384Sjkim mov M15 = T 760238384Sjkim} ;; 761238384Sjkim 762238384Sjkim.md5_HH_round: 763238384Sjkim HH4(.md5_HH_round) 764238384Sjkim 765238384Sjkim// The following sequence shuffles the block constants round for the 766238384Sjkim// next round: 767238384Sjkim// 768238384Sjkim// 5 8 11 14 1 4 7 10 13 0 3 6 9 12 15 2 769238384Sjkim// 0 7 14 5 12 3 10 1 8 15 6 13 4 11 2 9 770238384Sjkim 771238384Sjkim{ .mmi 772238384Sjkim mov Z = M0 773238384Sjkim mov Y = M15 774238384Sjkim mov ar.lc = 3 775238384Sjkim} 776238384Sjkim{ .mmi 777238384Sjkim mov X = M10 778238384Sjkim mov W = M1 779238384Sjkim mov V = M4 780238384Sjkim} ;; 781238384Sjkim 782238384Sjkim{ .mmi 783238384Sjkim mov M0 = M9 784238384Sjkim mov M15 = M12 785238384Sjkim mov ar.ec = 1 786238384Sjkim} 787238384Sjkim{ .mmi 788238384Sjkim mov M10 = M11 789238384Sjkim mov M1 = M6 790238384Sjkim mov M4 = M13 791238384Sjkim} ;; 792238384Sjkim 793238384Sjkim{ .mmi 794238384Sjkim mov M9 = M14 795238384Sjkim mov M12 = M5 796238384Sjkim mov U = M3 797238384Sjkim} 798238384Sjkim{ .mmi 799238384Sjkim mov M11 = M8 800238384Sjkim mov M6 = M7 801238384Sjkim mov M13 = M2 802238384Sjkim} ;; 803238384Sjkim 804238384Sjkim{ .mmi 805238384Sjkim mov M14 = Y 806238384Sjkim mov M5 = X 807238384Sjkim mov M3 = Z 808238384Sjkim} 809238384Sjkim{ .mmi 810238384Sjkim mov M8 = W 811238384Sjkim mov M7 = V 812238384Sjkim mov M2 = U 813238384Sjkim} ;; 814238384Sjkim 815238384Sjkim.md5_II_round: 816238384Sjkim II4(.md5_II_round) 817238384Sjkim 818238384Sjkim{ .mib 819238384Sjkim nop 0x0 820238384Sjkim nop 0x0 821238384Sjkim br.ret.sptk.many QUICK_RTN 822238384Sjkim} ;; 823238384Sjkim 824238384Sjkim .endp md5_digest_GHI 825238384Sjkim 826238384Sjkim#define FFLOADU(a, b, c, d, M, P, N, s, offset) \ 827238384Sjkim{ .mii ; \ 828238384Sjkim(pMore) ld4 N = [DPtr], 4 ; \ 829238384Sjkim add Z = M, TRound ; \ 830238384Sjkim and Y = c, b ; \ 831238384Sjkim} \ 832238384Sjkim{ .mmi ; \ 833238384Sjkim andcm X = d, b ;; \ 834238384Sjkim add Z = Z, a ; \ 835238384Sjkim or Y = Y, X ; \ 836238384Sjkim} ;; \ 837238384Sjkim{ .mii ; \ 838238384Sjkim ld4 TRound = [TPtr], 4 ; \ 839238384Sjkim GETLW(W, P, offset) ; \ 840238384Sjkim add Z = Z, Y ; \ 841238384Sjkim} ;; \ 842238384Sjkim{ .mii ; \ 843238384Sjkim or W = W, DTmp ; \ 844238384Sjkim dep.z Y = Z, 32, 32 ;; \ 845238384Sjkim shrp Z = Z, Y, 64 - s ; \ 846238384Sjkim} ;; \ 847238384Sjkim{ .mii ; \ 848238384Sjkim add a = Z, b ; \ 849238384Sjkim GETRW(DTmp, P, offset) ; \ 850238384Sjkim mov P = W ; \ 851238384Sjkim} ;; 852238384Sjkim 853238384Sjkim#define FFLOOPU(a, b, c, d, M, P, N, s, offset) \ 854238384Sjkim{ .mii ; \ 855238384Sjkim(pMore) ld4 N = [DPtr], 4 ; \ 856238384Sjkim add Z = M, TRound ; \ 857238384Sjkim and Y = c, b ; \ 858238384Sjkim} \ 859238384Sjkim{ .mmi ; \ 860238384Sjkim andcm X = d, b ;; \ 861238384Sjkim add Z = Z, a ; \ 862238384Sjkim or Y = Y, X ; \ 863238384Sjkim} ;; \ 864238384Sjkim{ .mii ; \ 865238384Sjkim ld4 TRound = [TPtr], 4 ; \ 866238384Sjkim(pMore) GETLW(W, P, offset) ; \ 867238384Sjkim add Z = Z, Y ; \ 868238384Sjkim} ;; \ 869238384Sjkim{ .mii ; \ 870238384Sjkim(pMore) or W = W, DTmp ; \ 871238384Sjkim dep.z Y = Z, 32, 32 ;; \ 872238384Sjkim shrp Z = Z, Y, 64 - s ; \ 873238384Sjkim} ;; \ 874238384Sjkim{ .mii ; \ 875238384Sjkim add a = Z, b ; \ 876238384Sjkim(pMore) GETRW(DTmp, P, offset) ; \ 877238384Sjkim(pMore) mov P = W ; \ 878238384Sjkim} \ 879238384Sjkim{ .mib ; \ 880238384Sjkim cmp.ne pMore, p0 = 0, LTrip ; \ 881238384Sjkim add LTrip = -1, LTrip ; \ 882238384Sjkim br.ctop.sptk.many .md5_FF_round##offset ; \ 883238384Sjkim} ;; 884238384Sjkim 885238384Sjkim#define MD5FBLOCK(offset) \ 886238384Sjkim .type md5_digest_block##offset, @function ; \ 887238384Sjkim \ 888238384Sjkim .align 32 ; \ 889238384Sjkim .proc md5_digest_block##offset ; \ 890238384Sjkim .prologue ; \ 891238384Sjkim .altrp QUICK_RTN ; \ 892238384Sjkim .body ; \ 893238384Sjkimmd5_digest_block##offset: \ 894238384Sjkim{ .mmi ; \ 895238384Sjkim alloc PFS = ar.pfs, _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE ; \ 896238384Sjkim mov LTrip = 2 ; \ 897238384Sjkim mov ar.lc = 3 ; \ 898238384Sjkim} ;; \ 899238384Sjkim{ .mii ; \ 900238384Sjkim cmp.eq pMore, p0 = r0, r0 ; \ 901238384Sjkim mov ar.ec = 0 ; \ 902238384Sjkim nop 0x0 ; \ 903238384Sjkim} ;; \ 904238384Sjkim \ 905238384Sjkim .pred.rel "mutex", pLoad, pSkip ; \ 906238384Sjkim.md5_FF_round##offset: \ 907238384Sjkim FFLOADU(A, B, C, D, M12, M13, RotateM0, 7, offset) \ 908238384Sjkim FFLOADU(D, A, B, C, M13, M14, RotateM1, 12, offset) \ 909238384Sjkim FFLOADU(C, D, A, B, M14, M15, RotateM2, 17, offset) \ 910238384Sjkim FFLOOPU(B, C, D, A, M15, RotateM0, RotateM3, 22, offset) \ 911238384Sjkim \ 912238384Sjkim{ .mib ; \ 913238384Sjkim nop 0x0 ; \ 914238384Sjkim nop 0x0 ; \ 915238384Sjkim br.cond.sptk.many md5_digest_GHI ; \ 916238384Sjkim} ;; \ 917238384Sjkim .endp md5_digest_block##offset 918238384Sjkim 919238384SjkimMD5FBLOCK(1) 920238384SjkimMD5FBLOCK(2) 921238384SjkimMD5FBLOCK(3) 922238384Sjkim 923238384Sjkim .align 64 924238384Sjkim .type md5_constants, @object 925238384Sjkimmd5_constants: 926238384Sjkim.md5_tbl_data_order: // To ensure little-endian data 927238384Sjkim // order, code as bytes. 928238384Sjkim data1 0x78, 0xa4, 0x6a, 0xd7 // 0 929238384Sjkim data1 0x56, 0xb7, 0xc7, 0xe8 // 1 930238384Sjkim data1 0xdb, 0x70, 0x20, 0x24 // 2 931238384Sjkim data1 0xee, 0xce, 0xbd, 0xc1 // 3 932238384Sjkim data1 0xaf, 0x0f, 0x7c, 0xf5 // 4 933238384Sjkim data1 0x2a, 0xc6, 0x87, 0x47 // 5 934238384Sjkim data1 0x13, 0x46, 0x30, 0xa8 // 6 935238384Sjkim data1 0x01, 0x95, 0x46, 0xfd // 7 936238384Sjkim data1 0xd8, 0x98, 0x80, 0x69 // 8 937238384Sjkim data1 0xaf, 0xf7, 0x44, 0x8b // 9 938238384Sjkim data1 0xb1, 0x5b, 0xff, 0xff // 10 939238384Sjkim data1 0xbe, 0xd7, 0x5c, 0x89 // 11 940238384Sjkim data1 0x22, 0x11, 0x90, 0x6b // 12 941238384Sjkim data1 0x93, 0x71, 0x98, 0xfd // 13 942238384Sjkim data1 0x8e, 0x43, 0x79, 0xa6 // 14 943238384Sjkim data1 0x21, 0x08, 0xb4, 0x49 // 15 944238384Sjkim data1 0x62, 0x25, 0x1e, 0xf6 // 16 945238384Sjkim data1 0x40, 0xb3, 0x40, 0xc0 // 17 946238384Sjkim data1 0x51, 0x5a, 0x5e, 0x26 // 18 947238384Sjkim data1 0xaa, 0xc7, 0xb6, 0xe9 // 19 948238384Sjkim data1 0x5d, 0x10, 0x2f, 0xd6 // 20 949238384Sjkim data1 0x53, 0x14, 0x44, 0x02 // 21 950238384Sjkim data1 0x81, 0xe6, 0xa1, 0xd8 // 22 951238384Sjkim data1 0xc8, 0xfb, 0xd3, 0xe7 // 23 952238384Sjkim data1 0xe6, 0xcd, 0xe1, 0x21 // 24 953238384Sjkim data1 0xd6, 0x07, 0x37, 0xc3 // 25 954238384Sjkim data1 0x87, 0x0d, 0xd5, 0xf4 // 26 955238384Sjkim data1 0xed, 0x14, 0x5a, 0x45 // 27 956238384Sjkim data1 0x05, 0xe9, 0xe3, 0xa9 // 28 957238384Sjkim data1 0xf8, 0xa3, 0xef, 0xfc // 29 958238384Sjkim data1 0xd9, 0x02, 0x6f, 0x67 // 30 959238384Sjkim data1 0x8a, 0x4c, 0x2a, 0x8d // 31 960238384Sjkim data1 0x42, 0x39, 0xfa, 0xff // 32 961238384Sjkim data1 0x81, 0xf6, 0x71, 0x87 // 33 962238384Sjkim data1 0x22, 0x61, 0x9d, 0x6d // 34 963238384Sjkim data1 0x0c, 0x38, 0xe5, 0xfd // 35 964238384Sjkim data1 0x44, 0xea, 0xbe, 0xa4 // 36 965238384Sjkim data1 0xa9, 0xcf, 0xde, 0x4b // 37 966238384Sjkim data1 0x60, 0x4b, 0xbb, 0xf6 // 38 967238384Sjkim data1 0x70, 0xbc, 0xbf, 0xbe // 39 968238384Sjkim data1 0xc6, 0x7e, 0x9b, 0x28 // 40 969238384Sjkim data1 0xfa, 0x27, 0xa1, 0xea // 41 970238384Sjkim data1 0x85, 0x30, 0xef, 0xd4 // 42 971238384Sjkim data1 0x05, 0x1d, 0x88, 0x04 // 43 972238384Sjkim data1 0x39, 0xd0, 0xd4, 0xd9 // 44 973238384Sjkim data1 0xe5, 0x99, 0xdb, 0xe6 // 45 974238384Sjkim data1 0xf8, 0x7c, 0xa2, 0x1f // 46 975238384Sjkim data1 0x65, 0x56, 0xac, 0xc4 // 47 976238384Sjkim data1 0x44, 0x22, 0x29, 0xf4 // 48 977238384Sjkim data1 0x97, 0xff, 0x2a, 0x43 // 49 978238384Sjkim data1 0xa7, 0x23, 0x94, 0xab // 50 979238384Sjkim data1 0x39, 0xa0, 0x93, 0xfc // 51 980238384Sjkim data1 0xc3, 0x59, 0x5b, 0x65 // 52 981238384Sjkim data1 0x92, 0xcc, 0x0c, 0x8f // 53 982238384Sjkim data1 0x7d, 0xf4, 0xef, 0xff // 54 983238384Sjkim data1 0xd1, 0x5d, 0x84, 0x85 // 55 984238384Sjkim data1 0x4f, 0x7e, 0xa8, 0x6f // 56 985238384Sjkim data1 0xe0, 0xe6, 0x2c, 0xfe // 57 986238384Sjkim data1 0x14, 0x43, 0x01, 0xa3 // 58 987238384Sjkim data1 0xa1, 0x11, 0x08, 0x4e // 59 988238384Sjkim data1 0x82, 0x7e, 0x53, 0xf7 // 60 989238384Sjkim data1 0x35, 0xf2, 0x3a, 0xbd // 61 990238384Sjkim data1 0xbb, 0xd2, 0xd7, 0x2a // 62 991238384Sjkim data1 0x91, 0xd3, 0x86, 0xeb // 63 992238384Sjkim.size md5_constants#,64*4 993