1/**
2 * Computes MD5 hashes of arbitrary data. MD5 hashes are 16 byte quantities that are like a
3 * checksum or CRC, but are more robust.
4 *
5$(SCRIPT inhibitQuickIndex = 1;)
6
7$(DIVC quickindex,
8$(BOOKTABLE ,
9$(TR $(TH Category) $(TH Functions)
10)
11$(TR $(TDNW Template API) $(TD $(MYREF MD5)
12)
13)
14$(TR $(TDNW OOP API) $(TD $(MYREF MD5Digest))
15)
16$(TR $(TDNW Helpers) $(TD $(MYREF md5Of))
17)
18)
19)
20
21 * This module conforms to the APIs defined in `std.digest`. To understand the
22 * differences between the template and the OOP API, see $(MREF std, digest).
23 *
24 * This module publicly imports $(MREF std, digest) and can be used as a stand-alone
25 * module.
26 *
27 * License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
28 *
29 * CTFE:
30 * Digests do not work in CTFE
31 *
32 * Authors:
33 * Piotr Szturmaj, Kai Nacke, Johannes Pfau $(BR)
34 * The routines and algorithms are derived from the $(I RSA Data Security, Inc. MD5 Message-Digest Algorithm).
35 *
36 * References:
37 *      $(LINK2 http://en.wikipedia.org/wiki/Md5, Wikipedia on MD5)
38 *
39 * Source: $(PHOBOSSRC std/digest/md.d)
40 *
41 */
42
43/* md5.d - RSA Data Security, Inc., MD5 message-digest algorithm
44 * Derived from the RSA Data Security, Inc. MD5 Message-Digest Algorithm.
45 */
46module std.digest.md;
47
48public import std.digest;
49
50///
51@safe unittest
52{
53    //Template API
54    import std.digest.md;
55
56    //Feeding data
57    ubyte[1024] data;
58    MD5 md5;
59    md5.start();
60    md5.put(data[]);
61    md5.start(); //Start again
62    md5.put(data[]);
63    auto hash = md5.finish();
64}
65
66///
67@safe unittest
68{
69    //OOP API
70    import std.digest.md;
71
72    auto md5 = new MD5Digest();
73    ubyte[] hash = md5.digest("abc");
74    assert(toHexString(hash) == "900150983CD24FB0D6963F7D28E17F72");
75
76    //Feeding data
77    ubyte[1024] data;
78    md5.put(data[]);
79    md5.reset(); //Start again
80    md5.put(data[]);
81    hash = md5.finish();
82}
83
84/**
85 * Template API MD5 implementation.
86 * See `std.digest` for differences between template and OOP API.
87 */
88struct MD5
89{
90    import core.bitop : rol;
91    private:
92        // magic initialization constants
93        uint[4] _state = [0x67452301,0xefcdab89,0x98badcfe,0x10325476]; // state (ABCD)
94        ulong _count; //number of bits, modulo 2^64
95        ubyte[64] _buffer; // input buffer
96
97        static immutable ubyte[64] _padding =
98        [
99          0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
101          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
102        ];
103
104        // F, G, H and I are basic MD5 functions
105        static @safe pure nothrow @nogc
106        {
107            uint F(uint x, uint y, uint z) { return (x & y) | (~x & z); }
108            uint G(uint x, uint y, uint z) { return (x & z) | (y & ~z); }
109            uint H(uint x, uint y, uint z) { return x ^ y ^ z; }
110            uint I(uint x, uint y, uint z) { return y ^ (x | ~z); }
111        }
112
113
114        /*
115         * FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
116         * Rotation is separate from addition to prevent recomputation.
117         */
118        static void FF(ref uint a, uint b, uint c, uint d, uint x, uint s, uint ac)
119            @safe pure nothrow @nogc
120        {
121            a += F (b, c, d) + x + ac;
122            a = rol(a, s);
123            a += b;
124        }
125
126        static void GG(ref uint a, uint b, uint c, uint d, uint x, uint s, uint ac)
127            @safe pure nothrow @nogc
128        {
129            a += G (b, c, d) + x + ac;
130            a = rol(a, s);
131            a += b;
132        }
133
134        static void HH(ref uint a, uint b, uint c, uint d, uint x, uint s, uint ac)
135            @safe pure nothrow @nogc
136        {
137            a += H (b, c, d) + x + ac;
138            a = rol(a, s);
139            a += b;
140        }
141
142        static void II(ref uint a, uint b, uint c, uint d, uint x, uint s, uint ac)
143            @safe pure nothrow @nogc
144        {
145            a += I (b, c, d) + x + ac;
146            a = rol(a, s);
147            a += b;
148        }
149
150        /*
151         * MD5 basic transformation. Transforms state based on block.
152         */
153
154        //Constants for MD5Transform routine.
155        enum
156        {
157            S11 = 7,
158            S12 = 12,
159            S13 = 17,
160            S14 = 22,
161            S21 = 5,
162            S22 = 9,
163            S23 = 14,
164            S24 = 20,
165            S31 = 4,
166            S32 = 11,
167            S33 = 16,
168            S34 = 23,
169            S41 = 6,
170            S42 = 10,
171            S43 = 15,
172            S44 = 21,
173        }
174
175        private void transform(const(ubyte[64])* block) pure nothrow @nogc
176        {
177            uint a = _state[0],
178                 b = _state[1],
179                 c = _state[2],
180                 d = _state[3];
181
182            uint[16] x = void;
183
184            version (BigEndian)
185            {
186                import std.bitmanip : littleEndianToNative;
187
188                for (size_t i = 0; i < 16; i++)
189                {
190                    x[i] = littleEndianToNative!uint(*cast(ubyte[4]*)&(*block)[i*4]);
191                }
192            }
193            else
194            {
195                (cast(ubyte*) x.ptr)[0 .. 64] = (cast(ubyte*) block)[0 .. 64];
196            }
197
198            //Round 1
199            FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
200            FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
201            FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
202            FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
203            FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
204            FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
205            FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
206            FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
207            FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
208            FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
209            FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
210            FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
211            FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
212            FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
213            FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
214            FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
215
216            //Round 2
217            GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
218            GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
219            GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
220            GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
221            GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
222            GG (d, a, b, c, x[10], S22,  0x2441453); /* 22 */
223            GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
224            GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
225            GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
226            GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
227            GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
228            GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
229            GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
230            GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
231            GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
232            GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
233
234            //Round 3
235            HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
236            HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
237            HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
238            HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
239            HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
240            HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
241            HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
242            HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
243            HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
244            HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
245            HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
246            HH (b, c, d, a, x[ 6], S34,  0x4881d05); /* 44 */
247            HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
248            HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
249            HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
250            HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
251
252            //Round 4
253            II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
254            II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
255            II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
256            II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
257            II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
258            II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
259            II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
260            II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
261            II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
262            II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
263            II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
264            II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
265            II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
266            II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
267            II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
268            II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
269
270            _state[0] += a;
271            _state[1] += b;
272            _state[2] += c;
273            _state[3] += d;
274
275            //Zeroize sensitive information.
276            x[] = 0;
277        }
278
279    public:
280        enum blockSize = 512;
281
282        /**
283         * Use this to feed the digest with data.
284         * Also implements the $(REF isOutputRange, std,range,primitives)
285         * interface for `ubyte` and `const(ubyte)[]`.
286         *
287         * Example:
288         * ----
289         * MD5 dig;
290         * dig.put(cast(ubyte) 0); //single ubyte
291         * dig.put(cast(ubyte) 0, cast(ubyte) 0); //variadic
292         * ubyte[10] buf;
293         * dig.put(buf); //buffer
294         * ----
295         */
296        void put(scope const(ubyte)[] data...) @trusted pure nothrow @nogc
297        {
298            uint i, index, partLen;
299            auto inputLen = data.length;
300
301            //Compute number of bytes mod 64
302            index = (cast(uint)_count >> 3) & (64 - 1);
303
304            //Update number of bits
305            _count += inputLen * 8;
306
307            partLen = 64 - index;
308
309            //Transform as many times as possible
310            if (inputLen >= partLen)
311            {
312                (&_buffer[index])[0 .. partLen] = data.ptr[0 .. partLen];
313                transform(&_buffer);
314
315                for (i = partLen; i + 63 < inputLen; i += 64)
316                {
317                    transform(cast(const(ubyte[64])*)(data[i .. i + 64].ptr));
318                }
319
320                index = 0;
321            }
322            else
323            {
324                i = 0;
325            }
326
327            /* Buffer remaining input */
328            if (inputLen - i)
329                (&_buffer[index])[0 .. inputLen-i] = (&data[i])[0 .. inputLen-i];
330        }
331
332        /**
333         * Used to (re)initialize the MD5 digest.
334         *
335         * Note:
336         * For this MD5 Digest implementation calling start after default construction
337         * is not necessary. Calling start is only necessary to reset the Digest.
338         *
339         * Generic code which deals with different Digest types should always call start though.
340         *
341         * Example:
342         * --------
343         * MD5 digest;
344         * //digest.start(); //Not necessary
345         * digest.put(0);
346         * --------
347         */
348        void start() @safe pure nothrow @nogc
349        {
350            this = MD5.init;
351        }
352
353        /**
354         * Returns the finished MD5 hash. This also calls $(LREF start) to
355         * reset the internal state.
356          */
357        ubyte[16] finish() @trusted pure nothrow @nogc
358        {
359            import std.bitmanip : nativeToLittleEndian;
360
361            ubyte[16] data = void;
362            ubyte[8] bits = void;
363            uint index, padLen;
364
365            //Save number of bits
366            bits[0 .. 8] = nativeToLittleEndian(_count)[];
367
368            //Pad out to 56 mod 64
369            index = (cast(uint)_count >> 3) & (64 - 1);
370            padLen = (index < 56) ? (56 - index) : (120 - index);
371            put(_padding[0 .. padLen]);
372
373            //Append length (before padding)
374            put(bits);
375
376            //Store state in digest
377            data[0 .. 4]   = nativeToLittleEndian(_state[0])[];
378            data[4 .. 8]   = nativeToLittleEndian(_state[1])[];
379            data[8 .. 12]  = nativeToLittleEndian(_state[2])[];
380            data[12 .. 16] = nativeToLittleEndian(_state[3])[];
381
382            /* Zeroize sensitive information. */
383            start();
384            return data;
385        }
386        ///
387        @safe unittest
388        {
389            //Simple example
390            MD5 hash;
391            hash.start();
392            hash.put(cast(ubyte) 0);
393            ubyte[16] result = hash.finish();
394        }
395}
396
397///
398@safe unittest
399{
400    //Simple example, hashing a string using md5Of helper function
401    ubyte[16] hash = md5Of("abc");
402    //Let's get a hash string
403    assert(toHexString(hash) == "900150983CD24FB0D6963F7D28E17F72");
404}
405
406///
407@safe unittest
408{
409    //Using the basic API
410    MD5 hash;
411    hash.start();
412    ubyte[1024] data;
413    //Initialize data here...
414    hash.put(data);
415    ubyte[16] result = hash.finish();
416}
417
418///
419@safe unittest
420{
421    //Let's use the template features:
422    void doSomething(T)(ref T hash)
423    if (isDigest!T)
424    {
425        hash.put(cast(ubyte) 0);
426    }
427    MD5 md5;
428    md5.start();
429    doSomething(md5);
430    assert(toHexString(md5.finish()) == "93B885ADFE0DA089CDF634904FD59F71");
431}
432
433@safe unittest
434{
435    assert(isDigest!MD5);
436}
437
438@system unittest
439{
440    import std.range;
441    import std.conv : hexString;
442
443    ubyte[16] digest;
444
445    MD5 md5;
446    md5.put(cast(ubyte[])"abcdef");
447    md5.start();
448    md5.put(cast(ubyte[])"");
449    assert(md5.finish() == cast(ubyte[]) hexString!"d41d8cd98f00b204e9800998ecf8427e");
450
451    digest = md5Of("");
452    assert(digest == cast(ubyte[]) hexString!"d41d8cd98f00b204e9800998ecf8427e");
453
454    digest = md5Of("a");
455    assert(digest == cast(ubyte[]) hexString!"0cc175b9c0f1b6a831c399e269772661");
456
457    digest = md5Of("abc");
458    assert(digest == cast(ubyte[]) hexString!"900150983cd24fb0d6963f7d28e17f72");
459
460    digest = md5Of("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq");
461    assert(digest == cast(ubyte[]) hexString!"8215ef0796a20bcaaae116d3876c664a");
462
463    digest = md5Of("message digest");
464    assert(digest == cast(ubyte[]) hexString!"f96b697d7cb7938d525a2f31aaf161d0");
465
466    digest = md5Of("abcdefghijklmnopqrstuvwxyz");
467    assert(digest == cast(ubyte[]) hexString!"c3fcd3d76192e4007dfb496cca67e13b");
468
469    digest = md5Of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789");
470    assert(digest == cast(ubyte[]) hexString!"d174ab98d277d9f5a5611c2c9f419d9f");
471
472    digest = md5Of("1234567890123456789012345678901234567890"~
473                    "1234567890123456789012345678901234567890");
474    assert(digest == cast(ubyte[]) hexString!"57edf4a22be3c955ac49da2e2107b67a");
475
476    enum ubyte[16] input = cast(ubyte[16]) hexString!"c3fcd3d76192e4007dfb496cca67e13b";
477    assert(toHexString(input)
478        == "C3FCD3D76192E4007DFB496CCA67E13B");
479
480    ubyte[] onemilliona = new ubyte[1000000];
481    onemilliona[] = 'a';
482    digest = md5Of(onemilliona);
483    assert(digest == cast(ubyte[]) hexString!"7707D6AE4E027C70EEA2A935C2296F21");
484
485    auto oneMillionRange = repeat!ubyte(cast(ubyte)'a', 1000000);
486    digest = md5Of(oneMillionRange);
487    assert(digest == cast(ubyte[]) hexString!"7707D6AE4E027C70EEA2A935C2296F21");
488}
489
490/**
491 * This is a convenience alias for $(REF digest, std,digest) using the
492 * MD5 implementation.
493 */
494//simple alias doesn't work here, hope this gets inlined...
495auto md5Of(T...)(T data)
496{
497    return digest!(MD5, T)(data);
498}
499
500///
501@safe unittest
502{
503    ubyte[16] hash = md5Of("abc");
504    assert(hash == digest!MD5("abc"));
505}
506
507/**
508 * OOP API MD5 implementation.
509 * See `std.digest` for differences between template and OOP API.
510 *
511 * This is an alias for $(D $(REF WrapperDigest, std,digest)!MD5), see
512 * there for more information.
513 */
514alias MD5Digest = WrapperDigest!MD5;
515
516///
517@safe unittest
518{
519    //Simple example, hashing a string using Digest.digest helper function
520    auto md5 = new MD5Digest();
521    ubyte[] hash = md5.digest("abc");
522    //Let's get a hash string
523    assert(toHexString(hash) == "900150983CD24FB0D6963F7D28E17F72");
524}
525
526///
527@system unittest
528{
529     //Let's use the OOP features:
530    void test(Digest dig)
531    {
532      dig.put(cast(ubyte) 0);
533    }
534    auto md5 = new MD5Digest();
535    test(md5);
536
537    //Let's use a custom buffer:
538    ubyte[16] buf;
539    ubyte[] result = md5.finish(buf[]);
540    assert(toHexString(result) == "93B885ADFE0DA089CDF634904FD59F71");
541}
542
543@system unittest
544{
545    import std.conv : hexString;
546    auto md5 = new MD5Digest();
547
548    md5.put(cast(ubyte[])"abcdef");
549    md5.reset();
550    md5.put(cast(ubyte[])"");
551    assert(md5.finish() == cast(ubyte[]) hexString!"d41d8cd98f00b204e9800998ecf8427e");
552
553    md5.put(cast(ubyte[])"abcdefghijklmnopqrstuvwxyz");
554    ubyte[20] result;
555    auto result2 = md5.finish(result[]);
556    assert(result[0 .. 16] == result2 && result2 == cast(ubyte[]) hexString!"c3fcd3d76192e4007dfb496cca67e13b");
557
558    debug
559    {
560        import std.exception;
561        assertThrown!Error(md5.finish(result[0 .. 15]));
562    }
563
564    assert(md5.length == 16);
565
566    assert(md5.digest("") == cast(ubyte[]) hexString!"d41d8cd98f00b204e9800998ecf8427e");
567
568    assert(md5.digest("a") == cast(ubyte[]) hexString!"0cc175b9c0f1b6a831c399e269772661");
569
570    assert(md5.digest("abc") == cast(ubyte[]) hexString!"900150983cd24fb0d6963f7d28e17f72");
571
572    assert(md5.digest("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq")
573           == cast(ubyte[]) hexString!"8215ef0796a20bcaaae116d3876c664a");
574
575    assert(md5.digest("message digest") == cast(ubyte[]) hexString!"f96b697d7cb7938d525a2f31aaf161d0");
576
577    assert(md5.digest("abcdefghijklmnopqrstuvwxyz")
578           == cast(ubyte[]) hexString!"c3fcd3d76192e4007dfb496cca67e13b");
579
580    assert(md5.digest("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789")
581           == cast(ubyte[]) hexString!"d174ab98d277d9f5a5611c2c9f419d9f");
582
583    assert(md5.digest("1234567890123456789012345678901234567890",
584                                   "1234567890123456789012345678901234567890")
585           == cast(ubyte[]) hexString!"57edf4a22be3c955ac49da2e2107b67a");
586}
587