1/** Fundamental operations for arbitrary-precision arithmetic
2 *
3 * These functions are for internal use only.
4 */
5/*          Copyright Don Clugston 2008 - 2010.
6 * Distributed under the Boost Software License, Version 1.0.
7 *    (See accompanying file LICENSE_1_0.txt or copy at
8 *          http://www.boost.org/LICENSE_1_0.txt)
9 */
10/* References:
11   "Modern Computer Arithmetic" (MCA) is the primary reference for all
12    algorithms used in this library.
13  - R.P. Brent and P. Zimmermann, "Modern Computer Arithmetic",
14    Version 0.5.9, (Oct 2010).
15  - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022,
16    Max-Planck Institute fuer Informatik, (Oct 1998).
17  - G. Hanrot, M. Quercia, and P. Zimmermann, "The Middle Product Algorithm, I.",
18    INRIA 4664, (Dec 2002).
19  - M. Bodrato and A. Zanoni, "What about Toom-Cook Matrices Optimality?",
20    http://bodrato.it/papers (2006).
21  - A. Fog, "Optimizing subroutines in assembly language",
22    www.agner.org/optimize (2008).
23  - A. Fog, "The microarchitecture of Intel and AMD CPU's",
24    www.agner.org/optimize (2008).
25  - A. Fog, "Instruction tables: Lists of instruction latencies, throughputs
26    and micro-operation breakdowns for Intel and AMD CPU's.", www.agner.org/optimize (2008).
27
28Idioms:
29  Many functions in this module use
30  'func(Tulong)(Tulong x) if (is(Tulong == ulong))' rather than 'func(ulong x)'
31  in order to disable implicit conversion.
32
33*/
34module std.internal.math.biguintcore;
35
36version (D_InlineAsm_X86)
37{
38    static import std.internal.math.biguintx86;
39}
40static import std.internal.math.biguintnoasm;
41
42import std.internal.math.biguintnoasm : BigDigit, KARATSUBALIMIT,
43    KARATSUBASQUARELIMIT;
44
45alias multibyteAdd = multibyteAddSub!('+');
46alias multibyteSub = multibyteAddSub!('-');
47
48private import std.traits;
49private import std.range.primitives;
50public import std.ascii : LetterCase;
51import std.range.primitives;
52import std.traits;
53
54private:
55
56// dipatchers to the right low-level primitives. Added to allow BigInt CTFE for
57// 32 bit systems (https://issues.dlang.org/show_bug.cgi?id=14767) although it's
58// used by the other architectures too.
59// See comments below in case it has to be refactored.
60version (X86)
61uint multibyteAddSub(char op)(uint[] dest, const(uint)[] src1, const (uint)[] src2, uint carry)
62{
63    // must be checked before, otherwise D_InlineAsm_X86 is true.
64    if (__ctfe)
65        return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry);
66    // Runtime.
67    else version (D_InlineAsm_X86)
68        return std.internal.math.biguintx86.multibyteAddSub!op(dest, src1, src2, carry);
69    // Runtime if no asm available.
70    else
71        return std.internal.math.biguintnoasm.multibyteAddSub!op(dest, src1, src2, carry);
72}
73// Any other architecture
74else alias multibyteAddSub = std.internal.math.biguintnoasm.multibyteAddSub;
75
76version (X86)
77uint multibyteIncrementAssign(char op)(uint[] dest, uint carry)
78{
79    if (__ctfe)
80        return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry);
81    else version (D_InlineAsm_X86)
82        return std.internal.math.biguintx86.multibyteIncrementAssign!op(dest, carry);
83    else
84        return std.internal.math.biguintnoasm.multibyteIncrementAssign!op(dest, carry);
85}
86else alias multibyteIncrementAssign = std.internal.math.biguintnoasm.multibyteIncrementAssign;
87
88version (X86)
89uint multibyteShl()(uint[] dest, const(uint)[] src, uint numbits)
90{
91    if (__ctfe)
92        return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits);
93    else version (D_InlineAsm_X86)
94        return std.internal.math.biguintx86.multibyteShl(dest, src, numbits);
95    else
96        return std.internal.math.biguintnoasm.multibyteShl(dest, src, numbits);
97}
98else alias multibyteShl = std.internal.math.biguintnoasm.multibyteShl;
99
100version (X86)
101void multibyteShr()(uint[] dest, const(uint)[] src, uint numbits)
102{
103    if (__ctfe)
104        std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits);
105    else version (D_InlineAsm_X86)
106        std.internal.math.biguintx86.multibyteShr(dest, src, numbits);
107    else
108        std.internal.math.biguintnoasm.multibyteShr(dest, src, numbits);
109}
110else alias multibyteShr = std.internal.math.biguintnoasm.multibyteShr;
111
112version (X86)
113uint multibyteMul()(uint[] dest, const(uint)[] src, uint multiplier, uint carry)
114{
115    if (__ctfe)
116        return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry);
117    else version (D_InlineAsm_X86)
118        return std.internal.math.biguintx86.multibyteMul(dest, src, multiplier, carry);
119    else
120        return std.internal.math.biguintnoasm.multibyteMul(dest, src, multiplier, carry);
121}
122else alias multibyteMul = std.internal.math.biguintnoasm.multibyteMul;
123
124version (X86)
125uint multibyteMulAdd(char op)(uint[] dest, const(uint)[] src, uint multiplier, uint carry)
126{
127    if (__ctfe)
128        return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry);
129    else version (D_InlineAsm_X86)
130        return std.internal.math.biguintx86.multibyteMulAdd!op(dest, src, multiplier, carry);
131    else
132        return std.internal.math.biguintnoasm.multibyteMulAdd!op(dest, src, multiplier, carry);
133}
134else alias multibyteMulAdd = std.internal.math.biguintnoasm.multibyteMulAdd;
135
136version (X86)
137void multibyteMultiplyAccumulate()(uint[] dest, const(uint)[] left, const(uint)[] right)
138{
139    if (__ctfe)
140        std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right);
141    else version (D_InlineAsm_X86)
142        std.internal.math.biguintx86.multibyteMultiplyAccumulate(dest, left, right);
143    else
144        std.internal.math.biguintnoasm.multibyteMultiplyAccumulate(dest, left, right);
145}
146else alias multibyteMultiplyAccumulate = std.internal.math.biguintnoasm.multibyteMultiplyAccumulate;
147
148version (X86)
149uint multibyteDivAssign()(uint[] dest, uint divisor, uint overflow)
150{
151    if (__ctfe)
152        return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow);
153    else version (D_InlineAsm_X86)
154        return std.internal.math.biguintx86.multibyteDivAssign(dest, divisor, overflow);
155    else
156        return std.internal.math.biguintnoasm.multibyteDivAssign(dest, divisor, overflow);
157}
158else alias multibyteDivAssign = std.internal.math.biguintnoasm.multibyteDivAssign;
159
160version (X86)
161void multibyteAddDiagonalSquares()(uint[] dest, const(uint)[] src)
162{
163    if (__ctfe)
164        std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src);
165    else version (D_InlineAsm_X86)
166        std.internal.math.biguintx86.multibyteAddDiagonalSquares(dest, src);
167    else
168        std.internal.math.biguintnoasm.multibyteAddDiagonalSquares(dest, src);
169}
170else alias multibyteAddDiagonalSquares = std.internal.math.biguintnoasm.multibyteAddDiagonalSquares;
171
172version (X86)
173void multibyteTriangleAccumulate()(uint[] dest, const(uint)[] x)
174{
175    if (__ctfe)
176        std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x);
177    else version (D_InlineAsm_X86)
178        std.internal.math.biguintx86.multibyteTriangleAccumulate(dest, x);
179    else
180        std.internal.math.biguintnoasm.multibyteTriangleAccumulate(dest, x);
181}
182else alias multibyteTriangleAccumulate = std.internal.math.biguintnoasm.multibyteTriangleAccumulate;
183
184version (X86)
185void multibyteSquare()(BigDigit[] result, const(BigDigit)[] x)
186{
187    if (__ctfe)
188        std.internal.math.biguintnoasm.multibyteSquare(result, x);
189    else version (D_InlineAsm_X86)
190        std.internal.math.biguintx86.multibyteSquare(result, x);
191    else
192        std.internal.math.biguintnoasm.multibyteSquare(result, x);
193}
194else alias multibyteSquare = std.internal.math.biguintnoasm.multibyteSquare;
195
196// Limits for when to switch between algorithms.
197// Half the size of the data cache.
198@nogc nothrow pure @safe size_t getCacheLimit()
199{
200    import core.cpuid : dataCaches;
201    return dataCaches[0].size * 1024 / 2;
202}
203enum size_t FASTDIVLIMIT = 100; // crossover to recursive division
204
205
206// These constants are used by shift operations
207static if (BigDigit.sizeof == int.sizeof)
208{
209    enum { LG2BIGDIGITBITS = 5, BIGDIGITSHIFTMASK = 31 }
210    alias BIGHALFDIGIT = ushort;
211}
212else static if (BigDigit.sizeof == long.sizeof)
213{
214    alias BIGHALFDIGIT = uint;
215    enum { LG2BIGDIGITBITS = 6, BIGDIGITSHIFTMASK = 63 }
216}
217else static assert(0, "Unsupported BigDigit size");
218
219import std.exception : assumeUnique;
220import std.traits : isIntegral;
221enum BigDigitBits = BigDigit.sizeof*8;
222template maxBigDigits(T)
223if (isIntegral!T)
224{
225    enum maxBigDigits = (T.sizeof+BigDigit.sizeof-1)/BigDigit.sizeof;
226}
227
228static immutable BigDigit[] ZERO = [0];
229static immutable BigDigit[] ONE = [1];
230static immutable BigDigit[] TWO = [2];
231static immutable BigDigit[] TEN = [10];
232
233
234public:
235
236/// BigUint performs memory management and wraps the low-level calls.
237struct BigUint
238{
239private:
240    pure invariant()
241    {
242        assert( data.length >= 1 && (data.length == 1 || data[$-1] != 0 ),
243                "Invariant requires data to not empty or zero");
244    }
245
246    immutable(BigDigit) [] data = ZERO;
247
248    this(return scope immutable(BigDigit) [] x) pure nothrow @nogc @safe
249    {
250       data = x;
251    }
252  package(std)  // used from: std.bigint
253    this(T)(T x) pure nothrow @safe scope if (isIntegral!T)
254    {
255        opAssign(x);
256    }
257
258    enum trustedAssumeUnique = function(BigDigit[] input) pure @trusted @nogc {
259        return assumeUnique(input);
260    };
261public:
262    // Length in uints
263    @property size_t uintLength() pure nothrow const @safe @nogc scope
264    {
265        static if (BigDigit.sizeof == uint.sizeof)
266        {
267            return data.length;
268        }
269        else static if (BigDigit.sizeof == ulong.sizeof)
270        {
271            return data.length * 2 -
272            ((data[$-1] & 0xFFFF_FFFF_0000_0000L) ? 1 : 0);
273        }
274    }
275    @property size_t ulongLength() pure nothrow const @safe @nogc scope
276    {
277        static if (BigDigit.sizeof == uint.sizeof)
278        {
279            return (data.length + 1) >> 1;
280        }
281        else static if (BigDigit.sizeof == ulong.sizeof)
282        {
283            return data.length;
284        }
285    }
286
287    // The value at (cast(ulong[]) data)[n]
288    ulong peekUlong(size_t n) pure nothrow const @safe @nogc scope
289    {
290        static if (BigDigit.sizeof == int.sizeof)
291        {
292            if (data.length == n*2 + 1) return data[n*2];
293            return data[n*2] + ((cast(ulong) data[n*2 + 1]) << 32 );
294        }
295        else static if (BigDigit.sizeof == long.sizeof)
296        {
297            return data[n];
298        }
299    }
300
301    uint peekUint(size_t n) pure nothrow const @safe @nogc scope
302    {
303        static if (BigDigit.sizeof == int.sizeof)
304        {
305            return data[n];
306        }
307        else
308        {
309            immutable x = data[n >> 1];
310            return (n & 1) ? cast(uint)(x >> 32) : cast(uint) x;
311        }
312    }
313
314    ///
315    void opAssign(Tulong)(Tulong u) pure nothrow @safe scope if (is (Tulong == ulong))
316    {
317        if (u == 0) data = ZERO;
318        else if (u == 1) data = ONE;
319        else if (u == 2) data = TWO;
320        else if (u == 10) data = TEN;
321        else
322        {
323            static if (BigDigit.sizeof == int.sizeof)
324            {
325                uint ulo = cast(uint)(u & 0xFFFF_FFFF);
326                uint uhi = cast(uint)(u >> 32);
327                if (uhi == 0)
328                {
329                    data = [ulo];
330                }
331                else
332                {
333                    data = [ulo, uhi];
334                }
335            }
336            else static if (BigDigit.sizeof == long.sizeof)
337            {
338                data = [u];
339            }
340        }
341    }
342    void opAssign(Tdummy = void)(BigUint y) pure nothrow @nogc @safe scope
343    {
344        this.data = y.data;
345    }
346
347    ///
348    int opCmp(Tdummy = void)(const BigUint y) pure nothrow @nogc const @safe scope
349    {
350        if (data.length != y.data.length)
351            return (data.length > y.data.length) ?  1 : -1;
352        size_t k = highestDifferentDigit(data, y.data);
353        if (data[k] == y.data[k])
354            return 0;
355        return data[k] > y.data[k] ? 1 : -1;
356    }
357
358    ///
359    int opCmp(Tulong)(Tulong y) pure nothrow @nogc const @safe scope if (is (Tulong == ulong))
360    {
361        if (data.length > maxBigDigits!Tulong)
362            return 1;
363
364        foreach_reverse (i; 0 .. maxBigDigits!Tulong)
365        {
366            BigDigit tmp = cast(BigDigit)(y>>(i*BigDigitBits));
367            if (tmp == 0)
368                if (data.length >= i+1)
369                {
370                    // Since ZERO is [0], so we cannot simply return 1 here, as
371                    // data[i] would be 0 for i == 0 in that case.
372                    return (data[i] > 0) ? 1 : 0;
373                }
374                else
375                    continue;
376            else
377                if (i+1 > data.length)
378                    return -1;
379                else if (tmp != data[i])
380                    return data[i] > tmp ? 1 : -1;
381        }
382        return 0;
383    }
384
385    bool opEquals(Tdummy = void)(ref const BigUint y) pure nothrow @nogc const @safe scope
386    {
387           return y.data[] == data[];
388    }
389
390    bool opEquals(Tdummy = void)(ulong y) pure nothrow @nogc const @safe scope
391    {
392        if (data.length > 2)
393            return false;
394        uint ylo = cast(uint)(y & 0xFFFF_FFFF);
395        uint yhi = cast(uint)(y >> 32);
396        if (data.length == 2 && data[1]!=yhi)
397            return false;
398        if (data.length == 1 && yhi != 0)
399            return false;
400        return (data[0] == ylo);
401    }
402
403    bool isZero() pure const nothrow @safe @nogc scope
404    {
405        return data.length == 1 && data[0] == 0;
406    }
407
408    size_t numBytes() pure nothrow const @safe @nogc scope
409    {
410        return data.length * BigDigit.sizeof;
411    }
412
413    // the extra bytes are added to the start of the string
414    char [] toDecimalString(int frontExtraBytes) const pure nothrow @safe scope
415    {
416        immutable predictlength = 20+20*(data.length/2); // just over 19
417        char [] buff = new char[frontExtraBytes + predictlength];
418        ptrdiff_t sofar = biguintToDecimal(buff, data.dup);
419        return buff[sofar-frontExtraBytes..$];
420    }
421
422    /** Convert to a hex string, printing a minimum number of digits 'minPadding',
423     *  allocating an additional 'frontExtraBytes' at the start of the string.
424     *  Padding is done with padChar, which may be '0' or ' '.
425     *  'separator' is a digit separation character. If non-zero, it is inserted
426     *  between every 8 digits.
427     *  Separator characters do not contribute to the minPadding.
428     */
429    char [] toHexString(int frontExtraBytes, char separator = 0,
430            int minPadding=0, char padChar = '0',
431            LetterCase letterCase = LetterCase.upper) const pure nothrow @safe scope
432    {
433        // Calculate number of extra padding bytes
434        size_t extraPad = (minPadding > data.length * 2 * BigDigit.sizeof)
435            ? minPadding - data.length * 2 * BigDigit.sizeof : 0;
436
437        // Length not including separator bytes
438        size_t lenBytes = data.length * 2 * BigDigit.sizeof;
439
440        // Calculate number of separator bytes
441        size_t mainSeparatorBytes = separator ? (lenBytes  / 8) - 1 : 0;
442        immutable totalSeparatorBytes = separator ? ((extraPad + lenBytes + 7) / 8) - 1: 0;
443
444        char [] buff = new char[lenBytes + extraPad + totalSeparatorBytes + frontExtraBytes];
445        biguintToHex(buff[$ - lenBytes - mainSeparatorBytes .. $], data, separator, letterCase);
446        if (extraPad > 0)
447        {
448            if (separator)
449            {
450                size_t start = frontExtraBytes; // first index to pad
451                if (extraPad &7)
452                {
453                    // Do 1 to 7 extra zeros.
454                    buff[frontExtraBytes .. frontExtraBytes + (extraPad & 7)] = padChar;
455                    buff[frontExtraBytes + (extraPad & 7)] = (padChar == ' ' ? ' ' : separator);
456                    start += (extraPad & 7) + 1;
457                }
458                for (int i=0; i< (extraPad >> 3); ++i)
459                {
460                    buff[start .. start + 8] = padChar;
461                    buff[start + 8] = (padChar == ' ' ? ' ' : separator);
462                    start += 9;
463                }
464            }
465            else
466            {
467                buff[frontExtraBytes .. frontExtraBytes + extraPad]=padChar;
468            }
469        }
470        int z = frontExtraBytes;
471        if (lenBytes > minPadding)
472        {
473            // Strip leading zeros.
474            ptrdiff_t maxStrip = lenBytes - minPadding;
475            while (z< buff.length-1 && (buff[z]=='0' || buff[z]==padChar) && maxStrip>0)
476            {
477                ++z;
478                --maxStrip;
479            }
480        }
481        if (padChar!='0')
482        {
483            // Convert leading zeros into padChars.
484            for (size_t k= z; k< buff.length-1 && (buff[k]=='0' || buff[k]==padChar); ++k)
485            {
486                if (buff[k]=='0') buff[k]=padChar;
487            }
488        }
489        return buff[z-frontExtraBytes..$];
490    }
491
492    /**
493     * Convert to an octal string.
494     */
495    char[] toOctalString() pure nothrow @safe const scope
496    {
497        auto predictLength = 1 + data.length*BigDigitBits / 3;
498        char[] buff = new char[predictLength];
499        size_t firstNonZero = biguintToOctal(buff, data);
500        return buff[firstNonZero .. $];
501    }
502
503    // return false if invalid character found
504    bool fromHexString(Range)(Range s) scope if (
505        isBidirectionalRange!Range && isSomeChar!(ElementType!Range))
506    {
507        import std.range : walkLength;
508
509        //Strip leading zeros
510        while (!s.empty && s.front == '0')
511            s.popFront;
512
513        if (s.empty)
514        {
515            data = ZERO;
516            return true;
517        }
518
519        immutable len = (s.save.walkLength + 15) / 4;
520        auto tmp = new BigDigit[len + 1];
521        uint part, sofar, partcount;
522
523        foreach_reverse (character; s)
524        {
525            if (character == '_')
526                continue;
527
528            uint x;
529            if (character >= '0' && character <= '9')
530            {
531                x = character - '0';
532            }
533            else if (character >= 'A' && character <= 'F')
534            {
535                x = character - 'A' + 10;
536            }
537            else if (character >= 'a' && character <= 'f')
538            {
539                x = character - 'a' + 10;
540            }
541            else
542            {
543                return false;
544            }
545
546            part >>= 4;
547            part |= (x << (32 - 4));
548            ++partcount;
549
550            if (partcount == 8)
551            {
552                tmp[sofar] = part;
553                ++sofar;
554                partcount = 0;
555                part = 0;
556            }
557        }
558        if (part)
559        {
560            for ( ; partcount != 8; ++partcount) part >>= 4;
561            tmp[sofar] = part;
562            ++sofar;
563        }
564        if (sofar == 0)
565            data = ZERO;
566        else
567            data = trustedAssumeUnique(tmp[0 .. sofar]);
568
569        return true;
570    }
571
572    // return true if OK; false if erroneous characters found
573    bool fromDecimalString(Range)(Range s) scope if (
574        isForwardRange!Range && isSomeChar!(ElementType!Range))
575    {
576        import std.range : walkLength;
577
578        while (!s.empty && s.front == '0')
579        {
580            s.popFront;
581        }
582
583        if (s.empty)
584        {
585            data = ZERO;
586            return true;
587        }
588
589        auto predict_length = (18 * 2 + 2 * s.save.walkLength) / 19;
590        auto tmp = new BigDigit[predict_length];
591
592        tmp.length = biguintFromDecimal(tmp, s);
593
594        data = trustedAssumeUnique(tmp);
595        return true;
596    }
597
598    void fromMagnitude(Range)(Range magnitude) scope
599        if (isInputRange!Range
600            && (isForwardRange!Range || hasLength!Range)
601            && isUnsigned!(ElementType!Range))
602    {
603        while (!magnitude.empty && magnitude.front == 0)
604            magnitude.popFront;
605        static if (hasLength!Range)
606            immutable inputLen = magnitude.length;
607        else
608            immutable inputLen = magnitude.save.walkLength;
609        if (!inputLen)
610        {
611            this.data = ZERO;
612            return;
613        }
614        // `magnitude` has its most significant element first but BigUint.data
615        // stores the most significant last.
616        BigDigit[] newDigits;
617        alias E = ElementType!Range;
618        static if (E.sizeof == BigDigit.sizeof)
619        {
620            newDigits = new BigDigit[inputLen];
621            foreach_reverse (ref digit; newDigits)
622            {
623                digit = magnitude.front;
624                magnitude.popFront();
625            }
626        }
627        else static if (E.sizeof < BigDigit.sizeof)
628        {
629            enum elementsPerDigit = BigDigit.sizeof / E.sizeof;
630            newDigits = new BigDigit[(inputLen + elementsPerDigit - 1) / elementsPerDigit];
631            immutable remainder = inputLen % elementsPerDigit;
632            // If there is a remainder specially assemble the most significant digit.
633            if (remainder)
634            {
635                BigDigit tmp = magnitude.front;
636                magnitude.popFront();
637                foreach (_; 1 .. remainder)
638                {
639                    tmp = (tmp << (E.sizeof * 8)) | magnitude.front;
640                    magnitude.popFront();
641                }
642                newDigits[$-1] = tmp;
643            }
644            // Assemble full digits from most to least significant.
645            foreach_reverse (ref digit; newDigits[0 .. $ - int(remainder != 0)])
646            {
647                BigDigit tmp;
648                static foreach (n; 0 .. elementsPerDigit)
649                {
650                    tmp |= cast(BigDigit) magnitude.front <<
651                        ((BigDigit.sizeof - (E.sizeof * (n + 1))) * 8);
652                    magnitude.popFront();
653                }
654                digit = tmp;
655            }
656        }
657        else static if (E.sizeof > BigDigit.sizeof)
658        {
659            enum digitsPerElement = E.sizeof / BigDigit.sizeof;
660            newDigits = new BigDigit[inputLen * digitsPerElement];
661            size_t i = newDigits.length - 1;
662            foreach (element; magnitude)
663            {
664                static foreach (n; 0 .. digitsPerElement)
665                    newDigits[i - n] =
666                        cast(BigDigit) (element >> ((E.sizeof - (BigDigit.sizeof * (n + 1))) * 8));
667                i -= digitsPerElement;
668            }
669            while (newDigits[$-1] == 0)
670                newDigits = newDigits[0 .. $-1];
671        }
672        else
673            static assert(0);
674        this.data = trustedAssumeUnique(newDigits);
675        return;
676    }
677
678    nothrow pure @safe unittest
679    {
680        immutable BigDigit[] referenceData = [BigDigit(0x2003_4005), 0x6007_8009, 0xABCD];
681        // Internal representation is most-significant-last but `fromMagnitude`
682        // argument is most-significant-first.
683        immutable BigDigit[] referenceMagnitude = [BigDigit(0xABCD), 0x6007_8009, 0x2003_4005];
684        BigUint b;
685        // Test with reference magnitude.
686        b.fromMagnitude(referenceMagnitude);
687        assert(b.data == referenceData);
688        // Test ubyte array.
689        import std.bitmanip : nativeToBigEndian;
690        ubyte[] ubyteMagnitude = nativeToBigEndian(referenceMagnitude[0]) ~
691            nativeToBigEndian(referenceMagnitude[1]) ~
692            nativeToBigEndian(referenceMagnitude[2]);
693        b.data = ZERO;
694        b.fromMagnitude(ubyteMagnitude);
695        assert(b.data == referenceData);
696        // Test ulong array.
697        static if (BigDigit.sizeof == uint.sizeof)
698            immutable(ulong)[] ulongMagnitude = [ulong(referenceMagnitude[0]),
699                ((cast(ulong) referenceMagnitude[1]) << 32) | referenceMagnitude[2],
700            ];
701        else static if (BigDigit.sizeof == ulong.sizeof)
702            alias ulongMagnitude = referenceMagnitude;
703        b.data = ZERO;
704        b.fromMagnitude(ulongMagnitude);
705        assert(b.data == referenceData);
706    }
707
708    ////////////////////////
709    //
710    // All of these member functions create a new BigUint.
711
712    // return x >> y
713    BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const return scope
714        if (op == ">>" && is (Tulong == ulong))
715    {
716        assert(y > 0, "Can not right shift BigUint by 0");
717        uint bits = cast(uint) y & BIGDIGITSHIFTMASK;
718        if ((y >> LG2BIGDIGITBITS) >= data.length) return BigUint(ZERO);
719        uint words = cast(uint)(y >> LG2BIGDIGITBITS);
720        if (bits == 0)
721        {
722            return BigUint(data[words..$]);
723        }
724        else
725        {
726            uint [] result = new BigDigit[data.length - words];
727            multibyteShr(result, data[words..$], bits);
728
729            if (result.length > 1 && result[$-1] == 0)
730                return BigUint(trustedAssumeUnique(result[0 .. $-1]));
731            else
732                return BigUint(trustedAssumeUnique(result));
733        }
734    }
735
736    // return x << y
737    BigUint opBinary(string op, Tulong)(Tulong y) pure nothrow @safe const scope
738        if (op == "<<" && is (Tulong == ulong))
739    {
740        assert(y > 0, "Can not left shift BigUint by 0");
741        if (isZero()) return this;
742        uint bits = cast(uint) y & BIGDIGITSHIFTMASK;
743        assert((y >> LG2BIGDIGITBITS) < cast(ulong)(uint.max),
744                "Shift result exceeds temporary store");
745        uint words = cast(uint)(y >> LG2BIGDIGITBITS);
746        BigDigit [] result = new BigDigit[data.length + words+1];
747        result[0 .. words] = 0;
748        if (bits == 0)
749        {
750            result[words .. words+data.length] = data[];
751            return BigUint(trustedAssumeUnique(result[0 .. words+data.length]));
752        }
753        else
754        {
755            immutable c = multibyteShl(result[words .. words+data.length], data, bits);
756            if (c == 0) return BigUint(trustedAssumeUnique(result[0 .. words+data.length]));
757            result[$-1] = c;
758            return BigUint(trustedAssumeUnique(result));
759        }
760    }
761
762    // If wantSub is false, return x + y, leaving sign unchanged
763    // If wantSub is true, return abs(x - y), negating sign if x < y
764    static BigUint addOrSubInt(Tulong)(const scope BigUint x, Tulong y,
765            bool wantSub, ref bool sign) pure nothrow @safe if (is(Tulong == ulong))
766    {
767        BigUint r;
768        if (wantSub)
769        {   // perform a subtraction
770            if (x.data.length > 2)
771            {
772                // subInt returns GC allocated array, can be safely cast to immutable
773                r.data = (() @trusted => cast(immutable) subInt(x.data, y))();
774            }
775            else
776            {   // could change sign!
777                ulong xx = x.data[0];
778                if (x.data.length > 1)
779                    xx += (cast(ulong) x.data[1]) << 32;
780                ulong d;
781                if (xx <= y)
782                {
783                    d = y - xx;
784                    sign = !sign;
785                }
786                else
787                {
788                    d = xx - y;
789                }
790                if (d == 0)
791                {
792                    r = 0UL;
793                    sign = false;
794                    return r;
795                }
796                if (d > uint.max)
797                {
798                    r.data = [cast(uint)(d & 0xFFFF_FFFF), cast(uint)(d >> 32)];
799                }
800                else
801                {
802                    r.data = [cast(uint)(d & 0xFFFF_FFFF)];
803                }
804            }
805        }
806        else
807        {
808            // addInt returns GC allocated array, can be safely cast to immutable
809            r.data = (() @trusted => cast(immutable) addInt(x.data, y))();
810        }
811        return r;
812    }
813
814    // If wantSub is false, return x + y, leaving sign unchanged.
815    // If wantSub is true, return abs(x - y), negating sign if x<y
816    static BigUint addOrSub(scope BigUint x, scope BigUint y, bool wantSub, ref bool sign)
817        pure nothrow @safe
818    {
819        BigUint r;
820        if (wantSub)
821        {   // perform a subtraction
822            bool negative;
823            // sub returns GC allocated array, can be safely cast to immutable
824            r.data = (() @trusted => cast(immutable) sub(x.data, y.data, &negative))();
825            sign ^= negative;
826            if (r.isZero())
827            {
828                sign = false;
829            }
830        }
831        else
832        {
833            // add returns GC allocated array, can be safely cast to immutable
834            r.data = (() @trusted => cast(immutable) add(x.data, y.data))();
835        }
836        return r;
837    }
838
839
840    //  return x*y.
841    static BigUint mulInt(T = ulong)(BigUint x, T y) pure nothrow @safe
842    {
843        if (y == 0 || x == 0) return BigUint(ZERO);
844        static if (T.sizeof * 8 <= 32)
845            uint hi = 0;
846        else
847            uint hi = cast(uint) (y >>> 32);
848        uint lo = cast(uint) (y & 0xFFFF_FFFF);
849        uint [] result = new BigDigit[x.data.length+1+(hi != 0)];
850        result[x.data.length] = multibyteMul(result[0 .. x.data.length], x.data, lo, 0);
851        if (hi != 0)
852        {
853            result[x.data.length+1] = multibyteMulAdd!('+')(result[1 .. x.data.length+1],
854                x.data, hi, 0);
855        }
856        return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
857    }
858
859    /*  return x * y.
860     */
861    static BigUint mul(scope BigUint x, scope BigUint y) pure nothrow @safe
862    {
863        if (y == 0 || x == 0)
864            return BigUint(ZERO);
865        auto len = x.data.length + y.data.length;
866        BigDigit [] result = new BigDigit[len];
867        if (y.data.length > x.data.length)
868        {
869            mulInternal(result, y.data, x.data);
870        }
871        else
872        {
873            if (x.data[]==y.data[]) squareInternal(result, x.data);
874            else mulInternal(result, x.data, y.data);
875        }
876        // the highest element could be zero,
877        // in which case we need to reduce the length
878        return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
879    }
880
881    // return x / y
882    static BigUint divInt(T)(return scope BigUint x, T y_) pure nothrow @safe
883    if ( is(immutable T == immutable uint) )
884    {
885        uint y = y_;
886        if (y == 1)
887            return x;
888        uint [] result = new BigDigit[x.data.length];
889        if ((y&(-y))==y)
890        {
891            assert(y != 0, "BigUint division by zero");
892            // perfect power of 2
893            uint b = 0;
894            for (;y != 1; y>>=1)
895            {
896                ++b;
897            }
898            multibyteShr(result, x.data, b);
899        }
900        else
901        {
902            result[] = x.data[];
903            cast(void) multibyteDivAssign(result, y, 0);
904        }
905        return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
906    }
907
908    static BigUint divInt(T)(scope BigUint x, T y) pure nothrow @safe
909    if ( is(immutable T == immutable ulong) )
910    {
911        if (y <= uint.max)
912            return divInt!uint(x, cast(uint) y);
913        if (x.data.length < 2)
914            return BigUint(ZERO);
915        uint hi = cast(uint)(y >>> 32);
916        uint lo = cast(uint)(y & 0xFFFF_FFFF);
917        immutable uint[2] z = [lo, hi];
918        BigDigit[] result = new BigDigit[x.data.length - z.length + 1];
919        divModInternal(result, null, x.data, z[]);
920        return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
921    }
922
923    // return x % y
924    static uint modInt(T)(scope BigUint x, T y_) pure if ( is(immutable T == immutable uint) )
925    {
926        import core.memory : GC;
927        uint y = y_;
928        assert(y != 0, "% 0 not allowed");
929        if ((y&(-y)) == y)
930        {   // perfect power of 2
931            return x.data[0] & (y-1);
932        }
933        else
934        {
935            // horribly inefficient - malloc, copy, & store are unnecessary.
936            uint [] wasteful = new BigDigit[x.data.length];
937            wasteful[] = x.data[];
938            immutable rem = multibyteDivAssign(wasteful, y, 0);
939            () @trusted { GC.free(wasteful.ptr); } ();
940            return rem;
941        }
942    }
943
944    // return x / y
945    static BigUint div(return scope BigUint x, scope BigUint y) pure nothrow @safe
946    {
947        if (y.data.length > x.data.length)
948            return BigUint(ZERO);
949        if (y.data.length == 1)
950            return divInt(x, y.data[0]);
951        BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1];
952           divModInternal(result, null, x.data, y.data);
953        return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
954    }
955
956    // return x % y
957    static BigUint mod(return scope BigUint x, scope BigUint y) pure nothrow @safe
958    {
959        if (y.data.length > x.data.length) return x;
960        if (y.data.length == 1)
961        {
962            return BigUint([modInt(x, y.data[0])]);
963        }
964        BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1];
965        BigDigit [] rem = new BigDigit[y.data.length];
966        divModInternal(result, rem, x.data, y.data);
967        return BigUint(removeLeadingZeros(trustedAssumeUnique(rem)));
968    }
969
970    // Return x / y in quotient, x % y in remainder
971    static void divMod(BigUint x, scope BigUint y,
972                       out BigUint quotient, out BigUint remainder) pure nothrow @safe
973    {
974        /* TODO Qualify parameter `x` as `return` when it applies to `out` parameters */
975        if (y.data.length > x.data.length)
976        {
977            quotient = 0uL;
978            remainder = x;
979        }
980        else if (y.data.length == 1)
981        {
982            quotient = divInt(x, y.data[0]);
983            remainder = BigUint([modInt(x, y.data[0])]);
984        }
985        else
986        {
987            BigDigit[] quot = new BigDigit[x.data.length - y.data.length + 1];
988            BigDigit[] rem = new BigDigit[y.data.length];
989            divModInternal(quot, rem, x.data, y.data);
990            quotient = BigUint(removeLeadingZeros(trustedAssumeUnique(quot)));
991            remainder = BigUint(removeLeadingZeros(trustedAssumeUnique(rem)));
992        }
993    }
994
995    // return x op y
996    static BigUint bitwiseOp(string op)(scope BigUint x, scope BigUint y, bool xSign, bool ySign, ref bool resultSign)
997    pure nothrow @safe if (op == "|" || op == "^" || op == "&")
998    {
999        auto d1 = includeSign(x.data, y.uintLength, xSign);
1000        auto d2 = includeSign(y.data, x.uintLength, ySign);
1001
1002        foreach (i; 0 .. d1.length)
1003        {
1004            mixin("d1[i] " ~ op ~ "= d2[i];");
1005        }
1006
1007        mixin("resultSign = xSign " ~ op ~ " ySign;");
1008
1009        if (resultSign)
1010        {
1011            twosComplement(d1, d1);
1012        }
1013
1014        return BigUint(removeLeadingZeros(trustedAssumeUnique(d1)));
1015    }
1016
1017    /**
1018     * Return a BigUint which is x raised to the power of y.
1019     * Method: Powers of 2 are removed from x, then left-to-right binary
1020     * exponentiation is used.
1021     * Memory allocation is minimized: at most one temporary BigUint is used.
1022     */
1023    static BigUint pow(return scope BigUint x, ulong y) pure nothrow @safe
1024    {
1025        // Deal with the degenerate cases first.
1026        if (y == 0) return BigUint(ONE);
1027        if (y == 1) return x;
1028        if (x == 0 || x == 1) return x;
1029
1030        BigUint result;
1031
1032        // Simplify, step 1: Remove all powers of 2.
1033        uint firstnonzero = firstNonZeroDigit(x.data);
1034        // Now we know x = x[firstnonzero..$] * (2^^(firstnonzero*BigDigitBits))
1035        // where BigDigitBits = BigDigit.sizeof * 8
1036
1037        // See if x[firstnonzero..$] can now fit into a single digit.
1038        bool singledigit = ((x.data.length - firstnonzero) == 1);
1039        // If true, then x0 is that digit
1040        // and the result will be (x0 ^^ y) * (2^^(firstnonzero*y*BigDigitBits))
1041        BigDigit x0 = x.data[firstnonzero];
1042        assert(x0 != 0, "pow(0, y) not allowed");
1043        // Length of the non-zero portion
1044        size_t nonzerolength = x.data.length - firstnonzero;
1045        ulong y0;
1046        uint evenbits = 0; // number of even bits in the bottom of x
1047        while (!(x0 & 1))
1048        {
1049            x0 >>= 1;
1050            ++evenbits;
1051        }
1052
1053        if (x.data.length- firstnonzero == 2)
1054        {
1055            // Check for a single digit straddling a digit boundary
1056            const BigDigit x1 = x.data[firstnonzero+1];
1057            if ((x1 >> evenbits) == 0)
1058            {
1059                x0 |= (x1 << (BigDigit.sizeof * 8 - evenbits));
1060                singledigit = true;
1061            }
1062        }
1063        // Now if (singledigit), x^^y  = (x0 ^^ y) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
1064
1065        uint evenshiftbits = 0; // Total powers of 2 to shift by, at the end
1066
1067        // Simplify, step 2: For singledigits, see if we can trivially reduce y
1068
1069        BigDigit finalMultiplier = 1UL;
1070
1071        if (singledigit)
1072        {
1073            // x fits into a single digit. Raise it to the highest power we can
1074            // that still fits into a single digit, then reduce the exponent accordingly.
1075            // We're quite likely to have a residual multiply at the end.
1076            // For example, 10^^100 = (((5^^13)^^7) * 5^^9) * 2^^100.
1077            // and 5^^13 still fits into a uint.
1078            evenshiftbits  = cast(uint)( (evenbits * y) & BIGDIGITSHIFTMASK);
1079            if (x0 == 1)
1080            {   // Perfect power of 2
1081                result = 1UL;
1082                return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y;
1083            }
1084            immutable p = highestPowerBelowUintMax(x0);
1085            if (y <= p)
1086            {   // Just do it with pow
1087                result = cast(ulong) intpow(x0, y);
1088                if (evenbits + firstnonzero == 0)
1089                    return result;
1090                return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y;
1091            }
1092            y0 = y / p;
1093            finalMultiplier = intpow(x0, y - y0*p);
1094            x0 = intpow(x0, p);
1095            // Result is x0
1096            nonzerolength = 1;
1097        }
1098        // Now if (singledigit), x^^y  = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
1099
1100        // Perform a crude check for overflow and allocate result buffer.
1101        // The length required is y * lg2(x) bits.
1102        // which will always fit into y*x.length digits. But this is
1103        // a gross overestimate if x is small (length 1 or 2) and the highest
1104        // digit is nearly empty.
1105        // A better estimate is:
1106        //   y * lg2(x[$-1]/BigDigit.max) + y * (x.length - 1) digits,
1107        //  and the first term is always between
1108        //  y * (bsr(x.data[$-1]) + 1) / BIGDIGITBITS and
1109        //  y * (bsr(x.data[$-1]) + 2) / BIGDIGITBITS
1110        // For single digit payloads, we already have
1111        //   x^^y  = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
1112        // and x0 is almost a full digit, so it's a tight estimate.
1113        // Number of digits is therefore 1 + x0.length*y0 + (evenbits*y)/BIGDIGIT + firstnonzero*y
1114        // Note that the divisions must be rounded up.
1115
1116        // Estimated length in BigDigits
1117        immutable estimatelength = singledigit
1118            ? 1 + y0 + ((evenbits*y  + BigDigit.sizeof * 8 - 1) / (BigDigit.sizeof *8)) + firstnonzero*y
1119            :  x.data.length * y;
1120        // Imprecise check for overflow. Makes the extreme cases easier to debug
1121        // (less extreme overflow will result in an out of memory error).
1122        if (estimatelength > uint.max/(4*BigDigit.sizeof))
1123            assert(0, "Overflow in BigInt.pow");
1124
1125        // The result buffer includes space for all the trailing zeros
1126        BigDigit [] resultBuffer = new BigDigit[cast(size_t) estimatelength];
1127
1128        // Do all the powers of 2!
1129        size_t result_start = cast(size_t)( firstnonzero * y
1130            + (singledigit ? ((evenbits * y) >> LG2BIGDIGITBITS) : 0));
1131
1132        resultBuffer[0 .. result_start] = 0;
1133        BigDigit [] t1 = resultBuffer[result_start..$];
1134        BigDigit [] r1;
1135
1136        if (singledigit)
1137        {
1138            r1 = t1[0 .. 1];
1139            r1[0] = x0;
1140            y = y0;
1141        }
1142        else
1143        {
1144            // It's not worth right shifting by evenbits unless we also shrink the length after each
1145            // multiply or squaring operation. That might still be worthwhile for large y.
1146            r1 = t1[0 .. x.data.length - firstnonzero];
1147            r1[0..$] = x.data[firstnonzero..$];
1148        }
1149
1150        if (y>1)
1151        {   // Set r1 = r1 ^^ y.
1152            // The secondary buffer only needs space for the multiplication results
1153            BigDigit [] t2 = new BigDigit[resultBuffer.length - result_start];
1154            BigDigit [] r2;
1155
1156            int shifts = 63; // num bits in a long
1157            while (!(y & 0x8000_0000_0000_0000L))
1158            {
1159                y <<= 1;
1160                --shifts;
1161            }
1162            y <<=1;
1163
1164            while (y != 0)
1165            {
1166                // For each bit of y: Set r1 =  r1 * r1
1167                // If the bit is 1, set r1 = r1 * x
1168                // Eg, if y is 0b101, result = ((x^^2)^^2)*x == x^^5.
1169                // Optimization opportunity: if more than 2 bits in y are set,
1170                // it's usually possible to reduce the number of multiplies
1171                // by caching odd powers of x. eg for y = 54,
1172                // (0b110110), set u = x^^3, and result is ((u^^8)*u)^^2
1173                r2 = t2[0 .. r1.length*2];
1174                squareInternal(r2, r1);
1175                if (y & 0x8000_0000_0000_0000L)
1176                {
1177                    r1 = t1[0 .. r2.length + nonzerolength];
1178                    if (singledigit)
1179                    {
1180                        r1[$-1] = multibyteMul(r1[0 .. $-1], r2, x0, 0);
1181                    }
1182                    else
1183                    {
1184                        mulInternal(r1, r2, x.data[firstnonzero..$]);
1185                    }
1186                }
1187                else
1188                {
1189                    r1 = t1[0 .. r2.length];
1190                    r1[] = r2[];
1191                }
1192                y <<=1;
1193                shifts--;
1194            }
1195            while (shifts>0)
1196            {
1197                r2 = t2[0 .. r1.length * 2];
1198                squareInternal(r2, r1);
1199                r1 = t1[0 .. r2.length];
1200                r1[] = r2[];
1201                --shifts;
1202            }
1203        }
1204
1205        if (finalMultiplier != 1)
1206        {
1207            const BigDigit carry = multibyteMul(r1, r1, finalMultiplier, 0);
1208            if (carry)
1209            {
1210                r1 = t1[0 .. r1.length + 1];
1211                r1[$-1] = carry;
1212            }
1213        }
1214        if (evenshiftbits)
1215        {
1216            const BigDigit carry = multibyteShl(r1, r1, evenshiftbits);
1217            if (carry != 0)
1218            {
1219                r1 = t1[0 .. r1.length + 1];
1220                r1[$ - 1] = carry;
1221            }
1222        }
1223        while (r1[$ - 1]==0)
1224        {
1225            r1=r1[0 .. $ - 1];
1226        }
1227        return BigUint(trustedAssumeUnique(resultBuffer[0 .. result_start + r1.length]));
1228    }
1229
1230    // Implement toHash so that BigUint works properly as an AA key.
1231    size_t toHash() const @nogc nothrow pure @safe scope
1232    {
1233        return .hashOf(data);
1234    }
1235
1236} // end BigUint
1237
1238@safe pure nothrow unittest
1239{
1240    // ulong comparison test
1241    BigUint a = [1];
1242    assert(a == 1);
1243    // https://issues.dlang.org/show_bug.cgi?id=9548
1244    assert(a < 0x8000_0000_0000_0000UL);
1245
1246    // https://issues.dlang.org/show_bug.cgi?id=12234
1247    BigUint z = [0];
1248    assert(z == 0UL);
1249    assert(!(z > 0UL));
1250    assert(!(z < 0UL));
1251}
1252
1253// https://issues.dlang.org/show_bug.cgi?id=16223
1254@system pure nothrow unittest
1255{
1256    BigUint a = [3];
1257    int b = 5;
1258    assert(BigUint.mulInt(a,b) == 15);
1259}
1260
1261// Remove leading zeros from x, to restore the BigUint invariant
1262inout(BigDigit) [] removeLeadingZeros(return scope inout(BigDigit) [] x) pure nothrow @safe
1263{
1264    size_t k = x.length;
1265    while (k>1 && x[k - 1]==0) --k;
1266    return x[0 .. k];
1267}
1268
1269pure @safe unittest
1270{
1271   BigUint r = BigUint([5]);
1272   BigUint t = BigUint([7]);
1273   BigUint s = BigUint.mod(r, t);
1274   assert(s == 5);
1275}
1276
1277
1278@safe pure unittest
1279{
1280    BigUint r;
1281    r = 5UL;
1282    assert(r.peekUlong(0) == 5UL);
1283    assert(r.peekUint(0) == 5U);
1284    r = 0x1234_5678_9ABC_DEF0UL;
1285    assert(r.peekUlong(0) == 0x1234_5678_9ABC_DEF0UL);
1286    assert(r.peekUint(0) == 0x9ABC_DEF0U);
1287}
1288
1289
1290// Pow tests
1291pure @safe unittest
1292{
1293    BigUint r, s;
1294    r.fromHexString("80000000_00000001");
1295    s = BigUint.pow(r, 5);
1296    r.fromHexString("08000000_00000000_50000000_00000001_40000000_00000002_80000000"
1297      ~ "_00000002_80000000_00000001");
1298    assert(s == r);
1299    s = 10UL;
1300    s = BigUint.pow(s, 39);
1301    r.fromDecimalString("1000000000000000000000000000000000000000");
1302    assert(s == r);
1303    r.fromHexString("1_E1178E81_00000000");
1304    s = BigUint.pow(r, 15); // Regression test: this used to overflow array bounds
1305
1306    r.fromDecimalString("000_000_00");
1307    assert(r == 0);
1308    r.fromDecimalString("0007");
1309    assert(r == 7);
1310    r.fromDecimalString("0");
1311    assert(r == 0);
1312}
1313
1314// Radix conversion tests
1315@safe pure unittest
1316{
1317    BigUint r;
1318    r.fromHexString("1_E1178E81_00000000");
1319    assert(r.toHexString(0, '_', 0) == "1_E1178E81_00000000");
1320    assert(r.toHexString(0, '_', 20) == "0001_E1178E81_00000000");
1321    assert(r.toHexString(0, '_', 16+8) == "00000001_E1178E81_00000000");
1322    assert(r.toHexString(0, '_', 16+9) == "0_00000001_E1178E81_00000000");
1323    assert(r.toHexString(0, '_', 16+8+8) ==   "00000000_00000001_E1178E81_00000000");
1324    assert(r.toHexString(0, '_', 16+8+8+1) ==      "0_00000000_00000001_E1178E81_00000000");
1325    assert(r.toHexString(0, '_', 16+8+8+1, ' ') == "                  1_E1178E81_00000000");
1326    assert(r.toHexString(0, 0, 16+8+8+1) == "00000000000000001E1178E8100000000");
1327    r = 0UL;
1328    assert(r.toHexString(0, '_', 0) == "0");
1329    assert(r.toHexString(0, '_', 7) == "0000000");
1330    assert(r.toHexString(0, '_', 7, ' ') == "      0");
1331    assert(r.toHexString(0, '#', 9) == "0#00000000");
1332    assert(r.toHexString(0, 0, 9) == "000000000");
1333}
1334
1335//
1336@safe pure unittest
1337{
1338    BigUint r;
1339    r.fromHexString("1_E1178E81_00000000");
1340    assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "1_e1178e81_00000000");
1341    assert(r.toHexString(0, '_', 20, '0', LetterCase.lower) == "0001_e1178e81_00000000");
1342    assert(r.toHexString(0, '_', 16+8, '0', LetterCase.lower) == "00000001_e1178e81_00000000");
1343    assert(r.toHexString(0, '_', 16+9, '0', LetterCase.lower) == "0_00000001_e1178e81_00000000");
1344    assert(r.toHexString(0, '_', 16+8+8, '0', LetterCase.lower) ==   "00000000_00000001_e1178e81_00000000");
1345    assert(r.toHexString(0, '_', 16+8+8+1, '0', LetterCase.lower) == "0_00000000_00000001_e1178e81_00000000");
1346    assert(r.toHexString(0, '_', 16+8+8+1, ' ', LetterCase.lower) == "                  1_e1178e81_00000000");
1347    assert(r.toHexString(0, 0, 16+8+8+1, '0', LetterCase.lower) == "00000000000000001e1178e8100000000");
1348    r = 0UL;
1349    assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "0");
1350    assert(r.toHexString(0, '_', 7, '0', LetterCase.lower) == "0000000");
1351    assert(r.toHexString(0, '_', 7, ' ', LetterCase.lower) == "      0");
1352    assert(r.toHexString(0, '#', 9, '0', LetterCase.lower) == "0#00000000");
1353    assert(r.toHexString(0, 'Z', 9, '0', LetterCase.lower) == "0Z00000000");
1354    assert(r.toHexString(0, 0, 9, '0', LetterCase.lower) == "000000000");
1355}
1356
1357
1358private:
1359void twosComplement(const(BigDigit) [] x, BigDigit[] result)
1360pure nothrow @safe
1361{
1362    foreach (i; 0 .. x.length)
1363    {
1364        result[i] = ~x[i];
1365    }
1366    result[x.length..$] = BigDigit.max;
1367
1368    foreach (i; 0 .. result.length)
1369    {
1370        if (result[i] == BigDigit.max)
1371        {
1372            result[i] = 0;
1373        }
1374        else
1375        {
1376            result[i] += 1;
1377            break;
1378        }
1379    }
1380}
1381
1382// Encode BigInt as BigDigit array (sign and 2's complement)
1383BigDigit[] includeSign(scope const(BigDigit) [] x, size_t minSize, bool sign)
1384pure nothrow @safe
1385{
1386    size_t length = (x.length > minSize) ? x.length : minSize;
1387    BigDigit [] result = new BigDigit[length];
1388    if (sign)
1389    {
1390        twosComplement(x, result);
1391    }
1392    else
1393    {
1394        result[0 .. x.length] = x;
1395    }
1396    return result;
1397}
1398
1399// works for any type
1400T intpow(T)(T x, ulong n) pure nothrow @safe
1401{
1402    T p;
1403
1404    switch (n)
1405    {
1406    case 0:
1407        p = 1;
1408        break;
1409
1410    case 1:
1411        p = x;
1412        break;
1413
1414    case 2:
1415        p = x * x;
1416        break;
1417
1418    default:
1419        p = 1;
1420        while (1)
1421        {
1422            if (n & 1)
1423                p *= x;
1424            n >>= 1;
1425            if (!n)
1426                break;
1427            x *= x;
1428        }
1429        break;
1430    }
1431    return p;
1432}
1433
1434
1435//  returns the maximum power of x that will fit in a uint.
1436int highestPowerBelowUintMax(uint x) pure nothrow @safe
1437{
1438     assert(x > 1, "x must be greater than 1");
1439     static immutable ubyte [22] maxpwr = [ 31, 20, 15, 13, 12, 11, 10, 10, 9, 9,
1440                                          8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7];
1441     if (x<24) return maxpwr[x-2];
1442     if (x<41) return 6;
1443     if (x<85) return 5;
1444     if (x<256) return 4;
1445     if (x<1626) return 3;
1446     if (x<65_536) return 2;
1447     return 1;
1448}
1449
1450//  returns the maximum power of x that will fit in a ulong.
1451int highestPowerBelowUlongMax(uint x) pure nothrow @safe
1452{
1453     assert(x > 1, "x must be greater than 1");
1454     static immutable ubyte [39] maxpwr = [ 63, 40, 31, 27, 24, 22, 21, 20, 19, 18,
1455                                         17, 17, 16, 16, 15, 15, 15, 15, 14, 14,
1456                                         14, 14, 13, 13, 13, 13, 13, 13, 13, 12,
1457                                         12, 12, 12, 12, 12, 12, 12, 12, 12];
1458     if (x<41) return maxpwr[x-2];
1459     if (x<57) return 11;
1460     if (x<85) return 10;
1461     if (x<139) return 9;
1462     if (x<256) return 8;
1463     if (x<566) return 7;
1464     if (x<1626) return 6;
1465     if (x<7132) return 5;
1466     if (x<65_536) return 4;
1467     if (x<2_642_246) return 3;
1468     return 2;
1469}
1470
1471version (StdUnittest)
1472{
1473
1474private int slowHighestPowerBelowUintMax(uint x) pure nothrow @safe
1475{
1476     int pwr = 1;
1477     for (ulong q = x;x*q < cast(ulong) uint.max; )
1478     {
1479         q*=x; ++pwr;
1480     }
1481     return pwr;
1482}
1483
1484@safe pure unittest
1485{
1486    assert(highestPowerBelowUintMax(10)==9);
1487    for (int k=82; k<88; ++k)
1488    {
1489        assert(highestPowerBelowUintMax(k)== slowHighestPowerBelowUintMax(k));
1490    }
1491}
1492
1493}
1494
1495
1496/*  General unsigned subtraction routine for bigints.
1497 *  Sets result = x - y. If the result is negative, negative will be true.
1498 * Returns:
1499 *    unique memory
1500 */
1501BigDigit [] sub(const scope BigDigit [] x, const scope BigDigit [] y, bool *negative)
1502pure nothrow @safe
1503{
1504    if (x.length == y.length)
1505    {
1506        // There's a possibility of cancellation, if x and y are almost equal.
1507        ptrdiff_t last = highestDifferentDigit(x, y);
1508        BigDigit [] result = new BigDigit[last+1];
1509        if (x[last] < y[last])
1510        {   // we know result is negative
1511            multibyteSub(result[0 .. last+1], y[0 .. last+1], x[0 .. last+1], 0);
1512            *negative = true;
1513        }
1514        else
1515        {   // positive or zero result
1516            multibyteSub(result[0 .. last+1], x[0 .. last+1], y[0 .. last+1], 0);
1517            *negative = false;
1518        }
1519        while (result.length > 1 && result[$-1] == 0)
1520        {
1521            result = result[0..$-1];
1522        }
1523//        if (result.length >1 && result[$-1]==0) return result[0..$-1];
1524        return result;
1525    }
1526    // Lengths are different
1527    const(BigDigit) [] large, small;
1528    if (x.length < y.length)
1529    {
1530        *negative = true;
1531        large = y; small = x;
1532    }
1533    else
1534    {
1535        *negative = false;
1536        large = x; small = y;
1537    }
1538    // result.length will be equal to larger length, or could decrease by 1.
1539
1540
1541    BigDigit [] result = new BigDigit[large.length];
1542    BigDigit carry = multibyteSub(result[0 .. small.length], large[0 .. small.length], small, 0);
1543    result[small.length..$] = large[small.length..$];
1544    if (carry)
1545    {
1546        multibyteIncrementAssign!('-')(result[small.length..$], carry);
1547    }
1548    while (result.length > 1 && result[$-1] == 0)
1549    {
1550        result = result[0..$-1];
1551    }
1552    return result;
1553}
1554
1555
1556/*
1557 * return a + b
1558 * Returns:
1559 *    unique memory
1560 */
1561BigDigit [] add(const scope BigDigit [] a, const scope BigDigit [] b) pure nothrow @safe
1562{
1563    const(BigDigit) [] x, y;
1564    if (a.length < b.length)
1565    {
1566        x = b; y = a;
1567    }
1568    else
1569    {
1570        x = a; y = b;
1571    }
1572    // now we know x.length > y.length
1573    // create result. add 1 in case it overflows
1574    BigDigit [] result = new BigDigit[x.length + 1];
1575
1576    BigDigit carry = multibyteAdd(result[0 .. y.length], x[0 .. y.length], y, 0);
1577    if (x.length != y.length)
1578    {
1579        result[y.length..$-1]= x[y.length..$];
1580        carry  = multibyteIncrementAssign!('+')(result[y.length..$-1], carry);
1581    }
1582    if (carry)
1583    {
1584        result[$-1] = carry;
1585        return result;
1586    }
1587    else
1588        return result[0..$-1];
1589}
1590
1591/**  return x + y
1592 */
1593BigDigit [] addInt(const BigDigit[] x, ulong y) @safe pure nothrow
1594{
1595    uint hi = cast(uint)(y >>> 32);
1596    uint lo = cast(uint)(y& 0xFFFF_FFFF);
1597    auto len = x.length;
1598    if (x.length < 2 && hi != 0) ++len;
1599    BigDigit [] result = new BigDigit[len+1];
1600    result[0 .. x.length] = x[];
1601    if (x.length < 2 && hi != 0)
1602    {
1603        result[1]=hi;
1604        hi=0;
1605    }
1606    uint carry = multibyteIncrementAssign!('+')(result[0..$-1], lo);
1607    if (hi != 0) carry += multibyteIncrementAssign!('+')(result[1..$-1], hi);
1608    if (carry)
1609    {
1610        result[$-1] = carry;
1611        return result;
1612    }
1613    else
1614        return result[0..$-1];
1615}
1616
1617/** Return x - y.
1618 *  x must be greater than y.
1619 */
1620BigDigit [] subInt(const BigDigit[] x, ulong y) pure nothrow @safe
1621{
1622    uint hi = cast(uint)(y >>> 32);
1623    uint lo = cast(uint)(y & 0xFFFF_FFFF);
1624    BigDigit [] result = new BigDigit[x.length];
1625    result[] = x[];
1626    multibyteIncrementAssign!('-')(result[], lo);
1627    if (hi)
1628        multibyteIncrementAssign!('-')(result[1..$], hi);
1629    if (result[$-1] == 0)
1630        return result[0..$-1];
1631    else
1632        return result;
1633}
1634
1635/**  General unsigned multiply routine for bigints.
1636 *  Sets result = x * y.
1637 *
1638 *  The length of y must not be larger than the length of x.
1639 *  Different algorithms are used, depending on the lengths of x and y.
1640 *  TODO: "Modern Computer Arithmetic" suggests the OddEvenKaratsuba algorithm for the
1641 *  unbalanced case. (But I doubt it would be faster in practice).
1642 *
1643 */
1644void mulInternal(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y)
1645    pure nothrow @safe
1646{
1647    import core.memory : GC;
1648    assert( result.length == x.length + y.length,
1649            "result array must have enough space to store computed result");
1650    assert( y.length > 0, "y must not be empty");
1651    assert( x.length >= y.length, "x must be greater or equal than y");
1652    if (y.length <= KARATSUBALIMIT)
1653    {
1654        // Small multiplier, we'll just use the asm classic multiply.
1655        if (y.length == 1)
1656        {   // Trivial case, no cache effects to worry about
1657            result[x.length] = multibyteMul(result[0 .. x.length], x, y[0], 0);
1658            return;
1659        }
1660
1661        immutable CACHELIMIT = getCacheLimit;
1662        if (x.length + y.length < CACHELIMIT)
1663            return mulSimple(result, x, y);
1664
1665        // If x is so big that it won't fit into the cache, we divide it into chunks
1666        // Every chunk must be greater than y.length.
1667        // We make the first chunk shorter, if necessary, to ensure this.
1668
1669        auto chunksize = CACHELIMIT / y.length;
1670        immutable residual  =  x.length % chunksize;
1671        if (residual < y.length)
1672        {
1673            chunksize -= y.length;
1674        }
1675
1676        // Use schoolbook multiply.
1677        mulSimple(result[0 .. chunksize + y.length], x[0 .. chunksize], y);
1678        auto done = chunksize;
1679
1680        while (done < x.length)
1681        {
1682            // result[done .. done+ylength] already has a value.
1683            chunksize = (done + (CACHELIMIT / y.length) < x.length) ? (CACHELIMIT / y.length) :  x.length - done;
1684            BigDigit [KARATSUBALIMIT] partial;
1685            partial[0 .. y.length] = result[done .. done+y.length];
1686            mulSimple(result[done .. done+chunksize+y.length], x[done .. done+chunksize], y);
1687            addAssignSimple(result[done .. done+chunksize + y.length], partial[0 .. y.length]);
1688            done += chunksize;
1689        }
1690        return;
1691    }
1692
1693    immutable half = (x.length >> 1) + (x.length & 1);
1694    if (2*y.length*y.length <= x.length*x.length)
1695    {
1696        // UNBALANCED MULTIPLY
1697        // Use school multiply to cut into quasi-squares of Karatsuba-size
1698        // or larger. The ratio of the two sides of the 'square' must be
1699        // between 1.414:1 and 1:1. Use Karatsuba on each chunk.
1700        //
1701        // For maximum performance, we want the ratio to be as close to
1702        // 1:1 as possible. To achieve this, we can either pad x or y.
1703        // The best choice depends on the modulus x%y.
1704        auto numchunks = x.length / y.length;
1705        auto chunksize = y.length;
1706        auto extra =  x.length % y.length;
1707        auto maxchunk = chunksize + extra;
1708        bool paddingY; // true = we're padding Y, false = we're padding X.
1709        bool isExtraSmall = extra * extra * 2 < y.length * y.length;
1710        if (numchunks == 1 && isExtraSmall)
1711        {
1712            // We divide (x_first_half * y) and (x_last_half * y)
1713            // between 1.414:1 and 1.707:1 (1.707 = 1+1/sqrt(2)).
1714            // (1.414 ~ 1.707)/2:1 is balanced.
1715            BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(y.length) + y.length];
1716            BigDigit [] partial = scratchbuff[$ - y.length .. $];
1717            scratchbuff = scratchbuff[0 .. $ - y.length];
1718            mulKaratsuba(result[0 .. half + y.length], y, x[0 .. half], scratchbuff);
1719            partial[] = result[half .. half + y.length];
1720            mulKaratsuba(result[half .. $], y, x[half .. $], scratchbuff);
1721            BigDigit c = addAssignSimple(result[half .. half + y.length], partial);
1722            if (c) multibyteIncrementAssign!('+')(result[half + y.length..$], c);
1723            () @trusted { GC.free(scratchbuff.ptr); } ();
1724        }
1725        else
1726        {
1727            if (isExtraSmall)
1728            {
1729                // The leftover bit is small enough that it should be incorporated
1730                // in the existing chunks.
1731                // Make all the chunks a tiny bit bigger
1732                // (We're padding y with zeros)
1733                chunksize += extra / numchunks;
1734                extra = x.length - chunksize*numchunks;
1735                // there will probably be a few left over.
1736                // Every chunk will either have size chunksize, or chunksize+1.
1737                maxchunk = chunksize + 1;
1738                paddingY = true;
1739                assert(chunksize + extra + chunksize *(numchunks-1) == x.length,
1740                    "Unexpected size");
1741            }
1742            else
1743            {
1744                // the extra bit is large enough that it's worth making a new chunk.
1745                // (This means we're padding x with zeros, when doing the first one).
1746                maxchunk = chunksize;
1747                ++numchunks;
1748                paddingY = false;
1749                assert(extra + chunksize *(numchunks-1) == x.length,
1750                    "Unexpected size");
1751            }
1752            // We make the buffer a bit bigger so we have space for the partial sums.
1753            BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(maxchunk) + y.length];
1754            BigDigit [] partial = scratchbuff[$ - y.length .. $];
1755            scratchbuff = scratchbuff[0 .. $ - y.length];
1756            size_t done; // how much of X have we done so far?
1757            if (paddingY)
1758            {
1759                // If the first chunk is bigger, do it first. We're padding y.
1760                mulKaratsuba(result[0 .. y.length + chunksize + (extra > 0 ? 1 : 0 )],
1761                    x[0 .. chunksize + (extra>0?1:0)], y, scratchbuff);
1762                done = chunksize + (extra > 0 ? 1 : 0);
1763                if (extra) --extra;
1764            }
1765            else
1766            {   // We're padding X. Begin with the extra bit.
1767                mulKaratsuba(result[0 .. y.length + extra], y, x[0 .. extra], scratchbuff);
1768                done = extra;
1769                extra = 0;
1770            }
1771            immutable basechunksize = chunksize;
1772            while (done < x.length)
1773            {
1774                chunksize = basechunksize + (extra > 0 ? 1 : 0);
1775                if (extra) --extra;
1776                partial[] = result[done .. done+y.length];
1777                mulKaratsuba(result[done .. done + y.length + chunksize],
1778                        x[done .. done+chunksize], y, scratchbuff);
1779                addAssignSimple(result[done .. done + y.length + chunksize], partial);
1780                done += chunksize;
1781            }
1782            () @trusted { GC.free(scratchbuff.ptr); } ();
1783        }
1784    }
1785    else
1786    {
1787        // Balanced. Use Karatsuba directly.
1788        BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)];
1789        mulKaratsuba(result, x, y, scratchbuff);
1790        () @trusted { GC.free(scratchbuff.ptr); } ();
1791    }
1792}
1793
1794// https://issues.dlang.org/show_bug.cgi?id=20493
1795@safe unittest
1796{
1797    // the bug report has a testcase with very large numbers (~10^3800 and ~10^2300)
1798    // the number itself isn't important, only the amount of digits, so we do a simpler
1799    // multiplication of the same size, analogous to:
1800    // 11111111 * 11111111 = 0123456787654321
1801    // but instead of base 10, it's in base `BigDigit`
1802
1803    BigDigit[398] x = 1;
1804    BigDigit[236] y = 1;
1805    BigDigit[x.length + y.length] result;
1806    mulInternal(result[], x[], y[]);
1807
1808    // create an array of the form [1, 2, ..., y.length, ..., y.length, y.length-1, ..., 1, 0]
1809    BigDigit[x.length + y.length] expected = y.length;
1810    foreach (BigDigit i; 0 .. y.length)
1811    {
1812        expected[i] = i+1;
1813        expected[$-1-i] = i;
1814    }
1815
1816    assert(result == expected);
1817}
1818
1819/**  General unsigned squaring routine for BigInts.
1820 *   Sets result = x*x.
1821 *   NOTE: If the highest half-digit of x is zero, the highest digit of result will
1822 *   also be zero.
1823 */
1824void squareInternal(BigDigit[] result, const BigDigit[] x) pure nothrow @safe
1825{
1826  import core.memory : GC;
1827  // Squaring is potentially half a multiply, plus add the squares of
1828  // the diagonal elements.
1829  assert(result.length == 2*x.length,
1830     "result needs to have twice the capacity of x");
1831  if (x.length <= KARATSUBASQUARELIMIT)
1832  {
1833      if (x.length == 1)
1834      {
1835         result[1] = multibyteMul(result[0 .. 1], x, x[0], 0);
1836         return;
1837      }
1838      return squareSimple(result, x);
1839  }
1840  // The nice thing about squaring is that it always stays balanced
1841  BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)];
1842  squareKaratsuba(result, x, scratchbuff);
1843  () @trusted { GC.free(scratchbuff.ptr); } ();
1844}
1845
1846
1847import core.bitop : bsr;
1848
1849/// if remainder is null, only calculate quotient.
1850void divModInternal(BigDigit [] quotient, BigDigit[] remainder, const BigDigit [] u,
1851        const BigDigit [] v) pure nothrow @safe
1852{
1853    import core.memory : GC;
1854    assert(quotient.length == u.length - v.length + 1,
1855        "Invalid quotient length");
1856    assert(remainder == null || remainder.length == v.length,
1857        "Invalid remainder");
1858    assert(v.length > 1, "v must have more than 1 element");
1859    assert(u.length >= v.length, "u must be as longer or longer than v");
1860
1861    // Normalize by shifting v left just enough so that
1862    // its high-order bit is on, and shift u left the
1863    // same amount. The highest bit of u will never be set.
1864
1865    BigDigit [] vn = new BigDigit[v.length];
1866    BigDigit [] un = new BigDigit[u.length + 1];
1867    // How much to left shift v, so that its MSB is set.
1868    uint s = BIGDIGITSHIFTMASK - bsr(v[$-1]);
1869    if (s != 0)
1870    {
1871        multibyteShl(vn, v, s);
1872        un[$-1] = multibyteShl(un[0..$-1], u, s);
1873    }
1874    else
1875    {
1876        vn[] = v[];
1877        un[0..$-1] = u[];
1878        un[$-1] = 0;
1879    }
1880    if (quotient.length<FASTDIVLIMIT)
1881    {
1882        schoolbookDivMod(quotient, un, vn);
1883    }
1884    else
1885    {
1886        blockDivMod(quotient, un, vn);
1887    }
1888
1889    // Unnormalize remainder, if required.
1890    if (remainder != null)
1891    {
1892        if (s == 0) remainder[] = un[0 .. vn.length];
1893        else multibyteShr(remainder, un[0 .. vn.length+1], s);
1894    }
1895    () @trusted { GC.free(un.ptr); GC.free(vn.ptr); } ();
1896}
1897
1898pure @safe unittest
1899{
1900    immutable(uint) [] u = [0, 0xFFFF_FFFE, 0x8000_0000];
1901    immutable(uint) [] v = [0xFFFF_FFFF, 0x8000_0000];
1902    uint [] q = new uint[u.length - v.length + 1];
1903    uint [] r = new uint[2];
1904    divModInternal(q, r, u, v);
1905    assert(q[]==[0xFFFF_FFFFu, 0]);
1906    assert(r[]==[0xFFFF_FFFFu, 0x7FFF_FFFF]);
1907    u = [0, 0xFFFF_FFFE, 0x8000_0001];
1908    v = [0xFFFF_FFFF, 0x8000_0000];
1909    divModInternal(q, r, u, v);
1910}
1911
1912
1913// Converts a big uint to a hexadecimal string.
1914//
1915// Optionally, a separator character (eg, an underscore) may be added between
1916// every 8 digits.
1917// buff.length must be data.length*8 if separator is zero,
1918// or data.length*9 if separator is non-zero. It will be completely filled.
1919char [] biguintToHex(return scope char [] buff, const scope BigDigit [] data, char separator=0,
1920        LetterCase letterCase = LetterCase.upper) pure nothrow @safe
1921{
1922    int x=0;
1923    for (ptrdiff_t i=data.length - 1; i >= 0; --i)
1924    {
1925        toHexZeroPadded(buff[x .. x+8], data[i], letterCase);
1926        x+=8;
1927        if (separator)
1928        {
1929            if (i>0) buff[x] = separator;
1930            ++x;
1931        }
1932    }
1933    return buff;
1934}
1935
1936/**
1937 * Convert a big uint into an octal string.
1938 *
1939 * Params:
1940 *  buff = The destination buffer for the octal string. Must be large enough to
1941 *      store the result, including leading zeroes, which is
1942 *      ceil(data.length * BigDigitBits / 3) characters.
1943 *      The buffer is filled from back to front, starting from `buff[$-1]`.
1944 *  data = The biguint to be converted.
1945 *
1946 * Returns: The index of the leading non-zero digit in `buff`. Will be
1947 * `buff.length - 1` if the entire big uint is zero.
1948 */
1949size_t biguintToOctal(char[] buff, const(BigDigit)[] data)
1950    pure nothrow @safe @nogc
1951{
1952    ubyte carry = 0;
1953    int shift = 0;
1954    size_t penPos = buff.length - 1;
1955    size_t lastNonZero = buff.length - 1;
1956
1957    pragma(inline) void output(uint digit) @nogc nothrow
1958    {
1959        if (digit != 0)
1960            lastNonZero = penPos;
1961        buff[penPos--] = cast(char)('0' + digit);
1962    }
1963
1964    foreach (bigdigit; data)
1965    {
1966        if (shift < 0)
1967        {
1968            // Some bits were carried over from previous word.
1969            assert(shift > -3, "shift must be greater than -3");
1970            output(((bigdigit << -shift) | carry) & 0b111);
1971            shift += 3;
1972            assert(shift > 0, "shift must be 1 or greater");
1973        }
1974
1975        while (shift <= BigDigitBits - 3)
1976        {
1977            output((bigdigit >>> shift) & 0b111);
1978            shift += 3;
1979        }
1980
1981        if (shift < BigDigitBits)
1982        {
1983            // Some bits need to be carried forward.
1984            carry = (bigdigit >>> shift) & 0b11;
1985        }
1986        shift -= BigDigitBits;
1987        assert(shift >= -2 && shift <= 0, "shift must in [-2,0]");
1988    }
1989
1990    if (shift < 0)
1991    {
1992        // Last word had bits that haven't been output yet.
1993        assert(shift > -3, "Shift must be greater than -3");
1994        output(carry);
1995    }
1996
1997    return lastNonZero;
1998}
1999
2000/** Convert a big uint into a decimal string.
2001 *
2002 * Params:
2003 *  data    The biguint to be converted. Will be destroyed.
2004 *  buff    The destination buffer for the decimal string. Must be
2005 *          large enough to store the result, including leading zeros.
2006 *          Will be filled backwards, starting from buff[$-1].
2007 *
2008 * buff.length must be >= (data.length*32)/log2(10) = 9.63296 * data.length.
2009 * Returns:
2010 *    the lowest index of buff which was used.
2011 */
2012size_t biguintToDecimal(char [] buff, BigDigit [] data) pure nothrow @safe
2013{
2014    ptrdiff_t sofar = buff.length;
2015    // Might be better to divide by (10^38/2^32) since that gives 38 digits for
2016    // the price of 3 divisions and a shr; this version only gives 27 digits
2017    // for 3 divisions.
2018    while (data.length>1)
2019    {
2020        uint rem = multibyteDivAssign(data, 10_0000_0000, 0);
2021        itoaZeroPadded(buff[sofar-9 .. sofar], rem);
2022        sofar -= 9;
2023        if (data[$-1] == 0 && data.length > 1)
2024        {
2025            data.length = data.length - 1;
2026        }
2027    }
2028    itoaZeroPadded(buff[sofar-10 .. sofar], data[0]);
2029    sofar -= 10;
2030    // and strip off the leading zeros
2031    while (sofar != buff.length-1 && buff[sofar] == '0')
2032        sofar++;
2033    return sofar;
2034}
2035
2036/** Convert a decimal string into a big uint.
2037 *
2038 * Params:
2039 *  data    The biguint to be receive the result. Must be large enough to
2040 *          store the result.
2041 *  s       The decimal string. May contain _ or 0 .. 9
2042 *
2043 * The required length for the destination buffer is slightly less than
2044 *  1 + s.length/log2(10) = 1 + s.length/3.3219.
2045 *
2046 * Returns:
2047 *    the highest index of data which was used.
2048 */
2049int biguintFromDecimal(Range)(BigDigit[] data, Range s)
2050if (
2051    isInputRange!Range &&
2052    isSomeChar!(ElementType!Range) &&
2053    !isInfinite!Range)
2054in
2055{
2056    static if (hasLength!Range)
2057        assert((data.length >= 2) || (data.length == 1 && s.length == 1),
2058            "data has a invalid length");
2059}
2060do
2061{
2062    import std.conv : ConvException;
2063
2064    // Convert to base 1e19 = 10_000_000_000_000_000_000.
2065    // (this is the largest power of 10 that will fit into a long).
2066    // The length will be less than 1 + s.length/log2(10) = 1 + s.length/3.3219.
2067    // 485 bits will only just fit into 146 decimal digits.
2068    // As we convert the string, we record the number of digits we've seen in base 19:
2069    // hi is the number of digits/19, lo is the extra digits (0 to 18).
2070    // TODO: This is inefficient for very large strings (it is O(n^^2)).
2071    // We should take advantage of fast multiplication once the numbers exceed
2072    // Karatsuba size.
2073    uint lo = 0; // number of powers of digits, 0 .. 18
2074    uint x = 0;
2075    ulong y = 0;
2076    uint hi = 0; // number of base 1e19 digits
2077    data[0] = 0; // initially number is 0.
2078    if (data.length > 1)
2079        data[1] = 0;
2080
2081    foreach (character; s)
2082    {
2083        if (character == '_')
2084            continue;
2085
2086        if (character < '0' || character > '9')
2087            throw new ConvException("invalid digit");
2088        x *= 10;
2089        x += character - '0';
2090        ++lo;
2091        if (lo == 9)
2092        {
2093            y = x;
2094            x = 0;
2095        }
2096        if (lo == 18)
2097        {
2098            y *= 10_0000_0000;
2099            y += x;
2100            x = 0;
2101        }
2102        if (lo == 19)
2103        {
2104            y *= 10;
2105            y += x;
2106            x = 0;
2107            // Multiply existing number by 10^19, then add y1.
2108            if (hi>0)
2109            {
2110                data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 1_220_703_125*2u, 0); // 5^13*2 = 0x9184_E72A
2111                ++hi;
2112                data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 15_625*262_144u, 0); // 5^6*2^18 = 0xF424_0000
2113                ++hi;
2114            }
2115            else
2116                hi = 2;
2117            uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF));
2118            c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32));
2119            if (c != 0)
2120            {
2121                data[hi]=c;
2122                ++hi;
2123            }
2124            y = 0;
2125            lo = 0;
2126        }
2127    }
2128    // Now set y = all remaining digits.
2129    if (lo >= 18)
2130    {
2131    }
2132    else if (lo >= 9)
2133    {
2134        for (int k=9; k<lo; ++k) y*=10;
2135        y+=x;
2136    }
2137    else
2138    {
2139        for (int k=0; k<lo; ++k) y*=10;
2140        y+=x;
2141    }
2142    if (lo != 0)
2143    {
2144        if (hi == 0)
2145        {
2146            data[0] = cast(uint) y;
2147            if (data.length == 1)
2148            {
2149                hi = 1;
2150            }
2151            else
2152            {
2153                data[1] = cast(uint)(y >>> 32);
2154                hi=2;
2155            }
2156        }
2157        else
2158        {
2159            while (lo>0)
2160            {
2161                immutable c = multibyteMul(data[0 .. hi], data[0 .. hi], 10, 0);
2162                if (c != 0)
2163                {
2164                    data[hi]=c;
2165                    ++hi;
2166                }
2167                --lo;
2168            }
2169            uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF));
2170            if (y > 0xFFFF_FFFFL)
2171            {
2172                c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32));
2173            }
2174            if (c != 0)
2175            {
2176                data[hi]=c;
2177                ++hi;
2178            }
2179        }
2180    }
2181    while (hi>1 && data[hi-1]==0)
2182        --hi;
2183    return hi;
2184}
2185
2186
2187// ------------------------
2188// These in-place functions are only for internal use; they are incompatible
2189// with COW.
2190
2191// Classic 'schoolbook' multiplication.
2192void mulSimple(BigDigit[] result, const(BigDigit) [] left,
2193        const(BigDigit)[] right) pure nothrow @safe
2194in
2195{
2196    assert(result.length == left.length + right.length,
2197        "Result must be able to store left + right");
2198    assert(right.length>1, "right must not be empty");
2199}
2200do
2201{
2202    result[left.length] = multibyteMul(result[0 .. left.length], left, right[0], 0);
2203    multibyteMultiplyAccumulate(result[1..$], left, right[1..$]);
2204}
2205
2206// Classic 'schoolbook' squaring
2207void squareSimple(BigDigit[] result, const(BigDigit) [] x) pure nothrow @safe
2208in
2209{
2210    assert(result.length == 2*x.length, "result must be twice as long as x");
2211    assert(x.length>1, "x must not be empty");
2212}
2213do
2214{
2215    multibyteSquare(result, x);
2216}
2217
2218
2219// add two uints of possibly different lengths. Result must be as long
2220// as the larger length.
2221// Returns carry (0 or 1).
2222uint addSimple(BigDigit[] result, const BigDigit [] left, const BigDigit [] right)
2223pure nothrow @safe
2224in
2225{
2226    assert(result.length == left.length,
2227        "result and left must be of the same length");
2228    assert(left.length >= right.length,
2229        "left must be longer or of equal length to right");
2230    assert(right.length > 0, "right must not be empty");
2231}
2232do
2233{
2234    uint carry = multibyteAdd(result[0 .. right.length],
2235            left[0 .. right.length], right, 0);
2236    if (right.length < left.length)
2237    {
2238        result[right.length .. left.length] = left[right.length .. $];
2239        carry = multibyteIncrementAssign!('+')(result[right.length..$], carry);
2240    }
2241    return carry;
2242}
2243
2244//  result = left - right
2245// returns carry (0 or 1)
2246BigDigit subSimple(BigDigit [] result,const(BigDigit) [] left,
2247        const(BigDigit) [] right) pure nothrow
2248in
2249{
2250    assert(result.length == left.length,
2251        "result and left must be of the same length");
2252    assert(left.length >= right.length,
2253        "left must be longer or of equal length to right");
2254    assert(right.length > 0, "right must not be empty");
2255}
2256do
2257{
2258    BigDigit carry = multibyteSub(result[0 .. right.length],
2259            left[0 .. right.length], right, 0);
2260    if (right.length < left.length)
2261    {
2262        result[right.length .. left.length] = left[right.length .. $];
2263        carry = multibyteIncrementAssign!('-')(result[right.length..$], carry);
2264    } //else if (result.length == left.length+1) { result[$-1] = carry; carry=0; }
2265    return carry;
2266}
2267
2268
2269/* result = result - right
2270 * Returns carry = 1 if result was less than right.
2271*/
2272BigDigit subAssignSimple(BigDigit [] result, const(BigDigit) [] right)
2273pure nothrow @safe
2274{
2275    assert(result.length >= right.length,
2276       "result must be longer or of equal length to right");
2277    uint c = multibyteSub(result[0 .. right.length], result[0 .. right.length], right, 0);
2278    if (c && result.length > right.length)
2279        c = multibyteIncrementAssign!('-')(result[right.length .. $], c);
2280    return c;
2281}
2282
2283/* result = result + right
2284*/
2285BigDigit addAssignSimple(BigDigit [] result, const(BigDigit) [] right)
2286pure nothrow @safe
2287{
2288    assert(result.length >= right.length,
2289       "result must be longer or of equal length to right");
2290    uint c = multibyteAdd(result[0 .. right.length], result[0 .. right.length], right, 0);
2291    if (c && result.length > right.length)
2292       c = multibyteIncrementAssign!('+')(result[right.length .. $], c);
2293    return c;
2294}
2295
2296/* performs result += wantSub? - right : right;
2297*/
2298BigDigit addOrSubAssignSimple(BigDigit [] result, const(BigDigit) [] right,
2299        bool wantSub) pure nothrow @safe
2300{
2301    if (wantSub)
2302        return subAssignSimple(result, right);
2303    else
2304        return addAssignSimple(result, right);
2305}
2306
2307
2308// return true if x<y, considering leading zeros
2309bool less(const(BigDigit)[] x, const(BigDigit)[] y) pure nothrow @safe
2310{
2311    assert(x.length >= y.length,
2312       "x must be longer or of equal length to y");
2313    auto k = x.length-1;
2314    while (x[k]==0 && k >= y.length)
2315        --k;
2316    if (k >= y.length)
2317        return false;
2318    while (k>0 && x[k]==y[k])
2319        --k;
2320    return x[k] < y[k];
2321}
2322
2323// Set result = abs(x-y), return true if result is negative(x<y), false if x <= y.
2324bool inplaceSub(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y)
2325    pure nothrow @safe
2326{
2327    assert(result.length == ((x.length >= y.length) ? x.length : y.length),
2328        "result must capable to store the maximum of x and y");
2329
2330    size_t minlen;
2331    bool negative;
2332    if (x.length >= y.length)
2333    {
2334        minlen = y.length;
2335        negative = less(x, y);
2336    }
2337    else
2338    {
2339       minlen = x.length;
2340       negative = !less(y, x);
2341    }
2342    const (BigDigit)[] large, small;
2343    if (negative)
2344    {
2345        large = y; small = x;
2346    }
2347    else
2348    {
2349        large = x; small = y;
2350    }
2351
2352    BigDigit carry = multibyteSub(result[0 .. minlen], large[0 .. minlen], small[0 .. minlen], 0);
2353    if (x.length != y.length)
2354    {
2355        result[minlen .. large.length]= large[minlen..$];
2356        result[large.length..$] = 0;
2357        if (carry)
2358            multibyteIncrementAssign!('-')(result[minlen..$], carry);
2359    }
2360    return negative;
2361}
2362
2363/* Determine how much space is required for the temporaries
2364 * when performing a Karatsuba multiplication.
2365 * TODO: determining a tight bound is non-trivial and depends on KARATSUBALIMIT, see:
2366 * https://issues.dlang.org/show_bug.cgi?id=20493
2367 */
2368size_t karatsubaRequiredBuffSize(size_t xlen) pure nothrow @safe
2369{
2370    return xlen <= KARATSUBALIMIT ? 0 : (xlen * 9) / 4;
2371}
2372
2373/* Sets result = x*y, using Karatsuba multiplication.
2374* x must be longer or equal to y.
2375* Valid only for balanced multiplies, where x is not shorter than y.
2376* It is superior to schoolbook multiplication if and only if
2377*    sqrt(2)*y.length > x.length > y.length.
2378* Karatsuba multiplication is O(n^1.59), whereas schoolbook is O(n^2)
2379* The maximum allowable length of x and y is uint.max; but better algorithms
2380* should be used far before that length is reached.
2381* Params:
2382* scratchbuff      An array long enough to store all the temporaries. Will be destroyed.
2383*/
2384void mulKaratsuba(BigDigit [] result, const(BigDigit) [] x,
2385        const(BigDigit)[] y, BigDigit [] scratchbuff) pure nothrow @safe
2386{
2387    assert(x.length >= y.length, "x must be greater or equal to y");
2388    assert(result.length < uint.max, "Operands too large");
2389    assert(result.length == x.length + y.length,
2390        "result must be as large as x + y");
2391    if (x.length <= KARATSUBALIMIT)
2392    {
2393        return mulSimple(result, x, y);
2394    }
2395    // Must be almost square (otherwise, a schoolbook iteration is better)
2396    assert(2L * y.length * y.length > (x.length-1) * (x.length-1),
2397        "Bigint Internal Error: Asymmetric Karatsuba");
2398
2399    // The subtractive version of Karatsuba multiply uses the following result:
2400    // (Nx1 + x0)*(Ny1 + y0) = (N*N)*x1y1 + x0y0 + N * (x0y0 + x1y1 - mid)
2401    // where mid = (x0-x1)*(y0-y1)
2402    // requiring 3 multiplies of length N, instead of 4.
2403    // The advantage of the subtractive over the additive version is that
2404    // the mid multiply cannot exceed length N. But there are subtleties:
2405    // (x0-x1),(y0-y1) may be negative or zero. To keep it simple, we
2406    // retain all of the leading zeros in the subtractions
2407
2408    // half length, round up.
2409    auto half = (x.length >> 1) + (x.length & 1);
2410
2411    const(BigDigit) [] x0 = x[0 .. half];
2412    const(BigDigit) [] x1 = x[half .. $];
2413    const(BigDigit) [] y0 = y[0 .. half];
2414    const(BigDigit) [] y1 = y[half .. $];
2415    BigDigit [] mid = scratchbuff[0 .. half*2];
2416    BigDigit [] newscratchbuff = scratchbuff[half*2 .. $];
2417    BigDigit [] resultLow = result[0 .. 2*half];
2418    BigDigit [] resultHigh = result[2*half .. $];
2419     // initially use result to store temporaries
2420    BigDigit [] xdiff= result[0 .. half];
2421    BigDigit [] ydiff = result[half .. half*2];
2422
2423    // First, we calculate mid, and sign of mid
2424    immutable bool midNegative = inplaceSub(xdiff, x0, x1)
2425                      ^ inplaceSub(ydiff, y0, y1);
2426    mulKaratsuba(mid, xdiff, ydiff, newscratchbuff);
2427
2428    // Low half of result gets x0 * y0. High half gets x1 * y1
2429
2430    mulKaratsuba(resultLow, x0, y0, newscratchbuff);
2431
2432    if (2L * y1.length * y1.length < x1.length * x1.length)
2433    {
2434        // an asymmetric situation has been created.
2435        // Worst case is if x:y = 1.414 : 1, then x1:y1 = 2.41 : 1.
2436        // Applying one schoolbook multiply gives us two pieces each 1.2:1
2437        if (y1.length <= KARATSUBALIMIT)
2438            mulSimple(resultHigh, x1, y1);
2439        else
2440        {
2441            // divide x1 in two, then use schoolbook multiply on the two pieces.
2442            auto quarter = (x1.length >> 1) + (x1.length & 1);
2443            immutable ysmaller = (quarter >= y1.length);
2444            mulKaratsuba(resultHigh[0 .. quarter+y1.length], ysmaller ? x1[0 .. quarter] : y1,
2445                ysmaller ? y1 : x1[0 .. quarter], newscratchbuff);
2446            // Save the part which will be overwritten.
2447            immutable ysmaller2 = ((x1.length - quarter) >= y1.length);
2448            newscratchbuff[0 .. y1.length] = resultHigh[quarter .. quarter + y1.length];
2449            mulKaratsuba(resultHigh[quarter..$], ysmaller2 ? x1[quarter..$] : y1,
2450                ysmaller2 ? y1 : x1[quarter..$], newscratchbuff[y1.length..$]);
2451
2452            resultHigh[quarter..$].addAssignSimple(newscratchbuff[0 .. y1.length]);
2453        }
2454    }
2455    else
2456        mulKaratsuba(resultHigh, x1, y1, newscratchbuff);
2457
2458    /* We now have result = x0y0 + (N*N)*x1y1
2459       Before adding or subtracting mid, we must calculate
2460       result += N * (x0y0 + x1y1)
2461       We can do this with three half-length additions. With a = x0y0, b = x1y1:
2462                      aHI aLO
2463        +       aHI   aLO
2464        +       bHI   bLO
2465        +  bHI  bLO
2466        =  R3   R2    R1   R0
2467        R1 = aHI + bLO + aLO
2468        R2 = aHI + bLO + aHI + carry_from_R1
2469        R3 = bHi + carry_from_R2
2470
2471     It might actually be quicker to do it in two full-length additions:
2472     newscratchbuff[2*half] = addSimple(newscratchbuff[0 .. 2*half], result[0 .. 2*half], result[2*half..$]);
2473     addAssignSimple(result[half..$], newscratchbuff[0 .. 2*half+1]);
2474   */
2475    BigDigit[] R1 = result[half .. half*2];
2476    BigDigit[] R2 = result[half*2 .. half*3];
2477    BigDigit[] R3 = result[half*3..$];
2478    BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1
2479    BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0
2480    BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3
2481    if (c1+c2)
2482        multibyteIncrementAssign!('+')(result[half*2..$], c1+c2);
2483    if (c1+c3)
2484        multibyteIncrementAssign!('+')(R3, c1+c3);
2485
2486    // And finally we subtract mid
2487    addOrSubAssignSimple(result[half..$], mid, !midNegative);
2488}
2489
2490void squareKaratsuba(BigDigit [] result, const BigDigit [] x,
2491        BigDigit [] scratchbuff) pure nothrow @safe
2492{
2493    // See mulKaratsuba for implementation comments.
2494    // Squaring is simpler, since it never gets asymmetric.
2495    assert(result.length < uint.max, "Operands too large");
2496    assert(result.length == 2*x.length,
2497        "result must be twice the length of x");
2498    if (x.length <= KARATSUBASQUARELIMIT)
2499    {
2500        return squareSimple(result, x);
2501    }
2502    // half length, round up.
2503    auto half = (x.length >> 1) + (x.length & 1);
2504
2505    const(BigDigit)[] x0 = x[0 .. half];
2506    const(BigDigit)[] x1 = x[half .. $];
2507    BigDigit [] mid = scratchbuff[0 .. half*2];
2508    BigDigit [] newscratchbuff = scratchbuff[half*2 .. $];
2509     // initially use result to store temporaries
2510    BigDigit [] xdiff= result[0 .. half];
2511    const BigDigit [] ydiff = result[half .. half*2];
2512
2513    // First, we calculate mid. We don't need its sign
2514    inplaceSub(xdiff, x0, x1);
2515    squareKaratsuba(mid, xdiff, newscratchbuff);
2516
2517    // Set result = x0x0 + (N*N)*x1x1
2518    squareKaratsuba(result[0 .. 2*half], x0, newscratchbuff);
2519    squareKaratsuba(result[2*half .. $], x1, newscratchbuff);
2520
2521    /* result += N * (x0x0 + x1x1)
2522       Do this with three half-length additions. With a = x0x0, b = x1x1:
2523        R1 = aHI + bLO + aLO
2524        R2 = aHI + bLO + aHI + carry_from_R1
2525        R3 = bHi + carry_from_R2
2526    */
2527    BigDigit[] R1 = result[half .. half*2];
2528    BigDigit[] R2 = result[half*2 .. half*3];
2529    BigDigit[] R3 = result[half*3..$];
2530    BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1
2531    BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0
2532    BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3
2533    if (c1+c2) multibyteIncrementAssign!('+')(result[half*2..$], c1+c2);
2534    if (c1+c3) multibyteIncrementAssign!('+')(R3, c1+c3);
2535
2536    // And finally we subtract mid, which is always positive
2537    subAssignSimple(result[half..$], mid);
2538}
2539
2540/* Knuth's Algorithm D, as presented in
2541 * H.S. Warren, "Hacker's Delight", Addison-Wesley Professional (2002).
2542 * Also described in "Modern Computer Arithmetic" 0.2, Exercise 1.8.18.
2543 * Given u and v, calculates  quotient  = u / v, u = u % v.
2544 * v must be normalized (ie, the MSB of v must be 1).
2545 * The most significant words of quotient and u may be zero.
2546 * u[0 .. v.length] holds the remainder.
2547 */
2548void schoolbookDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v)
2549    pure nothrow @safe
2550{
2551    assert(quotient.length == u.length - v.length,
2552        "quotient has wrong length");
2553    assert(v.length > 1, "v must not be empty");
2554    assert(u.length >= v.length, "u must be larger or equal to v");
2555    assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]");
2556    assert(u[$ - 1] < v[$ - 1], "u[$ - 1] must be less than v[$ - 1]");
2557    // BUG: This code only works if BigDigit is uint.
2558    uint vhi = v[$-1];
2559    uint vlo = v[$-2];
2560
2561    for (ptrdiff_t j = u.length - v.length - 1; j >= 0; j--)
2562    {
2563        // Compute estimate of quotient[j],
2564        // qhat = (three most significant words of u)/(two most sig words of v).
2565        uint qhat;
2566        if (u[j + v.length] == vhi)
2567        {
2568            // uu/vhi could exceed uint.max (it will be 0x8000_0000 or 0x8000_0001)
2569            qhat = uint.max;
2570        }
2571        else
2572        {
2573            uint ulo = u[j + v.length - 2];
2574            version (D_InlineAsm_X86)
2575            {
2576                // Note: On DMD, this is only ~10% faster than the non-asm code.
2577                uint *p = &u[j + v.length - 1];
2578                asm pure nothrow @trusted
2579                {
2580                    mov EAX, p;
2581                    mov EDX, [EAX+4];
2582                    mov EAX, [EAX];
2583                    div dword ptr [vhi];
2584                    mov qhat, EAX;
2585                    mov ECX, EDX;
2586div3by2correction:
2587                    mul dword ptr [vlo]; // EDX:EAX = qhat * vlo
2588                    sub EAX, ulo;
2589                    sbb EDX, ECX;
2590                    jbe div3by2done;
2591                    mov EAX, qhat;
2592                    dec EAX;
2593                    mov qhat, EAX;
2594                    add ECX, dword ptr [vhi];
2595                    jnc div3by2correction;
2596div3by2done:    ;
2597                }
2598            }
2599            else
2600            { // version (InlineAsm)
2601                ulong uu = (cast(ulong)(u[j + v.length]) << 32) | u[j + v.length - 1];
2602                immutable bigqhat = uu / vhi;
2603                ulong rhat =  uu - bigqhat * vhi;
2604                qhat = cast(uint) bigqhat;
2605again:
2606                if (cast(ulong) qhat * vlo > ((rhat << 32) + ulo))
2607                {
2608                    --qhat;
2609                    rhat += vhi;
2610                    if (!(rhat & 0xFFFF_FFFF_0000_0000L))
2611                        goto again;
2612                }
2613            } // version (InlineAsm)
2614        }
2615        // Multiply and subtract.
2616        uint carry = multibyteMulAdd!('-')(u[j .. j + v.length], v, qhat, 0);
2617
2618        if (u[j+v.length] < carry)
2619        {
2620            // If we subtracted too much, add back
2621            --qhat;
2622            carry -= multibyteAdd(u[j .. j + v.length],u[j .. j + v.length], v, 0);
2623        }
2624        quotient[j] = qhat;
2625        u[j + v.length] = u[j + v.length] - carry;
2626    }
2627}
2628
2629// TODO: Replace with a library call
2630void itoaZeroPadded(char[] output, uint value)
2631    pure nothrow @safe @nogc
2632{
2633    for (auto i = output.length; i--;)
2634    {
2635        if (value < 10)
2636        {
2637            output[i] = cast(char)(value + '0');
2638            value = 0;
2639        }
2640        else
2641        {
2642            output[i] = cast(char)(value % 10 + '0');
2643            value /= 10;
2644        }
2645    }
2646}
2647
2648void toHexZeroPadded(char[] output, uint value,
2649        LetterCase letterCase = LetterCase.upper) pure nothrow @safe
2650{
2651    ptrdiff_t x = output.length - 1;
2652    static immutable string upperHexDigits = "0123456789ABCDEF";
2653    static immutable string lowerHexDigits = "0123456789abcdef";
2654    for ( ; x >= 0; --x)
2655    {
2656        if (letterCase == LetterCase.upper)
2657        {
2658            output[x] = upperHexDigits[value & 0xF];
2659        }
2660        else
2661        {
2662            output[x] = lowerHexDigits[value & 0xF];
2663        }
2664        value >>= 4;
2665    }
2666}
2667
2668// Returns the highest value of i for which left[i]!=right[i],
2669// or 0 if left[] == right[]
2670size_t highestDifferentDigit(const BigDigit [] left, const BigDigit [] right)
2671pure nothrow @nogc @safe
2672{
2673    assert(left.length == right.length,
2674        "left have a length equal to that of right");
2675    for (ptrdiff_t i = left.length - 1; i>0; --i)
2676    {
2677        if (left[i] != right[i])
2678            return i;
2679    }
2680    return 0;
2681}
2682
2683// Returns the lowest value of i for which x[i]!=0.
2684int firstNonZeroDigit(const BigDigit [] x) pure nothrow @nogc @safe
2685{
2686    int k = 0;
2687    while (x[k]==0)
2688    {
2689        ++k;
2690        assert(k < x.length, "k must be less than x.length");
2691    }
2692    return k;
2693}
2694
2695/*
2696    Calculate quotient and remainder of u / v using fast recursive division.
2697    v must be normalised, and must be at least half as long as u.
2698    Given u and v, v normalised, calculates  quotient  = u/v, u = u%v.
2699    scratch is temporary storage space, length must be >= quotient + 1.
2700
2701Returns:
2702    u[0 .. v.length] is the remainder. u[v.length..$] is corrupted.
2703
2704    Implements algorithm 1.8 from MCA.
2705    This algorithm has an annoying special case. After the first recursion, the
2706    highest bit of the quotient may be set. This means that in the second
2707    recursive call, the 'in' contract would be violated. (This happens only
2708    when the top quarter of u is equal to the top half of v. A base 10
2709    equivalent example of this situation is 5517/56; the first step gives
2710    55/5 = 11). To maintain the in contract, we pad a zero to the top of both
2711    u and the quotient. 'mayOverflow' indicates that that the special case
2712    has occurred.
2713    (In MCA, a different strategy is used: the in contract is weakened, and
2714    schoolbookDivMod is more general: it allows the high bit of u to be set).
2715    See also:
2716    - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022,
2717      Max-Planck Institute fuer Informatik, (Oct 1998).
2718*/
2719void recursiveDivMod(BigDigit[] quotient, BigDigit[] u, const(BigDigit)[] v,
2720                     BigDigit[] scratch, bool mayOverflow = false)
2721                     pure nothrow @safe
2722in
2723{
2724    // v must be normalized
2725    assert(v.length > 1, "v must not be empty");
2726    assert((v[$ - 1] & 0x8000_0000) != 0, "Invalid value at v[$ - 1]");
2727    assert(!(u[$ - 1] & 0x8000_0000), "Invalid value at u[$ - 1]");
2728    assert(quotient.length == u.length - v.length,
2729        "quotient must be of equal length of u - v");
2730    if (mayOverflow)
2731    {
2732        assert(u[$-1] == 0, "Invalid value at u[$ - 1]");
2733        assert(u[$-2] & 0x8000_0000, "Invalid value at u[$ - 2]");
2734    }
2735
2736    // Must be symmetric. Use block schoolbook division if not.
2737    assert((mayOverflow ? u.length-1 : u.length) <= 2 * v.length,
2738        "Invalid length of u");
2739    assert((mayOverflow ? u.length-1 : u.length) >= v.length,
2740        "Invalid length of u");
2741    assert(scratch.length >= quotient.length + (mayOverflow ? 0 : 1),
2742        "Invalid quotient length");
2743}
2744do
2745{
2746    if (quotient.length < FASTDIVLIMIT)
2747    {
2748        return schoolbookDivMod(quotient, u, v);
2749    }
2750
2751    // Split quotient into two halves, but keep padding in the top half
2752    auto k = (mayOverflow ?  quotient.length - 1 : quotient.length) >> 1;
2753
2754    // RECURSION 1: Calculate the high half of the quotient
2755
2756    // Note that if u and quotient were padded, they remain padded during
2757    // this call, so in contract is satisfied.
2758    recursiveDivMod(quotient[k .. $], u[2 * k .. $], v[k .. $],
2759        scratch, mayOverflow);
2760
2761    // quotient[k..$] is our guess at the high quotient.
2762    // u[2*k .. 2.*k + v.length - k = k + v.length] is the high part of the
2763    // first remainder. u[0 .. 2*k] is the low part.
2764
2765    // Calculate the full first remainder to be
2766    //    remainder - highQuotient * lowDivisor
2767    // reducing highQuotient until the remainder is positive.
2768    // The low part of the remainder, u[0 .. k], cannot be altered by this.
2769
2770    adjustRemainder(quotient[k .. $], u[k .. k + v.length], v, k,
2771            scratch[0 .. quotient.length], mayOverflow);
2772
2773    // RECURSION 2: Calculate the low half of the quotient
2774    // The full first remainder is now in u[0 .. k + v.length].
2775
2776    if (u[k + v.length - 1] & 0x8000_0000)
2777    {
2778        // Special case. The high quotient is 0x1_00...000 or 0x1_00...001.
2779        // This means we need an extra quotient word for the next recursion.
2780        // We need to restore the invariant for the recursive calls.
2781        // We do this by padding both u and quotient. Extending u is trivial,
2782        // because the higher words will not be used again. But for the
2783        // quotient, we're clobbering the low word of the high quotient,
2784        // so we need save it, and add it back in after the recursive call.
2785
2786        auto clobberedQuotient = quotient[k];
2787        u[k+v.length] = 0;
2788
2789        recursiveDivMod(quotient[0 .. k+1], u[k .. k + v.length+1],
2790            v[k .. $], scratch, true);
2791        adjustRemainder(quotient[0 .. k+1], u[0 .. v.length], v, k,
2792            scratch[0 .. 2 * k+1], true);
2793
2794        // Now add the quotient word that got clobbered earlier.
2795        multibyteIncrementAssign!('+')(quotient[k..$], clobberedQuotient);
2796    }
2797    else
2798    {
2799        // The special case has NOT happened.
2800        recursiveDivMod(quotient[0 .. k], u[k .. k + v.length], v[k .. $],
2801            scratch, false);
2802
2803        // high remainder is in u[k .. k+(v.length-k)] == u[k .. v.length]
2804
2805        adjustRemainder(quotient[0 .. k], u[0 .. v.length], v, k,
2806            scratch[0 .. 2 * k]);
2807    }
2808}
2809
2810// rem -= quot * v[0 .. k].
2811// If would make rem negative, decrease quot until rem is >= 0.
2812// Needs (quot.length * k) scratch space to store the result of the multiply.
2813void adjustRemainder(BigDigit[] quot, BigDigit[] rem, const(BigDigit)[] v,
2814        ptrdiff_t k,
2815        BigDigit[] scratch, bool mayOverflow = false) pure nothrow @safe
2816{
2817    assert(rem.length == v.length, "rem must be as long as v");
2818    mulInternal(scratch, quot, v[0 .. k]);
2819    uint carry = 0;
2820    if (mayOverflow)
2821        carry = scratch[$-1] + subAssignSimple(rem, scratch[0..$-1]);
2822    else
2823        carry = subAssignSimple(rem, scratch);
2824    while (carry)
2825    {
2826        multibyteIncrementAssign!('-')(quot, 1); // quot--
2827        carry -= multibyteAdd(rem, rem, v, 0);
2828    }
2829}
2830
2831// Cope with unbalanced division by performing block schoolbook division.
2832void blockDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v)
2833pure nothrow @safe
2834{
2835    import core.memory : GC;
2836    assert(quotient.length == u.length - v.length,
2837        "quotient must be of equal length of u - v");
2838    assert(v.length > 1, "v must not be empty");
2839    assert(u.length >= v.length, "u must be longer or of equal length as v");
2840    assert((v[$-1] & 0x8000_0000)!=0, "Invalid value at v[$ - 1]");
2841    assert((u[$-1] & 0x8000_0000)==0, "Invalid value at u[$ - 1]");
2842    BigDigit [] scratch = new BigDigit[v.length + 1];
2843
2844    // Perform block schoolbook division, with 'v.length' blocks.
2845    auto m = u.length - v.length;
2846    while (m > v.length)
2847    {
2848        immutable mayOverflow = (u[m + v.length -1 ] & 0x8000_0000)!=0;
2849        BigDigit saveq;
2850        if (mayOverflow)
2851        {
2852            u[m + v.length] = 0;
2853            saveq = quotient[m];
2854        }
2855        recursiveDivMod(quotient[m-v.length .. m + (mayOverflow? 1: 0)],
2856            u[m - v.length .. m + v.length + (mayOverflow? 1: 0)], v, scratch, mayOverflow);
2857        if (mayOverflow)
2858        {
2859            assert(quotient[m] == 0, "quotient must not be 0");
2860            quotient[m] = saveq;
2861        }
2862        m -= v.length;
2863    }
2864    recursiveDivMod(quotient[0 .. m], u[0 .. m + v.length], v, scratch);
2865    () @trusted { GC.free(scratch.ptr); } ();
2866}
2867
2868@system unittest
2869{
2870    version (none)
2871    {
2872        import core.stdc.stdio;
2873
2874        void printBiguint(const uint [] data)
2875        {
2876            char [] buff = biguintToHex(new char[data.length*9], data, '_');
2877            printf("%.*s\n", cast(int) buff.length, buff.ptr);
2878        }
2879
2880        void printDecimalBigUint(BigUint data)
2881        {
2882            auto str = data.toDecimalString(0);
2883            printf("%.*s\n", cast(int) str.length, str.ptr);
2884        }
2885    }
2886
2887    uint [] a, b;
2888    a = new uint[43];
2889    b = new uint[179];
2890    for (int i=0; i<a.length; ++i) a[i] = 0x1234_B6E9 + i;
2891    for (int i=0; i<b.length; ++i) b[i] = 0x1BCD_8763 - i*546;
2892
2893    a[$-1] |= 0x8000_0000;
2894    uint [] r = new uint[a.length];
2895    uint [] q = new uint[b.length-a.length+1];
2896
2897    divModInternal(q, r, b, a);
2898    q = q[0..$-1];
2899    uint [] r1 = r.dup;
2900    uint [] q1 = q.dup;
2901    blockDivMod(q, b, a);
2902    r = b[0 .. a.length];
2903    assert(r[] == r1[]);
2904    assert(q[] == q1[]);
2905}
2906
2907// biguintToOctal
2908@safe unittest
2909{
2910    enum bufSize = 5 * BigDigitBits / 3 + 1;
2911    auto buf = new char[bufSize];
2912    size_t i;
2913    BigDigit[] data = [ 342391 ];
2914
2915    // Basic functionality with single word
2916    i = biguintToOctal(buf, data);
2917    assert(i == bufSize - 7 && buf[i .. $] == "1234567");
2918
2919    // Test carrying bits between words
2920    data = [ 0x77053977, 0x39770539, 0x00000005 ];
2921    i = biguintToOctal(buf, data);
2922    assert(i == bufSize - 23 && buf[i .. $] == "12345670123456701234567");
2923
2924    // Test carried bits in the last word
2925    data = [ 0x80000000 ];
2926    i = biguintToOctal(buf, data);
2927    assert(buf[i .. $] == "20000000000");
2928
2929    // Test boundary between 3rd and 4th word where the number of bits is
2930    // divisible by 3 and no bits should be carried.
2931    //
2932    // The 0xC0000000's are for "poisoning" the carry to be non-zero when the
2933    // rollover happens, so that if any bugs happen in wrongly adding the carry
2934    // to the next word, non-zero bits will show up in the output.
2935    data = [ 0xC0000000, 0xC0000000, 0xC0000000, 0x00000010 ];
2936    i = biguintToOctal(buf, data);
2937    assert(buf[i .. $] == "2060000000001400000000030000000000");
2938
2939    // Boundary case: 0
2940    data = [ 0 ];
2941    i = biguintToOctal(buf, data);
2942    assert(buf[i .. $] == "0");
2943}
2944