1
2/*
3 *  M_APM  -  mapmfmul.c
4 *
5 *  Copyright (C) 1999 - 2007   Michael C. Ring
6 *
7 *  Permission to use, copy, and distribute this software and its
8 *  documentation for any purpose with or without fee is hereby granted,
9 *  provided that the above copyright notice appear in all copies and
10 *  that both that copyright notice and this permission notice appear
11 *  in supporting documentation.
12 *
13 *  Permission to modify the software is granted. Permission to distribute
14 *  the modified code is granted. Modifications are to be distributed by
15 *  using the file 'license.txt' as a template to modify the file header.
16 *  'license.txt' is available in the official MAPM distribution.
17 *
18 *  This software is provided "as is" without express or implied warranty.
19 */
20
21/*
22 *      $Id: mapmfmul.c,v 1.33 2007/12/03 01:52:22 mike Exp $
23 *
24 *      This file contains the divide-and-conquer FAST MULTIPLICATION
25 *	function as well as its support functions.
26 *
27 *      $Log: mapmfmul.c,v $
28 *      Revision 1.33  2007/12/03 01:52:22  mike
29 *      Update license
30 *
31 *      Revision 1.32  2004/02/18 03:16:15  mike
32 *      optimize 4 byte multiply (when FFT is disabled)
33 *
34 *      Revision 1.31  2003/12/04 01:14:06  mike
35 *      redo math on 'borrow'
36 *
37 *      Revision 1.30  2003/07/21 20:34:18  mike
38 *      Modify error messages to be in a consistent format.
39 *
40 *      Revision 1.29  2003/03/31 21:55:07  mike
41 *      call generic error handling function
42 *
43 *      Revision 1.28  2002/11/03 22:38:15  mike
44 *      Updated function parameters to use the modern style
45 *
46 *      Revision 1.27  2002/02/14 19:53:32  mike
47 *      add conditional compiler option to disable use
48 *      of FFT multiply if the user so chooses.
49 *
50 *      Revision 1.26  2001/07/26 20:56:38  mike
51 *      fix comment, no code change
52 *
53 *      Revision 1.25  2001/07/16 19:43:45  mike
54 *      add function M_free_all_fmul
55 *
56 *      Revision 1.24  2001/02/11 22:34:47  mike
57 *      modify parameters to REALLOC
58 *
59 *      Revision 1.23  2000/10/20 19:23:26  mike
60 *      adjust power_of_2 function so it should work with
61 *      64 bit processors and beyond.
62 *
63 *      Revision 1.22  2000/08/23 22:27:34  mike
64 *      no real code change, re-named 2 local functions
65 *      so they make more sense.
66 *
67 *      Revision 1.21  2000/08/01 22:24:38  mike
68 *      use sizeof(int) function call to stop some
69 *      compilers from complaining.
70 *
71 *      Revision 1.20  2000/07/19 17:12:00  mike
72 *      lower the number of bytes that the FFT can handle. worst case
73 *      testing indicated math overflow when >= 1048576
74 *
75 *      Revision 1.19  2000/07/08 18:29:03  mike
76 *      increase define so FFT can handle bigger numbers
77 *
78 *      Revision 1.18  2000/07/06 23:20:12  mike
79 *      changed my mind. use static local MAPM numbers
80 *      for temp data storage
81 *
82 *      Revision 1.17  2000/07/06 20:52:34  mike
83 *      use init function to get local writable copies
84 *      instead of using the stack
85 *
86 *      Revision 1.16  2000/07/04 17:25:09  mike
87 *      guarantee 16 bit compilers still work OK
88 *
89 *      Revision 1.15  2000/07/04 15:40:02  mike
90 *      add call to use FFT algorithm
91 *
92 *      Revision 1.14  2000/05/05 21:10:46  mike
93 *      add comment indicating availability of assembly language
94 *      version of M_4_byte_multiply for Linux on x86 platforms.
95 *
96 *      Revision 1.13  2000/04/20 19:30:45  mike
97 *      minor optimization to 4 byte multiply
98 *
99 *      Revision 1.12  2000/04/14 15:39:30  mike
100 *      optimize the fast multiply function. don't re-curse down
101 *      to a size of 1. recurse down to a size of '4' and then
102 *      call a special 4 byte multiply function.
103 *
104 *      Revision 1.11  2000/02/03 23:02:13  mike
105 *      put in RCS for real...
106 *
107 *      Revision 1.10  2000/02/03 22:59:08  mike
108 *      remove the extra recursive function. not needed any
109 *      longer since all current compilers should not have
110 *      any problem with true recursive calls.
111 *
112 *      Revision 1.9  2000/02/03 22:47:39  mike
113 *      use MAPM_* generic memory function
114 *
115 *      Revision 1.8  1999/09/19 21:13:44  mike
116 *      eliminate unneeded local int in _split
117 *
118 *      Revision 1.7  1999/08/12 22:36:23  mike
119 *      move the 3 'simple' function to the top of file
120 *      so GCC can in-line the code.
121 *
122 *      Revision 1.6  1999/08/12 22:01:14  mike
123 *      more minor optimizations
124 *
125 *      Revision 1.5  1999/08/12 02:02:06  mike
126 *      minor optimization
127 *
128 *      Revision 1.4  1999/08/10 22:51:59  mike
129 *      minor tweak
130 *
131 *      Revision 1.3  1999/08/10 00:45:47  mike
132 *      added more comments and a few minor tweaks
133 *
134 *      Revision 1.2  1999/08/09 02:50:02  mike
135 *      add some comments
136 *
137 *      Revision 1.1  1999/08/08 18:27:57  mike
138 *      Initial revision
139 */
140
141#include "m_apm_lc.h"
142
143static int M_firsttimef = TRUE;
144
145/*
146 *      specify the max size the FFT routine can handle
147 *      (in MAPM, #digits = 2 * #bytes)
148 *
149 *      this number *must* be an exact power of 2.
150 *
151 *      **WORST** case input numbers (all 9's) has shown that
152 *	the FFT math will overflow if the #define here is
153 *      >= 1048576. On my system, 524,288 worked OK. I will
154 *      factor down another factor of 2 to safeguard against
155 *	other computers have less precise floating point math.
156 *	If you are confident in your system, 524288 will
157 *	theoretically work fine.
158 *
159 *      the define here allows the FFT algorithm to multiply two
160 *      524,288 digit numbers yielding a 1,048,576 digit result.
161 */
162
163#define MAX_FFT_BYTES 262144
164
165/*
166 *      the Divide-and-Conquer multiplication kicks in when the size of
167 *	the numbers exceed the capability of the FFT (#define just above).
168 *
169 *	#bytes    D&C call depth
170 *	------    --------------
171 *       512K           1
172 *        1M            2
173 *        2M            3
174 *        4M            4
175 *       ...           ...
176 *    2.1990E+12       23
177 *
178 *	the following stack sizes are sized to meet the
179 *      above 2.199E+12 example, though I wouldn't want to
180 *	wait for it to finish...
181 *
182 *      Each call requires 7 stack variables to be saved so
183 *	we need a stack depth of 23 * 7 + PAD.  (we use 164)
184 *
185 *      For 'exp_stack', 3 integers also are required to be saved
186 *      for each recursive call so we need a stack depth of
187 *      23 * 3 + PAD. (we use 72)
188 *
189 *
190 *      If the FFT multiply is disabled, resize the arrays
191 *	as follows:
192 *
193 *      the following stack sizes are sized to meet the
194 *      worst case expected assuming we are multiplying
195 *      numbers with 2.14E+9 (2 ^ 31) digits.
196 *
197 *      For sizeof(int) == 4 (32 bits) there may be up to 32 recursive
198 *      calls. Each call requires 7 stack variables so we need a
199 *      stack depth of 32 * 7 + PAD.  (we use 240)
200 *
201 *      For 'exp_stack', 3 integers also are required to be saved
202 *      for each recursive call so we need a stack depth of
203 *      32 * 3 + PAD. (we use 100)
204 */
205
206#ifdef NO_FFT_MULTIPLY
207#define M_STACK_SIZE 240
208#define M_ISTACK_SIZE 100
209#else
210#define M_STACK_SIZE 164
211#define M_ISTACK_SIZE 72
212#endif
213
214static int    exp_stack[M_ISTACK_SIZE];
215static int    exp_stack_ptr;
216
217static UCHAR  *mul_stack_data[M_STACK_SIZE];
218static int    mul_stack_data_size[M_STACK_SIZE];
219static int    M_mul_stack_ptr;
220
221static UCHAR  *fmul_a1, *fmul_a0, *fmul_a9, *fmul_b1, *fmul_b0,
222	      *fmul_b9, *fmul_t0;
223
224static int    size_flag, bit_limit, stmp, itmp, mii;
225
226static M_APM  M_ain;
227static M_APM  M_bin;
228
229static char   *M_stack_ptr_error_msg = "\'M_get_stack_ptr\', Out of memory";
230
231extern void   M_fast_multiply(M_APM, M_APM, M_APM);
232extern void   M_fmul_div_conq(UCHAR *, UCHAR *, UCHAR *, int);
233extern void   M_fmul_add(UCHAR *, UCHAR *, int, int);
234extern int    M_fmul_subtract(UCHAR *, UCHAR *, UCHAR *, int);
235extern void   M_fmul_split(UCHAR *, UCHAR *, UCHAR *, int);
236extern int    M_next_power_of_2(int);
237extern int    M_get_stack_ptr(int);
238extern void   M_push_mul_int(int);
239extern int    M_pop_mul_int(void);
240
241#ifdef NO_FFT_MULTIPLY
242extern void   M_4_byte_multiply(UCHAR *, UCHAR *, UCHAR *);
243#else
244extern void   M_fast_mul_fft(UCHAR *, UCHAR *, UCHAR *, int);
245#endif
246
247/*
248 *      the following algorithm is used in this fast multiply routine
249 *	(sometimes called the divide-and-conquer technique.)
250 *
251 *	assume we have 2 numbers (a & b) with 2N digits.
252 *
253 *      let : a = (2^N) * A1 + A0 , b = (2^N) * B1 + B0
254 *
255 *	where 'A1' is the 'most significant half' of 'a' and
256 *      'A0' is the 'least significant half' of 'a'. Same for
257 *	B1 and B0.
258 *
259 *	Now use the identity :
260 *
261 *               2N   N            N                    N
262 *	ab  =  (2  + 2 ) A1B1  +  2 (A1-A0)(B0-B1)  + (2 + 1)A0B0
263 *
264 *
265 *      The original problem of multiplying 2 (2N) digit numbers has
266 *	been reduced to 3 multiplications of N digit numbers plus some
267 *	additions, subtractions, and shifts.
268 *
269 *	The fast multiplication algorithm used here uses the above
270 *	identity in a recursive process. This algorithm results in
271 *	O(n ^ 1.585) growth.
272 */
273
274
275/****************************************************************************/
276void	M_free_all_fmul()
277{
278int	k;
279
280if (M_firsttimef == FALSE)
281  {
282   m_apm_free(M_ain);
283   m_apm_free(M_bin);
284
285   for (k=0; k < M_STACK_SIZE; k++)
286     {
287      if (mul_stack_data_size[k] != 0)
288        {
289         MAPM_FREE(mul_stack_data[k]);
290	}
291     }
292
293   M_firsttimef = TRUE;
294  }
295}
296/****************************************************************************/
297void	M_push_mul_int(int val)
298{
299exp_stack[++exp_stack_ptr] = val;
300}
301/****************************************************************************/
302int	M_pop_mul_int()
303{
304return(exp_stack[exp_stack_ptr--]);
305}
306/****************************************************************************/
307void   	M_fmul_split(UCHAR *x1, UCHAR *x0, UCHAR *xin, int nbytes)
308{
309memcpy(x1, xin, nbytes);
310memcpy(x0, (xin + nbytes), nbytes);
311}
312/****************************************************************************/
313void	M_fast_multiply(M_APM rr, M_APM aa, M_APM bb)
314{
315void	*vp;
316int	ii, k, nexp, sign;
317
318if (M_firsttimef)
319  {
320   M_firsttimef = FALSE;
321
322   for (k=0; k < M_STACK_SIZE; k++)
323     mul_stack_data_size[k] = 0;
324
325   size_flag = M_get_sizeof_int();
326   bit_limit = 8 * size_flag + 1;
327
328   M_ain = m_apm_init();
329   M_bin = m_apm_init();
330  }
331
332exp_stack_ptr   = -1;
333M_mul_stack_ptr = -1;
334
335m_apm_copy(M_ain, aa);
336m_apm_copy(M_bin, bb);
337
338sign = M_ain->m_apm_sign * M_bin->m_apm_sign;
339nexp = M_ain->m_apm_exponent + M_bin->m_apm_exponent;
340
341if (M_ain->m_apm_datalength >= M_bin->m_apm_datalength)
342  ii = M_ain->m_apm_datalength;
343else
344  ii = M_bin->m_apm_datalength;
345
346ii = (ii + 1) >> 1;
347ii = M_next_power_of_2(ii);
348
349/* Note: 'ii' must be >= 4 here. this is guaranteed
350   by the caller: m_apm_multiply
351*/
352
353k = 2 * ii;                   /* required size of result, in bytes  */
354
355M_apm_pad(M_ain, k);          /* fill out the data so the number of */
356M_apm_pad(M_bin, k);          /* bytes is an exact power of 2       */
357
358if (k > rr->m_apm_malloclength)
359  {
360   if ((vp = MAPM_REALLOC(rr->m_apm_data, (k + 32))) == NULL)
361     {
362      /* fatal, this does not return */
363
364      M_apm_log_error_msg(M_APM_FATAL, "\'M_fast_multiply\', Out of memory");
365     }
366
367   rr->m_apm_malloclength = k + 28;
368   rr->m_apm_data = (UCHAR *)vp;
369  }
370
371#ifdef NO_FFT_MULTIPLY
372
373M_fmul_div_conq(rr->m_apm_data, M_ain->m_apm_data,
374                                M_bin->m_apm_data, ii);
375#else
376
377/*
378 *     if the numbers are *really* big, use the divide-and-conquer
379 *     routine first until the numbers are small enough to be handled
380 *     by the FFT algorithm. If the numbers are already small enough,
381 *     call the FFT multiplication now.
382 *
383 *     Note that 'ii' here is (and must be) an exact power of 2.
384 */
385
386if (size_flag == 2)   /* if still using 16 bit compilers .... */
387  {
388   M_fast_mul_fft(rr->m_apm_data, M_ain->m_apm_data,
389                                  M_bin->m_apm_data, ii);
390  }
391else                  /* >= 32 bit compilers */
392  {
393   if (ii > (MAX_FFT_BYTES + 2))
394     {
395      M_fmul_div_conq(rr->m_apm_data, M_ain->m_apm_data,
396                                      M_bin->m_apm_data, ii);
397     }
398   else
399     {
400      M_fast_mul_fft(rr->m_apm_data, M_ain->m_apm_data,
401                                     M_bin->m_apm_data, ii);
402     }
403  }
404
405#endif
406
407rr->m_apm_sign       = sign;
408rr->m_apm_exponent   = nexp;
409rr->m_apm_datalength = 4 * ii;
410
411M_apm_normalize(rr);
412}
413/****************************************************************************/
414/*
415 *      This is the recursive function to perform the multiply. The
416 *      design intent here is to have no local variables. Any local
417 *      data that needs to be saved is saved on one of the two stacks.
418 */
419void	M_fmul_div_conq(UCHAR *rr, UCHAR *aa, UCHAR *bb, int sz)
420{
421
422#ifdef NO_FFT_MULTIPLY
423
424if (sz == 4)                /* multiply 4x4 yielding an 8 byte result */
425  {
426   M_4_byte_multiply(rr, aa, bb);
427   return;
428  }
429
430#else
431
432/*
433 *  if the numbers are now small enough, let the FFT algorithm
434 *  finish up.
435 */
436
437if (sz == MAX_FFT_BYTES)
438  {
439   M_fast_mul_fft(rr, aa, bb, sz);
440   return;
441  }
442
443#endif
444
445memset(rr, 0, (2 * sz));    /* zero out the result */
446mii = sz >> 1;
447
448itmp = M_get_stack_ptr(mii);
449M_push_mul_int(itmp);
450
451fmul_a1 = mul_stack_data[itmp];
452
453itmp    = M_get_stack_ptr(mii);
454fmul_a0 = mul_stack_data[itmp];
455
456itmp    = M_get_stack_ptr(2 * sz);
457fmul_a9 = mul_stack_data[itmp];
458
459itmp    = M_get_stack_ptr(mii);
460fmul_b1 = mul_stack_data[itmp];
461
462itmp    = M_get_stack_ptr(mii);
463fmul_b0 = mul_stack_data[itmp];
464
465itmp    = M_get_stack_ptr(2 * sz);
466fmul_b9 = mul_stack_data[itmp];
467
468itmp    = M_get_stack_ptr(2 * sz);
469fmul_t0 = mul_stack_data[itmp];
470
471M_fmul_split(fmul_a1, fmul_a0, aa, mii);
472M_fmul_split(fmul_b1, fmul_b0, bb, mii);
473
474stmp  = M_fmul_subtract(fmul_a9, fmul_a1, fmul_a0, mii);
475stmp *= M_fmul_subtract(fmul_b9, fmul_b0, fmul_b1, mii);
476
477M_push_mul_int(stmp);
478M_push_mul_int(mii);
479
480M_fmul_div_conq(fmul_t0, fmul_a0, fmul_b0, mii);
481
482mii  = M_pop_mul_int();
483stmp = M_pop_mul_int();
484itmp = M_pop_mul_int();
485
486M_push_mul_int(itmp);
487M_push_mul_int(stmp);
488M_push_mul_int(mii);
489
490/*   to restore all stack variables ...
491fmul_a1 = mul_stack_data[itmp];
492fmul_a0 = mul_stack_data[itmp+1];
493fmul_a9 = mul_stack_data[itmp+2];
494fmul_b1 = mul_stack_data[itmp+3];
495fmul_b0 = mul_stack_data[itmp+4];
496fmul_b9 = mul_stack_data[itmp+5];
497fmul_t0 = mul_stack_data[itmp+6];
498*/
499
500fmul_a1 = mul_stack_data[itmp];
501fmul_b1 = mul_stack_data[itmp+3];
502fmul_t0 = mul_stack_data[itmp+6];
503
504memcpy((rr + sz), fmul_t0, sz);    /* first 'add', result is now zero */
505				   /* so we just copy in the bytes    */
506M_fmul_add(rr, fmul_t0, mii, sz);
507
508M_fmul_div_conq(fmul_t0, fmul_a1, fmul_b1, mii);
509
510mii  = M_pop_mul_int();
511stmp = M_pop_mul_int();
512itmp = M_pop_mul_int();
513
514M_push_mul_int(itmp);
515M_push_mul_int(stmp);
516M_push_mul_int(mii);
517
518fmul_a9 = mul_stack_data[itmp+2];
519fmul_b9 = mul_stack_data[itmp+5];
520fmul_t0 = mul_stack_data[itmp+6];
521
522M_fmul_add(rr, fmul_t0, 0, sz);
523M_fmul_add(rr, fmul_t0, mii, sz);
524
525if (stmp != 0)
526  M_fmul_div_conq(fmul_t0, fmul_a9, fmul_b9, mii);
527
528mii  = M_pop_mul_int();
529stmp = M_pop_mul_int();
530itmp = M_pop_mul_int();
531
532fmul_t0 = mul_stack_data[itmp+6];
533
534/*
535 *  if the sign of (A1 - A0)(B0 - B1) is positive, ADD to
536 *  the result. if it is negative, SUBTRACT from the result.
537 */
538
539if (stmp < 0)
540  {
541   fmul_a9 = mul_stack_data[itmp+2];
542   fmul_b9 = mul_stack_data[itmp+5];
543
544   memset(fmul_b9, 0, (2 * sz));
545   memcpy((fmul_b9 + mii), fmul_t0, sz);
546   M_fmul_subtract(fmul_a9, rr, fmul_b9, (2 * sz));
547   memcpy(rr, fmul_a9, (2 * sz));
548  }
549
550if (stmp > 0)
551  M_fmul_add(rr, fmul_t0, mii, sz);
552
553M_mul_stack_ptr -= 7;
554}
555/****************************************************************************/
556/*
557 *	special addition function for use with the fast multiply operation
558 */
559void    M_fmul_add(UCHAR *r, UCHAR *a, int offset, int sz)
560{
561int	i, j;
562UCHAR   carry;
563
564carry = 0;
565j = offset + sz;
566i = sz;
567
568while (TRUE)
569  {
570   r[--j] += carry + a[--i];
571
572   if (r[j] >= 100)
573     {
574      r[j] -= 100;
575      carry = 1;
576     }
577   else
578      carry = 0;
579
580   if (i == 0)
581     break;
582  }
583
584if (carry)
585  {
586   while (TRUE)
587     {
588      r[--j] += 1;
589
590      if (r[j] < 100)
591        break;
592
593      r[j] -= 100;
594     }
595  }
596}
597/****************************************************************************/
598/*
599 *	special subtraction function for use with the fast multiply operation
600 */
601int     M_fmul_subtract(UCHAR *r, UCHAR *a, UCHAR *b, int sz)
602{
603int	k, jtmp, sflag, nb, borrow;
604
605nb    = sz;
606sflag = 0;      /* sign flag: assume the numbers are equal */
607
608/*
609 *   find if a > b (so we perform a-b)
610 *   or      a < b (so we perform b-a)
611 */
612
613for (k=0; k < nb; k++)
614  {
615   if (a[k] < b[k])
616     {
617      sflag = -1;
618      break;
619     }
620
621   if (a[k] > b[k])
622     {
623      sflag = 1;
624      break;
625     }
626  }
627
628if (sflag == 0)
629  {
630   memset(r, 0, nb);            /* zero out the result */
631  }
632else
633  {
634   k = nb;
635   borrow = 0;
636
637   while (TRUE)
638     {
639      k--;
640
641      if (sflag == 1)
642        jtmp = (int)a[k] - ((int)b[k] + borrow);
643      else
644        jtmp = (int)b[k] - ((int)a[k] + borrow);
645
646      if (jtmp >= 0)
647        {
648         r[k] = (UCHAR)jtmp;
649         borrow = 0;
650        }
651      else
652        {
653         r[k] = (UCHAR)(100 + jtmp);
654         borrow = 1;
655        }
656
657      if (k == 0)
658        break;
659     }
660  }
661
662return(sflag);
663}
664/****************************************************************************/
665int     M_next_power_of_2(int n)
666{
667int     ct, k;
668
669if (n <= 2)
670  return(n);
671
672k  = 2;
673ct = 0;
674
675while (TRUE)
676  {
677   if (k >= n)
678     break;
679
680   k = k << 1;
681
682   if (++ct == bit_limit)
683     {
684      /* fatal, this does not return */
685
686      M_apm_log_error_msg(M_APM_FATAL,
687               "\'M_next_power_of_2\', ERROR :sizeof(int) too small ??");
688     }
689  }
690
691return(k);
692}
693/****************************************************************************/
694int	M_get_stack_ptr(int sz)
695{
696int	i, k;
697UCHAR   *cp;
698
699k = ++M_mul_stack_ptr;
700
701/* if size is 0, just need to malloc and return */
702if (mul_stack_data_size[k] == 0)
703  {
704   if ((i = sz) < 16)
705     i = 16;
706
707   if ((cp = (UCHAR *)MAPM_MALLOC(i + 4)) == NULL)
708     {
709      /* fatal, this does not return */
710
711      M_apm_log_error_msg(M_APM_FATAL, M_stack_ptr_error_msg);
712     }
713
714   mul_stack_data[k]      = cp;
715   mul_stack_data_size[k] = i;
716  }
717else        /* it has been malloc'ed, see if it's big enough */
718  {
719   if (sz > mul_stack_data_size[k])
720     {
721      cp = mul_stack_data[k];
722
723      if ((cp = (UCHAR *)MAPM_REALLOC(cp, (sz + 4))) == NULL)
724        {
725         /* fatal, this does not return */
726
727         M_apm_log_error_msg(M_APM_FATAL, M_stack_ptr_error_msg);
728        }
729
730      mul_stack_data[k]      = cp;
731      mul_stack_data_size[k] = sz;
732     }
733  }
734
735return(k);
736}
737/****************************************************************************/
738
739#ifdef NO_FFT_MULTIPLY
740
741/*
742 *      multiply a 4 byte number by a 4 byte number
743 *      yielding an 8 byte result. each byte contains
744 *      a base 100 'digit', i.e.: range from 0-99.
745 *
746 *             MSB         LSB
747 *
748 *      a,b    [0] [1] [2] [3]
749 *   result    [0]  .....  [7]
750 */
751
752void	M_4_byte_multiply(UCHAR *r, UCHAR *a, UCHAR *b)
753{
754int	      jj;
755unsigned int  *ip, t1, rr[8];
756
757memset(rr, 0, (8 * sizeof(int)));        /* zero out result */
758jj = 3;
759ip = rr + 5;
760
761/*
762 *   loop for one number [b], un-roll the inner 'loop' [a]
763 *
764 *   accumulate partial sums in UINT array, release carries
765 *   and convert back to base 100 at the end
766 */
767
768while (1)
769  {
770   t1  = (unsigned int)b[jj];
771   ip += 2;
772
773   *ip-- += t1 * a[3];
774   *ip-- += t1 * a[2];
775   *ip-- += t1 * a[1];
776   *ip   += t1 * a[0];
777
778   if (jj-- == 0)
779     break;
780  }
781
782jj = 7;
783
784while (1)
785  {
786   t1 = rr[jj] / 100;
787   r[jj] = (UCHAR)(rr[jj] - 100 * t1);
788
789   if (jj == 0)
790     break;
791
792   rr[--jj] += t1;
793  }
794}
795
796#endif
797
798/****************************************************************************/
799