1
2/*
3 *  M_APM  -  mapmfact.c
4 *
5 *  Copyright (C) 1999 - 2007   Michael C. Ring
6 *
7 *  Permission to use, copy, and distribute this software and its
8 *  documentation for any purpose with or without fee is hereby granted,
9 *  provided that the above copyright notice appear in all copies and
10 *  that both that copyright notice and this permission notice appear
11 *  in supporting documentation.
12 *
13 *  Permission to modify the software is granted. Permission to distribute
14 *  the modified code is granted. Modifications are to be distributed by
15 *  using the file 'license.txt' as a template to modify the file header.
16 *  'license.txt' is available in the official MAPM distribution.
17 *
18 *  This software is provided "as is" without express or implied warranty.
19 */
20
21/*
22 *      $Id: mapmfact.c,v 1.11 2007/12/03 01:51:50 mike Exp $
23 *
24 *      This file contains the FACTORIAL function.
25 *
26 *      $Log: mapmfact.c,v $
27 *      Revision 1.11  2007/12/03 01:51:50  mike
28 *      Update license
29 *
30 *      Revision 1.10  2003/07/21 21:05:31  mike
31 *      facilitate 'gcov' code coverage tool by making an array smaller
32 *
33 *      Revision 1.9  2002/11/03 21:27:28  mike
34 *      Updated function parameters to use the modern style
35 *
36 *      Revision 1.8  2000/06/14 20:36:16  mike
37 *      increase size of DOS array
38 *
39 *      Revision 1.7  2000/05/29 13:15:59  mike
40 *      minor tweaks, fixed comment
41 *
42 *      Revision 1.6  2000/05/26 16:39:03  mike
43 *      minor update to comments and code
44 *
45 *      Revision 1.5  2000/05/25 23:12:53  mike
46 *      change 'nd' calculation
47 *
48 *      Revision 1.4  2000/05/25 22:17:45  mike
49 *      implement new algorithm for speed. approx 5 - 10X
50 *      faster on my PC when N! is large (> 6000)
51 *
52 *      Revision 1.3  1999/06/19 21:25:21  mike
53 *      changed local static variables to MAPM stack variables
54 *
55 *      Revision 1.2  1999/05/23 18:21:12  mike
56 *      minor variable name tweaks
57 *
58 *      Revision 1.1  1999/05/15 21:06:11  mike
59 *      Initial revision
60 */
61
62/*
63 *      Brief explanation of the factorial algorithm.
64 *      ----------------------------------------------
65 *
66 *      The old algorithm simply multiplied N * (N-1) * (N-2) etc, until
67 *	the number counted down to '2'. So one term of the multiplication
68 *	kept getting bigger while multiplying by the next number in the
69 *	sequence.
70 *
71 *      The new algorithm takes advantage of the fast multiplication
72 *	algorithm. The "ideal" setup for fast multiplication is when
73 *	both numbers have approx the same number of significant digits
74 *	and the number of digits is very near (but not over) an exact
75 *	power of 2.
76 *
77 *	So, we will multiply N * (N-1) * (N-2), etc until the number of
78 *	significant digits is approx 256.
79 *
80 *	Store this temp product into an array.
81 *
82 *	Then we will multiply the next sequence until the number of
83 *	significant digits is approx 256.
84 *
85 *	Store this temp product into the next element of the array.
86 *
87 *	Continue until we've counted down to 2.
88 *
89 *	We now have an array of numbers with approx the same number
90 *	of digits (except for the last element, depending on where it
91 *	ended.) Now multiply each of the array elements together to
92 *	get the final product.
93 *
94 *      The array multiplies are done as follows (assume we used 11
95 *	array elements for this example, indicated by [0] - [10] ) :
96 *
97 *	initial    iter-1     iter-2       iter-3     iter-4
98 *
99 *	  [0]
100 *	     *  ->  [0]
101 *	  [1]
102 *                      * ->    [0]
103 *
104 *	  [2]
105 *	     *  ->  [1]
106 *	  [3]
107 *                                   * ->   [0]
108 *
109 *	  [4]
110 *	     *  ->  [2]
111 *	  [5]
112 *
113 *                      * ->    [1]
114 *
115 *	  [6]
116 *	     *  ->  [3]                           *  ->  [0]
117 *	  [7]
118 *
119 *
120 *	  [8]
121 *	     *  ->  [4]
122 *	  [9]
123 *                      * ->    [2]    ->   [1]
124 *
125 *
126 *	  [10]  ->  [5]
127 *
128 */
129
130#include "m_apm_lc.h"
131
132/* define size of local array for temp storage */
133
134#define NDIM 32
135
136/****************************************************************************/
137void	m_apm_factorial(M_APM moutput, M_APM minput)
138{
139int     ii, nmul, ndigits, nd, jj, kk, mm, ct;
140M_APM   array[NDIM];
141M_APM   iprod1, iprod2, tmp1, tmp2;
142
143/* return 1 for any input <= 1 */
144
145if (m_apm_compare(minput, MM_One) <= 0)
146  {
147   m_apm_copy(moutput, MM_One);
148   return;
149  }
150
151ct       = 0;
152mm       = NDIM - 2;
153ndigits  = 256;
154nd       = ndigits - 20;
155tmp1     = m_apm_init();
156tmp2     = m_apm_init();
157iprod1   = m_apm_init();
158iprod2   = m_apm_init();
159array[0] = m_apm_init();
160
161m_apm_copy(tmp2, minput);
162
163/* loop until multiply count-down has reached '2' */
164
165while (TRUE)
166  {
167   m_apm_copy(iprod1, MM_One);
168
169   /*
170    *   loop until the number of significant digits in this
171    *   partial result is slightly less than 256
172    */
173
174   while (TRUE)
175     {
176      m_apm_multiply(iprod2, iprod1, tmp2);
177
178      m_apm_subtract(tmp1, tmp2, MM_One);
179
180      m_apm_multiply(iprod1, iprod2, tmp1);
181
182      /*
183       *  I know, I know.  There just isn't a *clean* way
184       *  to break out of 2 nested loops.
185       */
186
187      if (m_apm_compare(tmp1, MM_Two) <= 0)
188        goto PHASE2;
189
190      m_apm_subtract(tmp2, tmp1, MM_One);
191
192      if (iprod1->m_apm_datalength > nd)
193        break;
194     }
195
196   if (ct == (NDIM - 1))
197     {
198      /*
199       *    if the array has filled up, start multiplying
200       *    some of the partial products now.
201       */
202
203      m_apm_copy(tmp1, array[mm]);
204      m_apm_multiply(array[mm], iprod1, tmp1);
205
206      if (mm == 0)
207        {
208         mm = NDIM - 2;
209	 ndigits = ndigits << 1;
210         nd = ndigits - 20;
211	}
212      else
213         mm--;
214     }
215   else
216     {
217      /*
218       *    store this partial product in the array
219       *    and allocate the next array element
220       */
221
222      m_apm_copy(array[ct], iprod1);
223      array[++ct] = m_apm_init();
224     }
225  }
226
227PHASE2:
228
229m_apm_copy(array[ct], iprod1);
230
231kk = ct;
232
233while (kk != 0)
234  {
235   ii = 0;
236   jj = 0;
237   nmul = (kk + 1) >> 1;
238
239   while (TRUE)
240     {
241      /* must use tmp var when ii,jj point to same element */
242
243      if (ii == 0)
244        {
245         m_apm_copy(tmp1, array[ii]);
246         m_apm_multiply(array[jj], tmp1, array[ii+1]);
247        }
248      else
249         m_apm_multiply(array[jj], array[ii], array[ii+1]);
250
251      if (++jj == nmul)
252        break;
253
254      ii += 2;
255     }
256
257   if ((kk & 1) == 0)
258     {
259      jj = kk >> 1;
260      m_apm_copy(array[jj], array[kk]);
261     }
262
263   kk = kk >> 1;
264  }
265
266m_apm_copy(moutput, array[0]);
267
268for (ii=0; ii <= ct; ii++)
269  {
270   m_apm_free(array[ii]);
271  }
272
273m_apm_free(tmp1);
274m_apm_free(tmp2);
275m_apm_free(iprod1);
276m_apm_free(iprod2);
277}
278/****************************************************************************/
279