1/**************************************************************
2 *
3 *  giants.c
4 *
5 *  Library for large-integer arithmetic.
6 *
7 *  The large-gcd implementation is due to J. P. Buhler.
8 *  Special mod routines use ideas of R. McIntosh.
9 *  Contributions from G. Woltman, A. Powell acknowledged.
10 *
11 *  Updates:
12 *      18 Jul 99   REC  Added routine fer_mod(), for use when Fermat
13                         giant itself is available.
14 *      17 Jul 99   REC  Fixed sign bug in fermatmod()
15 *      17 Apr 99   REC  Fixed various comment/line wraps
16 *      25 Mar 99   REC  G. Woltman/A. Powell fixes Karat. routines
17 *      05 Mar 99   REC  Moved invaux, binvaux giants to stack
18 *      05 Mar 99   REC  Moved gread/gwrite giants to stack
19 *      05 Mar 99   REC  No static on cur_den, cur_recip (A. Powell)
20 *      28 Feb 99   REC  Error detection added atop newgiant().
21 *      27 Feb 99   REC  Reduced wasted work in addsignal().
22 *      27 Feb 99   REC  Reduced wasted work in FFTmulg().
23 *      19 Feb 99   REC  Generalized iaddg() per R. Mcintosh.
24 *       2 Feb 99   REC  Fixed comments.
25 *       6 Dec 98   REC  Fixed yet another Karatsuba glitch.
26 *       1 Dec 98   REC  Fixed errant case of addg().
27 *      28 Nov 98   REC  Installed A. Powell's (correct) variant of
28						 Karatsuba multiply.
29 *      15 May 98   REC  Modified gwrite() to handle huge integers.
30 *      13 May 98   REC  Changed to static stack declarations
31 *      11 May 98   REC  Installed Karatsuba multiply, to handle
32 *                       medregion 'twixt grammar- and FFT-multiply.
33 *       1 May 98   JF   gshifts now handle bits < 0 correctly.
34 *      30 Apr 98   JF   68k assembler code removed,
35 *                       stack giant size now invariant and based
36 *                           on first call of newgiant(),
37 *                       stack memory leaks fixed.
38 *      29 Apr 98   JF   function prototyes cleaned up,
39 *                       GCD no longer uses personal stack,
40 *                       optimized shifts for bits%16 == 0.
41 *      27 Apr 98   JF   scratch giants now replaced with stack
42 *      20 Apr 98   JF   grammarsquareg fixed for asize == 0.
43 *                       scratch giants now static.
44 *      29 Jan 98   JF   Corrected out-of-range errors in
45 *                       mersennemod and fermatmod.
46 *      23 Dec 97   REC  Sped up divide routines via split-shift.
47 *      18 Nov 97   JF   Improved mersennemod, fermatmod.
48 *       9 Nov 97   JF   Sped up grammarsquareg.
49 *      20 May 97   RDW  Fixed Win32 compiler warnings.
50 *      18 May 97   REC  Installed new, fast divide.
51 *      17 May 97   REC  Reduced memory for FFT multiply.
52 *      26 Apr 97   REC  Creation.
53 *
54 *  c. 1997,1998 Perfectly Scientific, Inc.
55 *  All Rights Reserved.
56 *
57 **************************************************************/
58
59
60/* Include Files. */
61
62#include <stdio.h>
63#include <stdlib.h>
64#include <string.h>
65#include <math.h>
66#include "giants.h"
67
68
69/* Compiler options. */
70
71#ifdef _WIN32
72#pragma warning( disable : 4127 4706 ) /* disable conditional is constant warning */
73#endif
74
75
76/* Global variables. */
77
78int				error = 0;
79int				mulmode = AUTO_MUL;
80int				cur_prec = 0;
81int				cur_shift = 0;
82static int		cur_stack_size = 0;
83static int		cur_stack_elem = 0;
84static int		stack_glen = 0;
85static giant	*stack;
86giant			cur_den = NULL,
87				cur_recip = NULL;
88int				current_max_size = 0,
89				cur_run = 0;
90double *		sinCos=NULL;
91int				checkFFTerror = 0;
92double			maxFFTerror;
93static giant	u0=NULL, u1=NULL, v0=NULL, v1=NULL;
94static double	*z = NULL,
95				*z2 = NULL;
96
97/* stack handling functions. */
98static giant	popg(void);
99static void		pushg(int);
100
101
102/* Private function prototypes. */
103
104int 		gerr(void);
105double		gfloor(double);
106int			radixdiv(int, int, giant);
107void		columnwrite(FILE *, short *, char *, short, int);
108
109void		normal_addg(giant, giant);
110void		normal_subg(giant, giant);
111void		reverse_subg(giant, giant);
112int			binvaux(giant, giant);
113int 		invaux(giant, giant);
114int 		allzeros(int, int, giant);
115void 		auxmulg(giant a, giant b);
116void 		karatmulg(giant a, giant b);
117void 		karatsquareg(giant b);
118void 		grammarmulg(giant a, giant b);
119void		grammarsquareg(giant b);
120
121int 		lpt(int, int *);
122void 		addsignal(giant, double *, int);
123void 		FFTsquareg(giant x);
124void 		FFTmulg(giant y, giant x);
125void 		scramble_real();
126void 		fft_real_to_hermitian(double *z, int n);
127void 		fftinv_hermitian_to_real(double *z, int n);
128void 		mul_hermitian(double *a, double *b, int n);
129void 		square_hermitian(double *b, int n);
130void 		giant_to_double(giant x, int sizex, double *z, int L);
131void 		gswap(giant *, giant *);
132void 		onestep(giant, giant, gmatrix);
133void 		mulvM(gmatrix, giant, giant);
134void 		mulmM(gmatrix, gmatrix);
135void 		writeM(gmatrix);
136static void	punch(giant, gmatrix);
137static void	dotproduct(giant, giant, giant, giant);
138void		fix(giant *, giant *);
139void		hgcd(int, giant, giant, gmatrix);
140void		shgcd(int, int, gmatrix);
141
142
143
144/**************************************************************
145 *
146 *	Functions
147 *
148 **************************************************************/
149
150
151/**************************************************************
152 *
153 * Initialization and utility functions
154 *
155 **************************************************************/
156
157double
158gfloor(
159	double 	f
160)
161{
162	return floor(f);
163}
164
165
166void
167init_sinCos(
168	int 		n
169)
170{
171	int 		j;
172	double 	e = TWOPI/n;
173
174	if (n<=cur_run)
175		return;
176	cur_run = n;
177	if (sinCos)
178		free(sinCos);
179	sinCos = (double *)malloc(sizeof(double)*(1+(n>>2)));
180	for (j=0;j<=(n>>2);j++)
181	{
182		sinCos[j] = sin(e*j);
183	}
184}
185
186
187double
188s_sin(
189	int 	n
190)
191{
192	int 	seg = n/(cur_run>>2);
193
194	switch (seg)
195	{
196		case 0: return(sinCos[n]);
197		case 1: return(sinCos[(cur_run>>1)-n]);
198		case 2: return(-sinCos[n-(cur_run>>1)]);
199		case 3: return(-sinCos[cur_run-n]);
200	}
201	return 0;
202}
203
204
205double
206s_cos(
207	int 	n
208)
209{
210	int 	quart = (cur_run>>2);
211
212	if (n < quart)
213		return(s_sin(n+quart));
214	return(-s_sin(n-quart));
215}
216
217
218int
219gerr(void)
220{
221	return(error);
222}
223
224
225giant
226popg (
227	void
228)
229{
230	int i;
231
232	if (current_max_size <= 0) current_max_size = MAX_SHORTS;
233
234	if (cur_stack_size == 0) {
235/* Initialize the stack if we're just starting out.
236 * Note that all stack giants will be whatever current_max_size is
237 * when newgiant() is first called. */
238		cur_stack_size = STACK_GROW;
239		stack = (giant *) malloc (cur_stack_size * sizeof(giant));
240		for(i = 0; i < STACK_GROW; i++)
241			stack[i] = NULL;
242		if (stack_glen == 0) stack_glen = current_max_size;
243	} else if (cur_stack_elem >= cur_stack_size) {
244/* Expand the stack if we need to. */
245		i = cur_stack_size;
246		cur_stack_size += STACK_GROW;
247		stack = (giant *) realloc (stack,cur_stack_size * sizeof(giant));
248		for (; i < cur_stack_size; i++)
249			stack[i] = NULL;
250	} else if (cur_stack_elem < cur_stack_size - 2*STACK_GROW) {
251/* Prune the stack if it's too big. Disabled, so the stack can only expand */
252		/* cur_stack_size -= STACK_GROW;
253		for (i = cur_stack_size - STACK_GROW; i < cur_stack_size; i++)
254			free(stack[i]);
255		stack = (giant *) realloc (stack,cur_stack_size * sizeof(giant)); */
256	}
257
258/* Malloc our giant. */
259	if (stack[cur_stack_elem] == NULL)
260		stack[cur_stack_elem] = malloc(stack_glen*sizeof(short)+sizeof(int));
261	stack[cur_stack_elem]->sign = 0;
262
263	return(stack[cur_stack_elem++]);
264}
265
266
267void
268pushg (
269	int a
270)
271{
272	if (a < 0) return;
273	cur_stack_elem -= a;
274	if (cur_stack_elem < 0) cur_stack_elem = 0;
275}
276
277
278giant
279newgiant(
280	int 		numshorts
281)
282{
283	int 		size;
284	giant 		thegiant;
285
286    if (numshorts > MAX_SHORTS) {
287		fprintf(stderr, "Requested giant too big.\n");
288		fflush(stderr);
289	}
290	if (numshorts<=0)
291		numshorts = MAX_SHORTS;
292	size = numshorts*sizeof(short)+sizeof(int);
293	thegiant = (giant)malloc(size);
294	thegiant->sign = 0;
295
296	if (newmin(2*numshorts,MAX_SHORTS) > current_max_size)
297		current_max_size = newmin(2*numshorts,MAX_SHORTS);
298
299/* If newgiant() is being called for the first time, set the
300 * size of the stack giants. */
301	if (stack_glen == 0) stack_glen = current_max_size;
302
303	return(thegiant);
304}
305
306
307gmatrix
308newgmatrix(
309	void
310)
311/* Allocates space for a gmatrix struct, but does not
312 * allocate space for the giants. */
313{
314	return((gmatrix) malloc (4*sizeof(giant)));
315}
316
317int
318bitlen(
319	giant		n
320)
321{
322	int 		b = 16, c = 1<<15, w;
323
324	if (isZero(n))
325		return(0);
326	w = n->n[abs(n->sign) - 1];
327	while ((w&c) == 0)
328	{
329		b--;
330		c >>= 1;
331	}
332	return (16*(abs(n->sign)-1) + b);
333}
334
335
336int
337bitval(
338	giant 	n,
339	int 	pos
340)
341{
342	int 	i = abs(pos)>>4, c = 1 << (pos&15);
343
344	return ((n->n[i]) & c);
345}
346
347
348int
349isone(
350	giant	g
351)
352{
353	return((g->sign==1)&&(g->n[0]==1));
354}
355
356
357int isZero(
358	giant thegiant
359)
360/* Returns TR if thegiant == 0. */
361{
362	register int 				count;
363	int			 				length = abs(thegiant->sign);
364	register unsigned short	*	numpointer = thegiant->n;
365
366	if (length)
367	{
368		for(count = 0; count<length; ++count,++numpointer)
369		{
370			if (*numpointer != 0 )
371				return(FA);
372		}
373	}
374	return(TR);
375}
376
377
378void
379gtog(
380	giant		srcgiant,
381	giant		destgiant
382)
383/* destgiant becomes equal to srcgiant. */
384{
385	int 		numbytes = sizeof(int) + abs(srcgiant->sign)*sizeof(short);
386
387	memcpy((char *)destgiant,(char *)srcgiant,numbytes);
388}
389
390
391void
392itog(
393	int				i,
394	giant			g
395)
396/* The giant g becomes set to the integer value i. */
397{
398	unsigned int	j = abs(i);
399
400	if (i==0)
401	{
402		g->sign = 0;
403		g->n[0] = 0;
404		return;
405	}
406	g->n[0] = (unsigned short)(j & 0xFFFF);
407	j >>= 16;
408	if (j)
409	{
410		g->n[1] = (unsigned short)j;
411		g->sign = 2;
412	}
413	else
414	{
415		g->sign = 1;
416	}
417	if (i<0)
418		g->sign = -(g->sign);
419}
420
421
422signed int
423gtoi(
424	giant 			x
425)
426/* Calculate the value of an int-sized giant NOT exceeding 31 bits. */
427{
428	register int 	size = abs(x->sign);
429	register int 	sign = (x->sign < 0) ? -1 : 1;
430
431	switch(size)
432	{
433		case 0:
434			break;
435		case 1:
436			return sign * x->n[0];
437		case 2:
438			return sign * (x->n[0]+((x->n[1])<<16));
439		default:
440			fprintf(stderr,"Giant too large for gtoi\n");
441			break;
442	}
443	return 0;
444}
445
446
447int
448gsign(
449	giant 	g
450)
451/* Returns the sign of g. */
452{
453	if (isZero(g))
454		return(0);
455	if (g->sign >0)
456		return(1);
457	return(-1);
458}
459
460
461#if 0
462int gcompg(a,b)
463/* Returns -1,0,1 if a<b, a=b, a>b, respectively. */
464	giant a,b;
465{
466	int size = abs(a->sign);
467
468	if(isZero(a)) size = 0;
469	if (size == 0) {
470		if (isZero(b)) return(0); else return(-gsign(b));
471	}
472
473	if (b->sign == 0) return(gsign(a));
474	if (gsign(a)!=gsign(b)) return(gsign(a));
475	if (size>abs(b->sign)) return(gsign(a));
476	if (size<abs(b->sign)) return(-gsign(a));
477
478	do {
479		--size;
480		if (a->n[size] > b->n[size]) return(gsign(a));
481		if (a->n[size] < b->n[size]) return(-gsign(a));
482	 } while(size>0);
483
484	return(0);
485}
486#else
487
488int
489gcompg(
490	giant		a,
491	giant		b
492)
493/* Returns -1,0,1 if a<b, a=b, a>b, respectively. */
494{
495	int 		sa = a->sign, j, sb = b->sign, va, vb, sgn;
496
497	if(sa > sb)
498		return(1);
499	if(sa < sb)
500		return(-1);
501	if(sa < 0)
502	{
503		sa = -sa; /* Take absolute value of sa. */
504		sgn = -1;
505	}
506	else
507	{
508		sgn = 1;
509	}
510	for(j = sa-1; j >= 0; j--)
511	{
512		va = a->n[j];
513		vb = b->n[j];
514		if (va > vb)
515			return(sgn);
516		if (va < vb)
517			return(-sgn);
518	}
519	return(0);
520}
521#endif
522
523
524void
525setmulmode(
526	int 	mode
527)
528{
529	mulmode = mode;
530}
531
532
533/**************************************************************
534 *
535 * Private I/O Functions
536 *
537 **************************************************************/
538
539
540int
541radixdiv(
542	int				base,
543	int				divisor,
544	giant			thegiant
545)
546/* Divides giant of arbitrary base by divisor.
547 * Returns remainder. Used by idivg and gread. */
548{
549	int				first = TR;
550	int				finalsize = abs(thegiant->sign);
551	int				j = finalsize-1;
552	unsigned short	*digitpointer=&thegiant->n[j];
553	unsigned int 	num,rem=0;
554
555	if (divisor == 0)
556	{
557		error = DIVIDEBYZERO;
558		exit(error);
559	}
560
561	while (j>=0)
562	{
563		num=rem*base + *digitpointer;
564		*digitpointer = (unsigned short)(num/divisor);
565		if (first)
566		{
567			if (*digitpointer == 0)
568				--finalsize;
569			else
570				first = FA;
571		}
572		rem = num % divisor;
573		--digitpointer;
574		--j;
575	}
576
577	if ((divisor<0) ^ (thegiant->sign<0))
578		finalsize=-finalsize;
579	thegiant->sign=finalsize;
580	return(rem);
581}
582
583
584void
585columnwrite(
586	FILE 	*filepointer,
587	short 	*column,
588	char 	*format,
589	short 	arg,
590	int 	newlines
591)
592/* Used by gwriteln. */
593{
594	char 	outstring[10];
595	short 	i;
596
597	sprintf(outstring,format,arg);
598	for (i=0; outstring[i]!=0; ++i)
599	{
600		if (newlines)
601		{
602			if (*column >= COLUMNWIDTH)
603			{
604				fputs("\\\n",filepointer);
605				*column = 0;
606			}
607		}
608		fputc(outstring[i],filepointer);
609		++*column;
610	}
611}
612
613
614void
615gwrite(
616	giant			thegiant,
617	FILE			*filepointer,
618	int				newlines
619)
620/* Outputs thegiant to filepointer. Output is terminated by a newline. */
621{
622	short			column;
623	unsigned int 	i;
624	unsigned short	*numpointer;
625	giant	garbagegiant, basetengrand;
626
627	basetengrand = popg();
628    garbagegiant = popg();
629
630	if (isZero(thegiant))
631	{
632		fputs("0",filepointer);
633	}
634	else
635	{
636		numpointer = basetengrand->n;
637		gtog(thegiant,garbagegiant);
638
639		basetengrand->sign = 0;
640		do
641		{
642			*numpointer = (unsigned short)idivg(10000,garbagegiant);
643			++numpointer;
644			if (++basetengrand->sign >= current_max_size)
645			{
646				error = OVFLOW;
647				exit(error);
648			}
649		} 	while (!isZero(garbagegiant));
650
651		if (!error)
652		{
653			i = basetengrand->sign-1;
654			column = 0;
655			if (thegiant->sign<0 && basetengrand->n[i]!=0)
656				columnwrite(filepointer,&column,"-",0, newlines);
657			columnwrite(filepointer,&column,"%d",basetengrand->n[i],newlines);
658			for( ; i>0; )
659			{
660				--i;
661				columnwrite(filepointer,&column,"%04d",basetengrand->n[i],newlines);
662			}
663		}
664	}
665   pushg(2);
666}
667
668
669void
670gwriteln(
671	giant		theg,
672	FILE		*filepointer
673)
674{
675	gwrite(theg, filepointer, 1);
676	fputc('\n',filepointer);
677}
678
679
680void
681gread(
682	giant 			theg,
683	FILE 			*filepointer
684)
685{
686	char 			currentchar;
687	int 			isneg,size,backslash=FA,numdigits=0;
688	unsigned short	*numpointer;
689	giant	        basetenthousand;
690	static char		*inbuf = NULL;
691
692    basetenthousand = popg();
693	if (inbuf == NULL)
694		inbuf = (char*)malloc(MAX_DIGITS);
695
696	currentchar = (char)fgetc(filepointer);
697	if (currentchar=='-')
698	{
699		isneg=TR;
700	}
701	else
702	{
703		isneg=FA;
704		if (currentchar!='+')
705			ungetc(currentchar,filepointer);
706	}
707
708	do
709	{
710		currentchar = (char)fgetc(filepointer);
711		if ((currentchar>='0') && (currentchar<='9'))
712		{
713			inbuf[numdigits]=currentchar;
714			if(++numdigits==MAX_DIGITS)
715				break;
716			backslash=FA;
717		}
718		else
719		{
720			if (currentchar=='\\')
721				backslash=TR;
722		}
723	} while(((currentchar!=' ') && (currentchar!='\n') &&
724				(currentchar!='\t')) || (backslash) );
725	if (numdigits)
726	{
727		size = 0;
728		do
729		{
730			inbuf[numdigits] = 0;
731			numdigits-=4;
732			if (numdigits<0)
733				numdigits=0;
734			basetenthousand->n[size] = (unsigned short)strtol(&inbuf[numdigits],NULL,10);
735			++size;
736		} while (numdigits>0);
737
738		basetenthousand->sign = size;
739		theg->sign = 0;
740		numpointer = theg->n;
741		do
742		{
743			*numpointer = (unsigned short)
744			radixdiv(10000,1<<(8*sizeof(short)),basetenthousand);
745			++numpointer;
746			if (++theg->sign >= current_max_size)
747			{
748				error = OVFLOW;
749				exit(error);
750			}
751		} while (!isZero(basetenthousand));
752
753		if (isneg)
754			theg->sign = -theg->sign;
755	}
756    pushg(1);
757}
758
759
760
761/**************************************************************
762 *
763 * Private Math Functions
764 *
765 **************************************************************/
766
767
768void
769negg(
770	giant	g
771)
772/* g becomes -g. */
773{
774	g->sign = -g->sign;
775}
776
777
778void
779absg(
780	giant g
781)
782{
783	/* g becomes the absolute value of g. */
784	if (g->sign <0)
785		g->sign=-g->sign;
786}
787
788
789void
790iaddg(
791	int		i,
792	giant	g
793)
794/* Giant g becomes g + (int)i. */
795{
796	int 	w,j=0,carry = 0, size = abs(g->sign);
797    giant	tmp;
798
799	if (isZero(g))
800	{
801		itog(i,g);
802	}
803	else if(g->sign < 0) {
804		tmp = popg();
805		itog(i, tmp);
806	    addg(tmp, g);
807		pushg(1);
808		return;
809    }
810	else
811	{
812		w = g->n[0]+i;
813		do
814		{
815			g->n[j] = (unsigned short) (w & 65535L);
816			carry = w >> 16;
817			w = g->n[++j]+carry;
818		} while ((carry!=0) && (j<size));
819	}
820	if (carry)
821	{
822		++g->sign;
823		g->n[size] = (unsigned short)carry;
824	}
825}
826
827
828/* New subtract routines.
829	The basic subtract "subg()" uses the following logic table:
830
831     a      b          if(b > a)           if(a > b)
832
833     +      +          b := b - a          b := -(a - b)
834     -      +          b := b + (-a)       N.A.
835     +      -          N.A.                b := -((-b) + a)
836	  -      -          b := (-a) - (-b)    b := -((-b) - (-a))
837
838   The basic addition routine "addg()" uses:
839
840	  a      b          if(b > -a)          if(-a > b)
841
842     +      +          b := b + a          N.A.
843     -      +          b := b - (-a)       b := -((-a) - b)
844     +      -          b := a - (-b)       b := -((-b) - a)
845     -      -          N.A.                b := -((-b) + (-a))
846
847   In this way, internal routines "normal_addg," "normal_subg,"
848	and "reverse_subg;" each of which assumes non-negative
849	operands and a non-negative result, are now used for greater
850	efficiency.
851 */
852
853void
854normal_addg(
855	giant			a,
856	giant			b
857)
858/* b := a + b, both a,b assumed non-negative. */
859{
860	int 			carry = 0;
861	int 			asize = a->sign, bsize = b->sign;
862	long 			k;
863	int				j=0;
864	unsigned short	*aptr = a->n, *bptr = b->n;
865
866	if (asize < bsize)
867	{
868		for (j=0; j<asize; j++)
869		{
870			k = *aptr++ + *bptr + carry;
871			carry = 0;
872			if (k >= 65536L)
873			{
874				k -= 65536L;
875				++carry;
876			}
877			*bptr++ = (unsigned short)k;
878		}
879		for (j=asize; j<bsize; j++)
880		{
881			k = *bptr + carry;
882			carry = 0;
883			if (k >= 65536L)
884			{
885				k -= 65536L;
886				++carry;
887			}
888			*bptr++ = (unsigned short)k;
889		}
890	}
891	else
892	{
893		for (j=0; j<bsize; j++)
894		{
895			k = *aptr++ + *bptr + carry;
896			carry = 0;
897			if (k >= 65536L)
898			{
899				k -= 65536L;
900				++carry;
901			}
902			*bptr++ = (unsigned short)k;
903		}
904		for (j=bsize; j<asize; j++)
905		{
906			k = *aptr++ + carry;
907			carry = 0;
908			if (k >= 65536L)
909			{
910				k -= 65536L;
911				++carry;
912			}
913			*bptr++ = (unsigned short)k;
914		}
915	}
916	if (carry)
917	{
918		*bptr = 1; ++j;
919	}
920	b->sign = j;
921}
922
923
924void
925normal_subg(
926	giant			a,
927	giant			b
928)
929/* b := b - a; requires b, a non-negative and b >= a. */
930{
931	int 			j, size = b->sign;
932	unsigned int	k;
933
934	if (a->sign == 0)
935		return;
936
937	k = 0;
938	for (j=0; j<a->sign; ++j)
939	{
940		k += 0xffff - a->n[j] + b->n[j];
941		b->n[j] = (unsigned short)(k & 0xffff);
942		k >>= 16;
943	}
944	for (j=a->sign; j<size; ++j)
945	{
946		k += 0xffff + b->n[j];
947		b->n[j] = (unsigned short)(k & 0xffff);
948		k >>= 16;
949	}
950
951	if (b->n[0] == 0xffff)
952		iaddg(1,b);
953	else
954		++b->n[0];
955
956	while ((size-- > 0) && (b->n[size]==0));
957
958	b->sign = (b->n[size]==0) ? 0 : size+1;
959}
960
961
962void
963reverse_subg(
964	giant			a,
965	giant			b
966)
967/* b := a - b; requires b, a non-negative and a >= b. */
968{
969	int 			j, size = a->sign;
970	unsigned int	k;
971
972	k = 0;
973	for (j=0; j<b->sign; ++j)
974	{
975		k += 0xffff - b->n[j] + a->n[j];
976		b->n[j] = (unsigned short)(k & 0xffff);
977		k >>= 16;
978	}
979	for (j=b->sign; j<size; ++j)
980	{
981		k += 0xffff + a->n[j];
982		b->n[j] = (unsigned short)(k & 0xffff);
983		k >>= 16;
984	}
985
986	b->sign = size; /* REC, 21 Apr 1996. */
987	if (b->n[0] == 0xffff)
988		iaddg(1,b);
989	else
990		++b->n[0];
991
992	while (!b->n[--size]);
993
994	b->sign = size+1;
995}
996
997void
998addg(
999	giant		a,
1000	giant		b
1001)
1002/* b := b + a, any signs any result. */
1003{
1004	int 		asgn = a->sign, bsgn = b->sign;
1005
1006	if (asgn == 0)
1007		return;
1008	if (bsgn == 0)
1009	{
1010		gtog(a,b);
1011		return;
1012	}
1013	if ((asgn < 0) == (bsgn < 0))
1014	{
1015		if (bsgn > 0)
1016		{
1017			normal_addg(a,b);
1018			return;
1019		}
1020		absg(b);
1021		if(a != b) absg(a);
1022		normal_addg(a,b);
1023		negg(b);
1024		if(a != b) negg(a);
1025		return;
1026	}
1027	if(bsgn > 0)
1028	{
1029		negg(a);
1030		if (gcompg(b,a) >= 0)
1031		{
1032			normal_subg(a,b);
1033			negg(a);
1034			return;
1035		}
1036		reverse_subg(a,b);
1037		negg(a);
1038		negg(b);
1039		return;
1040	}
1041	negg(b);
1042	if(gcompg(b,a) < 0)
1043	{
1044		reverse_subg(a,b);
1045		return;
1046	}
1047	normal_subg(a,b);
1048	negg(b);
1049	return;
1050}
1051
1052void
1053subg(
1054	giant		a,
1055	giant		b
1056)
1057/* b := b - a, any signs, any result. */
1058{
1059	int 		asgn = a->sign, bsgn = b->sign;
1060
1061	if (asgn == 0)
1062		return;
1063	if (bsgn == 0)
1064	{
1065		gtog(a,b);
1066		negg(b);
1067		return;
1068	}
1069	if ((asgn < 0) != (bsgn < 0))
1070	{
1071		if (bsgn > 0)
1072		{
1073			negg(a);
1074			normal_addg(a,b);
1075			negg(a);
1076			return;
1077		}
1078		negg(b);
1079		normal_addg(a,b);
1080		negg(b);
1081		return;
1082	}
1083	if (bsgn > 0)
1084	{
1085		if (gcompg(b,a) >= 0)
1086		{
1087			normal_subg(a,b);
1088			return;
1089		}
1090		reverse_subg(a,b);
1091		negg(b);
1092		return;
1093	}
1094	negg(a);
1095	negg(b);
1096	if (gcompg(b,a) >= 0)
1097	{
1098		normal_subg(a,b);
1099		negg(a);
1100		negg(b);
1101		return;
1102	}
1103	reverse_subg(a,b);
1104	negg(a);
1105	return;
1106}
1107
1108
1109int
1110numtrailzeros(
1111	giant					g
1112)
1113/* Returns the number of trailing zero bits in g. */
1114{
1115	register int 			numshorts = abs(g->sign), j, bcount=0;
1116	register unsigned short gshort, c;
1117
1118	for (j=0;j<numshorts;j++)
1119	{
1120		gshort = g->n[j];
1121		c = 1;
1122		for (bcount=0;bcount<16; bcount++)
1123		{
1124			if (c & gshort)
1125				break;
1126			c <<= 1;
1127		}
1128		if (bcount<16)
1129			break;
1130	}
1131	return(bcount + 16*j);
1132}
1133
1134
1135void
1136bdivg(
1137	giant		v,
1138	giant		u
1139)
1140/* u becomes greatest power of two not exceeding u/v. */
1141{
1142	int 		diff = bitlen(u) - bitlen(v);
1143	giant		scratch7;
1144
1145	if (diff<0)
1146	{
1147		itog(0,u);
1148		return;
1149	}
1150	scratch7 = popg();
1151	gtog(v, scratch7);
1152	gshiftleft(diff,scratch7);
1153	if (gcompg(u,scratch7) < 0)
1154		diff--;
1155	if (diff<0)
1156	{
1157		itog(0,u);
1158		pushg(1);
1159		return;
1160	}
1161	itog(1,u);
1162	gshiftleft(diff,u);
1163
1164	pushg(1);
1165}
1166
1167
1168int
1169binvaux(
1170	giant 	p,
1171	giant 	x
1172)
1173/* Binary inverse method. Returns zero if no inverse exists,
1174 * in which case x becomes GCD(x,p). */
1175{
1176
1177	giant scratch7, u0, u1, v0, v1;
1178
1179	if (isone(x))
1180		return(1);
1181	u0 = popg();
1182    u1 = popg();
1183    v0 = popg();
1184    v1 = popg();
1185	itog(1, v0);
1186	gtog(x, v1);
1187	itog(0,x);
1188	gtog(p, u1);
1189
1190	scratch7 = popg();
1191	while(!isZero(v1))
1192	{
1193		gtog(u1, u0);
1194		bdivg(v1, u0);
1195		gtog(x, scratch7);
1196		gtog(v0, x);
1197		mulg(u0, v0);
1198		subg(v0,scratch7);
1199		gtog(scratch7, v0);
1200
1201		gtog(u1, scratch7);
1202		gtog(v1, u1);
1203		mulg(u0, v1);
1204		subg(v1,scratch7);
1205		gtog(scratch7, v1);
1206	}
1207
1208	pushg(1);
1209
1210	if (!isone(u1))
1211	{
1212		gtog(u1,x);
1213		if(x->sign<0) addg(p, x);
1214		pushg(4);
1215	    return(0);
1216	}
1217	if(x->sign<0)
1218		addg(p, x);
1219    pushg(4);
1220	return(1);
1221}
1222
1223
1224int
1225binvg(
1226	giant 	p,
1227	giant 	x
1228)
1229{
1230	modg(p, x);
1231	return(binvaux(p,x));
1232}
1233
1234
1235int
1236invg(
1237	giant 	p,
1238	giant 	x
1239)
1240{
1241	modg(p, x);
1242	return(invaux(p,x));
1243}
1244
1245int
1246invaux(
1247	giant 	p,
1248	giant 	x
1249)
1250/* Returns zero if no inverse exists, in which case x becomes
1251 * GCD(x,p). */
1252{
1253
1254	giant scratch7, u0, u1, v0, v1;
1255
1256	if ((x->sign==1)&&(x->n[0]==1))
1257		return(1);
1258
1259    u0 = popg();
1260    u1 = popg();
1261    v0 = popg();
1262    v1 = popg();
1263
1264	itog(1,u1);
1265	gtog(p, v0);
1266	gtog(x, v1);
1267	itog(0,x);
1268
1269	scratch7 = popg();
1270	while (!isZero(v1))
1271	{
1272		gtog(v0, u0);
1273		divg(v1, u0);
1274		gtog(u0, scratch7);
1275		mulg(v1, scratch7);
1276		subg(v0, scratch7);
1277		negg(scratch7);
1278		gtog(v1, v0);
1279		gtog(scratch7, v1);
1280		gtog(u1, scratch7);
1281		mulg(u0, scratch7);
1282		subg(x, scratch7);
1283		negg(scratch7);
1284		gtog(u1,x);
1285		gtog(scratch7, u1);
1286	}
1287	pushg(1);
1288
1289	if ((v0->sign!=1)||(v0->n[0]!=1))
1290	{
1291		gtog(v0,x);
1292        pushg(4);
1293		return(0);
1294	}
1295	if(x->sign<0)
1296		addg(p, x);
1297	pushg(4);
1298	return(1);
1299}
1300
1301
1302int
1303mersenneinvg(
1304	int		q,
1305	giant 	x
1306)
1307{
1308	int		k;
1309    giant u0, u1, v1;
1310
1311    u0 = popg();
1312    u1 = popg();
1313    v1 = popg();
1314
1315	itog(1, u0);
1316	itog(0, u1);
1317	itog(1, v1);
1318	gshiftleft(q, v1);
1319	subg(u0, v1);
1320	mersennemod(q, x);
1321	while (1)
1322	{
1323		k = -1;
1324		if (isZero(x))
1325		{
1326			gtog(v1, x);
1327            pushg(3);
1328			return(0);
1329		}
1330		while (bitval(x, ++k) == 0);
1331
1332		gshiftright(k, x);
1333		if (k)
1334		{
1335			gshiftleft(q-k, u0);
1336			mersennemod(q, u0);
1337		}
1338		if (isone(x))
1339			break;
1340		addg(u1, u0);
1341		mersennemod(q, u0);
1342		negg(u1);
1343		addg(u0, u1);
1344		mersennemod(q, u1);
1345		if (!gcompg(v1,x)) {
1346			pushg(3);
1347			return(0);
1348        }
1349		addg(v1, x);
1350		negg(v1);
1351		addg(x, v1);
1352		mersennemod(q, v1);
1353	}
1354	gtog(u0, x);
1355	mersennemod(q,x);
1356    pushg(3);
1357	return(1);
1358}
1359
1360
1361void
1362cgcdg(
1363	giant 	a,
1364	giant 	v
1365)
1366/* Classical Euclid GCD. v becomes gcd(a, v). */
1367{
1368	giant 	u, r;
1369
1370	v->sign = abs(v->sign);
1371	if (isZero(a))
1372		return;
1373
1374	u = popg();
1375	r = popg();
1376	gtog(a, u);
1377	u->sign = abs(u->sign);
1378	while (!isZero(v))
1379	{
1380		gtog(u, r);
1381		modg(v, r);
1382		gtog(v, u);
1383		gtog(r, v);
1384	}
1385	gtog(u,v);
1386	pushg(2);
1387}
1388
1389
1390void
1391gcdg(
1392	giant		x,
1393	giant		y
1394)
1395{
1396	if (bitlen(y)<= GCDLIMIT)
1397		bgcdg(x,y);
1398	else
1399		ggcd(x,y);
1400}
1401
1402void
1403bgcdg(
1404	giant 	a,
1405	giant 	b
1406)
1407/* Binary form of the gcd. b becomes the gcd of a,b. */
1408{
1409	int		k = isZero(b), m = isZero(a);
1410	giant 	u, v, t;
1411
1412	if (k || m)
1413	{
1414		if (m)
1415		{
1416			if (k)
1417				itog(1,b);
1418			return;
1419		}
1420		if (k)
1421		{
1422			if (m)
1423				itog(1,b);
1424			else
1425				gtog(a,b);
1426			return;
1427		}
1428	}
1429
1430	u = popg();
1431	v = popg();
1432	t = popg();
1433
1434	/* Now neither a nor b is zero. */
1435	gtog(a, u);
1436	u->sign = abs(a->sign);
1437	gtog(b, v);
1438	v->sign = abs(b->sign);
1439	k = numtrailzeros(u);
1440	m = numtrailzeros(v);
1441	if (k>m)
1442		k = m;
1443	gshiftright(k,u);
1444	gshiftright(k,v);
1445	if (u->n[0] & 1)
1446	{
1447		gtog(v, t);
1448		negg(t);
1449	}
1450	else
1451	{
1452		gtog(u,t);
1453	}
1454
1455	while (!isZero(t))
1456	{
1457		m = numtrailzeros(t);
1458		gshiftright(m, t);
1459		if (t->sign > 0)
1460		{
1461			gtog(t, u);
1462			subg(v,t);
1463		}
1464		else
1465		{
1466			gtog(t, v);
1467			negg(v);
1468			addg(u,t);
1469		}
1470	}
1471	gtog(u,b);
1472	gshiftleft(k, b);
1473	pushg(3);
1474}
1475
1476
1477void
1478powerg(
1479	int		m,
1480	int		n,
1481	giant 	g
1482)
1483/* g becomes m^n, NO mod performed. */
1484{
1485	giant scratch2 = popg();
1486
1487	itog(1, g);
1488	itog(m, scratch2);
1489	while (n)
1490	{
1491		if (n & 1)
1492			mulg(scratch2, g);
1493		n >>= 1;
1494		if (n)
1495			squareg(scratch2);
1496	}
1497	pushg(1);
1498}
1499
1500#if 0
1501void
1502jtest(
1503	giant 	n
1504)
1505{
1506	if (n->sign)
1507	{
1508		if (n->n[n->sign-1] == 0)
1509		{
1510			fprintf(stderr,"%d %d tilt",n->sign, (int)(n->n[n->sign-1]));
1511			exit(7);
1512		}
1513	}
1514}
1515#endif
1516
1517
1518void
1519make_recip(
1520	giant 	d,
1521	giant 	r
1522)
1523/* r becomes the steady-state reciprocal
1524 * 2^(2b)/d, where b = bit-length of d-1. */
1525{
1526	int		b;
1527	giant 	tmp, tmp2;
1528
1529	if (isZero(d) || (d->sign < 0))
1530	{
1531		exit(SIGN);
1532	}
1533	tmp = popg();
1534	tmp2 = popg();
1535	itog(1, r);
1536	subg(r, d);
1537	b = bitlen(d);
1538	addg(r, d);
1539	gshiftleft(b, r);
1540	gtog(r, tmp2);
1541	while (1)
1542	{
1543		gtog(r, tmp);
1544		squareg(tmp);
1545		gshiftright(b, tmp);
1546		mulg(d, tmp);
1547		gshiftright(b, tmp);
1548		addg(r, r);
1549		subg(tmp, r);
1550		if (gcompg(r, tmp2) <= 0)
1551			break;
1552		gtog(r, tmp2);
1553	}
1554	itog(1, tmp);
1555	gshiftleft(2*b, tmp);
1556	gtog(r, tmp2);
1557	mulg(d, tmp2);
1558	subg(tmp2, tmp);
1559	itog(1, tmp2);
1560	while (tmp->sign < 0)
1561	{
1562		subg(tmp2, r);
1563		addg(d, tmp);
1564	}
1565	pushg(2);
1566}
1567
1568void
1569divg_via_recip(
1570	giant 	d,
1571	giant 	r,
1572	giant 	n
1573)
1574/* n := n/d, where r is the precalculated
1575 * steady-state reciprocal of d. */
1576{
1577	int 	s = 2*(bitlen(r)-1), sign = gsign(n);
1578	giant 	tmp, tmp2;
1579
1580	if (isZero(d) || (d->sign < 0))
1581	{
1582		exit(SIGN);
1583	}
1584
1585	tmp = popg();
1586	tmp2 = popg();
1587
1588	n->sign = abs(n->sign);
1589	itog(0, tmp2);
1590	while (1)
1591	{
1592		gtog(n, tmp);
1593		mulg(r, tmp);
1594		gshiftright(s, tmp);
1595		addg(tmp, tmp2);
1596		mulg(d, tmp);
1597		subg(tmp, n);
1598		if (gcompg(n,d) >= 0)
1599		{
1600			subg(d,n);
1601			iaddg(1, tmp2);
1602		}
1603		if (gcompg(n,d) < 0)
1604			break;
1605	}
1606	gtog(tmp2, n);
1607	n->sign *= sign;
1608	pushg(2);
1609}
1610
1611#if 1
1612void
1613modg_via_recip(
1614	giant 	d,
1615	giant 	r,
1616	giant 	n
1617)
1618/* This is the fastest mod of the present collection.
1619 * n := n % d, where r is the precalculated
1620 * steady-state reciprocal of d. */
1621
1622{
1623	int		s = (bitlen(r)-1), sign = n->sign;
1624	giant 	tmp, tmp2;
1625
1626	if (isZero(d) || (d->sign < 0))
1627	{
1628		exit(SIGN);
1629	}
1630
1631	tmp = popg();
1632	tmp2 = popg();
1633
1634	n->sign = abs(n->sign);
1635	while (1)
1636	{
1637		gtog(n, tmp); gshiftright(s-1, tmp);
1638		mulg(r, tmp);
1639		gshiftright(s+1, tmp);
1640		mulg(d, tmp);
1641		subg(tmp, n);
1642		if (gcompg(n,d) >= 0)
1643			subg(d,n);
1644		if (gcompg(n,d) < 0)
1645			break;
1646	}
1647	if (sign >= 0)
1648		goto done;
1649	if (isZero(n))
1650		goto done;
1651	negg(n);
1652	addg(d,n);
1653done:
1654	pushg(2);
1655	return;
1656}
1657
1658#else
1659void
1660modg_via_recip(
1661	giant 	d,
1662	giant 	r,
1663	giant 	n
1664)
1665{
1666	int		s = 2*(bitlen(r)-1), sign = n->sign;
1667	giant 	tmp, tmp2;
1668
1669	if (isZero(d) || (d->sign < 0))
1670	{
1671		exit(SIGN);
1672	}
1673
1674	tmp = popg();
1675	tmp2 = popg()
1676
1677	n->sign = abs(n->sign);
1678	while (1)
1679	{
1680		gtog(n, tmp);
1681		mulg(r, tmp);
1682		gshiftright(s, tmp);
1683		mulg(d, tmp);
1684		subg(tmp, n);
1685		if (gcompg(n,d) >= 0)
1686			subg(d,n);
1687		if (gcompg(n,d) < 0)
1688			break;
1689	}
1690	if (sign >= 0)
1691		goto done;
1692	if (isZero(n))
1693		goto done;
1694	negg(n);
1695	addg(d,n);
1696done:
1697	pushg(2);
1698	return;
1699}
1700#endif
1701
1702void
1703modg(
1704	giant 	d,
1705	giant 	n
1706)
1707/* n becomes n%d. n is arbitrary, but the denominator d must be positive! */
1708{
1709	if (cur_recip == NULL) {
1710		cur_recip = newgiant(current_max_size);
1711		cur_den = newgiant(current_max_size);
1712		gtog(d, cur_den);
1713		make_recip(d, cur_recip);
1714	} else if (gcompg(d, cur_den)) {
1715		gtog(d, cur_den);
1716		make_recip(d, cur_recip);
1717	}
1718	modg_via_recip(d, cur_recip, n);
1719}
1720
1721
1722#if 0
1723int
1724feemulmod (
1725	giant a,
1726	giant b,
1727	int q,
1728	int k
1729)
1730/* a becomes (a*b) (mod 2^q-k) where q % 16 == 0 and k is "small" (0 < k < 65535).
1731 * Returns 0 if unsuccessful, otherwise 1. */
1732{
1733	giant			carry, kk, scratch;
1734	int				i, j;
1735	int 			asize = abs(a->sign), bsize = abs(b->sign);
1736	unsigned short 	*aptr,*bptr,*destptr;
1737	unsigned int	words;
1738	int				kpower, curk;
1739
1740	if ((q % 16) || (k <= 0) || (k >= 65535)) {
1741		return (0);
1742	}
1743
1744	carry = popg();
1745	kk = popg();
1746	scratch = popg();
1747
1748	for (i=0; i<asize+bsize; i++) scratch->n[i]=0;
1749
1750	words = q >> 4;
1751
1752	bptr = b->n;
1753	for (i = 0; i < bsize; i++) {
1754		mult = *bptr++;
1755		if (mult) {
1756			kpower = i/words;
1757
1758			if (kpower >= 1) itog (kpower,kk);
1759			for (j = 1; j < kpower; k++) smulg(kpower,kk);
1760
1761			itog(0,carry);
1762
1763			aptr = a->n;
1764			for (j = 0; j < bsize; b++) {
1765				gtog(kk,scratch);
1766				smulg(*aptr++,scratch);
1767				smulg(mult,scratch);
1768				iaddg(*destptr,scratch);
1769				addg(carry,scratch);
1770				*destptr++ = scratch->n[0];
1771				gshiftright(scratch,16);
1772				gtog(scratch,carry);
1773				if (destptr - scratch->n >= words) {
1774					smulg(k, carry);
1775					smulg(k, kk);
1776					destptr -= words;
1777				}
1778			}
1779		}
1780	}
1781
1782	int 			i,j,m;
1783	unsigned int 	prod,carry=0;
1784	int 			asize = abs(a->sign), bsize = abs(b->sign);
1785	unsigned short 	*aptr,*bptr,*destptr;
1786	unsigned short	mult;
1787	int				words, excess;
1788	int				temp;
1789	giant			scratch = popg(), scratch2 = popg(), scratch3 = popg();
1790	short			*carryptr = scratch->n;
1791	int				kpower,kpowerlimit, curk;
1792
1793	if ((q % 16) || (k <= 0) || (k >= 65535)) {
1794		return (0);
1795	}
1796
1797	scratch
1798
1799	for (i=0; i<asize+bsize; i++) scratch->n[i]=0;
1800
1801	words = q >> 4;
1802
1803	bptr = b->n;
1804	for (i=0; i<bsize; ++i)
1805	{
1806		mult = *bptr++;
1807		if (mult)
1808		{
1809			kpower = i/words;
1810			aptr = a->n;
1811			destptr = scratch->n + i;
1812
1813			if (kpower == 0) {
1814				carry = 0;
1815			} else if (kpower <= kpowerlimit) {
1816				carry = 0;
1817				curk = k;
1818				for (j = 1; j < kpower; j++) curk *= k;
1819			} else {
1820				itog (k,scratch);
1821				for (j = 1; j < kpower; j++) smulg(k,scratch);
1822				itog(0,scratch2);
1823			}
1824
1825			for (j = 0; j < asize; j++) {
1826				if(kpower == 0) {
1827					prod = *aptr++ * mult + *destptr + carry;
1828					*destptr++ = (unsigned short)(prod & 0xFFFF);
1829					carry = prod >> 16;
1830				} else if (kpower < kpowerlimit) {
1831					prod = kcur * *aptr++;
1832					temp = prod >> 16;
1833					prod &= 0xFFFF;
1834					temp *= mult;
1835					prod *= mult;
1836					temp += prod >> 16;
1837					prod &= 0xFFFF;
1838					prod += *destptr + carry;
1839					carry = prod >> 16 + temp;
1840					*destptr++ = (unsigned short)(prod & 0xFFFF);
1841				} else {
1842					gtog(scratch,scratch3);
1843					smulg(*aptr++,scratch3);
1844					smulg(mult,scratch3);
1845					iaddg(*destptr,scratch3);
1846					addg(scratch3,scratch2);
1847					*destptr++ = scratch2->n[0];
1848					memmove(scratch2->n,scratch2->n+1,2*(scratch2->size-1));
1849					scratch2->sign--;
1850				}
1851				if (destptr - scratch->n > words) {
1852					if (kpower == 0) {
1853						curk = k;
1854						carry *= k;
1855					} else if (kpower < kpowerlimit) {
1856						curk *= k;
1857						carry *= curk;
1858					} else if (kpower == kpowerlimit) {
1859						itog (k,scratch);
1860						for (j = 1; j < kpower; j++) smulg(k,scratch);
1861						itog(carry,scratch2);
1862						smulg(k,scratch2);
1863					} else {
1864						smulg(k,scratch);
1865						smulg(k,scratch2);
1866					}
1867					kpower++;
1868					destptr -= words;
1869				}
1870			}
1871
1872			/* Next, deal with the carry term. Needs to be improved to
1873			handle overflow carry cases. */
1874			if (kpower <= kpowerlimit) {
1875				iaddg(carry,scratch);
1876			} else {
1877				addg(scratch2,scratch);
1878			}
1879			while(scratch->sign > q)
1880				gtog(scratch,scratch2)
1881		}
1882	}
1883	scratch->sign = destptr - scratch->n;
1884	if (!carry)
1885		--(scratch->sign);
1886	scratch->sign *= gsign(a)*gsign(b);
1887	gtog(scratch,a);
1888	pushg(3);
1889	return (1);
1890}
1891#endif
1892
1893int
1894idivg(
1895	int		divisor,
1896	giant 	theg
1897)
1898{
1899	/* theg becomes theg/divisor. Returns remainder. */
1900	int 	n;
1901	int 	base = 1<<(8*sizeof(short));
1902
1903	n = radixdiv(base,divisor,theg);
1904	return(n);
1905}
1906
1907
1908void
1909divg(
1910	giant 	d,
1911	giant 	n
1912)
1913/* n becomes n/d. n is arbitrary, but the denominator d must be positive! */
1914{
1915	if (cur_recip == NULL) {
1916		cur_recip = newgiant(current_max_size);
1917		cur_den = newgiant(current_max_size);
1918		gtog(d, cur_den);
1919		make_recip(d, cur_recip);
1920	} else if (gcompg(d, cur_den)) {
1921		gtog(d, cur_den);
1922		make_recip(d, cur_recip);
1923	}
1924	divg_via_recip(d, cur_recip, n);
1925}
1926
1927
1928void
1929powermod(
1930	giant		x,
1931	int 		n,
1932	giant 		g
1933)
1934/* x becomes x^n (mod g). */
1935{
1936	giant scratch2 = popg();
1937	gtog(x, scratch2);
1938	itog(1, x);
1939	while (n)
1940	{
1941		if (n & 1)
1942		{
1943			mulg(scratch2, x);
1944			modg(g, x);
1945		}
1946		n >>= 1;
1947		if (n)
1948		{
1949			squareg(scratch2);
1950			modg(g, scratch2);
1951		}
1952	}
1953	pushg(1);
1954}
1955
1956
1957void
1958powermodg(
1959	giant		x,
1960	giant		n,
1961	giant		g
1962)
1963/* x becomes x^n (mod g). */
1964{
1965	int 		len, pos;
1966	giant		scratch2 = popg();
1967
1968	gtog(x, scratch2);
1969	itog(1, x);
1970	len = bitlen(n);
1971	pos = 0;
1972	while (1)
1973	{
1974		if (bitval(n, pos++))
1975		{
1976			mulg(scratch2, x);
1977			modg(g, x);
1978		}
1979		if (pos>=len)
1980			break;
1981		squareg(scratch2);
1982		modg(g, scratch2);
1983	}
1984	pushg(1);
1985}
1986
1987
1988void
1989fermatpowermod(
1990	giant 	x,
1991	int		n,
1992	int		q
1993)
1994/* x becomes x^n (mod 2^q+1). */
1995{
1996	giant scratch2 = popg();
1997
1998	gtog(x, scratch2);
1999	itog(1, x);
2000	while (n)
2001	{
2002		if (n & 1)
2003		{
2004			mulg(scratch2, x);
2005			fermatmod(q, x);
2006		}
2007		n >>= 1;
2008		if (n)
2009		{
2010			squareg(scratch2);
2011			fermatmod(q, scratch2);
2012		}
2013	}
2014	pushg(1);
2015}
2016
2017
2018void
2019fermatpowermodg(
2020	giant 	x,
2021	giant	n,
2022	int		q
2023)
2024/* x becomes x^n (mod 2^q+1). */
2025{
2026	int		len, pos;
2027	giant	scratch2 = popg();
2028
2029	gtog(x, scratch2);
2030	itog(1, x);
2031	len = bitlen(n);
2032	pos = 0;
2033	while (1)
2034	{
2035		if (bitval(n, pos++))
2036		{
2037			mulg(scratch2, x);
2038			fermatmod(q, x);
2039		}
2040		if (pos>=len)
2041			break;
2042		squareg(scratch2);
2043		fermatmod(q, scratch2);
2044	}
2045	pushg(1);
2046}
2047
2048
2049void
2050mersennepowermod(
2051	giant 	x,
2052	int		n,
2053	int		q
2054)
2055/* x becomes x^n (mod 2^q-1). */
2056{
2057	giant scratch2 = popg();
2058
2059	gtog(x, scratch2);
2060	itog(1, x);
2061	while (n)
2062	{
2063		if (n & 1)
2064		{
2065			mulg(scratch2, x);
2066			mersennemod(q, x);
2067		}
2068		n >>= 1;
2069		if (n)
2070		{
2071			squareg(scratch2);
2072			mersennemod(q, scratch2);
2073		}
2074	}
2075	pushg(1);
2076}
2077
2078
2079void
2080mersennepowermodg(
2081	giant 	x,
2082	giant	n,
2083	int		q
2084)
2085/* x becomes x^n (mod 2^q-1). */
2086{
2087	int		len, pos;
2088	giant	scratch2 = popg();
2089
2090	gtog(x, scratch2);
2091	itog(1, x);
2092	len = bitlen(n);
2093	pos = 0;
2094	while (1)
2095	{
2096		if (bitval(n, pos++))
2097		{
2098			mulg(scratch2, x);
2099			mersennemod(q, x);
2100		}
2101		if (pos>=len)
2102			break;
2103		squareg(scratch2);
2104		mersennemod(q, scratch2);
2105	}
2106	pushg(1);
2107}
2108
2109
2110void
2111gshiftleft(
2112	int				bits,
2113	giant			g
2114)
2115/* shift g left bits bits. Equivalent to g = g*2^bits. */
2116{
2117	int 			rem = bits&15, crem = 16-rem, words = bits>>4;
2118	int 			size = abs(g->sign), j, k, sign = gsign(g);
2119	unsigned short 	carry, dat;
2120
2121	if (!bits)
2122		return;
2123	if (!size)
2124		return;
2125	if (bits < 0) {
2126		gshiftright(-bits,g);
2127		return;
2128	}
2129	if (size+words+1 > current_max_size) {
2130		error = OVFLOW;
2131		exit(error);
2132	}
2133	if (rem == 0) {
2134		memmove(g->n + words, g->n, size * sizeof(short));
2135		for (j = 0; j < words; j++) g->n[j] = 0;
2136		g->sign += (g->sign < 0)?(-words):(words);
2137	} else {
2138		k = size+words;
2139		carry = 0;
2140		for (j=size-1; j>=0; j--) {
2141			dat = g->n[j];
2142			g->n[k--] = (unsigned short)((dat >> crem) | carry);
2143			carry = (unsigned short)(dat << rem);
2144		}
2145		do {
2146			g->n[k--] = carry;
2147			carry = 0;
2148		} while(k>=0);
2149
2150		k = size+words;
2151		if (g->n[k] == 0)
2152			--k;
2153		g->sign = sign*(k+1);
2154	}
2155}
2156
2157
2158void
2159gshiftright(
2160	int						bits,
2161	giant					g
2162)
2163/* shift g right bits bits. Equivalent to g = g/2^bits. */
2164{
2165	register int 			j,size=abs(g->sign);
2166	register unsigned int 	carry;
2167	int 					words = bits >> 4;
2168	int 					remain = bits & 15, cremain = (16-remain);
2169
2170	if (bits==0)
2171		return;
2172	if (isZero(g))
2173		return;
2174	if (bits < 0) {
2175		gshiftleft(-bits,g);
2176		return;
2177	}
2178	if (words >= size) {
2179		g->sign = 0;
2180		return;
2181	}
2182	if (remain == 0) {
2183		memmove(g->n,g->n + words,(size - words) * sizeof(short));
2184		g->sign += (g->sign < 0)?(words):(-words);
2185	} else {
2186		size -= words;
2187
2188		if (size)
2189		{
2190			for(j=0;j<size-1;++j)
2191			{
2192				carry = g->n[j+words+1] << cremain;
2193				g->n[j] = (unsigned short)((g->n[j+words] >> remain ) | carry);
2194			}
2195			g->n[size-1] = (unsigned short)(g->n[size-1+words] >> remain);
2196		}
2197
2198		if (g->n[size-1] == 0)
2199			--size;
2200
2201		if (g->sign > 0)
2202			g->sign = size;
2203		else
2204			g->sign = -size;
2205	}
2206}
2207
2208
2209void
2210extractbits(
2211	int				n,
2212	giant			src,
2213	giant			dest
2214)
2215/* dest becomes lowermost n bits of src. Equivalent to dest = src % 2^n. */
2216{
2217	register int 	words = n >> 4, numbytes = words*sizeof(short);
2218	register int 	bits = n & 15;
2219
2220	if (n<=0)
2221		return;
2222	if (words >= abs(src->sign))
2223		gtog(src,dest);
2224	else
2225	{
2226		memcpy((char *)(dest->n), (char *)(src->n), numbytes);
2227		if (bits)
2228		{
2229			dest->n[words] = (unsigned short)(src->n[words] & ((1<<bits)-1));
2230			++words;
2231		}
2232		while ((dest->n[words-1] == 0) && (words > 0))
2233		{
2234			--words;
2235		}
2236		if (src->sign<0)
2237			dest->sign = -words;
2238		else
2239			dest->sign = words;
2240	}
2241}
2242
2243
2244int
2245allzeros(
2246	int		shorts,
2247	int		bits,
2248	giant	g
2249)
2250{
2251	int		i=shorts;
2252
2253	while (i>0)
2254	{
2255		if (g->n[--i])
2256			return(0);
2257	}
2258	return((int)(!(g->n[shorts] & ((1<<bits)-1))));
2259}
2260
2261
2262void
2263fermatnegate(
2264	int					n,
2265	giant	 			g
2266)
2267/* negate g. g is mod 2^n+1. */
2268{
2269	register int		shorts = n>>4,
2270			 			bits = n & 15,
2271			 			i = shorts,
2272			 			mask = 1<<bits;
2273	register unsigned	carry,temp;
2274
2275	for (temp=(unsigned)shorts; (int)temp>g->sign-1; --temp)
2276	{
2277		g->n[temp] = 0;
2278	}
2279	if (g->n[shorts] & mask)
2280	{                 /* if high bit is set, -g = 1. */
2281		g->sign = 1;
2282		g->n[0] = 1;
2283		return;
2284	}
2285	if (allzeros(shorts,bits,g))
2286		return;       /* if g=0, -g = 0. */
2287
2288	while (i>0)
2289	{   --i;
2290		g->n[i] = (unsigned short)(~(g->n[i+1]));
2291	}
2292	g->n[shorts] ^= mask-1;
2293
2294	carry = 2;
2295	i = 0;
2296	while (carry)
2297	{
2298		temp = g->n[i]+carry;
2299		g->n[i++] = (unsigned short)(temp & 0xffff);
2300		carry = temp>>16;
2301	}
2302	while(!g->n[shorts])
2303	{
2304		--shorts;
2305	}
2306	g->sign = shorts+1;
2307}
2308
2309
2310void
2311mersennemod (
2312	int n,
2313	giant g
2314)
2315/* g := g (mod 2^n - 1) */
2316{
2317	int the_sign, s;
2318	giant scratch3 = popg(), scratch4 = popg();
2319
2320	if ((the_sign = gsign(g)) < 0) absg(g);
2321	while (bitlen(g) > n) {
2322		gtog(g,scratch3);
2323		gshiftright(n,scratch3);
2324		addg(scratch3,g);
2325		gshiftleft(n,scratch3);
2326		subg(scratch3,g);
2327	}
2328	if(!isZero(g)) {
2329		if ((s = gsign(g)) < 0) absg(g);
2330		itog(1,scratch3);
2331		gshiftleft(n,scratch3);
2332		itog(1,scratch4);
2333		subg(scratch4,scratch3);
2334		if(gcompg(g,scratch3) >= 0) subg(scratch3,g);
2335		if (s < 0) {
2336			g->sign = -g->sign;
2337			addg(scratch3,g);
2338		}
2339		if (the_sign < 0) {
2340			g->sign = -g->sign;
2341			addg(scratch3,g);
2342		}
2343	}
2344	pushg(2);
2345}
2346
2347void
2348fermatmod (
2349	int 			n,
2350	giant 			g
2351)
2352/* g := g (mod 2^n + 1), */
2353{
2354	int the_sign, s;
2355	giant scratch3 = popg();
2356
2357	if ((the_sign = gsign(g)) < 0) absg(g);
2358	while (bitlen(g) > n) {
2359		gtog(g,scratch3);
2360		gshiftright(n,scratch3);
2361		subg(scratch3,g);
2362		gshiftleft(n,scratch3);
2363		subg(scratch3,g);
2364	}
2365        if((bitlen(g) < n) && (the_sign * (g->sign) >= 0)) goto leave;
2366	if(!isZero(g)) {
2367		if ((s = gsign(g)) < 0) absg(g);
2368		itog(1,scratch3);
2369		gshiftleft(n,scratch3);
2370		iaddg(1,scratch3);
2371		if(gcompg(g,scratch3) >= 0) subg(scratch3,g);
2372		if (s * the_sign < 0) {
2373			g->sign = -g->sign;
2374			addg(scratch3,g);
2375		}
2376	}
2377leave:
2378	pushg(1);
2379
2380}
2381
2382void
2383fer_mod (
2384	int 			n,
2385	giant 			g,
2386	giant modulus
2387)
2388/* Same as fermatmod(), except modulus = 2^n should be passed
2389if available (i.e. if already allocated and set). */
2390{
2391	int the_sign, s;
2392	giant scratch3 = popg();
2393
2394	if ((the_sign = gsign(g)) < 0) absg(g);
2395	while (bitlen(g) > n) {
2396		gtog(g,scratch3);
2397		gshiftright(n,scratch3);
2398		subg(scratch3,g);
2399		gshiftleft(n,scratch3);
2400		subg(scratch3,g);
2401	}
2402        if((bitlen(g) < n) && (the_sign * (g->sign) >= 0)) goto leave;
2403	if(!isZero(g)) {
2404		if ((s = gsign(g)) < 0) absg(g);
2405		if(gcompg(g,modulus) >= 0) subg(modulus,g);
2406		if (s * the_sign < 0) {
2407			g->sign = -g->sign;
2408			addg(modulus,g);
2409		}
2410	}
2411leave:
2412	pushg(1);
2413}
2414
2415
2416void
2417smulg(
2418	unsigned short	i,
2419	giant 			g
2420)
2421/* g becomes g * i. */
2422{
2423	unsigned short	carry = 0;
2424	int				size = abs(g->sign);
2425	register int 	j,k,mul = abs(i);
2426	unsigned short 	*digit = g->n;
2427
2428	for (j=0; j<size; ++j)
2429	{
2430		k = *digit * mul + carry;
2431		carry = (unsigned short)(k>>16);
2432		*digit = (unsigned short)(k & 0xffff);
2433		++digit;
2434	}
2435	if (carry)
2436	{
2437		if (++j >= current_max_size)
2438		{
2439			error = OVFLOW;
2440			exit(error);
2441		}
2442		*digit = carry;
2443	}
2444
2445	if ((g->sign>0) ^ (i>0))
2446		g->sign = -j;
2447	else
2448		g->sign = j;
2449}
2450
2451
2452void
2453squareg(
2454	giant 	b
2455)
2456/* b becomes b^2. */
2457{
2458	auxmulg(b,b);
2459}
2460
2461
2462void
2463mulg(
2464	giant	a,
2465	giant	b
2466)
2467/* b becomes a*b. */
2468{
2469	auxmulg(a,b);
2470}
2471
2472
2473void
2474auxmulg(
2475	giant		a,
2476	giant		b
2477)
2478/* Optimized general multiply, b becomes a*b. Modes are:
2479 * AUTO_MUL: switch according to empirical speed criteria.
2480 * GRAMMAR_MUL: force grammar-school algorithm.
2481 * KARAT_MUL: force Karatsuba divide-conquer method.
2482 * FFT_MUL: force floating point FFT method. */
2483{
2484	float		grammartime;
2485	int 		square = (a==b);
2486	int 		sizea, sizeb;
2487
2488	switch (mulmode)
2489	{
2490		case GRAMMAR_MUL:
2491			if (square) grammarsquareg(b);
2492			else grammarmulg(a,b);
2493			break;
2494		case FFT_MUL:
2495			if (square)
2496				FFTsquareg(b);
2497			else
2498				FFTmulg(a,b);
2499			break;
2500		case KARAT_MUL:
2501			if (square) karatsquareg(b);
2502				else karatmulg(a,b);
2503			break;
2504		case AUTO_MUL:
2505			sizea = abs(a->sign);
2506			sizeb = abs(b->sign);
2507		    if((sizea > KARAT_BREAK) && (sizea <= FFT_BREAK) &&
2508			   (sizeb > KARAT_BREAK) && (sizeb <= FFT_BREAK)){
2509				if(square) karatsquareg(b);
2510					else karatmulg(a,b);
2511
2512			} else {
2513				grammartime  = (float)sizea;
2514				grammartime *= (float)sizeb;
2515			    if (grammartime < FFT_BREAK * FFT_BREAK)
2516			    {
2517				   if (square) grammarsquareg(b);
2518						else grammarmulg(a,b);
2519				}
2520				else
2521				{
2522				if (square) FFTsquareg(b);
2523					else FFTmulg(a,b);
2524				}
2525			}
2526			break;
2527	}
2528}
2529
2530void
2531justg(giant x) {
2532	int s = x->sign, sg = 1;
2533
2534	if(s<0) {
2535		sg = -1;
2536		s = -s;
2537	}
2538	--s;
2539	while(x->n[s] == 0) {
2540			--s;
2541			if(s < 0) break;
2542	}
2543	x->sign = sg*(s+1);
2544}
2545
2546/* Next, improved Karatsuba routines from A. Powell,
2547   improvements by G. Woltman. */
2548
2549void
2550karatmulg(giant x, giant y)
2551/* y becomes x*y. */
2552{
2553	int s = abs(x->sign), t = abs(y->sign), w, bits,
2554		sg = gsign(x)*gsign(y);
2555	giant a, b, c, d, e, f;
2556
2557	if((s <= KARAT_BREAK) || (t <= KARAT_BREAK)) {
2558		grammarmulg(x,y);
2559		return;
2560	}
2561	w = (s + t + 2)/4; bits = 16*w;
2562	a = popg(); b = popg(); c = popg();
2563    d = popg(); e = popg(); f = popg();
2564	gtog(x,a); absg(a); if (w <= s) {a->sign = w; justg(a);}
2565	gtog(x,b); absg(b);
2566	gshiftright(bits, b);
2567	gtog(y,c); absg(c); if (w <= t) {c->sign = w; justg(c);}
2568	gtog(y,d); absg(d);
2569	gshiftright(bits,d);
2570	gtog(a,e); normal_addg(b,e);	/* e := (a + b) */
2571	gtog(c,f); normal_addg(d,f);	/* f := (c + d) */
2572	karatmulg(e,f);			/* f := (a + b)(c + d) */
2573	karatmulg(c,a);			/* a := a c */
2574	karatmulg(d,b);			/* b := b d */
2575	normal_subg(a,f);
2576         /* f := (a + b)(c + d) - a c */
2577	normal_subg(b,f);
2578         /* f := (a + b)(c + d) - a c - b d */
2579	gshiftleft(bits, b);
2580	normal_addg(f, b);
2581	gshiftleft(bits, b);
2582	normal_addg(a, b);
2583	gtog(b, y); y->sign *= sg;
2584	pushg(6);
2585
2586	return;
2587}
2588
2589void
2590karatsquareg(giant x)
2591/* x becomes x^2. */
2592{
2593	int s = abs(x->sign), w, bits;
2594	giant a, b, c;
2595
2596	if(s <= KARAT_BREAK) {
2597		grammarsquareg(x);
2598		return;
2599	}
2600	w = (s+1)/2; bits = 16*w;
2601	a = popg(); b = popg(); c = popg();
2602	gtog(x, a); a->sign = w; justg(a);
2603	gtog(x, b); absg(b);
2604	gshiftright(bits, b);
2605	gtog(a,c); normal_addg(b,c);
2606	karatsquareg(c);
2607	karatsquareg(a);
2608	karatsquareg(b);
2609	normal_subg(b, c);
2610	normal_subg(a, c);
2611	gshiftleft(bits, b);
2612	normal_addg(c,b);
2613	gshiftleft(bits, b);
2614	normal_addg(a, b);
2615	gtog(b, x);
2616	pushg(3);
2617
2618	return;
2619}
2620
2621void
2622grammarmulg(
2623	giant			a,
2624	giant			b
2625)
2626/* b becomes a*b. */
2627{
2628	int 			i,j;
2629	unsigned int 	prod,carry=0;
2630	int 			asize = abs(a->sign), bsize = abs(b->sign);
2631	unsigned short 	*aptr,*bptr,*destptr;
2632	unsigned short	mult;
2633	giant scratch = popg();
2634
2635	for (i=0; i<asize+bsize; ++i)
2636	{
2637		scratch->n[i]=0;
2638	}
2639
2640	bptr = &(b->n[0]);
2641	for (i=0; i<bsize; ++i)
2642	{
2643		mult = *(bptr++);
2644		if (mult)
2645		{
2646			carry = 0;
2647			aptr = &(a->n[0]);
2648			destptr = &(scratch->n[i]);
2649			for (j=0; j<asize; ++j)
2650			{
2651				prod = *(aptr++) * mult + *destptr + carry;
2652				*(destptr++) = (unsigned short)(prod & 0xffff);
2653				carry = prod >> 16;
2654			}
2655			*destptr = (unsigned short)carry;
2656		}
2657	}
2658	bsize+=asize;
2659	if (!carry)
2660		--bsize;
2661	scratch->sign = gsign(a)*gsign(b)*bsize;
2662	gtog(scratch,b);
2663	pushg(1);
2664}
2665
2666
2667void
2668grammarsquareg (
2669	giant a
2670)
2671/* a := a^2. */
2672{
2673	unsigned int	cur_term;
2674	unsigned int	prod, carry=0, temp;
2675	int	asize = abs(a->sign), max = asize * 2 - 1;
2676	unsigned short	*ptr = a->n, *ptr1, *ptr2;
2677	giant scratch;
2678
2679	if(asize == 0) {
2680		itog(0,a);
2681		return;
2682	}
2683
2684	scratch = popg();
2685
2686	asize--;
2687
2688	temp = *ptr;
2689	temp *= temp;
2690	scratch->n[0] = temp;
2691	carry = temp >> 16;
2692
2693	for (cur_term = 1; cur_term < max; cur_term++) {
2694		ptr1 = ptr2 = ptr;
2695		if (cur_term <= asize) {
2696			ptr2 += cur_term;
2697		} else {
2698			ptr1 += cur_term - asize;
2699			ptr2 += asize;
2700		}
2701		prod = carry & 0xFFFF;
2702		carry >>= 16;
2703		while(ptr1 < ptr2) {
2704				temp = *ptr1++ * *ptr2--;
2705				prod += (temp << 1) & 0xFFFF;
2706				carry += (temp >> 15);
2707		}
2708		if (ptr1 == ptr2) {
2709				temp = *ptr1;
2710				temp *= temp;
2711				prod += temp & 0xFFFF;
2712				carry += (temp >> 16);
2713		}
2714		carry += prod >> 16;
2715		scratch->n[cur_term] = (unsigned short) (prod);
2716	}
2717	if (carry) {
2718		scratch->n[cur_term] = carry;
2719		scratch->sign = cur_term+1;
2720	} else scratch->sign = cur_term;
2721
2722	gtog(scratch,a);
2723	pushg(1);
2724}
2725
2726
2727/**************************************************************
2728 *
2729 * FFT multiply Functions
2730 *
2731 **************************************************************/
2732
2733int 		initL = 0;
2734
2735int
2736lpt(
2737	int	  			n,
2738	int				*lambda
2739)
2740/* Returns least power of two greater than n. */
2741{
2742	register int	i = 1;
2743
2744	*lambda = 0;
2745	while (i<n)
2746	{
2747		i<<=1;
2748		++(*lambda);
2749	}
2750	return(i);
2751}
2752
2753
2754void
2755addsignal(
2756	giant 				x,
2757	double 				*z,
2758	int 				n
2759)
2760{
2761	register int 		j, k, m, car, last;
2762	register double 	f, g,err;
2763
2764	maxFFTerror = 0;
2765    last = 0;
2766	for (j=0;j<n;j++)
2767	{
2768		f = gfloor(z[j]+0.5);
2769        if(f != 0.0) last = j;
2770		if (checkFFTerror)
2771		{
2772			err = fabs(f - z[j]);
2773			if (err > maxFFTerror)
2774				maxFFTerror = err;
2775		}
2776		z[j] =0;
2777		k = 0;
2778		do
2779		{
2780			g = gfloor(f*TWOM16);
2781			z[j+k] += f-g*TWO16;
2782			++k;
2783			f=g;
2784		} while(f != 0.0);
2785	}
2786	car = 0;
2787	for(j=0;j < last + 1;j++)
2788	{
2789		m = (int)(z[j]+car);
2790		x->n[j] = (unsigned short)(m & 0xffff);
2791		car = (m>>16);
2792	}
2793	if (car)
2794		x->n[j] = (unsigned short)car;
2795	else
2796		--j;
2797
2798	while(!(x->n[j])) --j;
2799
2800	x->sign = j+1;
2801}
2802
2803
2804void
2805FFTsquareg(
2806	giant 			x
2807)
2808{
2809	int 			j,size = abs(x->sign);
2810	register int 	L;
2811
2812	if (size<4)
2813	{
2814		grammarmulg(x,x);
2815		return;
2816	}
2817	L = lpt(size+size, &j);
2818	if(!z) z = (double *)malloc(MAX_SHORTS * sizeof(double));
2819	giant_to_double(x, size, z, L);
2820	fft_real_to_hermitian(z, L);
2821	square_hermitian(z, L);
2822	fftinv_hermitian_to_real(z, L);
2823	addsignal(x,z,L);
2824	x->sign = abs(x->sign);
2825}
2826
2827
2828void
2829FFTmulg(
2830	giant			y,
2831	giant			x
2832)
2833{
2834	/* x becomes y*x. */
2835	int 			lambda, sizex = abs(x->sign), sizey = abs(y->sign);
2836	int 			finalsign = gsign(x)*gsign(y);
2837	register int	L;
2838
2839	if ((sizex<=4)||(sizey<=4))
2840	{
2841		grammarmulg(y,x);
2842		return;
2843	}
2844	L = lpt(sizex+sizey, &lambda);
2845	if(!z) z = (double *)malloc(MAX_SHORTS * sizeof(double));
2846	if(!z2) z2 = (double *)malloc(MAX_SHORTS * sizeof(double));
2847
2848	giant_to_double(x, sizex, z, L);
2849	giant_to_double(y, sizey, z2, L);
2850	fft_real_to_hermitian(z, L);
2851	fft_real_to_hermitian(z2, L);
2852	mul_hermitian(z2, z, L);
2853	fftinv_hermitian_to_real(z, L);
2854	addsignal(x,z,L);
2855	x->sign = finalsign*abs(x->sign);
2856}
2857
2858
2859void
2860scramble_real(
2861	double 			*x,
2862	int 			n
2863)
2864{
2865	register int 	i,j,k;
2866	register double	tmp;
2867
2868	for (i=0,j=0;i<n-1;i++)
2869	{
2870		if (i<j)
2871		{
2872			tmp = x[j];
2873			x[j]=x[i];
2874			x[i]=tmp;
2875		}
2876		k = n/2;
2877		while (k<=j)
2878		{
2879			j -= k;
2880			k>>=1;
2881		}
2882		j += k;
2883	}
2884}
2885
2886
2887void
2888fft_real_to_hermitian(
2889	double 			*z,
2890	int 			n
2891)
2892/* Output is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]).
2893 *	This is a decimation-in-time, split-radix algorithm.
2894 */
2895{
2896	register double	cc1, ss1, cc3, ss3;
2897	register int 	is, id, i0, i1, i2, i3, i4, i5, i6, i7, i8,
2898					a, a3, b, b3, nminus = n-1, dil, expand;
2899	register double *x, e;
2900	int 			nn = n>>1;
2901	double 			t1, t2, t3, t4, t5, t6;
2902	register int 	n2, n4, n8, i, j;
2903
2904	init_sinCos(n);
2905	expand = cur_run/n;
2906	scramble_real(z, n);
2907	x = z-1; /* FORTRAN compatibility. */
2908	is = 1;
2909	id = 4;
2910	do
2911	{
2912		for (i0=is;i0<=n;i0+=id)
2913		{
2914			i1 = i0+1;
2915			e = x[i0];
2916			x[i0] = e + x[i1];
2917			x[i1] = e - x[i1];
2918		}
2919		is = (id<<1)-1;
2920		id <<= 2;
2921	} while(is<n);
2922
2923	n2 = 2;
2924	while(nn>>=1)
2925	{
2926		n2 <<= 1;
2927		n4 = n2>>2;
2928		n8 = n2>>3;
2929		is = 0;
2930		id = n2<<1;
2931		do
2932		{
2933			for (i=is;i<n;i+=id)
2934			{
2935				i1 = i+1;
2936				i2 = i1 + n4;
2937				i3 = i2 + n4;
2938				i4 = i3 + n4;
2939				t1 = x[i4]+x[i3];
2940				x[i4] -= x[i3];
2941				x[i3] = x[i1] - t1;
2942				x[i1] += t1;
2943				if (n4==1)
2944					continue;
2945				i1 += n8;
2946				i2 += n8;
2947				i3 += n8;
2948				i4 += n8;
2949				t1 = (x[i3]+x[i4])*SQRTHALF;
2950				t2 = (x[i3]-x[i4])*SQRTHALF;
2951				x[i4] = x[i2] - t1;
2952				x[i3] = -x[i2] - t1;
2953				x[i2] = x[i1] - t2;
2954				x[i1] += t2;
2955			}
2956			is = (id<<1) - n2;
2957			id <<= 2;
2958		} while(is<n);
2959		dil = n/n2;
2960		a = dil;
2961		for (j=2;j<=n8;j++)
2962		{
2963			a3 = (a+(a<<1))&nminus;
2964			b = a*expand;
2965			b3 = a3*expand;
2966			cc1 = s_cos(b);
2967			ss1 = s_sin(b);
2968			cc3 = s_cos(b3);
2969			ss3 = s_sin(b3);
2970			a = (a+dil)&nminus;
2971			is = 0;
2972			id = n2<<1;
2973			do
2974			{
2975				for(i=is;i<n;i+=id)
2976				{
2977					i1 = i+j;
2978					i2 = i1 + n4;
2979					i3 = i2 + n4;
2980					i4 = i3 + n4;
2981					i5 = i + n4 - j + 2;
2982					i6 = i5 + n4;
2983					i7 = i6 + n4;
2984					i8 = i7 + n4;
2985					t1 = x[i3]*cc1 + x[i7]*ss1;
2986					t2 = x[i7]*cc1 - x[i3]*ss1;
2987					t3 = x[i4]*cc3 + x[i8]*ss3;
2988					t4 = x[i8]*cc3 - x[i4]*ss3;
2989					t5 = t1 + t3;
2990					t6 = t2 + t4;
2991					t3 = t1 - t3;
2992					t4 = t2 - t4;
2993					t2 = x[i6] + t6;
2994					x[i3] = t6 - x[i6];
2995					x[i8] = t2;
2996					t2 = x[i2] - t3;
2997					x[i7] = -x[i2] - t3;
2998					x[i4] = t2;
2999					t1 = x[i1] + t5;
3000					x[i6] = x[i1] - t5;
3001					x[i1] = t1;
3002					t1 = x[i5] + t4;
3003					x[i5] -= t4;
3004					x[i2] = t1;
3005				}
3006				is = (id<<1) - n2;
3007				id <<= 2;
3008			} while(is<n);
3009		}
3010	}
3011}
3012
3013
3014void
3015fftinv_hermitian_to_real(
3016	double 				*z,
3017	int 				n
3018)
3019/* Input is {Re(z^[0]),...,Re(z^[n/2),Im(z^[n/2-1]),...,Im(z^[1]).
3020 * This is a decimation-in-frequency, split-radix algorithm.
3021 */
3022{
3023	register double 	cc1, ss1, cc3, ss3;
3024	register int 		is, id, i0, i1, i2, i3, i4, i5, i6, i7, i8,
3025						a, a3, b, b3, nminus = n-1, dil, expand;
3026	register double 	*x, e;
3027	int 				nn = n>>1;
3028	double 				t1, t2, t3, t4, t5;
3029	int 				n2, n4, n8, i, j;
3030
3031	init_sinCos(n);
3032	expand = cur_run/n;
3033	x = z-1;
3034	n2 = n<<1;
3035	while(nn >>= 1)
3036	{
3037		is = 0;
3038		id = n2;
3039		n2 >>= 1;
3040		n4 = n2>>2;
3041		n8 = n4>>1;
3042		do
3043		{
3044			for(i=is;i<n;i+=id)
3045			{
3046				i1 = i+1;
3047				i2 = i1 + n4;
3048				i3 = i2 + n4;
3049				i4 = i3 + n4;
3050				t1 = x[i1] - x[i3];
3051				x[i1] += x[i3];
3052				x[i2] += x[i2];
3053				x[i3] = t1 - 2.0*x[i4];
3054				x[i4] = t1 + 2.0*x[i4];
3055				if (n4==1)
3056					continue;
3057				i1 += n8;
3058				i2 += n8;
3059				i3 += n8;
3060				i4 += n8;
3061				t1 = (x[i2]-x[i1])*SQRTHALF;
3062				t2 = (x[i4]+x[i3])*SQRTHALF;
3063				x[i1] += x[i2];
3064				x[i2] = x[i4]-x[i3];
3065				x[i3] = -2.0*(t2+t1);
3066				x[i4] = 2.0*(t1-t2);
3067			}
3068			is = (id<<1) - n2;
3069			id <<= 2;
3070		} while (is<n-1);
3071		dil = n/n2;
3072		a = dil;
3073		for (j=2;j<=n8;j++)
3074		{
3075			a3 = (a+(a<<1))&nminus;
3076			b = a*expand;
3077			b3 = a3*expand;
3078			cc1 = s_cos(b);
3079			ss1 = s_sin(b);
3080			cc3 = s_cos(b3);
3081			ss3 = s_sin(b3);
3082			a = (a+dil)&nminus;
3083			is = 0;
3084			id = n2<<1;
3085			do
3086			{
3087				for(i=is;i<n;i+=id)
3088				{
3089					i1 = i+j;
3090					i2 = i1+n4;
3091					i3 = i2+n4;
3092					i4 = i3+n4;
3093					i5 = i+n4-j+2;
3094					i6 = i5+n4;
3095					i7 = i6+n4;
3096					i8 = i7+n4;
3097					t1 = x[i1] - x[i6];
3098					x[i1] += x[i6];
3099					t2 = x[i5] - x[i2];
3100					x[i5] += x[i2];
3101					t3 = x[i8] + x[i3];
3102					x[i6] = x[i8] - x[i3];
3103					t4 = x[i4] + x[i7];
3104					x[i2] = x[i4] - x[i7];
3105					t5 = t1 - t4;
3106					t1 += t4;
3107					t4 = t2 - t3;
3108					t2 += t3;
3109					x[i3] = t5*cc1 + t4*ss1;
3110					x[i7] = -t4*cc1 + t5*ss1;
3111					x[i4] = t1*cc3 - t2*ss3;
3112					x[i8] = t2*cc3 + t1*ss3;
3113				}
3114				is = (id<<1) - n2;
3115				id <<= 2;
3116			} while(is<n-1);
3117		}
3118	}
3119	is = 1;
3120	id = 4;
3121	do
3122	{
3123		for (i0=is;i0<=n;i0+=id)
3124		{
3125			i1 = i0+1;
3126			e = x[i0];
3127			x[i0] = e + x[i1];
3128			x[i1] = e - x[i1];
3129		}
3130		is = (id<<1) - 1;
3131		id <<= 2;
3132	} while(is<n);
3133	scramble_real(z, n);
3134	e = 1/(double)n;
3135	for (i=0;i<n;i++)
3136	{
3137		z[i] *= e;
3138	}
3139}
3140
3141
3142void
3143mul_hermitian(
3144	double 				*a,
3145	double 				*b,
3146	int 				n
3147)
3148{
3149	register int 		k, half = n>>1;
3150	register double 	aa, bb, am, bm;
3151
3152	b[0] *= a[0];
3153	b[half] *= a[half];
3154	for (k=1;k<half;k++)
3155	{
3156		aa = a[k];
3157		bb = b[k];
3158		am = a[n-k];
3159		bm = b[n-k];
3160		b[k] = aa*bb - am*bm;
3161		b[n-k] = aa*bm + am*bb;
3162	}
3163}
3164
3165
3166void
3167square_hermitian(
3168	double 				*b,
3169	int 				n
3170)
3171{
3172	register int 		k, half = n>>1;
3173	register double 	c, d;
3174
3175	b[0] *= b[0];
3176	b[half] *= b[half];
3177	for (k=1;k<half;k++)
3178	{
3179		c = b[k];
3180		d = b[n-k];
3181		b[n-k] = 2.0*c*d;
3182		b[k] = (c+d)*(c-d);
3183	}
3184}
3185
3186
3187void
3188giant_to_double
3189(
3190	giant 			x,
3191	int 			sizex,
3192	double 			*z,
3193	int 			L
3194)
3195{
3196	register int 	j;
3197
3198	for (j=sizex;j<L;j++)
3199	{
3200		z[j]=0.0;
3201	}
3202	for (j=0;j<sizex;j++)
3203	{
3204		 z[j] = x->n[j];
3205	}
3206}
3207
3208
3209void
3210gswap(
3211	giant 	*p,
3212	giant 	*q
3213)
3214{
3215	giant 	t;
3216
3217	t = *p;
3218	*p = *q;
3219	*q = t;
3220}
3221
3222
3223void
3224onestep(
3225	giant 		x,
3226	giant 		y,
3227	gmatrix 	A
3228)
3229/* Do one step of the euclidean algorithm and modify
3230 * the matrix A accordingly. */
3231{
3232	giant s4 = popg();
3233
3234	gtog(x,s4);
3235	gtog(y,x);
3236	gtog(s4,y);
3237	divg(x,s4);
3238	punch(s4,A);
3239	mulg(x,s4);
3240	subg(s4,y);
3241
3242	pushg(1);
3243}
3244
3245
3246void
3247mulvM(
3248	gmatrix 	A,
3249	giant 		x,
3250	giant 		y
3251)
3252/* Multiply vector by Matrix; changes x,y. */
3253{
3254	giant s0 = popg(), s1 = popg();
3255
3256	gtog(A->ur,s0);
3257	gtog( A->lr,s1);
3258	dotproduct(x,y,A->ul,s0);
3259	dotproduct(x,y,A->ll,s1);
3260	gtog(s0,x);
3261	gtog(s1,y);
3262
3263	pushg(2);
3264}
3265
3266
3267void
3268mulmM(
3269	gmatrix 	A,
3270	gmatrix 	B
3271)
3272/* Multiply matrix by Matrix; changes second matrix. */
3273{
3274	giant s0 = popg();
3275	giant s1 = popg();
3276	giant s2 = popg();
3277	giant s3 = popg();
3278
3279	gtog(B->ul,s0);
3280	gtog(B->ur,s1);
3281	gtog(B->ll,s2);
3282	gtog(B->lr,s3);
3283	dotproduct(A->ur,A->ul,B->ll,s0);
3284	dotproduct(A->ur,A->ul,B->lr,s1);
3285	dotproduct(A->ll,A->lr,B->ul,s2);
3286	dotproduct(A->ll,A->lr,B->ur,s3);
3287	gtog(s0,B->ul);
3288	gtog(s1,B->ur);
3289	gtog(s2,B->ll);
3290	gtog(s3,B->lr);
3291
3292	pushg(4);
3293}
3294
3295
3296void
3297writeM(
3298	gmatrix 	A
3299)
3300{
3301	printf("    ul:");
3302	gout(A->ul);
3303	printf("    ur:");
3304	gout(A->ur);
3305	printf("    ll:");
3306	gout(A->ll);
3307	printf("    lr:");
3308	gout(A->lr);
3309}
3310
3311
3312void
3313punch(
3314	giant 		q,
3315	gmatrix 	A
3316)
3317/* Multiply the matrix A on the left by [0,1,1,-q]. */
3318{
3319	giant s0 = popg();
3320
3321	gtog(A->ll,s0);
3322	mulg(q,A->ll);
3323	gswap(&A->ul,&A->ll);
3324	subg(A->ul,A->ll);
3325	gtog(s0,A->ul);
3326	gtog(A->lr,s0);
3327	mulg(q,A->lr);
3328	gswap(&A->ur,&A->lr);
3329	subg(A->ur,A->lr);
3330	gtog(s0,A->ur);
3331
3332	pushg(1);
3333}
3334
3335
3336static void
3337dotproduct(
3338	giant 	a,
3339	giant 	b,
3340	giant 	c,
3341	giant 	d
3342)
3343/* Replace last argument with the dot product of two 2-vectors. */
3344{
3345	giant s4 = popg();
3346
3347	gtog(c,s4);
3348	mulg(a, s4);
3349	mulg(b,d);
3350	addg(s4,d);
3351
3352	pushg(1);
3353}
3354
3355
3356void
3357ggcd(
3358	giant 	xx,
3359	giant 	yy
3360)
3361/* A giant gcd.  Modifies its arguments. */
3362{
3363	giant 	x = popg(), y = popg();
3364	gmatrix 	A = newgmatrix();
3365
3366	gtog(xx,x); gtog(yy,y);
3367	for(;;)
3368	{
3369		fix(&x,&y);
3370		if (bitlen(y) <= GCDLIMIT )
3371			break;
3372		A->ul = popg();
3373		A->ur = popg();
3374		A->ll = popg();
3375		A->lr = popg();
3376		itog(1,A->ul);
3377		itog(0,A->ur);
3378		itog(0,A->ll);
3379		itog(1,A->lr);
3380		hgcd(0,x,y,A);
3381		mulvM(A,x,y);
3382		pushg(4);
3383		fix(&x,&y);
3384		if (bitlen(y) <= GCDLIMIT )
3385			break;
3386		modg(y,x);
3387		gswap(&x,&y);
3388	}
3389	bgcdg(x,y);
3390	gtog(y,yy);
3391	pushg(2);
3392	free(A);
3393}
3394
3395
3396void
3397fix(
3398	giant 	*p,
3399	giant 	*q
3400)
3401/* Insure that x > y >= 0. */
3402{
3403	if( gsign(*p) < 0 )
3404		negg(*p);
3405	if( gsign(*q) < 0 )
3406		negg(*q);
3407	if( gcompg(*p,*q) < 0 )
3408		gswap(p,q);
3409}
3410
3411
3412void
3413hgcd(
3414	int 		n,
3415	giant	 	xx,
3416	giant 		yy,
3417	gmatrix 	A
3418)
3419/* hgcd(n,x,y,A) chops n bits off x and y and computes th
3420 * 2 by 2 matrix A such that A[x y] is the pair of terms
3421 * in the remainder sequence starting with x,y that is
3422 * half the size of x. Note that the argument A is modified
3423 * but that the arguments xx and yy are left unchanged.
3424 */
3425{
3426	giant 		x, y;
3427
3428	if (isZero(yy))
3429		return;
3430
3431	x = popg();
3432	y = popg();
3433	gtog(xx,x);
3434	gtog(yy,y);
3435	gshiftright(n,x);
3436	gshiftright(n,y);
3437	if (bitlen(x) <= INTLIMIT )
3438	{
3439		shgcd(gtoi(x),gtoi(y),A);
3440	}
3441	else
3442	{
3443		gmatrix 	B = newgmatrix();
3444		int 		m = bitlen(x)/2;
3445
3446		hgcd(m,x,y,A);
3447		mulvM(A,x,y);
3448		if (gsign(x) < 0)
3449		{
3450			negg(x); negg(A->ul); negg(A->ur);
3451		}
3452		if (gsign(y) < 0)
3453		{
3454			negg(y); negg(A->ll); negg(A->lr);
3455		}
3456		if (gcompg(x,y) < 0)
3457		{
3458			gswap(&x,&y);
3459			gswap(&A->ul,&A->ll);
3460			gswap(&A->ur,&A->lr);
3461		}
3462		if (!isZero(y))
3463		{
3464			onestep(x,y,A);
3465			m /= 2;
3466			B->ul = popg();
3467			B->ur = popg();
3468			B->ll = popg();
3469			B->lr = popg();
3470			itog(1,B->ul);
3471			itog(0,B->ur);
3472			itog(0,B->ll);
3473			itog(1,B->lr);
3474			hgcd(m,x,y,B);
3475			mulmM(B,A);
3476			pushg(4);
3477		}
3478		free(B);
3479	}
3480	pushg(2);
3481}
3482
3483
3484void
3485shgcd(
3486	register int	x,
3487	register int 	y,
3488	gmatrix 		A
3489)
3490/*
3491 * Do a half gcd on the integers a and b, putting the result in A
3492 * It is fairly easy to use the 2 by 2 matrix description of the
3493 * extended Euclidean algorithm to prove that the quantity q*t
3494 * never overflows.
3495 */
3496{
3497	register int 	q, t, start = x;
3498	int 			Aul = 1, Aur = 0, All = 0, Alr = 1;
3499
3500	while	(y != 0 && y > start/y)
3501	{
3502		q = x/y;
3503		t = y;
3504		y = x%y;
3505		x = t;
3506		t = All;
3507		All = Aul-q*t;
3508		Aul = t;
3509		t = Alr;
3510		Alr = Aur-q*t;
3511		Aur = t;
3512	}
3513	itog(Aul,A->ul);
3514	itog(Aur,A->ur);
3515	itog(All,A->ll);
3516	itog(Alr,A->lr);
3517}
3518