1/* schoof.c
2
3   Elliptic curve order calculator, for
4
5   y^2 = x^3 + a x + b (mod p)
6
7   Compile with:
8
9   % cc -O schoof.c giants.c tools.c ellproj.c -lm -o schoof
10
11   and run, entering p and the a,b parameters.
12   Eventually the curve order o is reported, together with
13   the twist order o'.  The sum of these two orders is always
14   2p + 2.  A final check is performed to verify that a
15   random point (x,y,z) enjoys the annihilation
16   o * (x,y,z) = point-at-infinity.  This is not absolutely
17   definitive, rather it is a necessary condition on the oder o
18   (i.e. it is a sanity check of sorts).
19
20 * Change history:
21
22     18 Nov 99   REC  fixed M. Scott bug (MAX_DIGS bumped by 2)
23      5 Jul 99   REC  Installed improved (base-4) power ladder
24      5 Jul 99   REC  Made use of binary segmentation square (per se)
25      5 Jul 99   REC  Improved memory usage
26      2 May 99   REC  Added initial primality check
27     30 Apr 99   REC  Added actual order-annihilation test
28     29 Apr 99   REC  Improvements due to A. Powell
29      2 Feb 99   REC  Added explicit CRT result
30     12 Jan 99   REC  Removed (hopefully) last of memory crashes
31     20 Jan 98   REC  Creation
32
33 *	c. 1998 Perfectly Scientific, Inc.
34 *	All Rights Reserved.
35 *
36 *
37 *************************************************************/
38
39#include <stdio.h>
40#include<assert.h>
41#include <math.h>
42#include <stdlib.h>
43#include "giants.h"
44#include "tools.h"
45#include "ellproj.h"
46
47#define P_BREAK 32
48
49#ifdef _WIN32
50#include <string.h>
51#define bzero(D, n) memset(D, 0x00, n)
52#define bcopy(S, D, n) memcpy(D, S, n)
53#endif
54
55#define Q_MAX 256       /* Bits in largest primes handled. */
56#define L_CEIL 100       /* Bound on Schoof L values (not all needed in general). */
57
58
59typedef struct
60         {
61         int deg;
62         giant *coe;
63         } polystruct;
64typedef polystruct *poly;
65
66extern int *pr;
67
68static int Q, L_MAX;
69static int MAX_DIGS, MAX_COEFFS;
70
71static giant *mcand, coe, tmp, err, aux, aux2, globx, globy, t1, t2,
72	t3, t4, t5;
73static poly qscratch, rscratch, sscratch, tscratch, pbuff,
74     pbuff2, precip, cubic, powx, powy, kxn, kyn, kxd, kyd,
75     txn, txd, tyn, tyd, txn1, txd1, tyn1, tyd1,
76     nx, dx, ny, dy, mn, md, tmp1, tmp2, tmp3, tmp4;
77static poly s[L_CEIL+2], smonic;
78static giant p, a, b;
79static int L;
80
81void quickmodg(giant g, giant x)
82{       int sgn = x->sign;
83
84        if(sgn == 0) return;
85        if(sgn > 0) {
86                if(gcompg(x, g) >= 0) subg(g, x);
87                return;
88        }
89        addg(g,x);
90        return;
91}
92
93int
94log_2(int n) {
95        int c = 1, d = -1;
96        while(c <= n) {
97                c <<= 1;
98                ++d;
99        }
100        return(d);
101}
102
103void
104justifyp(poly x) {
105        int j;
106        for(j = x->deg; j >= 0; j--) {
107                if(!is0(x->coe[j])) break;
108        }
109        x->deg = (j>0)? j : 0;
110}
111
112void
113polyrem(poly x) {
114        int j;
115   for(j=0; j <= x->deg; j++) {
116      modg(p, x->coe[j]);
117   }
118   justifyp(x);
119}
120
121
122giant *
123newa(int n) {
124        giant *p = (giant *)malloc(n*sizeof(giant));
125        int j;
126        for(j=0; j<n; j++) {
127                p[j] = newgiant(MAX_DIGS);
128        }
129        return(p);
130}
131
132poly
133newpoly(int coeffs) {
134        poly pol;
135        pol = (poly) malloc(sizeof(polystruct));
136        pol->coe = (giant *)newa(coeffs);
137        return(pol);
138}
139
140void
141init_all() {
142   int j;
143
144   j = (2*Q + log_2(MAX_COEFFS) + 32 + 15)/16;
145   j = j * MAX_COEFFS;
146   globx = newgiant(j);
147   globy = newgiant(j);
148   s[0] = newpoly(MAX_COEFFS);
149
150   for(j=1; j<=L_MAX+1; j++) {
151                s[j] = newpoly(j*(j+1));
152   }
153   smonic = newpoly(MAX_COEFFS);
154   powx = newpoly(MAX_COEFFS);
155   powy = newpoly(MAX_COEFFS);
156   kxn = newpoly(MAX_COEFFS);
157   kxd = newpoly(MAX_COEFFS);
158   kyn = newpoly(MAX_COEFFS);
159   kyd = newpoly(MAX_COEFFS);
160   txn = newpoly(MAX_COEFFS);
161   txd = newpoly(MAX_COEFFS);
162   tyn = newpoly(MAX_COEFFS);
163   tyd = newpoly(MAX_COEFFS);
164   txn1 = newpoly(MAX_COEFFS);
165   txd1 = newpoly(MAX_COEFFS);
166   tyn1 = newpoly(MAX_COEFFS);
167   tyd1 = newpoly(MAX_COEFFS);
168   nx = newpoly(MAX_COEFFS);
169   ny = newpoly(MAX_COEFFS);
170   dx = newpoly(MAX_COEFFS);
171   dy = newpoly(MAX_COEFFS);
172   mn = newpoly(MAX_COEFFS);
173   md = newpoly(MAX_COEFFS);
174   tmp1 = newpoly(MAX_COEFFS);
175   tmp2 = newpoly(MAX_COEFFS);
176   tmp3 = newpoly(MAX_COEFFS);
177   tmp4 = newpoly(MAX_COEFFS);
178   mcand = (giant *)newa(MAX_COEFFS);
179/* The next three need extra space for remaindering routines. */
180        qscratch = newpoly(2*MAX_COEFFS);
181        rscratch = newpoly(2*MAX_COEFFS);
182        pbuff = newpoly(2*MAX_COEFFS);
183        pbuff2 = newpoly(MAX_COEFFS);
184        sscratch = newpoly(MAX_COEFFS);
185        tscratch = newpoly(MAX_COEFFS);
186        tmp = newgiant(MAX_DIGS);
187        err = newgiant(MAX_DIGS);
188        coe = newgiant(MAX_DIGS);
189        aux = newgiant(MAX_DIGS);
190        aux2 = newgiant(MAX_DIGS);
191        t3 = newgiant(MAX_DIGS);
192        t4 = newgiant(MAX_DIGS);
193        t5 = newgiant(MAX_DIGS);
194   		cubic = newpoly(4);
195        precip = newpoly(MAX_COEFFS);
196}
197
198void
199atoa(giant *a, giant *b, int n) {
200        int j;
201        for(j=0; j<n; j++) gtog(a[j], b[j]);
202}
203
204void
205ptop(poly x, poly y)
206/* y := x. */
207{
208        y->deg = x->deg;
209        atoa(x->coe, y->coe, y->deg+1);
210}
211
212void
213negp(poly y)
214/* y := -y. */
215{       int j;
216        for(j=0; j <= y->deg; j++) {
217                negg(y->coe[j]);
218                quickmodg(p, y->coe[j]);
219        }
220}
221
222int
223iszer(giant a) {
224
225  if(a->sign == 0) return(1);
226  return(0);
227
228}
229
230int
231iszerop(poly x) {
232   if(x->deg == 0 && iszer(x->coe[0])) return 1;
233   return 0;
234}
235
236int
237is0(giant a) {
238        return(iszer(a));
239}
240
241int
242is1(giant a) {
243        return(isone(a));
244}
245
246
247void
248addp(poly x, poly y)
249/* y += x. */
250{
251        int d = x->deg, j;
252
253        if(y->deg > d) d = y->deg;
254        for(j = 0; j <= d; j++) {
255                if((j <= x->deg) && (j <= y->deg)) {
256                        addg(x->coe[j], y->coe[j]);
257                        quickmodg(p, y->coe[j]);
258                        continue;
259                }
260                if((j <= x->deg) && (j > y->deg)) {
261                        gtog(x->coe[j], y->coe[j]);
262                        quickmodg(p, y->coe[j]);
263                        continue;
264                }
265        }
266        y->deg = d;
267        justifyp(y);
268}
269
270void
271subp(poly x, poly y)
272/* y -= x. */
273{
274        int d = x->deg, j;
275
276        if(y->deg > d) d = y->deg;
277        for(j = 0; j <= d; j++) {
278                if((j <= x->deg) && (j <= y->deg)) {
279                        subg(x->coe[j], y->coe[j]);
280                        quickmodg(p, y->coe[j]);
281                        continue;
282                }
283                if((j <= x->deg) && (j > y->deg)) {
284                        gtog(x->coe[j], y->coe[j]);
285                        negg(y->coe[j]);
286                        quickmodg(p, y->coe[j]);
287                        continue;
288                }
289        }
290        y->deg = d;
291        justifyp(y);
292}
293
294
295void
296grammarmulp(poly a, poly b)
297/* b *= a. */
298{
299        int dega = a->deg, degb = b->deg, deg = dega + degb;
300        register int d, kk, first, diffa;
301
302        for(d=deg; d>=0; d--) {
303                diffa = d-dega;
304                itog(0, coe);
305                for(kk=0; kk<=d; kk++) {
306                        if((kk>degb)||(kk<diffa)) continue;
307                        gtog(b->coe[kk], tmp);
308                        mulg(a->coe[d-kk], tmp);
309                        modg(p, tmp);
310                        addg(tmp, coe);
311                        quickmodg(p, coe);
312                }
313                gtog(coe, mcand[d]);
314        }
315        atoa(mcand, b->coe, deg+1);
316        b->deg = deg;
317        justifyp(b);
318}
319
320void
321atop(giant *a, poly x, int deg)
322/* Copy array to poly, with monic option. */
323{
324        int adeg = abs(deg);
325        x->deg = adeg;
326        atoa(a, x->coe, adeg);
327        if(deg < 0) {
328           itog(1, x->coe[adeg]);
329        } else {
330           gtog(a[adeg], x->coe[adeg]);
331        }
332}
333
334void
335just(giant g) {
336   while((g->n[g->sign-1] == 0) && (g->sign > 0)) --g->sign;
337}
338
339void
340unstuff_partial(giant g, poly y, int words){
341        int j;
342        for(j=0; j < y->deg; j++) {
343                bcopy((g->n) + j*words, y->coe[j]->n, words*sizeof(short));
344      y->coe[j]->sign = words;
345      just(y->coe[j]);
346   }
347}
348
349void
350stuff(poly x, giant g, int words) {
351        int deg = x->deg, j, coedigs;
352
353   g->sign = words*(1 + deg);
354   for(j=0; j <= deg; j++) {
355                coedigs = (x->coe[j])->sign;
356                bcopy(x->coe[j]->n, (g->n) + j*words, coedigs*sizeof(short));
357      bzero((g->n) + (j*words+coedigs),
358                                sizeof(short)*(words-coedigs));
359        }
360   just(g);
361}
362
363int maxwords = 0;
364void
365
366binarysegmul(poly x, poly y) {
367        int bits = bitlen(p), xwords, ywords, words;
368
369        xwords = (2*bits + log_2(x->deg+1) + 32 + 15)/16;
370   ywords = (2*bits + log_2(y->deg+1) + 32 + 15)/16;
371   if(xwords > ywords) words = xwords; else words = ywords;
372   stuff(x, globx, words);
373   stuff(y, globy, words);
374   mulg(globx, globy);
375   gtog(y->coe[y->deg], globx);  /* Save high coeff. */
376   y->deg += x->deg;
377   gtog(globx, y->coe[y->deg]);  /* Move high coeff. */
378   unstuff_partial(globy, y, words);
379   mulg(x->coe[x->deg], y->coe[y->deg]); /* resolve high coeff. */
380   justifyp(y);
381}
382
383binarysegsquare(poly y) {
384        int bits = bitlen(p), words;
385      words = (2*bits + log_2(y->deg+1) + 32 + 15)/16;
386   stuff(y, globy, words);
387   squareg(globy);
388   gtog(y->coe[y->deg], globx);  /* Save high coeff. */
389   y->deg += y->deg;
390   gtog(globx, y->coe[y->deg]);  /* Move high coeff. */
391   unstuff_partial(globy, y, words);
392   mulg(y->coe[y->deg], y->coe[y->deg]); /* resolve high coeff. */
393   justifyp(y);
394}
395
396void
397assess(poly x, poly y){
398        int max = 0, j;
399   for(j=0; j <= x->deg; j++) {
400                if(bitlen(x->coe[j]) > max) max = bitlen(x->coe[j]);
401   }
402   max = 0;
403   for(j=0; j <= y->deg; j++) {
404                if(bitlen(y->coe[j]) > max) max = bitlen(y->coe[j]);
405   }
406}
407
408int
409pcompp(poly x, poly y) {
410    int j;
411    if(x->deg != y->deg) return 1;
412    for(j=0; j <= x->deg; j++) {
413                        if(gcompg(x->coe[j], y->coe[j])) return 1;
414    }
415    return 0;
416}
417
418/*
419int max_deg = 0;
420*/
421
422void
423mulp(poly x, poly y)
424/*  y *= x. */
425{
426        int n, degx = x->deg, degy = y->deg;
427
428/*
429if(degx > max_deg) {
430	max_deg = degx; printf("xdeg: %d\n", degx);
431}
432
433if(degy > max_deg) {
434	max_deg = degy; printf("ydeg: %d\n", degy);
435}
436*/
437        if((degx < P_BREAK) || (degy < P_BREAK)) {
438                grammarmulp(x,y);
439                justifyp(y);
440                return;
441        }
442   if(x==y) binarysegsquare(y);
443   	else binarysegmul(x, y);
444}
445
446void
447revp(poly x)
448/* Reverse the coefficients of x. */
449{       int j, deg = x->deg;
450
451        for(j=0; j <= deg/2; j++) {
452                gtog(x->coe[deg-j], tmp);
453                gtog(x->coe[j], x->coe[deg-j]);
454                gtog(tmp, x->coe[j]);
455        }
456   justifyp(x);
457}
458
459void
460recipp(poly f, int deg)
461/* f := 1/f, via newton method.  */
462{
463        int lim = deg + 1, prec = 1;
464
465        sscratch->deg = 0; itog(1, sscratch->coe[0]);
466        itog(1, aux);
467        while(prec < lim) {
468                prec <<= 1;
469                if(prec > lim) prec = lim;
470                f->deg = prec-1;
471                ptop(sscratch, tscratch);
472                mulp(f, tscratch);
473				tscratch->deg = prec-1;
474				polyrem(tscratch);
475                subg(aux, tscratch->coe[0]);
476                quickmodg(p, tscratch->coe[0]);
477                mulp(sscratch, tscratch);
478				tscratch->deg = prec-1;
479				polyrem(tscratch);
480                subp(tscratch, sscratch);
481                sscratch->deg = prec-1;
482				polyrem(sscratch);
483        }
484        justifyp(sscratch);
485        ptop(sscratch, f);
486}
487
488int
489left_justifyp(poly x, int start)
490/* Left-justify the polynomial, checking from position "start." */
491{
492        int j, shift = 0;
493
494        for(j = start; j <= x->deg; j++) {
495                if(!is0(x->coe[j])) {
496                     shift = start;
497                     break;
498                }
499        }
500        x->deg -= shift;
501        for(j=0; j<= x->deg; j++) {
502                gtog(x->coe[j+shift], x->coe[j]);
503        }
504        return(shift);
505}
506
507void
508remp(poly x, poly y, int mode)
509/* y := x (mod y).
510  mode = 0 is normal operation,
511       = 1 jams a fixed reciprocal,
512       = 2 uses the fixed reciprocal.
513 */
514{
515        int degx = x->deg, degy = y->deg, d, shift;
516
517        if(degy == 0) {
518                y->deg = 0;
519                itog(0, y->coe[0]);
520                return;
521        }
522        d = degx - degy;
523        if(d < 0) {
524                ptop(x, y);
525                return;
526        }
527        revp(x); revp(y);
528        ptop(y, rscratch);
529        switch(mode) {
530           case 0: recipp(rscratch, d);
531                   break;
532      case 1: recipp(rscratch, degy); /* degy -1. */
533                   ptop(rscratch, precip);
534                   rscratch->deg = d; justifyp(rscratch);
535                   break;
536           case 2: ptop(precip, rscratch);
537                   rscratch->deg = d; justifyp(rscratch);
538                   break;
539        }
540/* Next, a limited-precision multiply. */
541        if(d < degx) { x->deg = d; justifyp(x);}
542        mulp(x, rscratch);
543        rscratch->deg = d;
544		polyrem(rscratch);
545		justifyp(rscratch);
546        x->deg = degx; justifyp(x);
547        mulp(rscratch, y);
548        subp(x, y);
549        negp(y); polyrem(y);
550        shift = left_justifyp(y, d+1);
551   for(d = y->deg+1; d <= degx-shift; d++) itog(0, y->coe[d]);
552        y->deg = degx - shift;
553        revp(y);
554        revp(x);
555}
556
557fullmod(poly x) {
558   polyrem(x);
559   ptop(smonic, s[0]);
560   remp(x, s[0], 2);
561   ptop(s[0], x);
562   polyrem(x);
563}
564
565scalarmul(giant s, poly x) {
566        int j;
567   for(j=0; j <= x->deg; j++) {
568                mulg(s, x->coe[j]);
569      modg(p, x->coe[j]);
570   }
571}
572
573
574schain(int el) {
575   int j;
576
577        s[0]->deg = 0;
578   itog(0, s[0]->coe[0]);
579
580        s[1]->deg = 0;
581   itog(1, s[1]->coe[0]);
582        s[2]->deg = 0;
583   itog(2, s[2]->coe[0]);
584
585   s[3]->deg = 4;
586   gtog(a, aux); mulg(a, aux); negg(aux);
587   gtog(aux, s[3]->coe[0]);
588   gtog(b, aux); smulg(12, aux);
589   gtog(aux, s[3]->coe[1]);
590   gtog(a, aux); smulg(6, aux);
591   gtog(aux, s[3]->coe[2]);
592   itog(0, s[3]->coe[3]);
593   itog(3, s[3]->coe[4]);
594
595   s[4]->deg = 6;
596   gtog(a, aux); mulg(a, aux); mulg(a, aux);
597   gtog(b, tmp); mulg(b, tmp); smulg(8, tmp); addg(tmp, aux);
598   negg(aux);
599   gtog(aux, s[4]->coe[0]);
600   gtog(b, aux); mulg(a, aux); smulg(4, aux); negg(aux);
601   gtog(aux, s[4]->coe[1]);
602   gtog(a, aux); mulg(a, aux); smulg(5, aux); negg(aux);
603   gtog(aux, s[4]->coe[2]);
604   gtog(b, aux); smulg(20, aux);
605   gtog(aux, s[4]->coe[3]);
606   gtog(a, aux); smulg(5, aux);
607   gtog(aux, s[4]->coe[4]);
608   itog(0, s[4]->coe[5]);
609   itog(1, s[4]->coe[6]);
610   itog(4, aux);
611   scalarmul(aux, s[4]);
612   cubic->deg = 3;
613   itog(1, cubic->coe[3]);
614   itog(0, cubic->coe[2]);
615   gtog(a, cubic->coe[1]);
616   gtog(b, cubic->coe[0]);
617   for(j=5; j <= el; j++) {
618        if(j % 2 == 0) {
619                                ptop(s[j/2 + 2], s[j]); mulp(s[j/2-1], s[j]);
620            polyrem(s[j]); mulp(s[j/2-1], s[j]); polyrem(s[j]);
621            ptop(s[j/2-2], s[0]); mulp(s[j/2+1], s[0]); polyrem(s[0]);
622            mulp(s[j/2+1], s[0]); polyrem(s[0]);
623            subp(s[0], s[j]); mulp(s[j/2], s[j]); polyrem(s[j]);
624                           gtog(p, aux); iaddg(1, aux); gshiftright(1, aux);
625                                scalarmul(aux, s[j]);
626        } else {
627            ptop(s[(j-1)/2+2], s[j]);
628            mulp(s[(j-1)/2], s[j]);
629polyrem(s[j]);
630            mulp(s[(j-1)/2], s[j]);
631polyrem(s[j]);
632            mulp(s[(j-1)/2], s[j]);
633polyrem(s[j]);
634            ptop(s[(j-1)/2-1], s[0]);
635            mulp(s[(j-1)/2 + 1], s[0]); polyrem(s[0]);
636            mulp(s[(j-1)/2 + 1], s[0]); polyrem(s[0]);
637            mulp(s[(j-1)/2 + 1], s[0]); polyrem(s[0]);
638            if(((j-1)/2) % 2 == 1) {
639                                        mulp(cubic, s[0]); polyrem(s[0]);
640                                        mulp(cubic, s[0]); polyrem(s[0]);
641            } else {
642                                        mulp(cubic, s[j]); polyrem(s[j]);
643                                        mulp(cubic, s[j]); polyrem(s[j]);
644            }
645// polyout(s[1]); polyout(s[3]); polyout(s[0]); polyout(s[j]);
646            subp(s[0], s[j]);
647            polyrem(s[j]);
648        }
649   }
650}
651
652init_recip(int el) {
653   int j;
654   ptop(s[el], smonic);
655   if(el == 2) {
656		mulp(cubic, smonic); polyrem(smonic);
657   }
658   gtog(smonic->coe[smonic->deg], aux); /* High coeff. */
659   binvg(p, aux);
660   scalarmul(aux, smonic);  /* s is now monic. */
661   s[0]->deg = smonic->deg + 1;
662   for(j=0; j <= s[0]->deg; j++) itog(1, s[0]->coe[j]);
663   ptop(smonic, pbuff);
664   remp(s[0], pbuff, 1);  /* Initialize reciprocal of s as precip. */
665}
666
667/* void powerpoly(poly x, giant n)
668{       int len, pos;
669        ptop(x, pbuff);
670        x->deg = 0; itog(1, x->coe[0]);
671        len = bitlen(n);
672        pos = 0;
673        while(1) {
674                if(bitval(n, pos++)) {
675                        mulp(pbuff, x);
676                        fullmod(x);
677                }
678                if(pos>=len) break;
679                mulp(pbuff, pbuff);
680                fullmod(pbuff);
681        }
682}
683*/
684
685void powerpoly(poly x, giant n)
686/* Base-4 window. */
687{       int pos, code;
688        ptop(x, pbuff);  /* x. */
689	ptop(pbuff, pbuff2);
690	mulmod(pbuff2, pbuff2); mulmod(pbuff, pbuff2); /* x^3. */
691        pos = bitlen(n)-2;
692        while(pos >= 0) {
693		mulmod(x, x);
694		if(pos==0) {
695			if(bitval(n, pos) != 0) {
696				mulmod(pbuff, x);
697			}
698			break;
699		}
700		code = (bitval(n, pos) != 0) * 2 + (bitval(n, pos-1) != 0);
701		switch(code) {
702			case 0: mulmod(x,x); break;
703			case 1: mulmod(x,x);
704				mulmod(pbuff, x);
705				break;
706			case 2: mulmod(pbuff, x);
707				mulmod(x,x); break;
708			case 3: mulmod(x,x); mulmod(pbuff2, x); break;
709		}
710		pos -= 2;
711        }
712}
713
714mulmod(poly x, poly y) {
715        mulp(x, y); fullmod(y);
716}
717
718elldoublep(poly n1, poly d1, poly m1, poly c1, poly n0, poly d0,
719                poly m0, poly c0) {
720
721     ptop(n1, mn); mulmod(n1, mn);
722          ptop(mn, pbuff); addp(mn, mn); addp(pbuff, mn);
723     fullmod(mn);
724          ptop(d1, pbuff); mulmod(d1, pbuff);
725          scalarmul(a, pbuff); addp(pbuff, mn);
726          fullmod(mn);
727          mulmod(c1, mn);
728          ptop(m1, md); addp(md, md);
729          mulmod(d1, md); mulmod(d1, md); mulmod(cubic, md);
730
731     ptop(d1, n0); mulmod(mn, n0); mulmod(mn, n0);
732          mulmod(cubic, n0);
733          ptop(n1, pbuff); addp(pbuff, pbuff); fullmod(pbuff);
734          mulmod(md, pbuff); mulmod(md, pbuff);
735          subp(pbuff, n0); fullmod(n0);
736     ptop(md, d0); mulmod(md, d0); mulmod(d1, d0);
737
738     ptop(mn, m0); mulmod(c1, m0);
739          ptop(d0, pbuff); mulmod(n1, pbuff);
740          ptop(n0, c0); mulmod(d1, c0); subp(c0, pbuff);
741          fullmod(pbuff);
742          mulmod(pbuff, m0);
743          ptop(m1, pbuff); mulmod(md, pbuff);
744          mulmod(d1, pbuff); mulmod(d0, pbuff);
745          subp(pbuff, m0); fullmod(m0);
746
747     ptop(c1, c0); mulmod(md, c0); mulmod(d1, c0); mulmod(d0, c0);
748}
749
750elladdp(poly n1, poly d1, poly m1, poly c1, poly n2, poly d2, poly m2, poly c2, poly n0, poly d0, poly m0, poly c0) {
751        ptop(m2, mn); mulmod(c1, mn);
752   ptop(m1, pbuff); mulmod(c2, pbuff);
753   subp(pbuff, mn); fullmod(mn);
754   mulmod(d1, mn); mulmod(d2, mn);
755
756        ptop(n2, md); mulmod(d1, md);
757   ptop(n1, pbuff); mulmod(d2, pbuff);
758   subp(pbuff, md); fullmod(md);
759   mulmod(c1, md); mulmod(c2, md);
760
761   ptop(cubic, n0); mulmod(mn, n0); mulmod(mn, n0);
762   mulmod(d1, n0); mulmod(d2, n0);
763   ptop(n1, pbuff); mulmod(d2, pbuff);
764   ptop(n2, d0); mulmod(d1, d0);
765   addp(d0, pbuff); mulmod(md, pbuff); mulmod(md, pbuff);
766   subp(pbuff, n0); fullmod(n0);
767
768   ptop(md, d0); mulmod(md, d0); mulmod(d1, d0); mulmod(d2, d0);
769
770   ptop(mn, m0); mulmod(c1, m0);
771   ptop(d0, pbuff); mulmod(n1, pbuff);
772   ptop(d1, c0); mulmod(n0, c0);
773   subp(c0, pbuff); fullmod(pbuff);
774   mulmod(pbuff, m0);
775   ptop(m1, pbuff); mulmod(md, pbuff);
776   mulmod(d0, pbuff); mulmod(d1, pbuff);
777   subp(pbuff, m0); fullmod(m0);
778
779   ptop(md, c0); mulmod(c1, c0); mulmod(d0, c0); mulmod(d1, c0);
780
781}
782
783polyout(poly x) {
784   int j;
785   for(j=0; j <= x->deg; j++) {printf("%d: ",j); gout(x->coe[j]);}
786}
787
788main(int argc, char **argv) {
789      int j, ct = 0, el, xmatch, ymatch;
790      int k, t;
791      int T[L_CEIL], P[L_CEIL], LL[L_CEIL];
792      giant ss[L_CEIL];
793      unsigned int ord, ordminus;
794      point_proj pt, pt2;
795
796      p = newgiant(INFINITY);  /* Also sets up internal giants' stacks. */
797      j = ((Q_MAX+15)/16);
798      init_tools(2*j);
799	  a = newgiant(j);
800	  b = newgiant(j);
801
802entry:
803      printf("Give p > 3, a, b on separate lines:\n"); fflush(stdout);
804      gin(p);  /* Field prime. */
805      if((Q = bitlen(p)) > Q_MAX) {
806		fprintf(stderr, "p too large, larger than %d bits.\n", Q);
807		goto entry;
808	  }
809      if(!prime_probable(p)) {
810		fprintf(stderr, "p is not but must be prime.\n", Q);
811		goto entry;
812	  }
813      gin(a); gin(b); /* Curve parameters. */
814
815	  t1 = newgiant(2*j);
816	  t2 = newgiant(2*j);
817/* Next, discriminant test for legitimacy of curve. */
818	gtog(a, t1); squareg(t1); modg(p, t1); mulg(a, t1); modg(p, t1);
819    gshiftleft(2, t1);  /* 4 a^3 mod p. */
820    gtog(b, t2); squareg(t2); modg(p, t2); smulg(27, t2);
821    addg(t2, t1); modg(p, t1);
822    if(isZero(t1)) {
823		fprintf(stderr, "Discriminant FAILED\n");
824		goto entry;
825    }
826    printf("Discriminant PASSED\n"); fflush(stdout);
827
828/* Next, find an efficient prime power array such that
829   Prod[powers] >= 16 p. */
830
831 /* Create minimal prime power array such that Prod[powers]^2 > 16p. */
832
833    gtog(p, t2); gshiftleft(4, t2);   /* t2 := 16p. */
834
835    L_MAX = 3;
836    while(L_MAX <= L_CEIL-1) {
837		for(j=0; j <= L_MAX; j++) LL[j] = 0;
838   		for(j=2; j <= L_MAX; j++) {
839			if(primeq(j)) {
840				LL[j] = 1;
841		    	k = j;
842				while(1) {
843			    	k *= j;
844					if(k <= L_MAX) {
845						LL[k] = 1;
846						LL[k/j] = 0;
847					}
848					else break;
849				}
850			}
851		}
852    	itog(1, t1);
853    	for(j=2; j <= L_MAX; j++) {
854			if(LL[j]) { smulg(j, t1); smulg(j, t1); } /* Building up t1^2. */
855		}
856		if(gcompg(t1, t2) > 0) break;
857        ++L_MAX;
858	}
859
860   printf("Initializing for prime powers:\n");
861   for(j=2; j <= L_MAX; j++) {
862		if(LL[j]) printf("%d ", j);
863   }
864   printf("\n");
865   fflush(stdout);
866
867
868   MAX_DIGS = (2 + (Q+15)/8);  /* Size of (squared) coefficients. */
869   MAX_COEFFS = ((L_MAX+1)*(L_MAX+2));
870
871   init_all();
872   schain(L_MAX+1);
873
874for(L = 2; L <= L_MAX; L++) {
875      if(!LL[L]) continue;
876printf("Resolving Schoof L = %d...\n", L);
877      P[ct] = L;  /* Stuff another prime power. */
878      init_recip(L);
879// printf("s: "); polyout(s[L]);
880      gtog(p, aux2);
881      k = idivg(L, aux2);  /* p (mod L). */
882
883printf("power...\n");
884      txd->deg = 0; itog(1, txd->coe[0]);
885      tyd->deg = 0; itog(1, tyd->coe[0]);
886      txn->deg = 1; itog(0, txn->coe[0]); itog(1, txn->coe[1]);
887      ptop(txn, kxn);
888
889      gtog(p, aux2);
890      powerpoly(txn, aux2); /* x^p. */
891printf("x^p done...\n");
892      ptop(txn, powx);
893      powerpoly(powx, aux2);
894printf("x^p^2 done...\n");
895      ptop(cubic, tyn);
896      gtog(p, aux2); itog(1, aux); subg(aux, aux2);
897      gshiftright(1, aux2); /* aux2 := (p-1)/2. */
898      powerpoly(tyn, aux2); /* y^p. */
899printf("y^p done...\n");
900      ptop(tyn, powy); mulp(tyn, powy); fullmod(powy);
901      mulp(cubic, powy); fullmod(powy);
902      powerpoly(powy, aux2);
903      mulp(tyn, powy); fullmod(powy);
904printf("Powers done...\n");
905
906// printf("pow" ); polyout(powx); polyout(powy);
907      ptop(txn, txn1); ptop(txd, txd1);  /* Save t = 1 case. */
908      ptop(tyn, tyn1); ptop(tyd, tyd1);
909/* We now shall test
910     (powx, y powy) + k(kxn/kxd, y kyn/kyd) = t(txn/txd, y tyn/tyd)
911 */
912
913    if(k==1) { ptop(txd, kxd); ptop(txd, kyd);
914                              ptop(txd, kyn);
915    } else {
916                   ptop(s[k], kxd); mulp(s[k], kxd); fullmod(kxd);
917     if(k%2==0) { mulp(cubic, kxd); fullmod(kxd); }
918     mulp(kxd, kxn); fullmod(kxn);
919     ptop(s[k-1], pbuff); mulp(s[k+1], pbuff); fullmod(pbuff);
920     if(k%2==1) {mulp(cubic, pbuff); fullmod(pbuff); }
921     subp(pbuff, kxn); fullmod(kxn);
922
923     ptop(s[k+2], kyn); mulp(s[k-1], kyn); fullmod(kyn);
924          mulp(s[k-1], kyn); fullmod(kyn);
925     if(k > 2) {
926                ptop(s[k-2], pbuff); mulp(s[k+1], pbuff); fullmod(pbuff);
927          mulp(s[k+1], pbuff); fullmod(pbuff);
928     subp(pbuff, kyn); fullmod(kyn);
929     }
930     ptop(s[k], kyd); mulp(s[k], kyd); fullmod(kyd);
931          mulp(s[k], kyd); fullmod(kyd);
932     if(k%2==0) { mulp(cubic, kyd); fullmod(kyd);
933                        mulp(cubic, kyd); fullmod(kyd);}
934     itog(4, aux2); scalarmul(aux2, kyd);
935    }
936//printf("kP: "); polyout(kxn); polyout(kxd); polyout(kyn); polyout(kyd);
937/* Commence t = 0 check. */
938printf("Checking t = %d ...\n", 0);
939fflush(stdout);
940
941     ptop(powx, pbuff); mulp(kxd, pbuff);
942     subp(kxn, pbuff);
943     fullmod(pbuff);
944
945     xmatch = ymatch = 0;
946     if(iszerop(pbuff)) {
947		 xmatch = 1;
948         /* Now check y coords. */
949 		 if(L == 2) goto resolve;
950         ptop(powy, pbuff); mulp(kyd, pbuff);
951         addp(kyn, pbuff);
952         fullmod(pbuff);
953         if(iszerop(pbuff)) {
954               resolve:
955					printf("%d %d\n", L, 0);
956               		T[ct++] = 0;
957                    continue;
958         } else ymatch = 1;
959     }
960/* Combine pt1 and pt2. */
961   if((xmatch == 1) && (ymatch == 1))
962       elldoublep(powx, txd, powy, txd, nx, dx, ny, dy);
963       else
964       elladdp(powx, txd, powy, txd, kxn, kxd, kyn, kyd, nx, dx, ny, dy);
965/* Now {nx/dx, ny/dy} is (fixed) LHS. */
966// printf("add12: "); polyout(nx); polyout(dx); polyout(ny); polyout(dy);
967/* Commence t > 0 check. */
968    for(t=1; t <= L/2; t++) {
969printf("Checking t = %d ...\n", t);
970         if(t > 1) { /* Add (tx1, ty1) to (txn, tyn). */
971                                ptop(txn1, pbuff); mulmod(txd, pbuff);
972            ptop(txn, powx); mulmod(txd1, powx);
973                                subp(powx, pbuff); fullmod(pbuff);
974            if(!iszerop(pbuff))
975                                 elladdp(txn1, txd1, tyn1, tyd1, txn, txd, tyn, tyd,
976                                        tmp1, tmp2, tmp3, tmp4);
977                           else elldoublep(txn, txd, tyn, tyd,
978                                        tmp1, tmp2, tmp3, tmp4);
979            ptop(tmp1, txn); ptop(tmp2, txd);
980                           ptop(tmp3, tyn); ptop(tmp4, tyd);
981         }
982// printf("tQ: "); polyout(txn); polyout(txd); polyout(tyn); polyout(tyd);
983         /* Next, check {nx/dx, ny/dy} =? {txn/txd, tyn/tyd}. */
984                        ptop(nx, pbuff); mulmod(txd, pbuff);
985                   ptop(dx, powx); mulmod(txn, powx);
986                        subp(powx, pbuff); fullmod(pbuff);
987                   if(!iszerop(pbuff)) continue;
988         /* Next, check y. */
989                //      printf("y check!\n");
990                        ptop(ny, pbuff); mulmod(tyd, pbuff);
991                   ptop(dy, powx); mulmod(tyn, powx);
992                        subp(powx, pbuff); fullmod(pbuff);
993                   if(iszerop(pbuff)) {
994                        printf("%d %d\n", L, t);
995               			T[ct++] = t;
996                   }  else {
997                        printf("%d %d\n", L, L-t);
998               			T[ct++] = L-t;
999         			}
1000					fflush(stdout);
1001         			break;
1002   }
1003}
1004
1005/* Now, prime powers P[] and CRT residues T[] are intact. */
1006	printf("Prime powers L:\n");
1007	printf("{");
1008    for(j=0; j < ct-1; j++) {
1009		printf("%d, ", P[j]);
1010    }
1011    printf("%d }\n", P[ct-1]);
1012
1013	printf("Residues t (mod L):\n");
1014	printf("{");
1015    for(j=0; j < ct-1; j++) {
1016		printf("%d, ", T[j]);
1017    }
1018    printf("%d }\n", T[ct-1]);
1019
1020/* Mathematica algorithm for order:
1021plis = {2^5, 3^3, 5^2, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47};
1022tlis = {1,    26,   4, 2,  4, 11,  6,  5, 19, 22, 10, 16,  7, 22, 11};
1023prod = Apply[Times, plis];
1024prlis = prod/plis;
1025invlis = Table[PowerMod[prlis[[q]], -1, plis[[q]]],{q,1,Length[plis]}];
1026p = 2^127 - 1;
1027t = Mod[tlis . (prlis * invlis), prod];
1028ord = p + 1 - If[t^2 > 4p, t - prod, t]
1029*/
1030
1031  itog(1, t1);
1032  for(j=0; j < ct; j++) {
1033    free(s[j]);  /* Just to clear memory. */
1034	smulg(P[j], t1);
1035  }
1036
1037  for(j=0; j < 2*ct; j++) {
1038	   ss[j] = newgiant(MAX_DIGS);
1039  }
1040
1041  for(j=0; j < ct; j++) {
1042	gtog(t1, ss[j]);
1043    itog(P[j], t2);
1044    divg(t2, ss[j]);
1045  }
1046
1047  for(j=0; j < ct; j++) {
1048       gtog(ss[j], ss[j+ct]);
1049       itog(P[j], t2);
1050	   invg(t2, ss[j+ct]);
1051  }
1052
1053  itog(0, t4);
1054  for(j=0; j < ct; j++) {
1055	itog(T[j], t5);
1056	mulg(ss[j], t5);
1057	mulg(ss[j+ct], t5);
1058	addg(t5, t4);
1059  }
1060  modg(t1, t4);
1061  gtog(p, t5);
1062  iaddg(1, t5);
1063  gtog(t4, t2);
1064  squareg(t4);
1065  gtog(p, t3); gshiftleft(2, t3);
1066  if(gcompg(t4, t3) > 0) subg(t1, t2);
1067  subg(t2, t5);
1068  printf("Parameters:\n");
1069  printf("p = "); gout(p);
1070  printf("a = "); gout(a);
1071  printf("b = "); gout(b);
1072  printf("Curve order:\n");
1073  printf("o = "); gout(t5); gtog(t5, t3); /* Save order as t3. */
1074  printf("Twist order:\n");
1075  printf("o' = ");
1076  addg(t2, t5);
1077  addg(t2, t5);
1078  gout(t5);
1079/* Next, verify the order. */
1080  printf("Verifying order o:...\n");
1081  init_ell_proj(MAX_DIGS);
1082  pt = new_point_proj(MAX_DIGS);
1083  pt2 = new_point_proj(MAX_DIGS);
1084  itog(1,t2);
1085  find_point_proj(pt, t2, a, b, p);
1086  printf("A point on the curve y^2 = x^3 + a x + b (mod p) is:\n");
1087  printf("(x,y,z) = {\n"); gout(pt->x); printf(",");
1088  gout(pt->y); printf(","); gout(pt->z);
1089  printf("}\n");
1090  ell_mul_proj(pt, pt2, t3, a, p);
1091  printf("A multiple is:\n");
1092  printf("o * (x,y,z) = {\n");
1093  gout(pt2->x); printf(",");gout(pt2->y); printf(",");gout(pt2->z);
1094  printf("}\n");
1095  if(!isZero(pt2->z)) {
1096	printf("TILT: multiple should be point-at-infinity.\n");
1097	exit(1);
1098  }
1099  printf("VERIFIED: multiple is indeed point-at-infinity.\n");
1100}
1101