bn_asm.c revision 109998
155714Skris/* crypto/bn/bn_asm.c */ 255714Skris/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 355714Skris * All rights reserved. 455714Skris * 555714Skris * This package is an SSL implementation written 655714Skris * by Eric Young (eay@cryptsoft.com). 755714Skris * The implementation was written so as to conform with Netscapes SSL. 855714Skris * 955714Skris * This library is free for commercial and non-commercial use as long as 1055714Skris * the following conditions are aheared to. The following conditions 1155714Skris * apply to all code found in this distribution, be it the RC4, RSA, 1255714Skris * lhash, DES, etc., code; not just the SSL code. The SSL documentation 1355714Skris * included with this distribution is covered by the same copyright terms 1455714Skris * except that the holder is Tim Hudson (tjh@cryptsoft.com). 1555714Skris * 1655714Skris * Copyright remains Eric Young's, and as such any Copyright notices in 1755714Skris * the code are not to be removed. 1855714Skris * If this package is used in a product, Eric Young should be given attribution 1955714Skris * as the author of the parts of the library used. 2055714Skris * This can be in the form of a textual message at program startup or 2155714Skris * in documentation (online or textual) provided with the package. 2255714Skris * 2355714Skris * Redistribution and use in source and binary forms, with or without 2455714Skris * modification, are permitted provided that the following conditions 2555714Skris * are met: 2655714Skris * 1. Redistributions of source code must retain the copyright 2755714Skris * notice, this list of conditions and the following disclaimer. 2855714Skris * 2. Redistributions in binary form must reproduce the above copyright 2955714Skris * notice, this list of conditions and the following disclaimer in the 3055714Skris * documentation and/or other materials provided with the distribution. 3155714Skris * 3. All advertising materials mentioning features or use of this software 3255714Skris * must display the following acknowledgement: 3355714Skris * "This product includes cryptographic software written by 3455714Skris * Eric Young (eay@cryptsoft.com)" 3555714Skris * The word 'cryptographic' can be left out if the rouines from the library 3655714Skris * being used are not cryptographic related :-). 3755714Skris * 4. If you include any Windows specific code (or a derivative thereof) from 3855714Skris * the apps directory (application code) you must include an acknowledgement: 3955714Skris * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 4055714Skris * 4155714Skris * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 4255714Skris * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 4355714Skris * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 4455714Skris * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 4555714Skris * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 4655714Skris * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 4755714Skris * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 4855714Skris * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 4955714Skris * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 5055714Skris * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 5155714Skris * SUCH DAMAGE. 5255714Skris * 5355714Skris * The licence and distribution terms for any publically available version or 5455714Skris * derivative of this code cannot be changed. i.e. this code cannot simply be 5555714Skris * copied and put under another distribution licence 5655714Skris * [including the GNU Public Licence.] 5755714Skris */ 5855714Skris 5959191Skris#ifndef BN_DEBUG 6059191Skris# undef NDEBUG /* avoid conflicting definitions */ 6159191Skris# define NDEBUG 6259191Skris#endif 6359191Skris 6455714Skris#include <stdio.h> 6559191Skris#include <assert.h> 6655714Skris#include "cryptlib.h" 6755714Skris#include "bn_lcl.h" 6855714Skris 6959191Skris#if defined(BN_LLONG) || defined(BN_UMULT_HIGH) 7055714Skris 71109998SmarkmBN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) 7255714Skris { 7355714Skris BN_ULONG c1=0; 7455714Skris 7559191Skris assert(num >= 0); 7655714Skris if (num <= 0) return(c1); 7755714Skris 7859191Skris while (num&~3) 7955714Skris { 8055714Skris mul_add(rp[0],ap[0],w,c1); 8155714Skris mul_add(rp[1],ap[1],w,c1); 8255714Skris mul_add(rp[2],ap[2],w,c1); 8355714Skris mul_add(rp[3],ap[3],w,c1); 8459191Skris ap+=4; rp+=4; num-=4; 8555714Skris } 8659191Skris if (num) 8759191Skris { 8859191Skris mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1; 8959191Skris mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1; 9059191Skris mul_add(rp[2],ap[2],w,c1); return c1; 9159191Skris } 9255714Skris 9355714Skris return(c1); 9455714Skris } 9555714Skris 96109998SmarkmBN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) 9755714Skris { 9855714Skris BN_ULONG c1=0; 9955714Skris 10059191Skris assert(num >= 0); 10155714Skris if (num <= 0) return(c1); 10255714Skris 10359191Skris while (num&~3) 10455714Skris { 10555714Skris mul(rp[0],ap[0],w,c1); 10655714Skris mul(rp[1],ap[1],w,c1); 10755714Skris mul(rp[2],ap[2],w,c1); 10855714Skris mul(rp[3],ap[3],w,c1); 10959191Skris ap+=4; rp+=4; num-=4; 11055714Skris } 11159191Skris if (num) 11259191Skris { 11359191Skris mul(rp[0],ap[0],w,c1); if (--num == 0) return c1; 11459191Skris mul(rp[1],ap[1],w,c1); if (--num == 0) return c1; 11559191Skris mul(rp[2],ap[2],w,c1); 11659191Skris } 11755714Skris return(c1); 11855714Skris } 11955714Skris 120109998Smarkmvoid bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) 12155714Skris { 12259191Skris assert(n >= 0); 12355714Skris if (n <= 0) return; 12459191Skris while (n&~3) 12555714Skris { 12659191Skris sqr(r[0],r[1],a[0]); 12759191Skris sqr(r[2],r[3],a[1]); 12859191Skris sqr(r[4],r[5],a[2]); 12959191Skris sqr(r[6],r[7],a[3]); 13059191Skris a+=4; r+=8; n-=4; 13155714Skris } 13259191Skris if (n) 13359191Skris { 13459191Skris sqr(r[0],r[1],a[0]); if (--n == 0) return; 13559191Skris sqr(r[2],r[3],a[1]); if (--n == 0) return; 13659191Skris sqr(r[4],r[5],a[2]); 13759191Skris } 13855714Skris } 13955714Skris 14059191Skris#else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ 14155714Skris 142109998SmarkmBN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) 14355714Skris { 14455714Skris BN_ULONG c=0; 14555714Skris BN_ULONG bl,bh; 14655714Skris 14759191Skris assert(num >= 0); 14855714Skris if (num <= 0) return((BN_ULONG)0); 14955714Skris 15055714Skris bl=LBITS(w); 15155714Skris bh=HBITS(w); 15255714Skris 15355714Skris for (;;) 15455714Skris { 15555714Skris mul_add(rp[0],ap[0],bl,bh,c); 15655714Skris if (--num == 0) break; 15755714Skris mul_add(rp[1],ap[1],bl,bh,c); 15855714Skris if (--num == 0) break; 15955714Skris mul_add(rp[2],ap[2],bl,bh,c); 16055714Skris if (--num == 0) break; 16155714Skris mul_add(rp[3],ap[3],bl,bh,c); 16255714Skris if (--num == 0) break; 16355714Skris ap+=4; 16455714Skris rp+=4; 16555714Skris } 16655714Skris return(c); 16755714Skris } 16855714Skris 169109998SmarkmBN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) 17055714Skris { 17155714Skris BN_ULONG carry=0; 17255714Skris BN_ULONG bl,bh; 17355714Skris 17459191Skris assert(num >= 0); 17555714Skris if (num <= 0) return((BN_ULONG)0); 17655714Skris 17755714Skris bl=LBITS(w); 17855714Skris bh=HBITS(w); 17955714Skris 18055714Skris for (;;) 18155714Skris { 18255714Skris mul(rp[0],ap[0],bl,bh,carry); 18355714Skris if (--num == 0) break; 18455714Skris mul(rp[1],ap[1],bl,bh,carry); 18555714Skris if (--num == 0) break; 18655714Skris mul(rp[2],ap[2],bl,bh,carry); 18755714Skris if (--num == 0) break; 18855714Skris mul(rp[3],ap[3],bl,bh,carry); 18955714Skris if (--num == 0) break; 19055714Skris ap+=4; 19155714Skris rp+=4; 19255714Skris } 19355714Skris return(carry); 19455714Skris } 19555714Skris 196109998Smarkmvoid bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) 19755714Skris { 19859191Skris assert(n >= 0); 19955714Skris if (n <= 0) return; 20055714Skris for (;;) 20155714Skris { 20255714Skris sqr64(r[0],r[1],a[0]); 20355714Skris if (--n == 0) break; 20455714Skris 20555714Skris sqr64(r[2],r[3],a[1]); 20655714Skris if (--n == 0) break; 20755714Skris 20855714Skris sqr64(r[4],r[5],a[2]); 20955714Skris if (--n == 0) break; 21055714Skris 21155714Skris sqr64(r[6],r[7],a[3]); 21255714Skris if (--n == 0) break; 21355714Skris 21455714Skris a+=4; 21555714Skris r+=8; 21655714Skris } 21755714Skris } 21855714Skris 21959191Skris#endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ 22055714Skris 22155714Skris#if defined(BN_LLONG) && defined(BN_DIV2W) 22255714Skris 22355714SkrisBN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) 22455714Skris { 22555714Skris return((BN_ULONG)(((((BN_ULLONG)h)<<BN_BITS2)|l)/(BN_ULLONG)d)); 22655714Skris } 22755714Skris 22855714Skris#else 22955714Skris 23068651Skris/* Divide h,l by d and return the result. */ 23155714Skris/* I need to test this some more :-( */ 23255714SkrisBN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) 23355714Skris { 23455714Skris BN_ULONG dh,dl,q,ret=0,th,tl,t; 23555714Skris int i,count=2; 23655714Skris 23755714Skris if (d == 0) return(BN_MASK2); 23855714Skris 23955714Skris i=BN_num_bits_word(d); 24068651Skris assert((i == BN_BITS2) || (h > (BN_ULONG)1<<i)); 24168651Skris 24255714Skris i=BN_BITS2-i; 24355714Skris if (h >= d) h-=d; 24455714Skris 24555714Skris if (i) 24655714Skris { 24755714Skris d<<=i; 24855714Skris h=(h<<i)|(l>>(BN_BITS2-i)); 24955714Skris l<<=i; 25055714Skris } 25155714Skris dh=(d&BN_MASK2h)>>BN_BITS4; 25255714Skris dl=(d&BN_MASK2l); 25355714Skris for (;;) 25455714Skris { 25555714Skris if ((h>>BN_BITS4) == dh) 25655714Skris q=BN_MASK2l; 25755714Skris else 25855714Skris q=h/dh; 25955714Skris 26055714Skris th=q*dh; 26155714Skris tl=dl*q; 26255714Skris for (;;) 26355714Skris { 26455714Skris t=h-th; 26555714Skris if ((t&BN_MASK2h) || 26655714Skris ((tl) <= ( 26755714Skris (t<<BN_BITS4)| 26855714Skris ((l&BN_MASK2h)>>BN_BITS4)))) 26955714Skris break; 27055714Skris q--; 27155714Skris th-=dh; 27255714Skris tl-=dl; 27355714Skris } 27455714Skris t=(tl>>BN_BITS4); 27555714Skris tl=(tl<<BN_BITS4)&BN_MASK2h; 27655714Skris th+=t; 27755714Skris 27855714Skris if (l < tl) th++; 27955714Skris l-=tl; 28055714Skris if (h < th) 28155714Skris { 28255714Skris h+=d; 28355714Skris q--; 28455714Skris } 28555714Skris h-=th; 28655714Skris 28755714Skris if (--count == 0) break; 28855714Skris 28955714Skris ret=q<<BN_BITS4; 29055714Skris h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2; 29155714Skris l=(l&BN_MASK2l)<<BN_BITS4; 29255714Skris } 29355714Skris ret|=q; 29455714Skris return(ret); 29555714Skris } 29659191Skris#endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */ 29755714Skris 29855714Skris#ifdef BN_LLONG 299109998SmarkmBN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) 30055714Skris { 30155714Skris BN_ULLONG ll=0; 30255714Skris 30359191Skris assert(n >= 0); 30455714Skris if (n <= 0) return((BN_ULONG)0); 30555714Skris 30655714Skris for (;;) 30755714Skris { 30855714Skris ll+=(BN_ULLONG)a[0]+b[0]; 30955714Skris r[0]=(BN_ULONG)ll&BN_MASK2; 31055714Skris ll>>=BN_BITS2; 31155714Skris if (--n <= 0) break; 31255714Skris 31355714Skris ll+=(BN_ULLONG)a[1]+b[1]; 31455714Skris r[1]=(BN_ULONG)ll&BN_MASK2; 31555714Skris ll>>=BN_BITS2; 31655714Skris if (--n <= 0) break; 31755714Skris 31855714Skris ll+=(BN_ULLONG)a[2]+b[2]; 31955714Skris r[2]=(BN_ULONG)ll&BN_MASK2; 32055714Skris ll>>=BN_BITS2; 32155714Skris if (--n <= 0) break; 32255714Skris 32355714Skris ll+=(BN_ULLONG)a[3]+b[3]; 32455714Skris r[3]=(BN_ULONG)ll&BN_MASK2; 32555714Skris ll>>=BN_BITS2; 32655714Skris if (--n <= 0) break; 32755714Skris 32855714Skris a+=4; 32955714Skris b+=4; 33055714Skris r+=4; 33155714Skris } 33255714Skris return((BN_ULONG)ll); 33355714Skris } 33459191Skris#else /* !BN_LLONG */ 335109998SmarkmBN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) 33655714Skris { 33755714Skris BN_ULONG c,l,t; 33855714Skris 33959191Skris assert(n >= 0); 34055714Skris if (n <= 0) return((BN_ULONG)0); 34155714Skris 34255714Skris c=0; 34355714Skris for (;;) 34455714Skris { 34555714Skris t=a[0]; 34655714Skris t=(t+c)&BN_MASK2; 34755714Skris c=(t < c); 34855714Skris l=(t+b[0])&BN_MASK2; 34955714Skris c+=(l < t); 35055714Skris r[0]=l; 35155714Skris if (--n <= 0) break; 35255714Skris 35355714Skris t=a[1]; 35455714Skris t=(t+c)&BN_MASK2; 35555714Skris c=(t < c); 35655714Skris l=(t+b[1])&BN_MASK2; 35755714Skris c+=(l < t); 35855714Skris r[1]=l; 35955714Skris if (--n <= 0) break; 36055714Skris 36155714Skris t=a[2]; 36255714Skris t=(t+c)&BN_MASK2; 36355714Skris c=(t < c); 36455714Skris l=(t+b[2])&BN_MASK2; 36555714Skris c+=(l < t); 36655714Skris r[2]=l; 36755714Skris if (--n <= 0) break; 36855714Skris 36955714Skris t=a[3]; 37055714Skris t=(t+c)&BN_MASK2; 37155714Skris c=(t < c); 37255714Skris l=(t+b[3])&BN_MASK2; 37355714Skris c+=(l < t); 37455714Skris r[3]=l; 37555714Skris if (--n <= 0) break; 37655714Skris 37755714Skris a+=4; 37855714Skris b+=4; 37955714Skris r+=4; 38055714Skris } 38155714Skris return((BN_ULONG)c); 38255714Skris } 38359191Skris#endif /* !BN_LLONG */ 38455714Skris 385109998SmarkmBN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) 38655714Skris { 38755714Skris BN_ULONG t1,t2; 38855714Skris int c=0; 38955714Skris 39059191Skris assert(n >= 0); 39155714Skris if (n <= 0) return((BN_ULONG)0); 39255714Skris 39355714Skris for (;;) 39455714Skris { 39555714Skris t1=a[0]; t2=b[0]; 39655714Skris r[0]=(t1-t2-c)&BN_MASK2; 39755714Skris if (t1 != t2) c=(t1 < t2); 39855714Skris if (--n <= 0) break; 39955714Skris 40055714Skris t1=a[1]; t2=b[1]; 40155714Skris r[1]=(t1-t2-c)&BN_MASK2; 40255714Skris if (t1 != t2) c=(t1 < t2); 40355714Skris if (--n <= 0) break; 40455714Skris 40555714Skris t1=a[2]; t2=b[2]; 40655714Skris r[2]=(t1-t2-c)&BN_MASK2; 40755714Skris if (t1 != t2) c=(t1 < t2); 40855714Skris if (--n <= 0) break; 40955714Skris 41055714Skris t1=a[3]; t2=b[3]; 41155714Skris r[3]=(t1-t2-c)&BN_MASK2; 41255714Skris if (t1 != t2) c=(t1 < t2); 41355714Skris if (--n <= 0) break; 41455714Skris 41555714Skris a+=4; 41655714Skris b+=4; 41755714Skris r+=4; 41855714Skris } 41955714Skris return(c); 42055714Skris } 42155714Skris 42255714Skris#ifdef BN_MUL_COMBA 42355714Skris 42455714Skris#undef bn_mul_comba8 42555714Skris#undef bn_mul_comba4 42655714Skris#undef bn_sqr_comba8 42755714Skris#undef bn_sqr_comba4 42855714Skris 42959191Skris/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */ 43059191Skris/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */ 43159191Skris/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ 43259191Skris/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ 43359191Skris 43455714Skris#ifdef BN_LLONG 43555714Skris#define mul_add_c(a,b,c0,c1,c2) \ 43655714Skris t=(BN_ULLONG)a*b; \ 43755714Skris t1=(BN_ULONG)Lw(t); \ 43855714Skris t2=(BN_ULONG)Hw(t); \ 43955714Skris c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ 44055714Skris c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 44155714Skris 44255714Skris#define mul_add_c2(a,b,c0,c1,c2) \ 44355714Skris t=(BN_ULLONG)a*b; \ 44455714Skris tt=(t+t)&BN_MASK; \ 44555714Skris if (tt < t) c2++; \ 44655714Skris t1=(BN_ULONG)Lw(tt); \ 44755714Skris t2=(BN_ULONG)Hw(tt); \ 44855714Skris c0=(c0+t1)&BN_MASK2; \ 44955714Skris if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ 45055714Skris c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 45155714Skris 45255714Skris#define sqr_add_c(a,i,c0,c1,c2) \ 45355714Skris t=(BN_ULLONG)a[i]*a[i]; \ 45455714Skris t1=(BN_ULONG)Lw(t); \ 45555714Skris t2=(BN_ULONG)Hw(t); \ 45655714Skris c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ 45755714Skris c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 45855714Skris 45955714Skris#define sqr_add_c2(a,i,j,c0,c1,c2) \ 46055714Skris mul_add_c2((a)[i],(a)[j],c0,c1,c2) 46159191Skris 46259191Skris#elif defined(BN_UMULT_HIGH) 46359191Skris 46459191Skris#define mul_add_c(a,b,c0,c1,c2) { \ 46559191Skris BN_ULONG ta=(a),tb=(b); \ 46659191Skris t1 = ta * tb; \ 46759191Skris t2 = BN_UMULT_HIGH(ta,tb); \ 46859191Skris c0 += t1; t2 += (c0<t1)?1:0; \ 46959191Skris c1 += t2; c2 += (c1<t2)?1:0; \ 47059191Skris } 47159191Skris 47259191Skris#define mul_add_c2(a,b,c0,c1,c2) { \ 47359191Skris BN_ULONG ta=(a),tb=(b),t0; \ 47459191Skris t1 = BN_UMULT_HIGH(ta,tb); \ 47559191Skris t0 = ta * tb; \ 47659191Skris t2 = t1+t1; c2 += (t2<t1)?1:0; \ 47759191Skris t1 = t0+t0; t2 += (t1<t0)?1:0; \ 47859191Skris c0 += t1; t2 += (c0<t1)?1:0; \ 47959191Skris c1 += t2; c2 += (c1<t2)?1:0; \ 48059191Skris } 48159191Skris 48259191Skris#define sqr_add_c(a,i,c0,c1,c2) { \ 48359191Skris BN_ULONG ta=(a)[i]; \ 48459191Skris t1 = ta * ta; \ 48559191Skris t2 = BN_UMULT_HIGH(ta,ta); \ 48659191Skris c0 += t1; t2 += (c0<t1)?1:0; \ 48759191Skris c1 += t2; c2 += (c1<t2)?1:0; \ 48859191Skris } 48959191Skris 49059191Skris#define sqr_add_c2(a,i,j,c0,c1,c2) \ 49159191Skris mul_add_c2((a)[i],(a)[j],c0,c1,c2) 49259191Skris 49359191Skris#else /* !BN_LLONG */ 49455714Skris#define mul_add_c(a,b,c0,c1,c2) \ 49555714Skris t1=LBITS(a); t2=HBITS(a); \ 49655714Skris bl=LBITS(b); bh=HBITS(b); \ 49755714Skris mul64(t1,t2,bl,bh); \ 49855714Skris c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ 49955714Skris c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 50055714Skris 50155714Skris#define mul_add_c2(a,b,c0,c1,c2) \ 50255714Skris t1=LBITS(a); t2=HBITS(a); \ 50355714Skris bl=LBITS(b); bh=HBITS(b); \ 50455714Skris mul64(t1,t2,bl,bh); \ 50555714Skris if (t2 & BN_TBIT) c2++; \ 50655714Skris t2=(t2+t2)&BN_MASK2; \ 50755714Skris if (t1 & BN_TBIT) t2++; \ 50855714Skris t1=(t1+t1)&BN_MASK2; \ 50955714Skris c0=(c0+t1)&BN_MASK2; \ 51055714Skris if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ 51155714Skris c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 51255714Skris 51355714Skris#define sqr_add_c(a,i,c0,c1,c2) \ 51455714Skris sqr64(t1,t2,(a)[i]); \ 51555714Skris c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ 51655714Skris c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 51755714Skris 51855714Skris#define sqr_add_c2(a,i,j,c0,c1,c2) \ 51955714Skris mul_add_c2((a)[i],(a)[j],c0,c1,c2) 52059191Skris#endif /* !BN_LLONG */ 52155714Skris 52255714Skrisvoid bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 52355714Skris { 52455714Skris#ifdef BN_LLONG 52555714Skris BN_ULLONG t; 52655714Skris#else 52755714Skris BN_ULONG bl,bh; 52855714Skris#endif 52955714Skris BN_ULONG t1,t2; 53055714Skris BN_ULONG c1,c2,c3; 53155714Skris 53255714Skris c1=0; 53355714Skris c2=0; 53455714Skris c3=0; 53555714Skris mul_add_c(a[0],b[0],c1,c2,c3); 53655714Skris r[0]=c1; 53755714Skris c1=0; 53855714Skris mul_add_c(a[0],b[1],c2,c3,c1); 53955714Skris mul_add_c(a[1],b[0],c2,c3,c1); 54055714Skris r[1]=c2; 54155714Skris c2=0; 54255714Skris mul_add_c(a[2],b[0],c3,c1,c2); 54355714Skris mul_add_c(a[1],b[1],c3,c1,c2); 54455714Skris mul_add_c(a[0],b[2],c3,c1,c2); 54555714Skris r[2]=c3; 54655714Skris c3=0; 54755714Skris mul_add_c(a[0],b[3],c1,c2,c3); 54855714Skris mul_add_c(a[1],b[2],c1,c2,c3); 54955714Skris mul_add_c(a[2],b[1],c1,c2,c3); 55055714Skris mul_add_c(a[3],b[0],c1,c2,c3); 55155714Skris r[3]=c1; 55255714Skris c1=0; 55355714Skris mul_add_c(a[4],b[0],c2,c3,c1); 55455714Skris mul_add_c(a[3],b[1],c2,c3,c1); 55555714Skris mul_add_c(a[2],b[2],c2,c3,c1); 55655714Skris mul_add_c(a[1],b[3],c2,c3,c1); 55755714Skris mul_add_c(a[0],b[4],c2,c3,c1); 55855714Skris r[4]=c2; 55955714Skris c2=0; 56055714Skris mul_add_c(a[0],b[5],c3,c1,c2); 56155714Skris mul_add_c(a[1],b[4],c3,c1,c2); 56255714Skris mul_add_c(a[2],b[3],c3,c1,c2); 56355714Skris mul_add_c(a[3],b[2],c3,c1,c2); 56455714Skris mul_add_c(a[4],b[1],c3,c1,c2); 56555714Skris mul_add_c(a[5],b[0],c3,c1,c2); 56655714Skris r[5]=c3; 56755714Skris c3=0; 56855714Skris mul_add_c(a[6],b[0],c1,c2,c3); 56955714Skris mul_add_c(a[5],b[1],c1,c2,c3); 57055714Skris mul_add_c(a[4],b[2],c1,c2,c3); 57155714Skris mul_add_c(a[3],b[3],c1,c2,c3); 57255714Skris mul_add_c(a[2],b[4],c1,c2,c3); 57355714Skris mul_add_c(a[1],b[5],c1,c2,c3); 57455714Skris mul_add_c(a[0],b[6],c1,c2,c3); 57555714Skris r[6]=c1; 57655714Skris c1=0; 57755714Skris mul_add_c(a[0],b[7],c2,c3,c1); 57855714Skris mul_add_c(a[1],b[6],c2,c3,c1); 57955714Skris mul_add_c(a[2],b[5],c2,c3,c1); 58055714Skris mul_add_c(a[3],b[4],c2,c3,c1); 58155714Skris mul_add_c(a[4],b[3],c2,c3,c1); 58255714Skris mul_add_c(a[5],b[2],c2,c3,c1); 58355714Skris mul_add_c(a[6],b[1],c2,c3,c1); 58455714Skris mul_add_c(a[7],b[0],c2,c3,c1); 58555714Skris r[7]=c2; 58655714Skris c2=0; 58755714Skris mul_add_c(a[7],b[1],c3,c1,c2); 58855714Skris mul_add_c(a[6],b[2],c3,c1,c2); 58955714Skris mul_add_c(a[5],b[3],c3,c1,c2); 59055714Skris mul_add_c(a[4],b[4],c3,c1,c2); 59155714Skris mul_add_c(a[3],b[5],c3,c1,c2); 59255714Skris mul_add_c(a[2],b[6],c3,c1,c2); 59355714Skris mul_add_c(a[1],b[7],c3,c1,c2); 59455714Skris r[8]=c3; 59555714Skris c3=0; 59655714Skris mul_add_c(a[2],b[7],c1,c2,c3); 59755714Skris mul_add_c(a[3],b[6],c1,c2,c3); 59855714Skris mul_add_c(a[4],b[5],c1,c2,c3); 59955714Skris mul_add_c(a[5],b[4],c1,c2,c3); 60055714Skris mul_add_c(a[6],b[3],c1,c2,c3); 60155714Skris mul_add_c(a[7],b[2],c1,c2,c3); 60255714Skris r[9]=c1; 60355714Skris c1=0; 60455714Skris mul_add_c(a[7],b[3],c2,c3,c1); 60555714Skris mul_add_c(a[6],b[4],c2,c3,c1); 60655714Skris mul_add_c(a[5],b[5],c2,c3,c1); 60755714Skris mul_add_c(a[4],b[6],c2,c3,c1); 60855714Skris mul_add_c(a[3],b[7],c2,c3,c1); 60955714Skris r[10]=c2; 61055714Skris c2=0; 61155714Skris mul_add_c(a[4],b[7],c3,c1,c2); 61255714Skris mul_add_c(a[5],b[6],c3,c1,c2); 61355714Skris mul_add_c(a[6],b[5],c3,c1,c2); 61455714Skris mul_add_c(a[7],b[4],c3,c1,c2); 61555714Skris r[11]=c3; 61655714Skris c3=0; 61755714Skris mul_add_c(a[7],b[5],c1,c2,c3); 61855714Skris mul_add_c(a[6],b[6],c1,c2,c3); 61955714Skris mul_add_c(a[5],b[7],c1,c2,c3); 62055714Skris r[12]=c1; 62155714Skris c1=0; 62255714Skris mul_add_c(a[6],b[7],c2,c3,c1); 62355714Skris mul_add_c(a[7],b[6],c2,c3,c1); 62455714Skris r[13]=c2; 62555714Skris c2=0; 62655714Skris mul_add_c(a[7],b[7],c3,c1,c2); 62755714Skris r[14]=c3; 62855714Skris r[15]=c1; 62955714Skris } 63055714Skris 63155714Skrisvoid bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 63255714Skris { 63355714Skris#ifdef BN_LLONG 63455714Skris BN_ULLONG t; 63555714Skris#else 63655714Skris BN_ULONG bl,bh; 63755714Skris#endif 63855714Skris BN_ULONG t1,t2; 63955714Skris BN_ULONG c1,c2,c3; 64055714Skris 64155714Skris c1=0; 64255714Skris c2=0; 64355714Skris c3=0; 64455714Skris mul_add_c(a[0],b[0],c1,c2,c3); 64555714Skris r[0]=c1; 64655714Skris c1=0; 64755714Skris mul_add_c(a[0],b[1],c2,c3,c1); 64855714Skris mul_add_c(a[1],b[0],c2,c3,c1); 64955714Skris r[1]=c2; 65055714Skris c2=0; 65155714Skris mul_add_c(a[2],b[0],c3,c1,c2); 65255714Skris mul_add_c(a[1],b[1],c3,c1,c2); 65355714Skris mul_add_c(a[0],b[2],c3,c1,c2); 65455714Skris r[2]=c3; 65555714Skris c3=0; 65655714Skris mul_add_c(a[0],b[3],c1,c2,c3); 65755714Skris mul_add_c(a[1],b[2],c1,c2,c3); 65855714Skris mul_add_c(a[2],b[1],c1,c2,c3); 65955714Skris mul_add_c(a[3],b[0],c1,c2,c3); 66055714Skris r[3]=c1; 66155714Skris c1=0; 66255714Skris mul_add_c(a[3],b[1],c2,c3,c1); 66355714Skris mul_add_c(a[2],b[2],c2,c3,c1); 66455714Skris mul_add_c(a[1],b[3],c2,c3,c1); 66555714Skris r[4]=c2; 66655714Skris c2=0; 66755714Skris mul_add_c(a[2],b[3],c3,c1,c2); 66855714Skris mul_add_c(a[3],b[2],c3,c1,c2); 66955714Skris r[5]=c3; 67055714Skris c3=0; 67155714Skris mul_add_c(a[3],b[3],c1,c2,c3); 67255714Skris r[6]=c1; 67355714Skris r[7]=c2; 67455714Skris } 67555714Skris 676109998Smarkmvoid bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) 67755714Skris { 67855714Skris#ifdef BN_LLONG 67955714Skris BN_ULLONG t,tt; 68055714Skris#else 68155714Skris BN_ULONG bl,bh; 68255714Skris#endif 68355714Skris BN_ULONG t1,t2; 68455714Skris BN_ULONG c1,c2,c3; 68555714Skris 68655714Skris c1=0; 68755714Skris c2=0; 68855714Skris c3=0; 68955714Skris sqr_add_c(a,0,c1,c2,c3); 69055714Skris r[0]=c1; 69155714Skris c1=0; 69255714Skris sqr_add_c2(a,1,0,c2,c3,c1); 69355714Skris r[1]=c2; 69455714Skris c2=0; 69555714Skris sqr_add_c(a,1,c3,c1,c2); 69655714Skris sqr_add_c2(a,2,0,c3,c1,c2); 69755714Skris r[2]=c3; 69855714Skris c3=0; 69955714Skris sqr_add_c2(a,3,0,c1,c2,c3); 70055714Skris sqr_add_c2(a,2,1,c1,c2,c3); 70155714Skris r[3]=c1; 70255714Skris c1=0; 70355714Skris sqr_add_c(a,2,c2,c3,c1); 70455714Skris sqr_add_c2(a,3,1,c2,c3,c1); 70555714Skris sqr_add_c2(a,4,0,c2,c3,c1); 70655714Skris r[4]=c2; 70755714Skris c2=0; 70855714Skris sqr_add_c2(a,5,0,c3,c1,c2); 70955714Skris sqr_add_c2(a,4,1,c3,c1,c2); 71055714Skris sqr_add_c2(a,3,2,c3,c1,c2); 71155714Skris r[5]=c3; 71255714Skris c3=0; 71355714Skris sqr_add_c(a,3,c1,c2,c3); 71455714Skris sqr_add_c2(a,4,2,c1,c2,c3); 71555714Skris sqr_add_c2(a,5,1,c1,c2,c3); 71655714Skris sqr_add_c2(a,6,0,c1,c2,c3); 71755714Skris r[6]=c1; 71855714Skris c1=0; 71955714Skris sqr_add_c2(a,7,0,c2,c3,c1); 72055714Skris sqr_add_c2(a,6,1,c2,c3,c1); 72155714Skris sqr_add_c2(a,5,2,c2,c3,c1); 72255714Skris sqr_add_c2(a,4,3,c2,c3,c1); 72355714Skris r[7]=c2; 72455714Skris c2=0; 72555714Skris sqr_add_c(a,4,c3,c1,c2); 72655714Skris sqr_add_c2(a,5,3,c3,c1,c2); 72755714Skris sqr_add_c2(a,6,2,c3,c1,c2); 72855714Skris sqr_add_c2(a,7,1,c3,c1,c2); 72955714Skris r[8]=c3; 73055714Skris c3=0; 73155714Skris sqr_add_c2(a,7,2,c1,c2,c3); 73255714Skris sqr_add_c2(a,6,3,c1,c2,c3); 73355714Skris sqr_add_c2(a,5,4,c1,c2,c3); 73455714Skris r[9]=c1; 73555714Skris c1=0; 73655714Skris sqr_add_c(a,5,c2,c3,c1); 73755714Skris sqr_add_c2(a,6,4,c2,c3,c1); 73855714Skris sqr_add_c2(a,7,3,c2,c3,c1); 73955714Skris r[10]=c2; 74055714Skris c2=0; 74155714Skris sqr_add_c2(a,7,4,c3,c1,c2); 74255714Skris sqr_add_c2(a,6,5,c3,c1,c2); 74355714Skris r[11]=c3; 74455714Skris c3=0; 74555714Skris sqr_add_c(a,6,c1,c2,c3); 74655714Skris sqr_add_c2(a,7,5,c1,c2,c3); 74755714Skris r[12]=c1; 74855714Skris c1=0; 74955714Skris sqr_add_c2(a,7,6,c2,c3,c1); 75055714Skris r[13]=c2; 75155714Skris c2=0; 75255714Skris sqr_add_c(a,7,c3,c1,c2); 75355714Skris r[14]=c3; 75455714Skris r[15]=c1; 75555714Skris } 75655714Skris 757109998Smarkmvoid bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) 75855714Skris { 75955714Skris#ifdef BN_LLONG 76055714Skris BN_ULLONG t,tt; 76155714Skris#else 76255714Skris BN_ULONG bl,bh; 76355714Skris#endif 76455714Skris BN_ULONG t1,t2; 76555714Skris BN_ULONG c1,c2,c3; 76655714Skris 76755714Skris c1=0; 76855714Skris c2=0; 76955714Skris c3=0; 77055714Skris sqr_add_c(a,0,c1,c2,c3); 77155714Skris r[0]=c1; 77255714Skris c1=0; 77355714Skris sqr_add_c2(a,1,0,c2,c3,c1); 77455714Skris r[1]=c2; 77555714Skris c2=0; 77655714Skris sqr_add_c(a,1,c3,c1,c2); 77755714Skris sqr_add_c2(a,2,0,c3,c1,c2); 77855714Skris r[2]=c3; 77955714Skris c3=0; 78055714Skris sqr_add_c2(a,3,0,c1,c2,c3); 78155714Skris sqr_add_c2(a,2,1,c1,c2,c3); 78255714Skris r[3]=c1; 78355714Skris c1=0; 78455714Skris sqr_add_c(a,2,c2,c3,c1); 78555714Skris sqr_add_c2(a,3,1,c2,c3,c1); 78655714Skris r[4]=c2; 78755714Skris c2=0; 78855714Skris sqr_add_c2(a,3,2,c3,c1,c2); 78955714Skris r[5]=c3; 79055714Skris c3=0; 79155714Skris sqr_add_c(a,3,c1,c2,c3); 79255714Skris r[6]=c1; 79355714Skris r[7]=c2; 79455714Skris } 79559191Skris#else /* !BN_MUL_COMBA */ 79655714Skris 79755714Skris/* hmm... is it faster just to do a multiply? */ 79855714Skris#undef bn_sqr_comba4 79955714Skrisvoid bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) 80055714Skris { 80155714Skris BN_ULONG t[8]; 80255714Skris bn_sqr_normal(r,a,4,t); 80355714Skris } 80455714Skris 80555714Skris#undef bn_sqr_comba8 80655714Skrisvoid bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) 80755714Skris { 80855714Skris BN_ULONG t[16]; 80955714Skris bn_sqr_normal(r,a,8,t); 81055714Skris } 81155714Skris 81255714Skrisvoid bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 81355714Skris { 81455714Skris r[4]=bn_mul_words( &(r[0]),a,4,b[0]); 81555714Skris r[5]=bn_mul_add_words(&(r[1]),a,4,b[1]); 81655714Skris r[6]=bn_mul_add_words(&(r[2]),a,4,b[2]); 81755714Skris r[7]=bn_mul_add_words(&(r[3]),a,4,b[3]); 81855714Skris } 81955714Skris 82055714Skrisvoid bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 82155714Skris { 82255714Skris r[ 8]=bn_mul_words( &(r[0]),a,8,b[0]); 82355714Skris r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]); 82455714Skris r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]); 82555714Skris r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]); 82655714Skris r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]); 82755714Skris r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]); 82855714Skris r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]); 82955714Skris r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]); 83055714Skris } 83155714Skris 83259191Skris#endif /* !BN_MUL_COMBA */ 833