bn_sqr.c revision 89837
131492Swollman/* crypto/bn/bn_sqr.c */
231492Swollman/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
331492Swollman * All rights reserved.
431492Swollman *
531492Swollman * This package is an SSL implementation written
631492Swollman * by Eric Young (eay@cryptsoft.com).
731492Swollman * The implementation was written so as to conform with Netscapes SSL.
831492Swollman *
931492Swollman * This library is free for commercial and non-commercial use as long as
1031492Swollman * the following conditions are aheared to.  The following conditions
1131492Swollman * apply to all code found in this distribution, be it the RC4, RSA,
1231492Swollman * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
1331492Swollman * included with this distribution is covered by the same copyright terms
1431492Swollman * except that the holder is Tim Hudson (tjh@cryptsoft.com).
1531492Swollman *
1631492Swollman * Copyright remains Eric Young's, and as such any Copyright notices in
1731492Swollman * the code are not to be removed.
1831492Swollman * If this package is used in a product, Eric Young should be given attribution
1931492Swollman * as the author of the parts of the library used.
2031492Swollman * This can be in the form of a textual message at program startup or
2131492Swollman * in documentation (online or textual) provided with the package.
2231492Swollman *
2331492Swollman * Redistribution and use in source and binary forms, with or without
2431492Swollman * modification, are permitted provided that the following conditions
2531492Swollman * are met:
2631492Swollman * 1. Redistributions of source code must retain the copyright
2731492Swollman *    notice, this list of conditions and the following disclaimer.
2831492Swollman * 2. Redistributions in binary form must reproduce the above copyright
2931492Swollman *    notice, this list of conditions and the following disclaimer in the
3031492Swollman *    documentation and/or other materials provided with the distribution.
3131492Swollman * 3. All advertising materials mentioning features or use of this software
3231492Swollman *    must display the following acknowledgement:
3331492Swollman *    "This product includes cryptographic software written by
3431492Swollman *     Eric Young (eay@cryptsoft.com)"
3531492Swollman *    The word 'cryptographic' can be left out if the rouines from the library
3631492Swollman *    being used are not cryptographic related :-).
3731492Swollman * 4. If you include any Windows specific code (or a derivative thereof) from
3831492Swollman *    the apps directory (application code) you must include an acknowledgement:
39117590Sgad *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40117590Sgad *
41117590Sgad * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42117590Sgad * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43117590Sgad * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44117590Sgad * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45117541Sgad * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46117541Sgad * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
4731492Swollman * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
4831492Swollman * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
4931492Swollman * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
5031492Swollman * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
5131492Swollman * SUCH DAMAGE.
5231492Swollman *
5331492Swollman * The licence and distribution terms for any publically available version or
5431492Swollman * derivative of this code cannot be changed.  i.e. this code cannot simply be
5531492Swollman * copied and put under another distribution licence
5631492Swollman * [including the GNU Public Licence.]
5731492Swollman */
5831492Swollman
5931492Swollman#include <stdio.h>
6031492Swollman#include "cryptlib.h"
6131492Swollman#include "bn_lcl.h"
6231492Swollman
6378146Sgad/* r must not be a */
6431492Swollman/* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96 */
6578146Sgadint BN_sqr(BIGNUM *r, BIGNUM *a, BN_CTX *ctx)
6678146Sgad	{
6778146Sgad	int max,al;
6878146Sgad	int ret = 0;
6978146Sgad	BIGNUM *tmp,*rr;
7078146Sgad
7178146Sgad#ifdef BN_COUNT
7278146Sgadprintf("BN_sqr %d * %d\n",a->top,a->top);
7331492Swollman#endif
7431492Swollman	bn_check_top(a);
7531492Swollman
7631492Swollman	al=a->top;
7731492Swollman	if (al <= 0)
7831492Swollman		{
7931492Swollman		r->top=0;
8031492Swollman		return(1);
8131492Swollman		}
8231492Swollman
8331492Swollman	BN_CTX_start(ctx);
8431492Swollman	rr=(a != r) ? r : BN_CTX_get(ctx);
8531492Swollman	tmp=BN_CTX_get(ctx);
8631492Swollman	if (tmp == NULL) goto err;
8731492Swollman
8831492Swollman	max=(al+al);
8931492Swollman	if (bn_wexpand(rr,max+1) == NULL) goto err;
9031492Swollman
9131492Swollman	r->neg=0;
9231492Swollman	if (al == 4)
9331492Swollman		{
9431492Swollman#ifndef BN_SQR_COMBA
9531492Swollman		BN_ULONG t[8];
9631492Swollman		bn_sqr_normal(rr->d,a->d,4,t);
9731492Swollman#else
9831492Swollman		bn_sqr_comba4(rr->d,a->d);
9931492Swollman#endif
10031492Swollman		}
10131492Swollman	else if (al == 8)
10231492Swollman		{
10331492Swollman#ifndef BN_SQR_COMBA
10431492Swollman		BN_ULONG t[16];
10531492Swollman		bn_sqr_normal(rr->d,a->d,8,t);
10631492Swollman#else
10731492Swollman		bn_sqr_comba8(rr->d,a->d);
10831492Swollman#endif
10931492Swollman		}
11031492Swollman	else
11131492Swollman		{
11231492Swollman#if defined(BN_RECURSION)
11331492Swollman		if (al < BN_SQR_RECURSIVE_SIZE_NORMAL)
11431492Swollman			{
11531492Swollman			BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL*2];
11631492Swollman			bn_sqr_normal(rr->d,a->d,al,t);
11731492Swollman			}
11831492Swollman		else
11931492Swollman			{
12031492Swollman			int j,k;
12131492Swollman
12231492Swollman			j=BN_num_bits_word((BN_ULONG)al);
12331492Swollman			j=1<<(j-1);
12431492Swollman			k=j+j;
12531492Swollman			if (al == j)
12631492Swollman				{
12731492Swollman				if (bn_wexpand(a,k*2) == NULL) goto err;
12831492Swollman				if (bn_wexpand(tmp,k*2) == NULL) goto err;
12931492Swollman				bn_sqr_recursive(rr->d,a->d,al,tmp->d);
13031492Swollman				}
13131492Swollman			else
13231492Swollman				{
13331492Swollman				if (bn_wexpand(tmp,max) == NULL) goto err;
13431492Swollman				bn_sqr_normal(rr->d,a->d,al,tmp->d);
13531492Swollman				}
13631492Swollman			}
13731492Swollman#else
13831492Swollman		if (bn_wexpand(tmp,max) == NULL) goto err;
13931492Swollman		bn_sqr_normal(rr->d,a->d,al,tmp->d);
14031492Swollman#endif
14131492Swollman		}
14231492Swollman
14331492Swollman	rr->top=max;
14431492Swollman	if ((max > 0) && (rr->d[max-1] == 0)) rr->top--;
14531492Swollman	if (rr != r) BN_copy(r,rr);
14631492Swollman	ret = 1;
14731492Swollman err:
14831492Swollman	BN_CTX_end(ctx);
14931492Swollman	return(ret);
15031492Swollman	}
15131492Swollman
15231492Swollman/* tmp must have 2*n words */
15331492Swollmanvoid bn_sqr_normal(BN_ULONG *r, BN_ULONG *a, int n, BN_ULONG *tmp)
15431492Swollman	{
15531492Swollman	int i,j,max;
15631492Swollman	BN_ULONG *ap,*rp;
15731492Swollman
15831492Swollman	max=n*2;
15931492Swollman	ap=a;
16031492Swollman	rp=r;
16131492Swollman	rp[0]=rp[max-1]=0;
16231492Swollman	rp++;
16331492Swollman	j=n;
16431492Swollman
16531492Swollman	if (--j > 0)
16631492Swollman		{
16731492Swollman		ap++;
16831492Swollman		rp[j]=bn_mul_words(rp,ap,j,ap[-1]);
16931492Swollman		rp+=2;
17031492Swollman		}
17131492Swollman
17231492Swollman	for (i=n-2; i>0; i--)
17331492Swollman		{
17431492Swollman		j--;
17531492Swollman		ap++;
17631492Swollman		rp[j]=bn_mul_add_words(rp,ap,j,ap[-1]);
17731492Swollman		rp+=2;
17831492Swollman		}
17931492Swollman
18031492Swollman	bn_add_words(r,r,r,max);
18131492Swollman
18231492Swollman	/* There will not be a carry */
18331492Swollman
18431492Swollman	bn_sqr_words(tmp,a,n);
18531492Swollman
18631492Swollman	bn_add_words(r,r,tmp,max);
18731492Swollman	}
18831492Swollman
18931492Swollman#ifdef BN_RECURSION
19031492Swollman/* r is 2*n words in size,
19131492Swollman * a and b are both n words in size.    (There's not actually a 'b' here ...)
19231492Swollman * n must be a power of 2.
19331492Swollman * We multiply and return the result.
19431492Swollman * t must be 2*n words in size
19531492Swollman * We calculate
19631492Swollman * a[0]*b[0]
19731492Swollman * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
19831492Swollman * a[1]*b[1]
19931492Swollman */
20031492Swollmanvoid bn_sqr_recursive(BN_ULONG *r, BN_ULONG *a, int n2, BN_ULONG *t)
20131492Swollman	{
20231492Swollman	int n=n2/2;
20331492Swollman	int zero,c1;
20431492Swollman	BN_ULONG ln,lo,*p;
20531492Swollman
20631492Swollman#ifdef BN_COUNT
20731492Swollmanprintf(" bn_sqr_recursive %d * %d\n",n2,n2);
20831492Swollman#endif
20931492Swollman	if (n2 == 4)
21031492Swollman		{
21131492Swollman#ifndef BN_SQR_COMBA
21231492Swollman		bn_sqr_normal(r,a,4,t);
21331492Swollman#else
21431492Swollman		bn_sqr_comba4(r,a);
21531492Swollman#endif
21631492Swollman		return;
21778146Sgad		}
21831492Swollman	else if (n2 == 8)
21931492Swollman		{
22046110Sjkh#ifndef BN_SQR_COMBA
22146110Sjkh		bn_sqr_normal(r,a,8,t);
22231492Swollman#else
22331492Swollman		bn_sqr_comba8(r,a);
22431492Swollman#endif
22531492Swollman		return;
22631492Swollman		}
22731492Swollman	if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL)
22831492Swollman		{
22931492Swollman		bn_sqr_normal(r,a,n2,t);
23031492Swollman		return;
23131492Swollman		}
23231492Swollman	/* r=(a[0]-a[1])*(a[1]-a[0]) */
23331492Swollman	c1=bn_cmp_words(a,&(a[n]),n);
23431492Swollman	zero=0;
23531492Swollman	if (c1 > 0)
23631492Swollman		bn_sub_words(t,a,&(a[n]),n);
23731492Swollman	else if (c1 < 0)
23831492Swollman		bn_sub_words(t,&(a[n]),a,n);
23931492Swollman	else
24031492Swollman		zero=1;
24131492Swollman
24231492Swollman	/* The result will always be negative unless it is zero */
24331492Swollman	p= &(t[n2*2]);
24431492Swollman
24531492Swollman	if (!zero)
24631492Swollman		bn_sqr_recursive(&(t[n2]),t,n,p);
24731492Swollman	else
24831492Swollman		memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
24931492Swollman	bn_sqr_recursive(r,a,n,p);
25031492Swollman	bn_sqr_recursive(&(r[n2]),&(a[n]),n,p);
25131492Swollman
25231492Swollman	/* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero
25331492Swollman	 * r[10] holds (a[0]*b[0])
25431492Swollman	 * r[32] holds (b[1]*b[1])
25531492Swollman	 */
25668253Sgad
25768253Sgad	c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
25831492Swollman
25931492Swollman	/* t[32] is negative */
26031492Swollman	c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
26131492Swollman
26295293Sgad	/* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1])
26331492Swollman	 * r[10] holds (a[0]*a[0])
26431492Swollman	 * r[32] holds (a[1]*a[1])
26531492Swollman	 * c1 holds the carry bits
26631492Swollman	 */
26731492Swollman	c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
26831492Swollman	if (c1)
26931492Swollman		{
27032031Swollman		p= &(r[n+n2]);
27131492Swollman		lo= *p;
27231492Swollman		ln=(lo+c1)&BN_MASK2;
27346110Sjkh		*p=ln;
27446110Sjkh
27546110Sjkh		/* The overflow will stop before we over write
27646110Sjkh		 * words we should not overwrite */
27746110Sjkh		if (ln < (BN_ULONG)c1)
27846110Sjkh			{
27946110Sjkh			do	{
28046110Sjkh				p++;
28146110Sjkh				lo= *p;
28246110Sjkh				ln=(lo+1)&BN_MASK2;
28346110Sjkh				*p=ln;
28446110Sjkh				} while (ln == 0);
28546110Sjkh			}
28646110Sjkh		}
28731492Swollman	}
28831492Swollman#endif
28931492Swollman