Fixes to BN code.  Previously the default was to define BN_RECURSION
but the BN code had some problems that would cause failures when
doing certificate verification and some other functions.

Submitted by: Eric A Young from a C2Net version of SSLeay
Reviewed by: Mark J Cox
PR:
diff --git a/CHANGES b/CHANGES
index 40e3b9d..2f2b3b1 100644
--- a/CHANGES
+++ b/CHANGES
@@ -5,6 +5,11 @@
 
  Changes between 0.9.1c and 0.9.2
 
+  *) Fixes to BN code.  Previously the default was to define BN_RECURSION
+     but the BN code had some problems that would cause failures when
+     doing certificate verification and some other functions.
+     [Eric A. Young, (from changes to C2Net SSLeay, integrated by Mark Cox)]
+
   *) Add ASN1 and PEM code to support netscape certificate sequences.
      [Steve Henson]
 
diff --git a/crypto/bn/bn.org b/crypto/bn/bn.org
index 90b438e..d8904d7 100644
--- a/crypto/bn/bn.org
+++ b/crypto/bn/bn.org
@@ -566,20 +566,18 @@
 #define BN_F_BN_BLINDING_INVERT				 101
 #define BN_F_BN_BLINDING_NEW				 102
 #define BN_F_BN_BLINDING_UPDATE				 103
-#define BN_F_BN_BL_CTX_INIT				 104
-#define BN_F_BN_BL_CTX_NEW				 105
-#define BN_F_BN_BN2DEC					 106
-#define BN_F_BN_BN2HEX					 107
-#define BN_F_BN_CTX_NEW					 108
-#define BN_F_BN_DIV					 109
-#define BN_F_BN_EXPAND2					 110
-#define BN_F_BN_MOD_EXP_MONT				 111
-#define BN_F_BN_MOD_INVERSE				 112
-#define BN_F_BN_MOD_MUL_RECIPROCAL			 113
-#define BN_F_BN_MPI2BN					 114
-#define BN_F_BN_NEW					 115
-#define BN_F_BN_RAND					 116
-#define BN_F_BN_USUB					 117
+#define BN_F_BN_BN2DEC					 104
+#define BN_F_BN_BN2HEX					 105
+#define BN_F_BN_CTX_NEW					 106
+#define BN_F_BN_DIV					 107
+#define BN_F_BN_EXPAND2					 108
+#define BN_F_BN_MOD_EXP_MONT				 109
+#define BN_F_BN_MOD_INVERSE				 110
+#define BN_F_BN_MOD_MUL_RECIPROCAL			 111
+#define BN_F_BN_MPI2BN					 112
+#define BN_F_BN_NEW					 113
+#define BN_F_BN_RAND					 114
+#define BN_F_BN_USUB					 115
 
 /* Reason codes. */
 #define BN_R_ARG2_LT_ARG3				 100
diff --git a/crypto/bn/bn_add.c b/crypto/bn/bn_add.c
index 1279722..caea96f 100644
--- a/crypto/bn/bn_add.c
+++ b/crypto/bn/bn_add.c
@@ -176,9 +176,6 @@
 BIGNUM *b;
 	{
 	int max,min;
-#if 0
-	int ret=1;
-#endif
 	register BN_ULONG t1,t2,*ap,*bp,*rp;
 	int i,carry;
 #if defined(IRIX_CC_BUG) && !defined(LINT)
diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c
index c9eb0e9..cdf20ef 100644
--- a/crypto/bn/bn_asm.c
+++ b/crypto/bn/bn_asm.c
@@ -447,7 +447,7 @@
 	return(c);
 	}
 
-#ifdef BN_COMBA
+#ifdef BN_MUL_COMBA
 
 #undef bn_mul_comba8
 #undef bn_mul_comba4
@@ -790,6 +790,7 @@
 #else
 
 /* hmm... is it faster just to do a multiply? */
+#undef bn_sqr_comba4
 void bn_sqr_comba4(r,a)
 BN_ULONG *r,*a;
 	{
@@ -797,6 +798,7 @@
 	bn_sqr_normal(r,a,4,t);
 	}
 
+#undef bn_sqr_comba8
 void bn_sqr_comba8(r,a)
 BN_ULONG *r,*a;
 	{
diff --git a/crypto/bn/bn_err.c b/crypto/bn/bn_err.c
index 773d0ef..4c29c1a 100644
--- a/crypto/bn/bn_err.c
+++ b/crypto/bn/bn_err.c
@@ -67,8 +67,6 @@
 {ERR_PACK(0,BN_F_BN_BLINDING_INVERT,0),	"BN_BLINDING_invert"},
 {ERR_PACK(0,BN_F_BN_BLINDING_NEW,0),	"BN_BLINDING_new"},
 {ERR_PACK(0,BN_F_BN_BLINDING_UPDATE,0),	"BN_BLINDING_update"},
-{ERR_PACK(0,BN_F_BN_BL_CTX_INIT,0),	"BN_BL_CTX_INIT"},
-{ERR_PACK(0,BN_F_BN_BL_CTX_NEW,0),	"BN_BL_CTX_NEW"},
 {ERR_PACK(0,BN_F_BN_BN2DEC,0),	"BN_bn2dec"},
 {ERR_PACK(0,BN_F_BN_BN2HEX,0),	"BN_bn2hex"},
 {ERR_PACK(0,BN_F_BN_CTX_NEW,0),	"BN_CTX_new"},
diff --git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c
index 44f47e7..cc45282 100644
--- a/crypto/bn/bn_exp.c
+++ b/crypto/bn/bn_exp.c
@@ -106,7 +106,7 @@
 
 	if (BN_is_odd(p))
 		{ if (BN_copy(r,a) == NULL) goto err; }
-	else	{ if (BN_one(r)) goto err; }
+	else	{ if (!BN_one(r)) goto err; }
 
 	for (i=1; i<bits; i++)
 		{
@@ -131,30 +131,35 @@
 BIGNUM *r,*a,*p;
 BN_CTX *ctx;
 	{
-	int i,bits,ret=0;
-	BIGNUM *v,*tmp;
+	int i,bits,ret=0,tos;
+	BIGNUM *v,*rr;
 
+	tos=ctx->tos;
 	v= &(ctx->bn[ctx->tos++]);
-	tmp= &(ctx->bn[ctx->tos++]);
+	if ((r == a) || (r == p))
+		rr= &(ctx->bn[ctx->tos++]);
+	else
+		rr=r;
 
 	if (BN_copy(v,a) == NULL) goto err;
 	bits=BN_num_bits(p);
 
 	if (BN_is_odd(p))
-		{ if (BN_copy(r,a) == NULL) goto err; }
-	else	{ if (BN_one(r)) goto err; }
+		{ if (BN_copy(rr,a) == NULL) goto err; }
+	else	{ if (!BN_one(rr)) goto err; }
 
 	for (i=1; i<bits; i++)
 		{
-		if (!BN_sqr(tmp,v,ctx)) goto err;
+		if (!BN_sqr(v,v,ctx)) goto err;
 		if (BN_is_bit_set(p,i))
 			{
-			if (!BN_mul(tmp,r,v,ctx)) goto err;
+			if (!BN_mul(rr,rr,v,ctx)) goto err;
 			}
 		}
 	ret=1;
 err:
-	ctx->tos-=2;
+	ctx->tos=tos;
+	if (r != rr) BN_copy(r,rr);
 	return(ret);
 	}
 
diff --git a/crypto/bn/bn_lcl.h b/crypto/bn/bn_lcl.h
index 8899943..306dd67 100644
--- a/crypto/bn/bn_lcl.h
+++ b/crypto/bn/bn_lcl.h
@@ -73,13 +73,18 @@
 #define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL	(32) /* 32 */
 #define BN_MONT_CTX_SET_SIZE_WORD		(64) /* 32 */
 
+#if 0
 #ifndef BN_MUL_COMBA
-#define bn_mul_comba8(r,a,b)	bn_mul_normal(r,a,8,b,8)
-#define bn_mul_comba4(r,a,b)	bn_mul_normal(r,a,4,b,4)
+/* #define bn_mul_comba8(r,a,b)	bn_mul_normal(r,a,8,b,8) */
+/* #define bn_mul_comba4(r,a,b)	bn_mul_normal(r,a,4,b,4) */
+#endif
+
+#ifndef BN_SQR_COMBA
 /* This is probably faster than using the C code - I need to check */
 #define bn_sqr_comba8(r,a)	bn_mul_normal(r,a,8,a,8)
 #define bn_sqr_comba4(r,a)	bn_mul_normal(r,a,4,a,4)
 #endif
+#endif
 
 /*************************************************************
  * Using the long long type
diff --git a/crypto/bn/bn_mul.c b/crypto/bn/bn_mul.c
index 83ff39a..0af6e96 100644
--- a/crypto/bn/bn_mul.c
+++ b/crypto/bn/bn_mul.c
@@ -390,7 +390,7 @@
 #ifdef BN_COUNT
 printf(" bn_mul_high %d * %d\n",n2,n2);
 #endif
-	n=(n2+1)/2;
+	n=n2/2;
 
 	/* Calculate (al-ah)*(bh-bl) */
 	neg=zero=0;
@@ -569,11 +569,12 @@
 BIGNUM *r,*a,*b;
 BN_CTX *ctx;
 	{
-	int top,i,j,k,al,bl;
+	int top,al,bl;
+	BIGNUM *rr;
+#ifdef BN_RECURSION
 	BIGNUM *t;
-
-	t=NULL;
-	i=j=k=0;
+	int i,j,k;
+#endif
 
 #ifdef BN_COUNT
 printf("BN_mul %d * %d\n",a->top,b->top);
@@ -593,22 +594,28 @@
 		return(1);
 		}
 	top=al+bl;
+
+	if ((r == a) || (r == b))
+		rr= &(ctx->bn[ctx->tos+1]);
+	else
+		rr=r;
+
 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
 	if (al == bl)
 		{
 #  ifdef BN_MUL_COMBA
 /*		if (al == 4)
 			{
-			if (bn_wexpand(r,8) == NULL) return(0);
+			if (bn_wexpand(rr,8) == NULL) return(0);
 			r->top=8;
-			bn_mul_comba4(r->d,a->d,b->d);
+			bn_mul_comba4(rr->d,a->d,b->d);
 			goto end;
 			}
 		else */ if (al == 8)
 			{
-			if (bn_wexpand(r,16) == NULL) return(0);
+			if (bn_wexpand(rr,16) == NULL) return(0);
 			r->top=16;
-			bn_mul_comba8(r->d,a->d,b->d);
+			bn_mul_comba8(rr->d,a->d,b->d);
 			goto end;
 			}
 		else
@@ -617,9 +624,9 @@
 		if (al < BN_MULL_SIZE_NORMAL)
 #endif
 			{
-			if (bn_wexpand(r,top) == NULL) return(0);
-			r->top=top;
-			bn_mul_normal(r->d,a->d,al,b->d,bl);
+			if (bn_wexpand(rr,top) == NULL) return(0);
+			rr->top=top;
+			bn_mul_normal(rr->d,a->d,al,b->d,bl);
 			goto end;
 			}
 #  ifdef BN_RECURSION
@@ -630,9 +637,9 @@
 #ifdef BN_RECURSION
 	else if ((al < BN_MULL_SIZE_NORMAL) || (bl < BN_MULL_SIZE_NORMAL))
 		{
-		if (bn_wexpand(r,top) == NULL) return(0);
-		r->top=top;
-		bn_mul_normal(r->d,a->d,al,b->d,bl);
+		if (bn_wexpand(rr,top) == NULL) return(0);
+		rr->top=top;
+		bn_mul_normal(rr->d,a->d,al,b->d,bl);
 		goto end;
 		}
 	else
@@ -656,9 +663,9 @@
 #endif
 
 	/* asymetric and >= 4 */ 
-	if (bn_wexpand(r,top) == NULL) return(0);
-	r->top=top;
-	bn_mul_normal(r->d,a->d,al,b->d,bl);
+	if (bn_wexpand(rr,top) == NULL) return(0);
+	rr->top=top;
+	bn_mul_normal(rr->d,a->d,al,b->d,bl);
 
 #ifdef BN_RECURSION
 	if (0)
@@ -673,26 +680,29 @@
 		if (al == j) /* exact multiple */
 			{
 			bn_wexpand(t,k*2);
-			bn_wexpand(r,k*2);
-			bn_mul_recursive(r->d,a->d,b->d,al,t->d);
+			bn_wexpand(rr,k*2);
+			bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
 			}
 		else
 			{
 			bn_wexpand(a,k);
 			bn_wexpand(b,k);
 			bn_wexpand(t,k*4);
-			bn_wexpand(r,k*4);
+			bn_wexpand(rr,k*4);
 			for (i=a->top; i<k; i++)
 				a->d[i]=0;
 			for (i=b->top; i<k; i++)
 				b->d[i]=0;
-			bn_mul_part_recursive(r->d,a->d,b->d,al-j,j,t->d);
+			bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
 			}
-		r->top=top;
+		rr->top=top;
 		}
 #endif
+#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
 end:
-	bn_fix_top(r);
+#endif
+	bn_fix_top(rr);
+	if (r != rr) BN_copy(r,rr);
 	return(1);
 	}
 
diff --git a/crypto/bn/bn_sqr.c b/crypto/bn/bn_sqr.c
index 3166e6c..bcd9c3b 100644
--- a/crypto/bn/bn_sqr.c
+++ b/crypto/bn/bn_sqr.c
@@ -68,13 +68,14 @@
 BN_CTX *ctx;
 	{
 	int max,al;
-	BIGNUM *tmp;
+	BIGNUM *tmp,*rr;
 
 #ifdef BN_COUNT
 printf("BN_sqr %d * %d\n",a->top,a->top);
 #endif
 	bn_check_top(a);
 	tmp= &(ctx->bn[ctx->tos]);
+	rr=(a != r)?r: (&ctx->bn[ctx->tos+1]);
 
 	al=a->top;
 	if (al <= 0)
@@ -84,25 +85,25 @@
 		}
 
 	max=(al+al);
-	if (bn_wexpand(r,max+1) == NULL) return(0);
+	if (bn_wexpand(rr,max+1) == NULL) return(0);
 
 	r->neg=0;
 	if (al == 4)
 		{
 #ifndef BN_SQR_COMBA
 		BN_ULONG t[8];
-		bn_sqr_normal(r->d,a->d,4,t);
+		bn_sqr_normal(rr->d,a->d,4,t);
 #else
-		bn_sqr_comba4(r->d,a->d);
+		bn_sqr_comba4(rr->d,a->d);
 #endif
 		}
 	else if (al == 8)
 		{
 #ifndef BN_SQR_COMBA
 		BN_ULONG t[16];
-		bn_sqr_normal(r->d,a->d,8,t);
+		bn_sqr_normal(rr->d,a->d,8,t);
 #else
-		bn_sqr_comba8(r->d,a->d);
+		bn_sqr_comba8(rr->d,a->d);
 #endif
 		}
 	else 
@@ -111,21 +112,36 @@
 		if (al < BN_SQR_RECURSIVE_SIZE_NORMAL)
 			{
 			BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL*2];
-			bn_sqr_normal(r->d,a->d,al,t);
+			bn_sqr_normal(rr->d,a->d,al,t);
 			}
 		else
 			{
-			if (bn_wexpand(tmp,2*max+1) == NULL) return(0);
-			bn_sqr_recursive(r->d,a->d,al,tmp->d);
+			int j,k;
+
+			j=BN_num_bits_word((BN_ULONG)al);
+			j=1<<(j-1);
+			k=j+j;
+			if (al == j)
+				{
+				if (bn_wexpand(a,k*2) == NULL) return(0);
+				if (bn_wexpand(tmp,k*2) == NULL) return(0);
+				bn_sqr_recursive(rr->d,a->d,al,tmp->d);
+				}
+			else
+				{
+				if (bn_wexpand(tmp,max) == NULL) return(0);
+				bn_sqr_normal(rr->d,a->d,al,tmp->d);
+				}
 			}
 #else
 		if (bn_wexpand(tmp,max) == NULL) return(0);
-		bn_sqr_normal(r->d,a->d,al,tmp->d);
+		bn_sqr_normal(rr->d,a->d,al,tmp->d);
 #endif
 		}
 
-	r->top=max;
-	if ((max > 0) && (r->d[max-1] == 0)) r->top--;
+	rr->top=max;
+	if ((max > 0) && (rr->d[max-1] == 0)) rr->top--;
+	if (rr != r) BN_copy(r,rr);
 	return(1);
 	}
 
diff --git a/crypto/bn/bntest.c b/crypto/bn/bntest.c
index ec48bad..e84cf25 100644
--- a/crypto/bn/bntest.c
+++ b/crypto/bn/bntest.c
@@ -85,6 +85,7 @@
 int test_mod(BIO *bp,BN_CTX *ctx);
 int test_mod_mul(BIO *bp,BN_CTX *ctx);
 int test_mod_exp(BIO *bp,BN_CTX *ctx);
+int test_exp(BIO *bp,BN_CTX *ctx);
 int rand_neg(void);
 #else
 int test_add ();
@@ -100,6 +101,7 @@
 int test_mod ();
 int test_mod_mul ();
 int test_mod_exp ();
+int test_exp ();
 int rand_neg();
 #endif
 
@@ -214,6 +216,10 @@
 	if (!test_mod_exp(out,ctx)) goto err;
 	fflush(stdout);
 
+	fprintf(stderr,"test BN_exp\n");
+	if (!test_exp(out,ctx)) goto err;
+	fflush(stdout);
+
 /**/
 	exit(0);
 err:
@@ -699,6 +705,46 @@
 	return(1);
 	}
 
+int test_exp(bp,ctx)
+BIO *bp;
+BN_CTX *ctx;
+	{
+	BIGNUM *a,*b,*d,*e;
+	int i;
+
+	a=BN_new();
+	b=BN_new();
+	d=BN_new();
+	e=BN_new();
+
+	for (i=0; i<6; i++)
+		{
+		BN_rand(a,20+i*5,0,0); /**/
+		BN_rand(b,2+i,0,0); /**/
+
+		if (!BN_exp(d,a,b,ctx))
+			return(00);
+
+		if (bp != NULL)
+			{
+			if (!results)
+				{
+				BN_print(bp,a);
+				BIO_puts(bp," ^ ");
+				BN_print(bp,b);
+				BIO_puts(bp," - ");
+				}
+			BN_print(bp,d);
+			BIO_puts(bp,"\n");
+			}
+		}
+	BN_free(a);
+	BN_free(b);
+	BN_free(d);
+	BN_free(e);
+	return(1);
+	}
+
 int test_lshift(bp)
 BIO *bp;
 	{
diff --git a/crypto/bn/exp.c b/crypto/bn/exp.c
index 2427116..6a24fee 100644
--- a/crypto/bn/exp.c
+++ b/crypto/bn/exp.c
@@ -48,7 +48,7 @@
 			BN_mod_exp_mont(&r,&a,&b,&c,&ctx,&mont);
 			}
 		ms_time_get(end);
-		d=ms_time_diff(start,end) *50/33 /**/;
+		d=ms_time_diff(start,end)/* *50/33 /**/;
 		printf("%5d bit:%6.2f %6d %6.4f %4d m_set(%5.4f)\n",size,
 			d,num,d/num,(int)((d/num)*mod),md/10.0);
 		num/=8;