Change to mitigate branch prediction attacks

Submitted by: Matthew D Wood
Reviewed by: Bodo Moeller
diff --git a/CHANGES b/CHANGES
index 2081bf6..21b90ed 100644
--- a/CHANGES
+++ b/CHANGES
@@ -483,6 +483,43 @@
 
  Changes between 0.9.8e and 0.9.8f  [xx XXX xxxx]
 
+  *) Mitigate branch prediction attacks, which can be practical if a
+     single processor is shared, allowing a spy process to extract
+     information.  For detailed background information, see
+     http://eprint.iacr.org/2007/039 (O. Aciicmez, S. Gueron,
+     J.-P. Seifert, "New Branch Prediction Vulnerabilities in OpenSSL
+     and Necessary Software Countermeasures").  The core of the change
+     are new versions BN_div_no_branch() and
+     BN_mod_inverse_no_branch() of BN_div() and BN_mod_inverse(),
+     respectively, which are slower, but avoid the security-relevant
+     conditional branches.  These are automatically called by BN_div()
+     and BN_mod_inverse() if the flag BN_FLG_CONSTTIME is set for the
+     modulus.  Also, BN_is_bit_set() has been changed to remove a
+     conditional branch.
+
+     BN_FLG_CONSTTIME is the new name for the previous
+     BN_FLG_EXP_CONSTTIME flag, since it now affects more than just
+     modular exponentiation.  (Since OpenSSL 0.9.7h, setting this flag
+     in the exponent causes BN_mod_exp_mont() to use the alternative
+     implementation in BN_mod_exp_mont_consttime().)  The old name
+     remains as a deprecated alias.
+
+     Similary, RSA_FLAG_NO_EXP_CONSTTIME is replaced by a more general
+     RSA_FLAG_NO_CONSTTIME flag since the RSA implementation now uses
+     constant-time implementations for more than just exponentiation.
+     Here too the old name is kept as a deprecated alias.
+
+     BN_BLINDING_new() will now use BN_dup() for the modulus so that
+     the BN_BLINDING structure gets an independent copy of the
+     modulus.  This means that the previous "BIGNUM *m" argument to
+     BN_BLINDING_new() and to BN_BLINDING_create_param() now
+     essentially becomes "const BIGNUM *m", although we can't actually
+     change this in the header file before 0.9.9.  It allows
+     RSA_setup_blinding() to use BN_with_flags() on the modulus to
+     enable BN_FLG_CONSTTIME.
+
+     [Matthew D Wood (Intel Corp)]
+
   *) In the SSL/TLS server implementation, be strict about session ID
      context matching (which matters if an application uses a single
      external cache for different purposes).  Previously,
@@ -1702,7 +1739,7 @@
 
      BN_mod_exp_mont_consttime() is the new exponentiation implementation,
      and this is automatically used by BN_mod_exp_mont() if the new flag
-     BN_FLG_EXP_CONSTTIME is set for the exponent.  RSA, DSA, and DH
+     BN_FLG_EXP_CONSTTIME is set for the exponent.  RSA, DSA, and DH 
      will use this BN flag for private exponents unless the flag
      RSA_FLAG_NO_EXP_CONSTTIME, DSA_FLAG_NO_EXP_CONSTTIME, or
      DH_FLAG_NO_EXP_CONSTTIME, respectively, is set.
diff --git a/crypto/bn/bn.h b/crypto/bn/bn.h
index 6eb4d99..d69e37b 100644
--- a/crypto/bn/bn.h
+++ b/crypto/bn/bn.h
@@ -256,8 +256,18 @@
 
 #define BN_FLG_MALLOCED		0x01
 #define BN_FLG_STATIC_DATA	0x02
-#define BN_FLG_EXP_CONSTTIME	0x04 /* avoid leaking exponent information through timings
-                            	      * (BN_mod_exp_mont() will call BN_mod_exp_mont_consttime) */
+#define BN_FLG_CONSTTIME	0x04 /* avoid leaking exponent information through timing,
+                                      * BN_mod_exp_mont() will call BN_mod_exp_mont_consttime,
+                                      * BN_div() will call BN_div_no_branch,
+                                      * BN_mod_inverse() will call BN_mod_inverse_no_branch.
+                                      */
+
+#ifndef OPENSSL_NO_DEPRECATED
+#define BN_FLG_EXP_CONSTTIME BN_FLG_CONSTTIME /* deprecated name for the flag */
+                                      /* avoid leaking exponent information through timings
+                                      * (BN_mod_exp_mont() will call BN_mod_exp_mont_consttime) */
+#endif
+
 #ifndef OPENSSL_NO_DEPRECATED
 #define BN_FLG_FREE		0x8000	/* used for debuging */
 #endif
@@ -436,6 +446,8 @@
 
 int	BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
 	BN_CTX *ctx);
+int	BN_div_no_branch(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
+	const BIGNUM *d, BN_CTX *ctx);
 #define BN_mod(rem,m,d,ctx) BN_div(NULL,(rem),(m),(d),(ctx))
 int	BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx);
 int	BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx);
@@ -505,6 +517,8 @@
 int	BN_kronecker(const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); /* returns -2 for error */
 BIGNUM *BN_mod_inverse(BIGNUM *ret,
 	const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx);
+BIGNUM *BN_mod_inverse_no_branch(BIGNUM *ret,
+	const BIGNUM *A, const BIGNUM *n,BN_CTX *ctx);
 BIGNUM *BN_mod_sqrt(BIGNUM *ret,
 	const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx);
 
diff --git a/crypto/bn/bn_blind.c b/crypto/bn/bn_blind.c
index ccecc63..e9b6173 100644
--- a/crypto/bn/bn_blind.c
+++ b/crypto/bn/bn_blind.c
@@ -153,7 +153,12 @@
 		{
 		if ((ret->Ai = BN_dup(Ai)) == NULL) goto err;
 		}
-	ret->mod = mod;
+
+	/* save a copy of mod in the BN_BLINDING structure */
+	if ((ret->mod = BN_dup(mod)) == NULL) goto err;
+	if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0)
+		BN_set_flags(ret->mod, BN_FLG_CONSTTIME);
+
 	ret->counter = BN_BLINDING_COUNTER;
 	return(ret);
 err:
@@ -169,6 +174,7 @@
 	if (r->A  != NULL) BN_free(r->A );
 	if (r->Ai != NULL) BN_free(r->Ai);
 	if (r->e  != NULL) BN_free(r->e );
+	if (r->mod != NULL) BN_free(r->mod); 
 	OPENSSL_free(r);
 	}
 
diff --git a/crypto/bn/bn_div.c b/crypto/bn/bn_div.c
index 2857f44..1fd0206 100644
--- a/crypto/bn/bn_div.c
+++ b/crypto/bn/bn_div.c
@@ -185,6 +185,11 @@
 	BN_ULONG d0,d1;
 	int num_n,div_n;
 
+	if (BN_get_flags(num, BN_FLG_CONSTTIME) != 0)
+		{
+		return BN_div_no_branch(dv, rm, num, divisor, ctx);
+		}
+
 	bn_check_top(dv);
 	bn_check_top(rm);
 	bn_check_top(num);
@@ -397,4 +402,228 @@
 	return(0);
 	}
 
+
+/* BN_div_no_branch is a special version of BN_div. It does not contain
+ * branches that may leak sensitive information.
+ */
+int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, 
+	const BIGNUM *divisor, BN_CTX *ctx)
+	{
+	int norm_shift,i,loop;
+	BIGNUM *tmp,wnum,*snum,*sdiv,*res;
+	BN_ULONG *resp,*wnump;
+	BN_ULONG d0,d1;
+	int num_n,div_n;
+
+	bn_check_top(dv);
+	bn_check_top(rm);
+	bn_check_top(num);
+	bn_check_top(divisor);
+
+	if (BN_is_zero(divisor))
+		{
+		BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO);
+		return(0);
+		}
+
+	BN_CTX_start(ctx);
+	tmp=BN_CTX_get(ctx);
+	snum=BN_CTX_get(ctx);
+	sdiv=BN_CTX_get(ctx);
+	if (dv == NULL)
+		res=BN_CTX_get(ctx);
+	else	res=dv;
+	if (sdiv == NULL || res == NULL) goto err;
+
+	/* First we normalise the numbers */
+	norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
+	if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err;
+	sdiv->neg=0;
+	norm_shift+=BN_BITS2;
+	if (!(BN_lshift(snum,num,norm_shift))) goto err;
+	snum->neg=0;
+
+	/* Since we don't know whether snum is larger than sdiv,
+	 * we pad snum with enough zeroes without changing its
+	 * value. 
+	 */
+	if (snum->top <= sdiv->top+1) 
+		{
+		if (bn_wexpand(snum, sdiv->top + 2) == NULL) goto err;
+		for (i = snum->top; i < sdiv->top + 2; i++) snum->d[i] = 0;
+		snum->top = sdiv->top + 2;
+		}
+	else
+		{
+		if (bn_wexpand(snum, snum->top + 1) == NULL) goto err;
+		snum->d[snum->top] = 0;
+		snum->top ++;
+		}
+
+	div_n=sdiv->top;
+	num_n=snum->top;
+	loop=num_n-div_n;
+	/* Lets setup a 'window' into snum
+	 * This is the part that corresponds to the current
+	 * 'area' being divided */
+	wnum.neg   = 0;
+	wnum.d     = &(snum->d[loop]);
+	wnum.top   = div_n;
+	/* only needed when BN_ucmp messes up the values between top and max */
+	wnum.dmax  = snum->dmax - loop; /* so we don't step out of bounds */
+
+	/* Get the top 2 words of sdiv */
+	/* div_n=sdiv->top; */
+	d0=sdiv->d[div_n-1];
+	d1=(div_n == 1)?0:sdiv->d[div_n-2];
+
+	/* pointer to the 'top' of snum */
+	wnump= &(snum->d[num_n-1]);
+
+	/* Setup to 'res' */
+	res->neg= (num->neg^divisor->neg);
+	if (!bn_wexpand(res,(loop+1))) goto err;
+	res->top=loop-1;
+	resp= &(res->d[loop-1]);
+
+	/* space for temp */
+	if (!bn_wexpand(tmp,(div_n+1))) goto err;
+
+	/* if res->top == 0 then clear the neg value otherwise decrease
+	 * the resp pointer */
+	if (res->top == 0)
+		res->neg = 0;
+	else
+		resp--;
+
+	for (i=0; i<loop-1; i++, wnump--, resp--)
+		{
+		BN_ULONG q,l0;
+		/* the first part of the loop uses the top two words of
+		 * snum and sdiv to calculate a BN_ULONG q such that
+		 * | wnum - sdiv * q | < sdiv */
+#if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM)
+		BN_ULONG bn_div_3_words(BN_ULONG*,BN_ULONG,BN_ULONG);
+		q=bn_div_3_words(wnump,d1,d0);
+#else
+		BN_ULONG n0,n1,rem=0;
+
+		n0=wnump[0];
+		n1=wnump[-1];
+		if (n0 == d0)
+			q=BN_MASK2;
+		else 			/* n0 < d0 */
+			{
+#ifdef BN_LLONG
+			BN_ULLONG t2;
+
+#if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
+			q=(BN_ULONG)(((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0);
+#else
+			q=bn_div_words(n0,n1,d0);
+#ifdef BN_DEBUG_LEVITTE
+			fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
+X) -> 0x%08X\n",
+				n0, n1, d0, q);
+#endif
+#endif
+
+#ifndef REMAINDER_IS_ALREADY_CALCULATED
+			/*
+			 * rem doesn't have to be BN_ULLONG. The least we
+			 * know it's less that d0, isn't it?
+			 */
+			rem=(n1-q*d0)&BN_MASK2;
+#endif
+			t2=(BN_ULLONG)d1*q;
+
+			for (;;)
+				{
+				if (t2 <= ((((BN_ULLONG)rem)<<BN_BITS2)|wnump[-2]))
+					break;
+				q--;
+				rem += d0;
+				if (rem < d0) break; /* don't let rem overflow */
+				t2 -= d1;
+				}
+#else /* !BN_LLONG */
+			BN_ULONG t2l,t2h,ql,qh;
+
+			q=bn_div_words(n0,n1,d0);
+#ifdef BN_DEBUG_LEVITTE
+			fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
+X) -> 0x%08X\n",
+				n0, n1, d0, q);
+#endif
+#ifndef REMAINDER_IS_ALREADY_CALCULATED
+			rem=(n1-q*d0)&BN_MASK2;
+#endif
+
+#if defined(BN_UMULT_LOHI)
+			BN_UMULT_LOHI(t2l,t2h,d1,q);
+#elif defined(BN_UMULT_HIGH)
+			t2l = d1 * q;
+			t2h = BN_UMULT_HIGH(d1,q);
+#else
+			t2l=LBITS(d1); t2h=HBITS(d1);
+			ql =LBITS(q);  qh =HBITS(q);
+			mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
+#endif
+
+			for (;;)
+				{
+				if ((t2h < rem) ||
+					((t2h == rem) && (t2l <= wnump[-2])))
+					break;
+				q--;
+				rem += d0;
+				if (rem < d0) break; /* don't let rem overflow */
+				if (t2l < d1) t2h--; t2l -= d1;
+				}
+#endif /* !BN_LLONG */
+			}
+#endif /* !BN_DIV3W */
+
+		l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
+		tmp->d[div_n]=l0;
+		wnum.d--;
+		/* ingore top values of the bignums just sub the two 
+		 * BN_ULONG arrays with bn_sub_words */
+		if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n+1))
+			{
+			/* Note: As we have considered only the leading
+			 * two BN_ULONGs in the calculation of q, sdiv * q
+			 * might be greater than wnum (but then (q-1) * sdiv
+			 * is less or equal than wnum)
+			 */
+			q--;
+			if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n))
+				/* we can't have an overflow here (assuming
+				 * that q != 0, but if q == 0 then tmp is
+				 * zero anyway) */
+				(*wnump)++;
+			}
+		/* store part of the result */
+		*resp = q;
+		}
+	bn_correct_top(snum);
+	if (rm != NULL)
+		{
+		/* Keep a copy of the neg flag in num because if rm==num
+		 * BN_rshift() will overwrite it.
+		 */
+		int neg = num->neg;
+		BN_rshift(rm,snum,norm_shift);
+		if (!BN_is_zero(rm))
+			rm->neg = neg;
+		bn_check_top(rm);
+		}
+	BN_CTX_end(ctx);
+	return(1);
+err:
+	bn_check_top(rm);
+	BN_CTX_end(ctx);
+	return(0);
+	}
+
 #endif
diff --git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c
index 8f8c694..70a33f0 100644
--- a/crypto/bn/bn_exp.c
+++ b/crypto/bn/bn_exp.c
@@ -122,9 +122,9 @@
 	int i,bits,ret=0;
 	BIGNUM *v,*rr;
 
-	if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
+	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
 		{
-		/* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
+		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
 		BNerr(BN_F_BN_EXP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
 		return -1;
 		}
@@ -213,7 +213,7 @@
 	if (BN_is_odd(m))
 		{
 #  ifdef MONT_EXP_WORD
-		if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) == 0))
+		if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0))
 			{
 			BN_ULONG A = a->d[0];
 			ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL);
@@ -245,9 +245,9 @@
 	BIGNUM *val[TABLE_SIZE];
 	BN_RECP_CTX recp;
 
-	if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
+	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
 		{
-		/* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
+		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
 		BNerr(BN_F_BN_MOD_EXP_RECP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
 		return -1;
 		}
@@ -379,7 +379,7 @@
 	BIGNUM *val[TABLE_SIZE];
 	BN_MONT_CTX *mont=NULL;
 
-	if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
+	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
 		{
 		return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
 		}
@@ -745,9 +745,9 @@
 #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
 		(BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
 
-	if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
+	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
 		{
-		/* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
+		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
 		BNerr(BN_F_BN_MOD_EXP_MONT_WORD,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
 		return -1;
 		}
@@ -881,9 +881,9 @@
 	/* Table of variables obtained from 'ctx' */
 	BIGNUM *val[TABLE_SIZE];
 
-	if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
+	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
 		{
-		/* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
+		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
 		BNerr(BN_F_BN_MOD_EXP_SIMPLE,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
 		return -1;
 		}
diff --git a/crypto/bn/bn_gcd.c b/crypto/bn/bn_gcd.c
index f02e6fc..9787a65 100644
--- a/crypto/bn/bn_gcd.c
+++ b/crypto/bn/bn_gcd.c
@@ -210,6 +210,11 @@
 	BIGNUM *ret=NULL;
 	int sign;
 
+	if (BN_get_flags(n, BN_FLG_CONSTTIME) != 0)
+		{
+		return BN_mod_inverse_no_branch(in, a, n, ctx);
+		}
+
 	bn_check_top(a);
 	bn_check_top(n);
 
@@ -491,3 +496,157 @@
 	bn_check_top(ret);
 	return(ret);
 	}
+
+
+/* BN_mod_inverse_no_branch is a special version of BN_mod_inverse. 
+ * It does not contain branches that may leak sensitive information.
+ */
+BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in,
+	const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
+	{
+	BIGNUM *A,*B,*X,*Y,*M,*D,*T,*R=NULL;
+	BIGNUM local_A, local_B;
+	BIGNUM *pA, *pB;
+	BIGNUM *ret=NULL;
+	int sign;
+
+	bn_check_top(a);
+	bn_check_top(n);
+
+	BN_CTX_start(ctx);
+	A = BN_CTX_get(ctx);
+	B = BN_CTX_get(ctx);
+	X = BN_CTX_get(ctx);
+	D = BN_CTX_get(ctx);
+	M = BN_CTX_get(ctx);
+	Y = BN_CTX_get(ctx);
+	T = BN_CTX_get(ctx);
+	if (T == NULL) goto err;
+
+	if (in == NULL)
+		R=BN_new();
+	else
+		R=in;
+	if (R == NULL) goto err;
+
+	BN_one(X);
+	BN_zero(Y);
+	if (BN_copy(B,a) == NULL) goto err;
+	if (BN_copy(A,n) == NULL) goto err;
+	A->neg = 0;
+
+	if (B->neg || (BN_ucmp(B, A) >= 0))
+		{
+		/* Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked,
+	 	 * BN_div_no_branch will be called eventually.
+	 	 */
+		pB = &local_B;
+		BN_with_flags(pB, B, BN_FLG_CONSTTIME);	
+		if (!BN_nnmod(B, pB, A, ctx)) goto err;
+		}
+	sign = -1;
+	/* From  B = a mod |n|,  A = |n|  it follows that
+	 *
+	 *      0 <= B < A,
+	 *     -sign*X*a  ==  B   (mod |n|),
+	 *      sign*Y*a  ==  A   (mod |n|).
+	 */
+
+	while (!BN_is_zero(B))
+		{
+		BIGNUM *tmp;
+		
+		/*
+		 *      0 < B < A,
+		 * (*) -sign*X*a  ==  B   (mod |n|),
+		 *      sign*Y*a  ==  A   (mod |n|)
+		 */
+
+		/* Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked,
+	 	 * BN_div_no_branch will be called eventually.
+	 	 */
+		pA = &local_A;
+		BN_with_flags(pA, A, BN_FLG_CONSTTIME);	
+		
+		/* (D, M) := (A/B, A%B) ... */		
+		if (!BN_div(D,M,pA,B,ctx)) goto err;
+		
+		/* Now
+		 *      A = D*B + M;
+		 * thus we have
+		 * (**)  sign*Y*a  ==  D*B + M   (mod |n|).
+		 */
+		
+		tmp=A; /* keep the BIGNUM object, the value does not matter */
+		
+		/* (A, B) := (B, A mod B) ... */
+		A=B;
+		B=M;
+		/* ... so we have  0 <= B < A  again */
+		
+		/* Since the former  M  is now  B  and the former  B  is now  A,
+		 * (**) translates into
+		 *       sign*Y*a  ==  D*A + B    (mod |n|),
+		 * i.e.
+		 *       sign*Y*a - D*A  ==  B    (mod |n|).
+		 * Similarly, (*) translates into
+		 *      -sign*X*a  ==  A          (mod |n|).
+		 *
+		 * Thus,
+		 *   sign*Y*a + D*sign*X*a  ==  B  (mod |n|),
+		 * i.e.
+		 *        sign*(Y + D*X)*a  ==  B  (mod |n|).
+		 *
+		 * So if we set  (X, Y, sign) := (Y + D*X, X, -sign),  we arrive back at
+		 *      -sign*X*a  ==  B   (mod |n|),
+		 *       sign*Y*a  ==  A   (mod |n|).
+		 * Note that  X  and  Y  stay non-negative all the time.
+		 */
+			
+		if (!BN_mul(tmp,D,X,ctx)) goto err;
+		if (!BN_add(tmp,tmp,Y)) goto err;
+
+		M=Y; /* keep the BIGNUM object, the value does not matter */
+		Y=X;
+		X=tmp;
+		sign = -sign;
+		}
+		
+	/*
+	 * The while loop (Euclid's algorithm) ends when
+	 *      A == gcd(a,n);
+	 * we have
+	 *       sign*Y*a  ==  A  (mod |n|),
+	 * where  Y  is non-negative.
+	 */
+
+	if (sign < 0)
+		{
+		if (!BN_sub(Y,n,Y)) goto err;
+		}
+	/* Now  Y*a  ==  A  (mod |n|).  */
+
+	if (BN_is_one(A))
+		{
+		/* Y*a == 1  (mod |n|) */
+		if (!Y->neg && BN_ucmp(Y,n) < 0)
+			{
+			if (!BN_copy(R,Y)) goto err;
+			}
+		else
+			{
+			if (!BN_nnmod(R,Y,n,ctx)) goto err;
+			}
+		}
+	else
+		{
+		BNerr(BN_F_BN_MOD_INVERSE,BN_R_NO_INVERSE);
+		goto err;
+		}
+	ret=R;
+err:
+	if ((ret == NULL) && (in == NULL)) BN_free(R);
+	BN_CTX_end(ctx);
+	bn_check_top(ret);
+	return(ret);
+	}
diff --git a/crypto/bn/bn_lib.c b/crypto/bn/bn_lib.c
index 244a430..63cdc34 100644
--- a/crypto/bn/bn_lib.c
+++ b/crypto/bn/bn_lib.c
@@ -763,7 +763,7 @@
 	i=n/BN_BITS2;
 	j=n%BN_BITS2;
 	if (a->top <= i) return 0;
-	return((a->d[i]&(((BN_ULONG)1)<<j))?1:0);
+	return(((a->d[i])>>j)&((BN_ULONG)1));
 	}
 
 int BN_mask_bits(BIGNUM *a, int n)
diff --git a/crypto/dh/dh_key.c b/crypto/dh/dh_key.c
index 37a2c1b..e7db440 100644
--- a/crypto/dh/dh_key.c
+++ b/crypto/dh/dh_key.c
@@ -150,7 +150,7 @@
 			{
 			BN_init(&local_prk);
 			prk = &local_prk;
-			BN_with_flags(prk, priv_key, BN_FLG_EXP_CONSTTIME);
+			BN_with_flags(prk, priv_key, BN_FLG_CONSTTIME);
 			}
 		else
 			prk = priv_key;
@@ -203,7 +203,7 @@
 		if ((dh->flags & DH_FLAG_NO_EXP_CONSTTIME) == 0)
 			{
 			/* XXX */
-			BN_set_flags(dh->priv_key, BN_FLG_EXP_CONSTTIME);
+			BN_set_flags(dh->priv_key, BN_FLG_CONSTTIME);
 			}
 		if (!mont)
 			goto err;
diff --git a/crypto/dsa/dsa_key.c b/crypto/dsa/dsa_key.c
index 0423f2e..c4aa86b 100644
--- a/crypto/dsa/dsa_key.c
+++ b/crypto/dsa/dsa_key.c
@@ -107,7 +107,7 @@
 			{
 			BN_init(&local_prk);
 			prk = &local_prk;
-			BN_with_flags(prk, priv_key, BN_FLG_EXP_CONSTTIME);
+			BN_with_flags(prk, priv_key, BN_FLG_CONSTTIME);
 			}
 		else
 			prk = priv_key;
diff --git a/crypto/dsa/dsa_ossl.c b/crypto/dsa/dsa_ossl.c
index 38004ec..4fead07 100644
--- a/crypto/dsa/dsa_ossl.c
+++ b/crypto/dsa/dsa_ossl.c
@@ -238,7 +238,7 @@
 	while (BN_is_zero(&k));
 	if ((dsa->flags & DSA_FLAG_NO_EXP_CONSTTIME) == 0)
 		{
-		BN_set_flags(&k, BN_FLG_EXP_CONSTTIME);
+		BN_set_flags(&k, BN_FLG_CONSTTIME);
 		}
 
 	if (dsa->flags & DSA_FLAG_CACHE_MONT_P)
diff --git a/crypto/rsa/rsa.h b/crypto/rsa/rsa.h
index 260fa43..898aaff 100644
--- a/crypto/rsa/rsa.h
+++ b/crypto/rsa/rsa.h
@@ -196,13 +196,27 @@
                                                 * default (ignoring RSA_FLAG_BLINDING),
                                                 * but other engines might not need it
                                                 */
-#define RSA_FLAG_NO_EXP_CONSTTIME	0x0100 /* new with 0.9.7h; the built-in RSA
+#define RSA_FLAG_NO_CONSTTIME		0x0100 /* new with 0.9.8f; the built-in RSA
+						* implementation now uses constant time
+						* operations by default in private key operations,
+						* e.g., constant time modular exponentiation, 
+                                                * modular inverse without leaking branches, 
+                                                * division without leaking branches. This 
+                                                * flag disables these constant time 
+                                                * operations and results in faster RSA 
+                                                * private key operations.
+                                                */ 
+#ifndef OPENSSL_NO_DEPRECATED
+#define RSA_FLAG_NO_EXP_CONSTTIME RSA_FLAG_NO_CONSTTIME /* deprecated name for the flag*/
+                                                /* new with 0.9.7h; the built-in RSA
                                                 * implementation now uses constant time
                                                 * modular exponentiation for secret exponents
                                                 * by default. This flag causes the
                                                 * faster variable sliding window method to
                                                 * be used for all exponents.
                                                 */
+#endif
+
 
 #define EVP_PKEY_CTX_set_rsa_padding(ctx, pad) \
 	EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, -1, EVP_PKEY_CTRL_RSA_PADDING, \
diff --git a/crypto/rsa/rsa_eay.c b/crypto/rsa/rsa_eay.c
index 385439c..15ebf5d 100644
--- a/crypto/rsa/rsa_eay.c
+++ b/crypto/rsa/rsa_eay.c
@@ -429,11 +429,11 @@
 		BIGNUM local_d;
 		BIGNUM *d = NULL;
 		
-		if (!(rsa->flags & RSA_FLAG_NO_EXP_CONSTTIME))
+		if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
 			{
 			BN_init(&local_d);
 			d = &local_d;
-			BN_with_flags(d, rsa->d, BN_FLG_EXP_CONSTTIME);
+			BN_with_flags(d, rsa->d, BN_FLG_CONSTTIME);
 			}
 		else
 			d = rsa->d;
@@ -551,10 +551,10 @@
 		BIGNUM local_d;
 		BIGNUM *d = NULL;
 		
-		if (!(rsa->flags & RSA_FLAG_NO_EXP_CONSTTIME))
+		if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
 			{
 			d = &local_d;
-			BN_with_flags(d, rsa->d, BN_FLG_EXP_CONSTTIME);
+			BN_with_flags(d, rsa->d, BN_FLG_CONSTTIME);
 			}
 		else
 			d = rsa->d;
@@ -715,8 +715,9 @@
 static int RSA_eay_mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa, BN_CTX *ctx)
 	{
 	BIGNUM *r1,*m1,*vrfy;
-	BIGNUM local_dmp1, local_dmq1;
-	BIGNUM *dmp1, *dmq1;
+	BIGNUM local_dmp1,local_dmq1,local_c,local_r1;
+	BIGNUM *dmp1,*dmq1,*c,*pr1;
+	int bn_flags;
 	int ret=0;
 
 	BN_CTX_start(ctx);
@@ -724,26 +725,72 @@
 	m1 = BN_CTX_get(ctx);
 	vrfy = BN_CTX_get(ctx);
 
+	/* Make sure mod_inverse in montgomerey intialization use correct 
+	 * BN_FLG_CONSTTIME flag.
+	 */
+	bn_flags = rsa->p->flags;
+	if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
+		{
+		rsa->p->flags |= BN_FLG_CONSTTIME;
+		}
 	MONT_HELPER(rsa, ctx, p, rsa->flags & RSA_FLAG_CACHE_PRIVATE, goto err);
+	/* We restore bn_flags back */
+	rsa->p->flags = bn_flags;
+
+        /* Make sure mod_inverse in montgomerey intialization use correct
+         * BN_FLG_CONSTTIME flag.
+         */
+	bn_flags = rsa->q->flags;
+	if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
+		{
+		rsa->q->flags |= BN_FLG_CONSTTIME;
+		}
 	MONT_HELPER(rsa, ctx, q, rsa->flags & RSA_FLAG_CACHE_PRIVATE, goto err);
+	/* We restore bn_flags back */
+	rsa->q->flags = bn_flags;	
+
 	MONT_HELPER(rsa, ctx, n, rsa->flags & RSA_FLAG_CACHE_PUBLIC, goto err);
 
-	if (!BN_mod(r1,I,rsa->q,ctx)) goto err;
-	if (!(rsa->flags & RSA_FLAG_NO_EXP_CONSTTIME))
+	/* compute I mod q */
+	if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
+		{
+		c = &local_c;
+		BN_with_flags(c, I, BN_FLG_CONSTTIME);
+		if (!BN_mod(r1,c,rsa->q,ctx)) goto err;
+		}
+	else
+		{
+		if (!BN_mod(r1,I,rsa->q,ctx)) goto err;
+		}
+
+	/* compute r1^dmq1 mod q */
+	if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
 		{
 		dmq1 = &local_dmq1;
-		BN_with_flags(dmq1, rsa->dmq1, BN_FLG_EXP_CONSTTIME);
+		BN_with_flags(dmq1, rsa->dmq1, BN_FLG_CONSTTIME);
 		}
 	else
 		dmq1 = rsa->dmq1;
 	if (!rsa->meth->bn_mod_exp(m1,r1,dmq1,rsa->q,ctx,
 		rsa->_method_mod_q)) goto err;
 
-	if (!BN_mod(r1,I,rsa->p,ctx)) goto err;
-	if (!(rsa->flags & RSA_FLAG_NO_EXP_CONSTTIME))
+	/* compute I mod p */
+	if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
+		{
+		c = &local_c;
+		BN_with_flags(c, I, BN_FLG_CONSTTIME);
+		if (!BN_mod(r1,c,rsa->p,ctx)) goto err;
+		}
+	else
+		{
+		if (!BN_mod(r1,I,rsa->p,ctx)) goto err;
+		}
+
+	/* compute r1^dmp1 mod p */
+	if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
 		{
 		dmp1 = &local_dmp1;
-		BN_with_flags(dmp1, rsa->dmp1, BN_FLG_EXP_CONSTTIME);
+		BN_with_flags(dmp1, rsa->dmp1, BN_FLG_CONSTTIME);
 		}
 	else
 		dmp1 = rsa->dmp1;
@@ -757,7 +804,17 @@
 		if (!BN_add(r0,r0,rsa->p)) goto err;
 
 	if (!BN_mul(r1,r0,rsa->iqmp,ctx)) goto err;
-	if (!BN_mod(r0,r1,rsa->p,ctx)) goto err;
+
+	/* Turn BN_FLG_CONSTTIME flag on before division operation */
+	if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
+		{
+		pr1 = &local_r1;
+		BN_with_flags(pr1, r1, BN_FLG_CONSTTIME);
+		}
+	else
+		pr1 = r1;
+	if (!BN_mod(r0,pr1,rsa->p,ctx)) goto err;
+
 	/* If p < q it is occasionally possible for the correction of
          * adding 'p' if r0 is negative above to leave the result still
 	 * negative. This can break the private key operations: the following
@@ -790,10 +847,10 @@
 			BIGNUM local_d;
 			BIGNUM *d = NULL;
 		
-			if (!(rsa->flags & RSA_FLAG_NO_EXP_CONSTTIME))
+			if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
 				{
 				d = &local_d;
-				BN_with_flags(d, rsa->d, BN_FLG_EXP_CONSTTIME);
+				BN_with_flags(d, rsa->d, BN_FLG_CONSTTIME);
 				}
 			else
 				d = rsa->d;
diff --git a/crypto/rsa/rsa_gen.c b/crypto/rsa/rsa_gen.c
index 742f8b1..767f7ab 100644
--- a/crypto/rsa/rsa_gen.c
+++ b/crypto/rsa/rsa_gen.c
@@ -85,6 +85,8 @@
 static int rsa_builtin_keygen(RSA *rsa, int bits, BIGNUM *e_value, BN_GENCB *cb)
 	{
 	BIGNUM *r0=NULL,*r1=NULL,*r2=NULL,*r3=NULL,*tmp;
+	BIGNUM local_r0,local_d,local_p;
+	BIGNUM *pr0,*d,*p;
 	int bitsp,bitsq,ok= -1,n=0;
 	BN_CTX *ctx=NULL;
 
@@ -165,16 +167,39 @@
 	if (!BN_sub(r1,rsa->p,BN_value_one())) goto err;	/* p-1 */
 	if (!BN_sub(r2,rsa->q,BN_value_one())) goto err;	/* q-1 */
 	if (!BN_mul(r0,r1,r2,ctx)) goto err;	/* (p-1)(q-1) */
-	if (!BN_mod_inverse(rsa->d,rsa->e,r0,ctx)) goto err;	/* d */
+	if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
+		{
+		  pr0 = &local_r0;
+		  BN_with_flags(pr0, r0, BN_FLG_CONSTTIME);
+		}
+	else
+	  pr0 = r0;
+	if (!BN_mod_inverse(rsa->d,rsa->e,pr0,ctx)) goto err;	/* d */
+
+	/* set up d for correct BN_FLG_CONSTTIME flag */
+	if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
+		{
+		d = &local_d;
+		BN_with_flags(d, rsa->d, BN_FLG_CONSTTIME);
+		}
+	else
+		d = rsa->d;
 
 	/* calculate d mod (p-1) */
-	if (!BN_mod(rsa->dmp1,rsa->d,r1,ctx)) goto err;
+	if (!BN_mod(rsa->dmp1,d,r1,ctx)) goto err;
 
 	/* calculate d mod (q-1) */
-	if (!BN_mod(rsa->dmq1,rsa->d,r2,ctx)) goto err;
+	if (!BN_mod(rsa->dmq1,d,r2,ctx)) goto err;
 
 	/* calculate inverse of q mod p */
-	if (!BN_mod_inverse(rsa->iqmp,rsa->q,rsa->p,ctx)) goto err;
+	if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
+		{
+		p = &local_p;
+		BN_with_flags(p, rsa->p, BN_FLG_CONSTTIME);
+		}
+	else
+		p = rsa->p;
+	if (!BN_mod_inverse(rsa->iqmp,rsa->q,p,ctx)) goto err;
 
 	ok=1;
 err:
diff --git a/crypto/rsa/rsa_lib.c b/crypto/rsa/rsa_lib.c
index c4b5b47..dd09609 100644
--- a/crypto/rsa/rsa_lib.c
+++ b/crypto/rsa/rsa_lib.c
@@ -361,7 +361,8 @@
 
 BN_BLINDING *RSA_setup_blinding(RSA *rsa, BN_CTX *in_ctx)
 {
-	BIGNUM *e;
+	BIGNUM local_n;
+	BIGNUM *e,*n;
 	BN_CTX *ctx;
 	BN_BLINDING *ret = NULL;
 
@@ -400,7 +401,16 @@
 		RAND_add(rsa->d->d, rsa->d->dmax * sizeof rsa->d->d[0], 0.0);
 		}
 
-	ret = BN_BLINDING_create_param(NULL, e, rsa->n, ctx,
+	if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
+		{
+		/* Set BN_FLG_CONSTTIME flag */
+		n = &local_n;
+		BN_with_flags(n, rsa->n, BN_FLG_CONSTTIME);
+		}
+	else
+		n = rsa->n;
+
+	ret = BN_BLINDING_create_param(NULL, e, n, ctx,
 			rsa->meth->bn_mod_exp, rsa->_method_mod_n);
 	if (ret == NULL)
 		{
diff --git a/crypto/rsa/rsa_test.c b/crypto/rsa/rsa_test.c
index 0f8059c..51135ea 100644
--- a/crypto/rsa/rsa_test.c
+++ b/crypto/rsa/rsa_test.c
@@ -242,7 +242,7 @@
 	clen = key3(key, ctext_ex);
 	break;
 	}
-	if (v/3 > 1) key->flags |= RSA_FLAG_NO_EXP_CONSTTIME;
+	if (v/3 >= 1) key->flags |= RSA_FLAG_NO_CONSTTIME;
 
 	num = RSA_public_encrypt(plen, ptext_ex, ctext, key,
 				 RSA_PKCS1_PADDING);
diff --git a/util/libeay.num b/util/libeay.num
index 4040ab5..534d264 100755
--- a/util/libeay.num
+++ b/util/libeay.num
@@ -3909,3 +3909,5 @@
 BIO_asn1_set_suffix                     4290	EXIST::FUNCTION:
 BIO_asn1_get_suffix                     4291	EXIST::FUNCTION:
 PKCS7_stream                            4292	EXIST::FUNCTION:
+BN_div_no_branch                        4293	EXIST::FUNCTION:
+BN_mod_inverse_no_branch                4294	EXIST::FUNCTION: