Move reduction step from BN_mod_exp to BN_mod_exp_mont_word.
Fix BN_mod_exp_simple for a==0 (mod m).
Skip useless round in BN_mod_sqrt (1 is always a square, no need
to test BN_kronecker for it).
diff --git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c
index f7e7ced..1739be6 100644
--- a/crypto/bn/bn_exp.c
+++ b/crypto/bn/bn_exp.c
@@ -191,6 +191,7 @@
 	 */
 
 #define MONT_MUL_MOD
+#define MONT_EXP_WORD
 #define RECP_MUL_MOD
 
 #ifdef MONT_MUL_MOD
@@ -202,14 +203,14 @@
 
 	if (BN_is_odd(m))
 		{
+#  ifdef MONT_EXP_WORD
 		if (a->top == 1 && !a->neg)
 			{
 			BN_ULONG A = a->d[0];
-			if (m->top == 1)
-				A %= m->d[0]; /* make sure that A is reduced */
 			ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL);
 			}
 		else
+#  endif
 			ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL);
 		}
 	else
@@ -505,11 +506,14 @@
 	bn_check_top(p);
 	bn_check_top(m);
 
-	if (!(m->d[0] & 1))
+	if (m->top == 0 || !(m->d[0] & 1))
 		{
 		BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS);
 		return(0);
 		}
+	if (m->top == 1)
+		a %= m->d[0]; /* make sure that 'a' is reduced */
+
 	bits = BN_num_bits(p);
 	if (bits == 0)
 		{
@@ -642,8 +646,8 @@
 	if (!BN_nnmod(&(val[0]),a,m,ctx)) goto err;		/* 1 */
 	if (BN_is_zero(&(val[0])))
 		{
-		ret = BN_one(r);
-		return ret;
+		ret = BN_zero(r);
+		goto err;
 		}
 
 	window = BN_window_bits_for_exponent_size(bits);
diff --git a/crypto/bn/bn_sqrt.c b/crypto/bn/bn_sqrt.c
index 5176772..2a72c18 100644
--- a/crypto/bn/bn_sqrt.c
+++ b/crypto/bn/bn_sqrt.c
@@ -140,13 +140,13 @@
 	
 	/* e > 1, so we really have to use the Tonelli/Shanks algorithm.
 	 * First, find some  y  that is not a square. */
-	i = 1;
+	i = 2;
 	do
 		{
 		/* For efficiency, try small numbers first;
 		 * if this fails, try random numbers.
 		 */
-		if (i < 20)
+		if (i < 22)
 			{
 			if (!BN_set_word(y, i)) goto end;
 			}
@@ -171,7 +171,7 @@
 			goto end;
 			}
 		}
-	while (r == 1 && i++ < 80);
+	while (r == 1 && ++i < 82);
 	
 	if (r != -1)
 		{