Simplify and fix ec_GFp_simple_points_make_affine
(which didn't always handle value 0 correctly).

Reviewed-by: emilia@openssl.org
diff --git a/CHANGES b/CHANGES
index 10d3eb2..9f444d3 100644
--- a/CHANGES
+++ b/CHANGES
@@ -310,6 +310,11 @@
 
  Changes between 1.0.1e and 1.0.2 [xx XXX xxxx]
 
+  *) Fix ec_GFp_simple_points_make_affine (thus, EC_POINTs_mul etc.)
+     for corner cases. (Certain input points at infinity could lead to
+     bogus results, with non-infinity inputs mapped to infinity too.)
+     [Bodo Moeller]
+
   *) Initial support for PowerISA 2.0.7, first implemented in POWER8.
      This covers AES, SHA256/512 and GHASH. "Initial" means that most
      common cases are optimized and there still is room for further
diff --git a/crypto/ec/ecp_smpl.c b/crypto/ec/ecp_smpl.c
index 16371e3..f2cd6f7 100644
--- a/crypto/ec/ecp_smpl.c
+++ b/crypto/ec/ecp_smpl.c
@@ -1175,9 +1175,8 @@
 int ec_GFp_simple_points_make_affine(const EC_GROUP *group, size_t num, EC_POINT *points[], BN_CTX *ctx)
 	{
 	BN_CTX *new_ctx = NULL;
-	BIGNUM *tmp0, *tmp1;
-	size_t pow2 = 0;
-	BIGNUM **heap = NULL;
+	BIGNUM *tmp, *tmp_Z;
+	BIGNUM **prod_Z = NULL;
 	size_t i;
 	int ret = 0;
 
@@ -1192,124 +1191,104 @@
 		}
 
 	BN_CTX_start(ctx);
-	tmp0 = BN_CTX_get(ctx);
-	tmp1 = BN_CTX_get(ctx);
-	if (tmp0  == NULL || tmp1 == NULL) goto err;
+	tmp = BN_CTX_get(ctx);
+	tmp_Z = BN_CTX_get(ctx);
+	if (tmp == NULL || tmp_Z == NULL) goto err;
 
-	/* Before converting the individual points, compute inverses of all Z values.
-	 * Modular inversion is rather slow, but luckily we can do with a single
-	 * explicit inversion, plus about 3 multiplications per input value.
-	 */
-
-	pow2 = 1;
-	while (num > pow2)
-		pow2 <<= 1;
-	/* Now pow2 is the smallest power of 2 satifsying pow2 >= num.
-	 * We need twice that. */
-	pow2 <<= 1;
-
-	heap = OPENSSL_malloc(pow2 * sizeof heap[0]);
-	if (heap == NULL) goto err;
-	
-	/* The array is used as a binary tree, exactly as in heapsort:
-	 *
-	 *                               heap[1]
-	 *                 heap[2]                     heap[3]
-	 *          heap[4]       heap[5]       heap[6]       heap[7]
-	 *   heap[8]heap[9] heap[10]heap[11] heap[12]heap[13] heap[14] heap[15]
-	 *
-	 * We put the Z's in the last line;
-	 * then we set each other node to the product of its two child-nodes (where
-	 * empty or 0 entries are treated as ones);
-	 * then we invert heap[1];
-	 * then we invert each other node by replacing it by the product of its
-	 * parent (after inversion) and its sibling (before inversion).
-	 */
-	heap[0] = NULL;
-	for (i = pow2/2 - 1; i > 0; i--)
-		heap[i] = NULL;
+	prod_Z = OPENSSL_malloc(num * sizeof prod_Z[0]);
+	if (prod_Z == NULL) goto err;
 	for (i = 0; i < num; i++)
-		heap[pow2/2 + i] = &points[i]->Z;
-	for (i = pow2/2 + num; i < pow2; i++)
-		heap[i] = NULL;
-	
-	/* set each node to the product of its children */
-	for (i = pow2/2 - 1; i > 0; i--)
 		{
-		heap[i] = BN_new();
-		if (heap[i] == NULL) goto err;
-		
-		if (heap[2*i] != NULL)
-			{
-			if ((heap[2*i + 1] == NULL) || BN_is_zero(heap[2*i + 1]))
-				{
-				if (!BN_copy(heap[i], heap[2*i])) goto err;
-				}
-			else
-				{
-				if (BN_is_zero(heap[2*i]))
-					{
-					if (!BN_copy(heap[i], heap[2*i + 1])) goto err;
-					}
-				else
-					{
-					if (!group->meth->field_mul(group, heap[i],
-						heap[2*i], heap[2*i + 1], ctx)) goto err;
-					}
-				}
-			}
+		prod_Z[i] = BN_new();
+		if (prod_Z[i] == NULL) goto err;
 		}
 
-	/* invert heap[1] */
-	if (!BN_is_zero(heap[1]))
-		{
-		if (!BN_mod_inverse(heap[1], heap[1], &group->field, ctx))
-			{
-			ECerr(EC_F_EC_GFP_SIMPLE_POINTS_MAKE_AFFINE, ERR_R_BN_LIB);
-			goto err;
-			}
-		}
-	if (group->meth->field_encode != 0)
-		{
-		/* in the Montgomery case, we just turned  R*H  (representing H)
-		 * into  1/(R*H),  but we need  R*(1/H)  (representing 1/H);
-		 * i.e. we have need to multiply by the Montgomery factor twice */
-		if (!group->meth->field_encode(group, heap[1], heap[1], ctx)) goto err;
-		if (!group->meth->field_encode(group, heap[1], heap[1], ctx)) goto err;
-		}
+	/* Set each prod_Z[i] to the product of points[0]->Z .. points[i]->Z,
+	 * skipping any zero-valued inputs (pretend that they're 1). */
 
-	/* set other heap[i]'s to their inverses */
-	for (i = 2; i < pow2/2 + num; i += 2)
+	if (!BN_is_zero(&points[0]->Z))
 		{
-		/* i is even */
-		if ((heap[i + 1] != NULL) && !BN_is_zero(heap[i + 1]))
+		if (!BN_copy(prod_Z[0], &points[0]->Z)) goto err;
+		}
+	else
+		{
+		if (group->meth->field_set_to_one != 0)
 			{
-			if (!group->meth->field_mul(group, tmp0, heap[i/2], heap[i + 1], ctx)) goto err;
-			if (!group->meth->field_mul(group, tmp1, heap[i/2], heap[i], ctx)) goto err;
-			if (!BN_copy(heap[i], tmp0)) goto err;
-			if (!BN_copy(heap[i + 1], tmp1)) goto err;
+			if (!group->meth->field_set_to_one(group, prod_Z[0], ctx)) goto err;
 			}
 		else
 			{
-			if (!BN_copy(heap[i], heap[i/2])) goto err;
+			if (!BN_one(prod_Z[0])) goto err;
 			}
 		}
 
-	/* we have replaced all non-zero Z's by their inverses, now fix up all the points */
+	for (i = 1; i < num; i++)
+		{
+		if (!BN_is_zero(&points[i]->Z))
+			{
+			if (!group->meth->field_mul(group, prod_Z[i], prod_Z[i - 1], &points[i]->Z, ctx)) goto err;
+			}
+		else
+			{
+			if (!BN_copy(prod_Z[i], prod_Z[i - 1])) goto err;
+			}
+		}
+
+	/* Now use a single explicit inversion to replace every
+	 * non-zero points[i]->Z by its inverse. */
+
+	if (!BN_mod_inverse(tmp, prod_Z[num - 1], &group->field, ctx))
+		{
+		ECerr(EC_F_EC_GFP_SIMPLE_POINTS_MAKE_AFFINE, ERR_R_BN_LIB);
+		goto err;
+		}
+	if (group->meth->field_encode != 0)
+		{
+		/* In the Montgomery case, we just turned  R*H  (representing H)
+		 * into  1/(R*H),  but we need  R*(1/H)  (representing 1/H);
+		 * i.e. we need to multiply by the Montgomery factor twice. */
+		if (!group->meth->field_encode(group, tmp, tmp, ctx)) goto err;
+		if (!group->meth->field_encode(group, tmp, tmp, ctx)) goto err;
+		}
+
+	for (i = num - 1; i > 0; --i)
+		{
+		/* Loop invariant: tmp is the product of the inverses of
+		 * points[0]->Z .. points[i]->Z (zero-valued inputs skipped). */
+		if (!BN_is_zero(&points[i]->Z))
+			{
+			/* Set tmp_Z to the inverse of points[i]->Z (as product
+			 * of Z inverses 0 .. i, Z values 0 .. i - 1). */
+			if (!group->meth->field_mul(group, tmp_Z, prod_Z[i - 1], tmp, ctx)) goto err;
+			/* Update tmp to satisfy the loop invariant for i - 1. */
+			if (!group->meth->field_mul(group, tmp, tmp, &points[i]->Z, ctx)) goto err;
+			/* Replace points[i]->Z by its inverse. */
+			if (!BN_copy(&points[i]->Z, tmp_Z)) goto err;
+			}
+		}
+
+	if (!BN_is_zero(&points[0]->Z))
+		{
+		/* Replace points[0]->Z by its inverse. */
+		if (!BN_copy(&points[0]->Z, tmp)) goto err;
+		}
+
+	/* Finally, fix up the X and Y coordinates for all points. */
+
 	for (i = 0; i < num; i++)
 		{
 		EC_POINT *p = points[i];
-		
+
 		if (!BN_is_zero(&p->Z))
 			{
 			/* turn  (X, Y, 1/Z)  into  (X/Z^2, Y/Z^3, 1) */
 
-			if (!group->meth->field_sqr(group, tmp1, &p->Z, ctx)) goto err;
-			if (!group->meth->field_mul(group, &p->X, &p->X, tmp1, ctx)) goto err;
+			if (!group->meth->field_sqr(group, tmp, &p->Z, ctx)) goto err;
+			if (!group->meth->field_mul(group, &p->X, &p->X, tmp, ctx)) goto err;
 
-			if (!group->meth->field_mul(group, tmp1, tmp1, &p->Z, ctx)) goto err;
-			if (!group->meth->field_mul(group, &p->Y, &p->Y, tmp1, ctx)) goto err;
-		
+			if (!group->meth->field_mul(group, tmp, tmp, &p->Z, ctx)) goto err;
+			if (!group->meth->field_mul(group, &p->Y, &p->Y, tmp, ctx)) goto err;
+
 			if (group->meth->field_set_to_one != 0)
 				{
 				if (!group->meth->field_set_to_one(group, &p->Z, ctx)) goto err;
@@ -1323,20 +1302,19 @@
 		}
 
 	ret = 1;
-		
+
  err:
 	BN_CTX_end(ctx);
 	if (new_ctx != NULL)
 		BN_CTX_free(new_ctx);
-	if (heap != NULL)
+	if (prod_Z != NULL)
 		{
-		/* heap[pow2/2] .. heap[pow2-1] have not been allocated locally! */
-		for (i = pow2/2 - 1; i > 0; i--)
+		for (i = 0; i < num; i++)
 			{
-			if (heap[i] != NULL)
-				BN_clear_free(heap[i]);
+			if (prod_Z[i] != NULL)
+				BN_clear_free(prod_Z[i]);
 			}
-		OPENSSL_free(heap);
+		OPENSSL_free(prod_Z);
 		}
 	return ret;
 	}
diff --git a/crypto/ec/ectest.c b/crypto/ec/ectest.c
index 102eaa9..82c8c8b 100644
--- a/crypto/ec/ectest.c
+++ b/crypto/ec/ectest.c
@@ -199,6 +199,7 @@
 	EC_POINT *P = EC_POINT_new(group);
 	EC_POINT *Q = EC_POINT_new(group);
 	BN_CTX *ctx = BN_CTX_new();
+	int i;
 
 	n1 = BN_new(); n2 = BN_new(); order = BN_new();
 	fprintf(stdout, "verify group order ...");
@@ -212,21 +213,55 @@
 	if (!EC_POINT_mul(group, Q, order, NULL, NULL, ctx)) ABORT;
 	if (!EC_POINT_is_at_infinity(group, Q)) ABORT;
 	fprintf(stdout, " ok\n");
-	fprintf(stdout, "long/negative scalar tests ... ");
-	if (!BN_one(n1)) ABORT;
-	/* n1 = 1 - order */
-	if (!BN_sub(n1, n1, order)) ABORT;
-	if(!EC_POINT_mul(group, Q, NULL, P, n1, ctx)) ABORT;
-	if (0 != EC_POINT_cmp(group, Q, P, ctx)) ABORT;
-	/* n2 = 1 + order */
-	if (!BN_add(n2, order, BN_value_one())) ABORT;
-	if(!EC_POINT_mul(group, Q, NULL, P, n2, ctx)) ABORT;
-	if (0 != EC_POINT_cmp(group, Q, P, ctx)) ABORT;
-	/* n2 = (1 - order) * (1 + order) */
-	if (!BN_mul(n2, n1, n2, ctx)) ABORT;
-	if(!EC_POINT_mul(group, Q, NULL, P, n2, ctx)) ABORT;
-	if (0 != EC_POINT_cmp(group, Q, P, ctx)) ABORT;
+	fprintf(stdout, "long/negative scalar tests ");
+        for (i = 1; i <= 2; i++)
+		{
+		const BIGNUM *scalars[6];
+		const EC_POINT *points[6];
+
+		fprintf(stdout, i == 1 ?
+			"allowing precomputation ... " :
+			"without precomputation ... ");
+		if (!BN_set_word(n1, i)) ABORT;
+		/* If i == 1, P will be the predefined generator for which
+		 * EC_GROUP_precompute_mult has set up precomputation. */
+		if (!EC_POINT_mul(group, P, n1, NULL, NULL, ctx)) ABORT;
+
+		if (!BN_one(n1)) ABORT;
+		/* n1 = 1 - order */
+		if (!BN_sub(n1, n1, order)) ABORT;
+		if (!EC_POINT_mul(group, Q, NULL, P, n1, ctx)) ABORT;
+		if (0 != EC_POINT_cmp(group, Q, P, ctx)) ABORT;
+
+		/* n2 = 1 + order */
+		if (!BN_add(n2, order, BN_value_one())) ABORT;
+		if (!EC_POINT_mul(group, Q, NULL, P, n2, ctx)) ABORT;
+		if (0 != EC_POINT_cmp(group, Q, P, ctx)) ABORT;
+
+		/* n2 = (1 - order) * (1 + order) = 1 - order^2 */
+		if (!BN_mul(n2, n1, n2, ctx)) ABORT;
+		if (!EC_POINT_mul(group, Q, NULL, P, n2, ctx)) ABORT;
+		if (0 != EC_POINT_cmp(group, Q, P, ctx)) ABORT;
+
+		/* n2 = order^2 - 1 */
+		BN_set_negative(n2, 0);
+		if (!EC_POINT_mul(group, Q, NULL, P, n2, ctx)) ABORT;
+		/* Add P to verify the result. */
+		if (!EC_POINT_add(group, Q, Q, P, ctx)) ABORT;
+		if (!EC_POINT_is_at_infinity(group, Q)) ABORT;
+
+		/* Exercise EC_POINTs_mul, including corner cases. */
+		scalars[0] = n1; points[0] = Q; /* => infinity */
+		scalars[1] = n2; points[1] = P; /* => -P */
+		scalars[2] = n1; points[2] = Q; /* => infinity */
+		scalars[3] = n2; points[3] = Q; /* => infinity */
+		scalars[4] = n1; points[4] = P; /* => P */
+		scalars[5] = n2; points[5] = Q; /* => infinity */
+		if (!EC_POINTs_mul(group, Q, NULL, 5, points, scalars, ctx)) ABORT;
+		if (!EC_POINT_is_at_infinity(group, Q)) ABORT;
+		}
 	fprintf(stdout, "ok\n");
+
 	EC_POINT_free(P);
 	EC_POINT_free(Q);
 	BN_free(n1);