Store canonical encodings of Name structures. Update X509_NAME_cmp() to use them.

commit: 450ea83495f8da9d9331da7a724514158d618a6f [log] [tgz]
author: Dr. Stephen Henson <steve@openssl.org> Tue Jul 18 12:36:19 2006 +0000
committer: Dr. Stephen Henson <steve@openssl.org> Tue Jul 18 12:36:19 2006 +0000
tree: 2ea54540139764fb5347f7722aac81e2820f3951
parent: af8c1d81a3366009fbf7b563ac629d6a33880012 [diff]
diff --git a/CHANGES b/CHANGES
index 89c016e..2c1dadd 100644
--- a/CHANGES
+++ b/CHANGES

@@ -4,6 +4,11 @@
 
  Changes between 0.9.8b and 0.9.9  [xx XXX xxxx]
 
+  *) Store a "canonical" representation of X509_NAME structure (ASN1 Name)
+     this maps equivalent X509_NAME structures into a consistent structure.
+     Name comparison can then be performed rapidly using memcmp().
+     [Steve Henson]
+
   *) Non-blocking OCSP request processing. Add -timeout option to ocsp 
      utility.
      [Steve Henson]

diff --git a/crypto/asn1/x_name.c b/crypto/asn1/x_name.c
index 8701c54..68fa34a 100644
--- a/crypto/asn1/x_name.c
+++ b/crypto/asn1/x_name.c

@@ -57,19 +57,26 @@
  */
 
 #include <stdio.h>
+#include <ctype.h>
 #include "cryptlib.h"
 #include <openssl/asn1t.h>
 #include <openssl/x509.h>
 #include "asn1_locl.h"
 
-static int x509_name_ex_d2i(ASN1_VALUE **val, const unsigned char **in, long len, const ASN1_ITEM *it,
-					int tag, int aclass, char opt, ASN1_TLC *ctx);
+static int x509_name_ex_d2i(ASN1_VALUE **val,
+				const unsigned char **in, long len,
+				const ASN1_ITEM *it,
+				int tag, int aclass, char opt, ASN1_TLC *ctx);
 
-static int x509_name_ex_i2d(ASN1_VALUE **val, unsigned char **out, const ASN1_ITEM *it, int tag, int aclass);
+static int x509_name_ex_i2d(ASN1_VALUE **val, unsigned char **out,
+				const ASN1_ITEM *it, int tag, int aclass);
 static int x509_name_ex_new(ASN1_VALUE **val, const ASN1_ITEM *it);
 static void x509_name_ex_free(ASN1_VALUE **val, const ASN1_ITEM *it);
 
 static int x509_name_encode(X509_NAME *a);
+static int x509_name_canon(X509_NAME *a);
+static int asn1_string_canon(ASN1_STRING *out, ASN1_STRING *in);
+static int i2d_name_canon(STACK *intname, unsigned char **in);
 
 
 static int x509_name_ex_print(BIO *out, ASN1_VALUE **pval,
@@ -126,6 +133,8 @@
 	if ((ret->entries=sk_X509_NAME_ENTRY_new_null()) == NULL)
 		goto memerr;
 	if((ret->bytes = BUF_MEM_new()) == NULL) goto memerr;
+	ret->canon_enc = NULL;
+	ret->canon_enclen = 0;
 	ret->modified=1;
 	*val = (ASN1_VALUE *)ret;
 	return 1;
@@ -150,6 +159,8 @@
 
 	BUF_MEM_free(a->bytes);
 	sk_X509_NAME_ENTRY_pop_free(a->entries,X509_NAME_ENTRY_free);
+	if (a->canon_enc)
+		OPENSSL_free(a->canon_enc);
 	OPENSSL_free(a);
 	*pval = NULL;
 }
@@ -164,8 +175,14 @@
 	sk_free(a);
 }
 
-static int x509_name_ex_d2i(ASN1_VALUE **val, const unsigned char **in, long len, const ASN1_ITEM *it,
-					int tag, int aclass, char opt, ASN1_TLC *ctx)
+static void canon_free(void *a)
+{
+	sk_X509_NAME_ENTRY_pop_free(a, X509_NAME_ENTRY_free);
+}
+
+static int x509_name_ex_d2i(ASN1_VALUE **val,
+			const unsigned char **in, long len, const ASN1_ITEM *it,
+				int tag, int aclass, char opt, ASN1_TLC *ctx)
 {
 	const unsigned char *p = *in, *q;
 	STACK *intname = NULL, **intname_pp = &intname;
@@ -200,6 +217,9 @@
 		sk_X509_NAME_ENTRY_free(entries);
 	}
 	sk_free(intname);
+	ret = x509_name_canon(nm);
+	if (!ret)
+		goto err;
 	nm->modified = 0;
 	*val = (ASN1_VALUE *)nm;
 	*in = p;
@@ -214,8 +234,12 @@
 	int ret;
 	X509_NAME *a = (X509_NAME *)*val;
 	if(a->modified) {
-		ret = x509_name_encode((X509_NAME *)a);
-		if(ret < 0) return ret;
+		ret = x509_name_encode(a);
+		if(ret < 0)
+			return ret;
+		ret = x509_name_canon(a);
+		if(ret < 0)
+			return ret;
 	}
 	ret = a->bytes->length;
 	if(out != NULL) {
@@ -271,6 +295,185 @@
 	return 2;
 	}
 
+/* This function generates the canonical encoding of the Name structure.
+ * In it all strings are converted to UTF8, leading, trailing and
+ * multiple spaces collapsed, converted to lower case and the leading
+ * SEQUENCE header removed.
+ *
+ * In future we could also normalize the UTF8 too.
+ *
+ * By doing this comparison of Name structures can be rapidly
+ * perfomed by just using memcmp() of the canonical encoding.
+ * By omitting the leading SEQUENCE name constraints of type
+ * dirName can also be checked with a simple memcmp().
+ */
+
+static int x509_name_canon(X509_NAME *a)
+	{
+	unsigned char *p;
+	STACK *intname = NULL;
+	STACK_OF(X509_NAME_ENTRY) *entries = NULL;
+	X509_NAME_ENTRY *entry, *tmpentry;
+	int i, set = -1, ret = 0;
+	if (a->canon_enc)
+		{
+		OPENSSL_free(a->canon_enc);
+		a->canon_enc = NULL;
+		}
+	intname = sk_new_null();
+	if(!intname)
+		goto err;
+	for(i = 0; i < sk_X509_NAME_ENTRY_num(a->entries); i++)
+		{
+		entry = sk_X509_NAME_ENTRY_value(a->entries, i);
+		if(entry->set != set)
+			{
+			entries = sk_X509_NAME_ENTRY_new_null();
+			if(!entries)
+				goto err;
+			if(!sk_push(intname, (char *)entries))
+				goto err;
+			set = entry->set;
+			}
+		tmpentry = X509_NAME_ENTRY_new();
+		tmpentry->object = OBJ_dup(entry->object);
+		if (!asn1_string_canon(tmpentry->value, entry->value))
+			goto err;
+		if(!sk_X509_NAME_ENTRY_push(entries, tmpentry))
+			goto err;
+		tmpentry = NULL;
+		}
+
+	/* Finally generate encoding */
+
+	a->canon_enclen = i2d_name_canon(intname, NULL);
+
+	p = OPENSSL_malloc(a->canon_enclen);
+
+	if (!p)
+		goto err;
+
+	a->canon_enc = p;
+
+	i2d_name_canon(intname, &p);
+
+	ret = 1;
+
+	err:
+
+	if (tmpentry)
+		X509_NAME_ENTRY_free(tmpentry);
+	if (intname)
+		sk_pop_free(intname, canon_free);
+	return ret;
+	}
+
+/* Bitmap of all the types of string that will be canonicalized. */
+
+#define ASN1_MASK_CANON	\
+	(B_ASN1_UTF8STRING | B_ASN1_BMPSTRING | B_ASN1_UNIVERSALSTRING \
+	| B_ASN1_PRINTABLESTRING | B_ASN1_T61STRING | B_ASN1_IA5STRING \
+	| B_ASN1_VISIBLESTRING)
+	
+
+static int asn1_string_canon(ASN1_STRING *out, ASN1_STRING *in)
+	{
+	unsigned char *to, *from;
+	int len, i;
+
+	/* If type not in bitmask just copy string across */
+	if (!(ASN1_tag2bit(in->type) & ASN1_MASK_CANON))
+		{
+		out->type = in->type;
+		if (!ASN1_STRING_set(out, in->data, in->length))
+			return 0;
+		}
+
+	out->type = V_ASN1_UTF8STRING;
+	out->length = ASN1_STRING_to_UTF8(&out->data, in);
+	if (out->length == -1)
+		return 0;
+
+	to = out->data;
+	from = to;
+
+	len = out->length;
+
+	/* Convert string in place to canonical form.
+	 * Ultimately we may need to handle a wider range of characters
+	 * but for now ignore anything with MSB set and rely on the
+	 * isspace() and tolower() functions.
+	 */
+
+	/* Ignore leading spaces */
+	while((len > 0) && !(*from & 0x80) && isspace(*from))
+		{
+		from++;
+		len--;
+		}
+
+	to = from + len - 1;
+
+	/* Ignore trailing spaces */
+	while ((len > 0) && !(*to & 0x80) && isspace(*to))
+		{
+		to--;
+		len--;
+		}
+
+	to = out->data;
+
+	i = 0;
+	while(i < len)
+		{
+		/* If MSB set just copy across */
+		if (*from & 0x80)
+			*to++ = *from++;
+		/* Collapse multiple spaces */
+		else if (isspace(*from))
+			{
+			/* Copy one space across */
+			*to++ = ' ';
+			/* Ignore subsequent spaces. Note: don't need to
+			 * check len here because we know the last 
+			 * character is a non-space so we can't overflow.
+			 */
+			do
+				{
+				from++;
+				i++;
+				}
+			while(!(*from & 0x80) && isspace(*from));
+			}
+		else
+			{
+			*to++ = tolower(*from++);
+			i++;
+			}
+		}
+
+	out->length = to - out->data;
+
+	return 1;
+
+	}
+
+static int i2d_name_canon(STACK *intname, unsigned char **in)
+	{
+	int i, len, ltmp;
+	ASN1_VALUE *v;
+	len = 0;
+	for (i = 0; i < sk_num(intname); i++)
+		{
+		v = (ASN1_VALUE *)sk_value(intname, i);
+		ltmp = ASN1_item_ex_i2d(&v, in,
+			ASN1_ITEM_rptr(X509_NAME_ENTRIES), -1, -1);
+		if (ltmp < 0)
+			return ltmp;
+		len += ltmp;
+		}
+	return len;
+	}
 
 int X509_NAME_set(X509_NAME **xn, X509_NAME *name)
 	{

diff --git a/crypto/x509/x509.h b/crypto/x509/x509.h
index f9d7e56..b068632 100644
--- a/crypto/x509/x509.h
+++ b/crypto/x509/x509.h

@@ -190,6 +190,8 @@
 	char *bytes;
 #endif
 	unsigned long hash; /* Keep the hash around for lookups */
+	unsigned char *canon_enc;
+	int canon_enclen;
 	} /* X509_NAME */;
 
 DECLARE_STACK_OF(X509_NAME)

diff --git a/crypto/x509/x509_cmp.c b/crypto/x509/x509_cmp.c
index d04225a..4f157ba 100644
--- a/crypto/x509/x509_cmp.c
+++ b/crypto/x509/x509_cmp.c

@@ -162,159 +162,36 @@
 #endif
 
 
-/* Case insensitive string comparision */
-static int nocase_cmp(const ASN1_STRING *a, const ASN1_STRING *b)
-{
-	int i;
-
-	if (a->length != b->length)
-		return (a->length - b->length);
-
-	for (i=0; i<a->length; i++)
-	{
-		int ca, cb;
-
-		ca = tolower(a->data[i]);
-		cb = tolower(b->data[i]);
-
-		if (ca != cb)
-			return(ca-cb);
-	}
-	return 0;
-}
-
-/* Case insensitive string comparision with space normalization 
- * Space normalization - ignore leading, trailing spaces, 
- *       multiple spaces between characters are replaced by single space  
- */
-static int nocase_spacenorm_cmp(const ASN1_STRING *a, const ASN1_STRING *b)
-{
-	unsigned char *pa = NULL, *pb = NULL;
-	int la, lb;
-	
-	la = a->length;
-	lb = b->length;
-	pa = a->data;
-	pb = b->data;
-
-	/* skip leading spaces */
-	while (la > 0 && isspace(*pa))
-	{
-		la--;
-		pa++;
-	}
-	while (lb > 0 && isspace(*pb))
-	{
-		lb--;
-		pb++;
-	}
-
-	/* skip trailing spaces */
-	while (la > 0 && isspace(pa[la-1]))
-		la--;
-	while (lb > 0 && isspace(pb[lb-1]))
-		lb--;
-
-	/* compare strings with space normalization */
-	while (la > 0 && lb > 0)
-	{
-		int ca, cb;
-
-		/* compare character */
-		ca = tolower(*pa);
-		cb = tolower(*pb);
-		if (ca != cb)
-			return (ca - cb);
-
-		pa++; pb++;
-		la--; lb--;
-
-		if (la <= 0 || lb <= 0)
-			break;
-
-		/* is white space next character ? */
-		if (isspace(*pa) && isspace(*pb))
-		{
-			/* skip remaining white spaces */
-			while (la > 0 && isspace(*pa))
-			{
-				la--;
-				pa++;
-			}
-			while (lb > 0 && isspace(*pb))
-			{
-				lb--;
-				pb++;
-			}
-		}
-	}
-	if (la > 0 || lb > 0)
-		return la - lb;
-
-	return 0;
-}
-
-static int asn1_string_memcmp(ASN1_STRING *a, ASN1_STRING *b)
-	{
-	int j;
-	j = a->length - b->length;
-	if (j)
-		return j;
-	return memcmp(a->data, b->data, a->length);
-	}
-
-#define STR_TYPE_CMP (B_ASN1_PRINTABLESTRING|B_ASN1_T61STRING|B_ASN1_UTF8STRING)
-
 int X509_NAME_cmp(const X509_NAME *a, const X509_NAME *b)
 	{
-	int i,j;
-	X509_NAME_ENTRY *na,*nb;
+	int ret;
 
-	unsigned long nabit, nbbit;
+	/* Ensure canonical encoding is present */
 
-	j = sk_X509_NAME_ENTRY_num(a->entries)
-		  - sk_X509_NAME_ENTRY_num(b->entries);
-	if (j)
-		return j;
-	for (i=sk_X509_NAME_ENTRY_num(a->entries)-1; i>=0; i--)
+	if (!a->canon_enc)
 		{
-		na=sk_X509_NAME_ENTRY_value(a->entries,i);
-		nb=sk_X509_NAME_ENTRY_value(b->entries,i);
-		j=na->value->type-nb->value->type;
-		if (j)
-			{
-			nabit = ASN1_tag2bit(na->value->type);
-			nbbit = ASN1_tag2bit(nb->value->type);
-			if (!(nabit & STR_TYPE_CMP) ||
-				!(nbbit & STR_TYPE_CMP))
-				return j;
-			j = asn1_string_memcmp(na->value, nb->value);
-			}
-		else if (na->value->type == V_ASN1_PRINTABLESTRING)
-			j=nocase_spacenorm_cmp(na->value, nb->value);
-		else if (na->value->type == V_ASN1_IA5STRING
-			&& OBJ_obj2nid(na->object) == NID_pkcs9_emailAddress)
-			j=nocase_cmp(na->value, nb->value);
-		else
-			j = asn1_string_memcmp(na->value, nb->value);
-		if (j) return(j);
-		j=na->set-nb->set;
-		if (j) return(j);
+		ret = i2d_X509_NAME((X509_NAME *)a, NULL);
+		if (ret < 0)
+			return -2;
 		}
 
-	/* We will check the object types after checking the values
-	 * since the values will more often be different than the object
-	 * types. */
-	for (i=sk_X509_NAME_ENTRY_num(a->entries)-1; i>=0; i--)
+	if (!b->canon_enc)
 		{
-		na=sk_X509_NAME_ENTRY_value(a->entries,i);
-		nb=sk_X509_NAME_ENTRY_value(b->entries,i);
-		j=OBJ_cmp(na->object,nb->object);
-		if (j) return(j);
+		ret = i2d_X509_NAME((X509_NAME *)b, NULL);
+		if (ret < 0)
+			return -2;
 		}
-	return(0);
+
+	ret = a->canon_enclen - b->canon_enclen;
+
+	if (ret)
+		return ret;
+
+	return memcmp(a->canon_enc, b->canon_enc, a->canon_enclen);
+
 	}
 
+
 #ifndef OPENSSL_NO_MD5
 /* I now DER encode the name and hash it.  Since I cache the DER encoding,
  * this is reasonably efficient. */
commit	450ea83495f8da9d9331da7a724514158d618a6f	[log] [tgz]
author	Dr. Stephen Henson <steve@openssl.org>	Tue Jul 18 12:36:19 2006 +0000
committer	Dr. Stephen Henson <steve@openssl.org>	Tue Jul 18 12:36:19 2006 +0000
tree	2ea54540139764fb5347f7722aac81e2820f3951
parent	af8c1d81a3366009fbf7b563ac629d6a33880012 [diff]