Profiling revealed that OPENSSL_cleanse consumes *more* CPU time than
sha1_block_data_order when hashing short messages. Move OPENSSL_cleanse
to "cpuid" assembler module and gain 2x.
diff --git a/crypto/mem.c b/crypto/mem.c
index 6635167..43d48ab 100644
--- a/crypto/mem.c
+++ b/crypto/mem.c
@@ -250,7 +250,6 @@
void *CRYPTO_malloc_locked(int num, const char *file, int line)
{
void *ret = NULL;
- extern unsigned char cleanse_ctr;
if (num <= 0) return NULL;
@@ -267,11 +266,15 @@
if (malloc_debug_func != NULL)
malloc_debug_func(ret, num, file, line, 1);
+#ifndef OPENSSL_CPUID_OBJ
/* Create a dependency on the value of 'cleanse_ctr' so our memory
* sanitisation function can't be optimised out. NB: We only do
* this for >2Kb so the overhead doesn't bother us. */
if(ret && (num > 2048))
+ { extern unsigned char cleanse_ctr;
((unsigned char *)ret)[0] = cleanse_ctr;
+ }
+#endif
return ret;
}
@@ -291,7 +294,6 @@
void *CRYPTO_malloc(int num, const char *file, int line)
{
void *ret = NULL;
- extern unsigned char cleanse_ctr;
if (num <= 0) return NULL;
@@ -308,11 +310,15 @@
if (malloc_debug_func != NULL)
malloc_debug_func(ret, num, file, line, 1);
+#ifndef OPENSSL_CPUID_OBJ
/* Create a dependency on the value of 'cleanse_ctr' so our memory
* sanitisation function can't be optimised out. NB: We only do
* this for >2Kb so the overhead doesn't bother us. */
if(ret && (num > 2048))
+ { extern unsigned char cleanse_ctr;
((unsigned char *)ret)[0] = cleanse_ctr;
+ }
+#endif
return ret;
}