ARM64 assembly pack: add ThunderX2 results.
Reviewed-by: Tim Hudson <tjh@openssl.org>
Reviewed-by: Richard Levitte <levitte@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/8776)
diff --git a/crypto/aes/asm/aesv8-armx.pl b/crypto/aes/asm/aesv8-armx.pl
index 81bc1cb..b708a61 100755
--- a/crypto/aes/asm/aesv8-armx.pl
+++ b/crypto/aes/asm/aesv8-armx.pl
@@ -36,6 +36,7 @@
# Denver 1.96 0.86 0.80
# Mongoose 1.33 1.20 1.20
# Kryo 1.26 0.94 1.00
+# ThunderX2 5.95 1.53 1.55
#
# (*) original 3.64/1.34/1.32 results were for r0p0 revision
# and are still same even for updated module;
diff --git a/crypto/aes/asm/vpaes-armv8.pl b/crypto/aes/asm/vpaes-armv8.pl
index f08ae58..c7839b3 100755
--- a/crypto/aes/asm/vpaes-armv8.pl
+++ b/crypto/aes/asm/vpaes-armv8.pl
@@ -30,6 +30,7 @@
# Denver(***) 16.6(**) 15.1/17.8(**) [8.80/9.93 ]
# Apple A7(***) 22.7(**) 10.9/14.3 [8.45/10.0 ]
# Mongoose(***) 26.3(**) 21.0/25.0(**) [13.3/16.8 ]
+# ThunderX2(***) 39.4(**) 33.8/48.6(**)
#
# (*) ECB denotes approximate result for parallelizable modes
# such as CBC decrypt, CTR, etc.;
diff --git a/crypto/chacha/asm/chacha-armv8.pl b/crypto/chacha/asm/chacha-armv8.pl
index 56ba1c3..dc38cbd 100755
--- a/crypto/chacha/asm/chacha-armv8.pl
+++ b/crypto/chacha/asm/chacha-armv8.pl
@@ -29,6 +29,7 @@
# X-Gene 9.50/+46% 8.82 8.89(*)
# Mongoose 8.00/+44% 3.64 3.25
# Kryo 8.17/+50% 4.83 4.65
+# ThunderX2 7.26/+48% 7.91 4.30
#
# (*) it's expected that doubling interleave factor doesn't help
# all processors, only those with higher NEON latency and
diff --git a/crypto/modes/asm/ghashv8-armx.pl b/crypto/modes/asm/ghashv8-armx.pl
index e891583..fbc49d1 100644
--- a/crypto/modes/asm/ghashv8-armx.pl
+++ b/crypto/modes/asm/ghashv8-armx.pl
@@ -42,6 +42,7 @@
# Denver 0.51 0.65 6.02
# Mongoose 0.65 1.10 8.06
# Kryo 0.76 1.16 8.00
+# ThunderX2 1.05
#
# (*) presented for reference/comparison purposes;
diff --git a/crypto/poly1305/asm/poly1305-armv8.pl b/crypto/poly1305/asm/poly1305-armv8.pl
index b7aa7dc..b5dd61e 100755
--- a/crypto/poly1305/asm/poly1305-armv8.pl
+++ b/crypto/poly1305/asm/poly1305-armv8.pl
@@ -29,6 +29,7 @@
# X-Gene 2.13/+68% 2.27
# Mongoose 1.77/+75% 1.12
# Kryo 2.70/+55% 1.13
+# ThunderX2 1.17/+95% 1.36
#
# (*) estimate based on resources availability is less than 1.0,
# i.e. measured result is worse than expected, presumably binary
diff --git a/crypto/sha/asm/keccak1600-armv8.pl b/crypto/sha/asm/keccak1600-armv8.pl
index bd15a52..dc72f18 100755
--- a/crypto/sha/asm/keccak1600-armv8.pl
+++ b/crypto/sha/asm/keccak1600-armv8.pl
@@ -51,6 +51,7 @@
# Kryo 12
# Denver 7.8
# Apple A7 7.2
+# ThunderX2 9.7
#
# (*) Corresponds to SHA3-256. No improvement coefficients are listed
# because they vary too much from compiler to compiler. Newer
diff --git a/crypto/sha/asm/sha1-armv8.pl b/crypto/sha/asm/sha1-armv8.pl
index 7a0cbf5..12403eb 100644
--- a/crypto/sha/asm/sha1-armv8.pl
+++ b/crypto/sha/asm/sha1-armv8.pl
@@ -27,6 +27,7 @@
# X-Gene 8.80 (+200%)
# Mongoose 2.05 6.50 (+160%)
# Kryo 1.88 8.00 (+90%)
+# ThunderX2 2.64 6.36 (+150%)
#
# (*) Software results are presented mostly for reference purposes.
# (**) Keep in mind that Denver relies on binary translation, which
diff --git a/crypto/sha/asm/sha512-armv8.pl b/crypto/sha/asm/sha512-armv8.pl
index f7c6721..b9ba05b 100644
--- a/crypto/sha/asm/sha512-armv8.pl
+++ b/crypto/sha/asm/sha512-armv8.pl
@@ -28,6 +28,7 @@
# X-Gene 20.0 (+100%) 12.8 (+300%(***))
# Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
# Kryo 1.92 17.4 (+30%) 11.2 (+8%)
+# ThunderX2 2.54 13.2 (+40%) 8.40 (+18%)
#
# (*) Software SHA256 results are of lesser relevance, presented
# mostly for informational purposes.