s390x assembly pack: cache capability query results.

IBM argues that in certain scenarios capability query is really
expensive. At the same time it's asserted that query results can
be safely cached, because disabling CPACF is incompatible with
reboot-free operation.

Reviewed-by: Tim Hudson <tjh@openssl.org>
diff --git a/crypto/aes/asm/aes-s390x.pl b/crypto/aes/asm/aes-s390x.pl
index 4aacf1b..231a299 100644
--- a/crypto/aes/asm/aes-s390x.pl
+++ b/crypto/aes/asm/aes-s390x.pl
@@ -818,13 +818,9 @@
 	tmhl	%r0,0x4000	# check for message-security assist
 	jz	.Lekey_internal
 
-	lghi	%r0,0		# query capability vector
-	la	%r1,16($sp)
-	.long	0xb92f0042	# kmc %r4,%r2
-
-	llihh	%r1,0x8000
-	srlg	%r1,%r1,0(%r5)
-	ng	%r1,16($sp)
+	llihh	%r0,0x8000
+	srlg	%r0,%r0,0(%r5)
+	ng	%r0,48(%r1)	# check kmc capability vector
 	jz	.Lekey_internal
 
 	lmg	%r0,%r1,0($inp)	# just copy 128 bits...
@@ -1444,13 +1440,10 @@
 
 	llgfr	$s0,%r0
 	lgr	$s1,%r1
-	lghi	%r0,0
-	la	%r1,16($sp)
-	.long	0xb92d2042	# kmctr %r4,%r2,%r2
-
+	larl	%r1,OPENSSL_s390xcap_P
 	llihh	%r0,0x8000	# check if kmctr supports the function code
 	srlg	%r0,%r0,0($s0)
-	ng	%r0,16($sp)
+	ng	%r0,64(%r1)	# check kmctr capability vector
 	lgr	%r0,$s0
 	lgr	%r1,$s1
 	jz	.Lctr32_km_loop
@@ -1597,12 +1590,10 @@
 	llgfr	$s0,%r0			# put aside the function code
 	lghi	$s1,0x7f
 	nr	$s1,%r0
-	lghi	%r0,0			# query capability vector
-	la	%r1,$tweak-16($sp)
-	.long	0xb92e0042		# km %r4,%r2
-	llihh	%r1,0x8000
-	srlg	%r1,%r1,32($s1)		# check for 32+function code
-	ng	%r1,$tweak-16($sp)
+	larl	%r1,OPENSSL_s390xcap_P
+	llihh	%r0,0x8000
+	srlg	%r0,%r0,32($s1)		# check for 32+function code
+	ng	%r0,32(%r1)		# check km capability vector
 	lgr	%r0,$s0			# restore the function code
 	la	%r1,0($key1)		# restore $key1
 	jz	.Lxts_km_vanilla
@@ -2229,7 +2220,7 @@
 }
 $code.=<<___;
 .string	"AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
-.comm	OPENSSL_s390xcap_P,16,8
+.comm	OPENSSL_s390xcap_P,80,8
 ___
 
 $code =~ s/\`([^\`]*)\`/eval $1/gem;
diff --git a/crypto/modes/asm/ghash-s390x.pl b/crypto/modes/asm/ghash-s390x.pl
index a46f3eb..8c3ce69 100644
--- a/crypto/modes/asm/ghash-s390x.pl
+++ b/crypto/modes/asm/ghash-s390x.pl
@@ -85,9 +85,7 @@
 	tmhl	%r0,0x4000	# check for message-security-assist
 	jz	.Lsoft_gmult
 	lghi	%r0,0
-	la	%r1,16($sp)
-	.long	0xb93e0004	# kimd %r0,%r4
-	lg	%r1,24($sp)
+	lg	%r1,24(%r1)	# load second word of kimd capabilities vector
 	tmhh	%r1,0x4000	# check for function 65
 	jz	.Lsoft_gmult
 	stg	%r0,16($sp)	# arrange 16 bytes of zero input
diff --git a/crypto/s390xcpuid.S b/crypto/s390xcpuid.S
index 3402a24..3efad55 100644
--- a/crypto/s390xcpuid.S
+++ b/crypto/s390xcpuid.S
@@ -5,14 +5,46 @@
 .align	16
 OPENSSL_s390x_facilities:
 	lghi	%r0,0
-	larl	%r2,OPENSSL_s390xcap_P
-	stg	%r0,8(%r2)
-	.long	0xb2b02000	# stfle	0(%r2)
+	larl	%r4,OPENSSL_s390xcap_P
+	stg	%r0,8(%r4)	# wipe capability vectors
+	stg	%r0,16(%r4)
+	stg	%r0,24(%r4)
+	stg	%r0,32(%r4)
+	stg	%r0,40(%r4)
+	stg	%r0,48(%r4)
+	stg	%r0,56(%r4)
+	stg	%r0,64(%r4)
+	stg	%r0,72(%r4)
+
+	.long	0xb2b04000	# stfle	0(%r4)
 	brc	8,.Ldone
 	lghi	%r0,1
-	.long	0xb2b02000	# stfle 0(%r2)
+	.long	0xb2b04000	# stfle 0(%r4)
 .Ldone:
-	lg	%r2,0(%r2)
+	lmg	%r2,%r3,0(%r4)
+	tmhl	%r2,0x4000	# check for message-security-assist
+	jz	.Lret
+
+	lghi	%r0,0		# query kimd capabilities
+	la	%r1,16(%r4)
+	.long	0xb93e0002	# kimd %r0,%r2
+
+	lghi	%r0,0		# query km capability vector
+	la	%r1,32(%r4)
+	.long	0xb92e0042	# km %r4,%r2
+
+	lghi	%r0,0		# query kmc capability vector
+	la	%r1,48(%r4)
+	.long	0xb92f0042	# kmc %r4,%r2
+
+	tmhh	%r3,0x0004	# check for message-security-assist-4
+	jz	.Lret
+
+	lghi	%r0,0		# query kmctr capability vector
+	la	%r1,64(%r4)
+	.long	0xb92d2042	# kmctr %r4,%r2,%r2
+
+.Lret:
 	br	%r14
 .size	OPENSSL_s390x_facilities,.-OPENSSL_s390x_facilities
 
@@ -112,4 +144,4 @@
 .section	.init
 	brasl	%r14,OPENSSL_cpuid_setup
 
-.comm	OPENSSL_s390xcap_P,16,8
+.comm	OPENSSL_s390xcap_P,80,8
diff --git a/crypto/sha/asm/sha1-s390x.pl b/crypto/sha/asm/sha1-s390x.pl
index a62cc31..003ba76 100644
--- a/crypto/sha/asm/sha1-s390x.pl
+++ b/crypto/sha/asm/sha1-s390x.pl
@@ -168,10 +168,7 @@
 	lg	%r0,0(%r1)
 	tmhl	%r0,0x4000	# check for message-security assist
 	jz	.Lsoftware
-	lghi	%r0,0
-	la	%r1,`2*$SIZE_T`($sp)
-	.long	0xb93e0002	# kimd %r0,%r2
-	lg	%r0,`2*$SIZE_T`($sp)
+	lg	%r0,16(%r1)	# check kimd capabilities
 	tmhh	%r0,`0x8000>>$kimdfunc`
 	jz	.Lsoftware
 	lghi	%r0,$kimdfunc
@@ -238,7 +235,7 @@
 	br	%r14
 .size	sha1_block_data_order,.-sha1_block_data_order
 .string	"SHA1 block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>"
-.comm	OPENSSL_s390xcap_P,16,8
+.comm	OPENSSL_s390xcap_P,80,8
 ___
 
 $code =~ s/\`([^\`]*)\`/eval $1/gem;
diff --git a/crypto/sha/asm/sha512-s390x.pl b/crypto/sha/asm/sha512-s390x.pl
index 7780627..00cee39 100644
--- a/crypto/sha/asm/sha512-s390x.pl
+++ b/crypto/sha/asm/sha512-s390x.pl
@@ -240,10 +240,7 @@
 	lg	%r0,0(%r1)
 	tmhl	%r0,0x4000	# check for message-security assist
 	jz	.Lsoftware
-	lghi	%r0,0
-	la	%r1,`2*$SIZE_T`($sp)
-	.long	0xb93e0002	# kimd %r0,%r2
-	lg	%r0,`2*$SIZE_T`($sp)
+	lg	%r0,16(%r1)	# check kimd capabilities
 	tmhh	%r0,`0x8000>>$kimdfunc`
 	jz	.Lsoftware
 	lghi	%r0,$kimdfunc
@@ -311,7 +308,7 @@
 	br	%r14
 .size	$Func,.-$Func
 .string	"SHA${label} block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>"
-.comm	OPENSSL_s390xcap_P,16,8
+.comm	OPENSSL_s390xcap_P,80,8
 ___
 
 $code =~ s/\`([^\`]*)\`/eval $1/gem;