crypto/x86_64cpuid.pl: fix AVX512 capability masking.

Originally it was thought that it's possible to use AVX512VL+BW
instructions with XMM and YMM registers without kernel enabling
ZMM support, but it turned to be wrong assumption.

Reviewed-by: Rich Salz <rsalz@openssl.org>
diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl
index 7c8952e..d30928e 100644
--- a/crypto/x86_64cpuid.pl
+++ b/crypto/x86_64cpuid.pl
@@ -197,10 +197,11 @@
 	and	\$0xe6,%eax		# isolate XMM, YMM and ZMM state support
 	cmp	\$0xe6,%eax
 	je	.Ldone
-	andl	\$0xfffeffff,8(%rdi)	# clear AVX512F, ~(1<<16)
-					# note that we don't touch other AVX512
-					# extensions, because they can be used
-					# with YMM (without opmasking though)
+	andl	\$0x3fdeffff,8(%rdi)	# ~(1<<31|1<<30|1<<21|1<<16)
+					# clear AVX512F+BW+VL+FIMA, all of
+					# them are EVEX-encoded, which requires
+					# ZMM state support even if one uses
+					# only XMM and YMM :-(
 	and	\$6,%eax		# isolate XMM and YMM state support
 	cmp	\$6,%eax
 	je	.Ldone