Disable mask-based implementation when SVE CLZ is present, since that allows vectorizing the scalar implementation. PiperOrigin-RevId: 588950163

commit: 4a67ce8a033f5b60fb90c82e0380f72f3390ac9d [log] [tgz]
author: Protobuf Team Bot <protobuf-github-bot@google.com> Thu Dec 07 16:50:55 2023 -0800
committer: Copybara-Service <copybara-worker@google.com> Thu Dec 07 16:52:57 2023 -0800
tree: 5c3e5bb28b25ef68ae4421170966a5c94393a26a
parent: 1a02cd47c8e3c1b99fe480d5b711333814858368 [diff]
diff --git a/src/google/protobuf/wire_format_lite.cc b/src/google/protobuf/wire_format_lite.cc
index 2fd3142..7789f92 100644
--- a/src/google/protobuf/wire_format_lite.cc
+++ b/src/google/protobuf/wire_format_lite.cc

@@ -685,11 +685,13 @@
   return sum;
 }
 
-// GCC does not recognize the vectorization opportunity
-// and other platforms are untested, in those cases using the optimized
-// varint size routine for each element is faster.
-// Hence we enable it only for clang
-#if (defined(__SSE__) || defined(__aarch64__)) && defined(__clang__)
+// On machines without a vector count-leading-zeros instruction such as SVE CLZ
+// on arm or VPLZCNT on x86, SSE or AVX2 instructions can allow vectorization of
+// the size calculation loop. GCC does not detect this autovectorization
+// opportunity, so only enable for clang.
+// When last tested, AVX512-vectorized lzcnt was slower than the SSE/AVX2
+// implementation, so __AVX512CD__ is not checked.
+#if defined(__SSE__) && defined(__clang__)
 size_t WireFormatLite::Int32Size(const RepeatedField<int32_t>& value) {
   return VarintSize<false, true>(value.data(), value.size());
 }
@@ -707,7 +709,7 @@
   return VarintSize<false, true>(value.data(), value.size());
 }
 
-#else  // !((defined(__SSE__) || defined(__aarch64__) && defined(__clang__))
+#else  // !(defined(__SSE__) && defined(__clang__))
 
 size_t WireFormatLite::Int32Size(const RepeatedField<int32_t>& value) {
   size_t out = 0;
commit	4a67ce8a033f5b60fb90c82e0380f72f3390ac9d	[log] [tgz]
author	Protobuf Team Bot <protobuf-github-bot@google.com>	Thu Dec 07 16:50:55 2023 -0800
committer	Copybara-Service <copybara-worker@google.com>	Thu Dec 07 16:52:57 2023 -0800
tree	5c3e5bb28b25ef68ae4421170966a5c94393a26a
parent	1a02cd47c8e3c1b99fe480d5b711333814858368 [diff]