Roll abseil_revision 1b52dcb350..4bf37d8e19 Change Log: https://chromium.googlesource.com/external/github.com/abseil/abseil-cpp/+log/1b52dcb350..4bf37d8e19 Full diff: https://chromium.googlesource.com/external/github.com/abseil/abseil-cpp/+/1b52dcb350..4bf37d8e19 Bug: 416394845 Change-Id: I529c041a89890b78c229ef536ff9f62bf80612bb Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6533125 Reviewed-by: Mirko Bonadei <mbonadei@chromium.org> Commit-Queue: Mirko Bonadei <mbonadei@chromium.org> Commit-Queue: Hans Wennborg <hans@chromium.org> Auto-Submit: Hans Wennborg <hans@chromium.org> Cr-Commit-Position: refs/heads/main@{#1458756} NOKEYCHECK=True GitOrigin-RevId: d813a3f88318094cd8010302cb65de73ee11a628
diff --git a/README.chromium b/README.chromium index 044587a..c9515f4 100644 --- a/README.chromium +++ b/README.chromium
@@ -4,7 +4,7 @@ License: Apache-2.0 License File: LICENSE Version: N/A -Revision: 1b52dcb350289b262a105471a75ef6c001beecae +Revision: 4bf37d8e19bbea7e2ac4cd15d85615d6c803573e Security Critical: yes Shipped: yes
diff --git a/absl/algorithm/container.h b/absl/algorithm/container.h index 913268d..6f9c193 100644 --- a/absl/algorithm/container.h +++ b/absl/algorithm/container.h
@@ -75,8 +75,8 @@ // An MSVC bug involving template parameter substitution requires us to use // decltype() here instead of just std::pair. template <typename C1, typename C2> -using ContainerIterPairType = - decltype(std::make_pair(ContainerIter<C1>(), ContainerIter<C2>())); +using ContainerIterPairType = decltype(std::make_pair( + std::declval<ContainerIter<C1>>(), std::declval<ContainerIter<C2>>())); template <typename C> using ContainerDifferenceType = decltype(std::distance(
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 3bc86d1..3effc44 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h
@@ -3607,7 +3607,7 @@ static constexpr PolicyFunctions value = { static_cast<uint32_t>(sizeof(key_type)), static_cast<uint32_t>(sizeof(value_type)), - static_cast<uint16_t>(sizeof(slot_type)), + static_cast<uint32_t>(sizeof(slot_type)), static_cast<uint16_t>(alignof(slot_type)), SooEnabled(), ShouldSampleHashtablezInfoForAlloc<CharAlloc>(), // TODO(b/328722020): try to type erase
diff --git a/absl/log/internal/log_message.cc b/absl/log/internal/log_message.cc index 07d17a0..3aed3a2 100644 --- a/absl/log/internal/log_message.cc +++ b/absl/log/internal/log_message.cc
@@ -27,7 +27,6 @@ #include <algorithm> #include <array> #include <atomic> -#include <cwchar> #include <ios> #include <memory> #include <ostream> @@ -425,8 +424,7 @@ CopyToEncodedBuffer<StringType::kNotLiteral>( absl::string_view(kCharNull.data(), kCharNull.size() - 1)); } else { - CopyToEncodedBuffer<StringType::kNotLiteral>( - std::wstring_view(v, wcsnlen(v, data_->encoded_remaining().size()))); + CopyToEncodedBuffer<StringType::kNotLiteral>(v); } return *this; }
diff --git a/absl/log/log_format_test.cc b/absl/log/log_format_test.cc index f4e33c9..6b7d1e5 100644 --- a/absl/log/log_format_test.cc +++ b/absl/log/log_format_test.cc
@@ -1009,7 +1009,7 @@ #undef ABSL_LOG_INTERNAL_WIDE_LITERAL #undef ABSL_LOG_INTERNAL_UTF8_LITERAL -TYPED_TEST(WideStringLogFormatTest, InvalidCharactersAreReplaced) { +TYPED_TEST(WideStringLogFormatTest, IsolatedLowSurrogatesAreReplaced) { absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); TypeParam value = L"AAA \xDC00 BBB"; @@ -1024,6 +1024,74 @@ LOG(INFO) << value; } +TYPED_TEST(WideStringLogFormatTest, + DISABLED_IsolatedHighSurrogatesAreReplaced) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + TypeParam value = L"AAA \xD800 BBB"; + // NOLINTNEXTLINE(readability/utf8) + absl::string_view utf8_value = "AAA � BBB"; + // Currently, this is "AAA \xF0\x90 BBB". + + EXPECT_CALL(test_sink, Send(AllOf(TextMessage(Eq(utf8_value)), + ENCODED_MESSAGE(HasValues(ElementsAre( + ValueWithStr(Eq(utf8_value)))))))); + + test_sink.StartCapturingLogs(); + LOG(INFO) << value; +} + +TYPED_TEST(WideStringLogFormatTest, + DISABLED_ConsecutiveHighSurrogatesAreReplaced) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + TypeParam value = L"AAA \xD800\xD800 BBB"; + // NOLINTNEXTLINE(readability/utf8) + absl::string_view utf8_value = "AAA �� BBB"; + // Currently, this is "AAA \xF0\x90\xF0\x90 BBB". + + EXPECT_CALL(test_sink, Send(AllOf(TextMessage(Eq(utf8_value)), + ENCODED_MESSAGE(HasValues(ElementsAre( + ValueWithStr(Eq(utf8_value)))))))); + + test_sink.StartCapturingLogs(); + LOG(INFO) << value; +} + +TYPED_TEST(WideStringLogFormatTest, + DISABLED_HighHighLowSurrogateSequencesAreReplaced) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + TypeParam value = L"AAA \xD800\xD800\xDC00 BBB"; + // NOLINTNEXTLINE(readability/utf8) + absl::string_view utf8_value = "AAA �𐀀 BBB"; + // Currently, this is "AAA \xF0\x90𐀀 BBB". + + EXPECT_CALL(test_sink, Send(AllOf(TextMessage(Eq(utf8_value)), + ENCODED_MESSAGE(HasValues(ElementsAre( + ValueWithStr(Eq(utf8_value)))))))); + + test_sink.StartCapturingLogs(); + LOG(INFO) << value; +} + +TYPED_TEST(WideStringLogFormatTest, + DISABLED_TrailingHighSurrogatesAreReplaced) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + TypeParam value = L"AAA \xD800"; + // NOLINTNEXTLINE(readability/utf8) + absl::string_view utf8_value = "AAA �"; + // Currently, this is "AAA \xF0\x90". + + EXPECT_CALL(test_sink, Send(AllOf(TextMessage(Eq(utf8_value)), + ENCODED_MESSAGE(HasValues(ElementsAre( + ValueWithStr(Eq(utf8_value)))))))); + + test_sink.StartCapturingLogs(); + LOG(INFO) << value; +} + TYPED_TEST(WideStringLogFormatTest, EmptyWideString) { absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected);
diff --git a/absl/meta/type_traits.h b/absl/meta/type_traits.h index ba57e52..02c1e63 100644 --- a/absl/meta/type_traits.h +++ b/absl/meta/type_traits.h
@@ -324,11 +324,17 @@ // absl::is_trivially_relocatable<T> // +// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p2786r11.html +// // Detects whether a type is known to be "trivially relocatable" -- meaning it // can be relocated from one place to another as if by memcpy/memmove. // This implies that its object representation doesn't depend on its address, // and also none of its special member functions do anything strange. // +// Note that when relocating the caller code should ensure that if the object is +// polymorphic, the dynamic type is of the most derived type. Padding bytes +// should not be copied. +// // This trait is conservative. If it's true then the type is definitely // trivially relocatable, but if it's false then the type may or may not be. For // example, std::vector<int> is trivially relocatable on every known STL @@ -346,11 +352,7 @@ // // Upstream documentation: // -// https://clang.llvm.org/docs/LanguageExtensions.html#:~:text=__is_trivially_relocatable - -// If the compiler offers a builtin that tells us the answer, we can use that. -// This covers all of the cases in the fallback below, plus types that opt in -// using e.g. [[clang::trivial_abi]]. +// https://clang.llvm.org/docs/LanguageExtensions.html#:~:text=__builtin_is_cpp_trivially_relocatable // // Clang on Windows has the builtin, but it falsely claims types with a // user-provided destructor are trivial (http://b/275003464). So we opt out @@ -375,15 +377,22 @@ // // According to https://github.com/abseil/abseil-cpp/issues/1479, this does not // work with NVCC either. -#if ABSL_HAVE_BUILTIN(__is_trivially_relocatable) && \ - (defined(__cpp_impl_trivially_relocatable) || \ - (!defined(__clang__) && !defined(__APPLE__) && !defined(__NVCC__))) +#if ABSL_HAVE_BUILTIN(__builtin_is_cpp_trivially_relocatable) +// https://github.com/llvm/llvm-project/pull/127636#pullrequestreview-2637005293 +// In the current implementation, __builtin_is_cpp_trivially_relocatable will +// only return true for types that are trivially relocatable according to the +// standard. Notably, this means that marking a type [[clang::trivial_abi]] aka +// ABSL_HAVE_ATTRIBUTE_TRIVIAL_ABI will have no effect on this trait. template <class T> struct is_trivially_relocatable - : std::integral_constant<bool, __is_trivially_relocatable(T)> {}; + : std::integral_constant<bool, __builtin_is_cpp_trivially_relocatable(T)> { +}; #elif ABSL_HAVE_BUILTIN(__is_trivially_relocatable) && defined(__clang__) && \ !(defined(_WIN32) || defined(_WIN64)) && !defined(__APPLE__) && \ !defined(__NVCC__) +// https://github.com/llvm/llvm-project/pull/139061 +// __is_trivially_relocatable is deprecated. +// TODO(b/325479096): Remove this case. template <class T> struct is_trivially_relocatable : std::integral_constant<
diff --git a/absl/meta/type_traits_test.cc b/absl/meta/type_traits_test.cc index 7c2dbbc..3d55a00 100644 --- a/absl/meta/type_traits_test.cc +++ b/absl/meta/type_traits_test.cc
@@ -333,51 +333,6 @@ static_assert(!absl::is_trivially_relocatable<S>::value, ""); } -// TODO(b/275003464): remove the opt-out for Clang on Windows once -// __is_trivially_relocatable is used there again. -// TODO(b/324278148): remove the opt-out for Apple once -// __is_trivially_relocatable is fixed there. -// TODO(b/325479096): remove the opt-out for Clang once -// __is_trivially_relocatable is fixed there. -#if defined(ABSL_HAVE_ATTRIBUTE_TRIVIAL_ABI) && \ - ABSL_HAVE_BUILTIN(__is_trivially_relocatable) && \ - (defined(__cpp_impl_trivially_relocatable) || \ - (!defined(__clang__) && !defined(__APPLE__) && !defined(__NVCC__))) -// A type marked with the "trivial ABI" attribute is trivially relocatable even -// if it has user-provided special members. -TEST(TriviallyRelocatable, TrivialAbi) { - struct ABSL_ATTRIBUTE_TRIVIAL_ABI S { - S(S&&) {} // NOLINT(modernize-use-equals-default) - S(const S&) {} // NOLINT(modernize-use-equals-default) - S& operator=(S&&) { return *this; } - S& operator=(const S&) { return *this; } - ~S() {} // NOLINT(modernize-use-equals-default) - }; - - static_assert(absl::is_trivially_relocatable<S>::value, ""); -} -#endif - -// TODO(b/275003464): remove the opt-out for Clang on Windows once -// __is_trivially_relocatable is used there again. -// TODO(b/324278148): remove the opt-out for Apple once -// __is_trivially_relocatable is fixed there. -#if defined(ABSL_HAVE_ATTRIBUTE_TRIVIAL_ABI) && \ - ABSL_HAVE_BUILTIN(__is_trivially_relocatable) && defined(__clang__) && \ - !(defined(_WIN32) || defined(_WIN64)) && !defined(__APPLE__) && \ - !defined(__NVCC__) -// A type marked with the "trivial ABI" attribute is trivially relocatable even -// if it has a user-provided copy constructor and a user-provided destructor. -TEST(TriviallyRelocatable, TrivialAbi_NoUserProvidedMove) { - struct ABSL_ATTRIBUTE_TRIVIAL_ABI S { - S(const S&) {} // NOLINT(modernize-use-equals-default) - ~S() {} // NOLINT(modernize-use-equals-default) - }; - - static_assert(absl::is_trivially_relocatable<S>::value, ""); -} -#endif - #ifdef ABSL_HAVE_CONSTANT_EVALUATED constexpr int64_t NegateIfConstantEvaluated(int64_t i) {
diff --git a/absl/strings/internal/utf8.cc b/absl/strings/internal/utf8.cc index 4370c7c..61945f5 100644 --- a/absl/strings/internal/utf8.cc +++ b/absl/strings/internal/utf8.cc
@@ -18,6 +18,7 @@ #include <cstddef> #include <cstdint> +#include <limits> #include "absl/base/config.h" @@ -25,7 +26,7 @@ ABSL_NAMESPACE_BEGIN namespace strings_internal { -size_t EncodeUTF8Char(char *buffer, char32_t utf8_char) { +size_t EncodeUTF8Char(char* buffer, char32_t utf8_char) { if (utf8_char <= 0x7F) { *buffer = static_cast<char>(utf8_char); return 1; @@ -53,45 +54,93 @@ } } -size_t WideToUtf8(wchar_t wc, char *buf, ShiftState &s) { - const auto v = static_cast<uint32_t>(wc); - if (v < 0x80) { - *buf = static_cast<char>(v); +size_t WideToUtf8(wchar_t wc, char* buf, ShiftState& s) { + // Reinterpret the output buffer `buf` as `unsigned char*` for subsequent + // bitwise operations. This ensures well-defined behavior for bit + // manipulations (avoiding issues with signed `char`) and is safe under C++ + // aliasing rules, as `unsigned char` can alias any type. + auto* ubuf = reinterpret_cast<unsigned char*>(buf); + const uint32_t v = static_cast<uint32_t>(wc); + constexpr size_t kError = static_cast<size_t>(-1); + + if (v <= 0x007F) { + // 1-byte sequence (U+0000 to U+007F). + // 0xxxxxxx. + ubuf[0] = (0b0111'1111 & v); + s = {}; // Reset surrogate state. return 1; - } else if (v < 0x800) { - *buf++ = static_cast<char>(0xc0 | (v >> 6)); - *buf = static_cast<char>(0x80 | (v & 0x3f)); + } else if (0x0080 <= v && v <= 0x07FF) { + // 2-byte sequence (U+0080 to U+07FF). + // 110xxxxx 10xxxxxx. + ubuf[0] = 0b1100'0000 | (0b0001'1111 & (v >> 6)); + ubuf[1] = 0b1000'0000 | (0b0011'1111 & v); + s = {}; // Reset surrogate state. return 2; - } else if (v < 0xd800 || (v - 0xe000) < 0x2000) { - *buf++ = static_cast<char>(0xe0 | (v >> 12)); - *buf++ = static_cast<char>(0x80 | ((v >> 6) & 0x3f)); - *buf = static_cast<char>(0x80 | (v & 0x3f)); + } else if ((0x0800 <= v && v <= 0xD7FF) || (0xE000 <= v && v <= 0xFFFF)) { + // 3-byte sequence (U+0800 to U+D7FF or U+E000 to U+FFFF). + // Excludes surrogate code points U+D800-U+DFFF. + // 1110xxxx 10xxxxxx 10xxxxxx. + ubuf[0] = 0b1110'0000 | (0b0000'1111 & (v >> 12)); + ubuf[1] = 0b1000'0000 | (0b0011'1111 & (v >> 6)); + ubuf[2] = 0b1000'0000 | (0b0011'1111 & v); + s = {}; // Reset surrogate state. return 3; - } else if ((v - 0x10000) < 0x100000) { - *buf++ = static_cast<char>(0xf0 | (v >> 18)); - *buf++ = static_cast<char>(0x80 | ((v >> 12) & 0x3f)); - *buf++ = static_cast<char>(0x80 | ((v >> 6) & 0x3f)); - *buf = static_cast<char>(0x80 | (v & 0x3f)); - return 4; - } else if (v < 0xdc00) { - s.saw_high_surrogate = true; - s.bits = static_cast<uint8_t>(v & 0x3); - const uint8_t high_bits = ((v >> 6) & 0xf) + 1; - *buf++ = static_cast<char>(0xf0 | (high_bits >> 2)); - *buf = - static_cast<char>(0x80 | static_cast<uint8_t>((high_bits & 0x3) << 4) | - static_cast<uint8_t>((v >> 2) & 0xf)); - return 2; - } else if (v < 0xe000 && s.saw_high_surrogate) { - *buf++ = static_cast<char>(0x80 | static_cast<uint8_t>(s.bits << 4) | - static_cast<uint8_t>((v >> 6) & 0xf)); - *buf = static_cast<char>(0x80 | (v & 0x3f)); - s.saw_high_surrogate = false; - s.bits = 0; - return 2; - } else { - return static_cast<size_t>(-1); + } else if (0xD800 <= v && v <= 0xDBFF) { + // High Surrogate (U+D800 to U+DBFF). + // This part forms the first two bytes of an eventual 4-byte UTF-8 sequence. + const unsigned char high_bits_val = (0b0000'1111 & (v >> 6)) + 1; + + // First byte of the 4-byte UTF-8 sequence (11110xxx). + ubuf[0] = 0b1111'0000 | (0b0000'0111 & (high_bits_val >> 2)); + // Second byte of the 4-byte UTF-8 sequence (10xxxxxx). + ubuf[1] = 0b1000'0000 | // + (0b0011'0000 & (high_bits_val << 4)) | // + (0b0000'1111 & (v >> 2)); + // Set state for high surrogate after writing to buffer. + s = {true, static_cast<unsigned char>(0b0000'0011 & v)}; + return 2; // Wrote 2 bytes, expecting 2 more from a low surrogate. + } else if (0xDC00 <= v && v <= 0xDFFF) { + // Low Surrogate (U+DC00 to U+DFFF). + // This part forms the last two bytes of a 4-byte UTF-8 sequence, + // using state from a preceding high surrogate. + if (!s.saw_high_surrogate) { + // Error: Isolated low surrogate without a preceding high surrogate. + // s remains in its current (problematic) state. + // Caller should handle error. + return kError; + } + + // Third byte of the 4-byte UTF-8 sequence (10xxxxxx). + ubuf[0] = 0b1000'0000 | // + (0b0011'0000 & (s.bits << 4)) | // + (0b0000'1111 & (v >> 6)); + // Fourth byte of the 4-byte UTF-8 sequence (10xxxxxx). + ubuf[1] = 0b1000'0000 | (0b0011'1111 & v); + + s = {}; // Reset surrogate state, pair complete. + return 2; // Wrote 2 more bytes, completing the 4-byte sequence. + } else if constexpr (0xFFFF < std::numeric_limits<wchar_t>::max()) { + // Conditionally compile the 4-byte direct conversion branch. + // This block is compiled only if wchar_t can represent values > 0xFFFF. + // It's placed after surrogate checks to ensure surrogates are handled by + // their specific logic. This inner 'if' is the runtime check for the 4-byte + // range. At this point, v is known not to be in the 1, 2, or 3-byte BMP + // ranges, nor is it a surrogate code point. + if (0x10000 <= v && v <= 0x10FFFF) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx. + ubuf[0] = 0b1111'0000 | (0b0000'0111 & (v >> 18)); + ubuf[1] = 0b1000'0000 | (0b0011'1111 & (v >> 12)); + ubuf[2] = 0b1000'0000 | (0b0011'1111 & (v >> 6)); + ubuf[3] = 0b1000'0000 | (0b0011'1111 & v); + s = {}; // Reset surrogate state. + return 4; + } } + + // Invalid wchar_t value (e.g., out of Unicode range, or unhandled after all + // checks). + s = {}; // Reset surrogate state. + return kError; } } // namespace strings_internal
diff --git a/absl/strings/internal/utf8.h b/absl/strings/internal/utf8.h index f240408..ed1db11 100644 --- a/absl/strings/internal/utf8.h +++ b/absl/strings/internal/utf8.h
@@ -41,11 +41,11 @@ // characters into buffer, however never will more than kMaxEncodedUTF8Size // bytes be written, regardless of the value of utf8_char. enum { kMaxEncodedUTF8Size = 4 }; -size_t EncodeUTF8Char(char *buffer, char32_t utf8_char); +size_t EncodeUTF8Char(char* buffer, char32_t utf8_char); struct ShiftState { bool saw_high_surrogate = false; - uint8_t bits = 0; + unsigned char bits = 0; }; // Converts `wc` from UTF-16 or UTF-32 to UTF-8 and writes to `buf`. `buf` is @@ -55,7 +55,7 @@ // // This is basically std::wcrtomb(), but always outputting UTF-8 instead of // respecting the current locale. -size_t WideToUtf8(wchar_t wc, char *buf, ShiftState &s); +size_t WideToUtf8(wchar_t wc, char* buf, ShiftState& s); } // namespace strings_internal ABSL_NAMESPACE_END
diff --git a/absl/strings/internal/utf8_test.cc b/absl/strings/internal/utf8_test.cc index 62322dd..b88d7bb 100644 --- a/absl/strings/internal/utf8_test.cc +++ b/absl/strings/internal/utf8_test.cc
@@ -103,8 +103,21 @@ {"BMP_MaxBeforeSurrogates_D7FF", L'\uD7FF', "\xED\x9F\xBF", 3}, {"BMP_FFFF", L'\uFFFF', "\xEF\xBF\xBF", 3}, - {"IsolatedHighSurr_D800", L'\xD800', "\xF0\x90", 2, {true, 0}, {true, 0}}, - {"IsolatedHighSurr_DBFF", L'\xDBFF', "\xF4\x8F", 2, {true, 3}, {true, 3}}, + {"IsolatedHighSurr_D800", L'\xD800', "\xF0\x90", 2, {}, {true, 0}}, + {"IsolatedHighSurr_DBFF", L'\xDBFF', "\xF4\x8F", 2, {}, {true, 3}}, + + {"HighSurr_D800_after_HighD800", + L'\xD800', + "\xF0\x90", + 2, + {true, 0}, + {true, 0}}, + {"HighSurr_DBFF_after_HighDBFF", + L'\xDBFF', + "\xF4\x8F", + 2, + {true, 3}, + {true, 3}}, {"LowSurr_DC00_after_HighD800", L'\xDC00', "\x80\x80", 2, {true, 0}, {}}, {"LowSurr_DFFD_after_HighDBFF", L'\xDFFD', "\xBF\xBD", 2, {true, 3}, {}},