blob: ca585f2d77c915684a897ca45590bba24ce5d906 [file] [log] [blame]
// Protocol Buffers - Google's data interchange format
// Copyright 2023 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include "google/protobuf/lazy_repeated_field.h"
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <string>
#include <utility>
#include "absl/base/optimization.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "google/protobuf/arena.h"
#include "google/protobuf/generated_message_util.h"
#include "google/protobuf/io/coded_stream.h"
#include "google/protobuf/io/zero_copy_stream_impl_lite.h"
#include "google/protobuf/message_lite.h"
#include "google/protobuf/parse_context.h"
#include "google/protobuf/port.h"
#include "google/protobuf/repeated_ptr_field.h"
// Must be included last.
// clang-format off
#include "google/protobuf/port_def.inc"
// clang-format on
namespace google {
namespace protobuf {
namespace internal {
namespace {} // namespace
namespace {
inline const char* InternalParseRepeated(const char* ptr,
ParseContext* local_ctx,
RepeatedPtrFieldBase* value,
const MessageLite* prototype) {
uint32_t expected_tag;
ptr = ReadTag(ptr, &expected_tag);
if (ABSL_PREDICT_FALSE(ptr == nullptr)) return nullptr;
// TODO: Try to optimize this. The tags and lengths are read again
// which is a bit wasteful.
return LazyRepeatedPtrField::ParseToRepeatedMessage<uint32_t>(
ptr, local_ctx, prototype, expected_tag, value);
}
template <typename T>
inline bool ParseWithNullOuterContextImpl(const T& input,
RepeatedPtrFieldBase* value,
const MessageLite* prototype,
bool set_missing_required) {
// Null outer context means it's either already verified or unverified.
//
// If the payload is eagerly verified, the recursion limit was also verified
// and we don't need to repeat that. Also, users might have used a custom
// limit which is not known at this access.
//
// Unverified lazy fields may suffer from stack overflow with deeply nested
// data. We argue that it should be better than silent data corruption.
constexpr int kUnlimitedDepth = std::numeric_limits<int>::max();
const char* ptr;
ParseContext local_ctx(kUnlimitedDepth, false, &ptr, input);
if (set_missing_required) {
local_ctx.SetParentMissingRequiredFields();
}
// Unparsed data is already verified at parsing. Disable eager-verification.
(void)local_ctx.set_lazy_parse_mode(ParseContext::LazyParseMode::kLazy);
ptr = InternalParseRepeated(ptr, &local_ctx, value, prototype);
return ptr != nullptr &&
(local_ctx.EndedAtEndOfStream() || local_ctx.EndedAtLimit());
}
template <typename T>
inline bool ParseWithOuterContextImpl(const T& input, ParseContext* ctx,
RepeatedPtrFieldBase* value,
const MessageLite* prototype,
bool set_missing_required) {
if (ctx == nullptr) {
return ParseWithNullOuterContextImpl(input, value, prototype,
set_missing_required);
}
ABSL_DCHECK(!ctx->AliasingEnabled());
// set_missing_required => ctx == nullptr
ABSL_DCHECK(!set_missing_required);
// Create local context with depth.
const char* ptr;
ParseContext local_ctx(ParseContext::kSpawn, *ctx, &ptr, input);
if (set_missing_required) {
local_ctx.SetParentMissingRequiredFields();
}
if (ctx->lazy_parse_mode() == ParseContext::LazyParseMode::kEagerVerify) {
// Unparsed data is already verified at parsing. Disable eager-verification.
(void)local_ctx.set_lazy_parse_mode(ParseContext::LazyParseMode::kLazy);
}
ptr = InternalParseRepeated(ptr, &local_ctx, value, prototype);
if (local_ctx.missing_required_fields()) {
ctx->SetMissingRequiredFields();
}
return ptr != nullptr &&
(local_ctx.EndedAtEndOfStream() || local_ctx.EndedAtLimit());
}
class ByPrototype {
public:
explicit ByPrototype(const MessageLite* prototype) : prototype_(prototype) {}
MessageLite* New(Arena* arena) const { return prototype_->New(arena); }
const MessageLite& Default() const { return *prototype_; }
private:
const MessageLite* prototype_;
};
} // namespace
const RepeatedPtrFieldBase* LazyRepeatedPtrField::GetByPrototype(
const MessageLite* prototype, Arena* arena, ParseContext* ctx) const {
return GetGeneric(ByPrototype(prototype), arena, ctx);
}
RepeatedPtrFieldBase* LazyRepeatedPtrField::MutableByPrototype(
const MessageLite* prototype, Arena* arena, ParseContext* ctx) {
return MutableGeneric(ByPrototype(prototype), arena, ctx);
}
void LazyRepeatedPtrField::Clear() {
PerformTransition([](ExclusiveTxn& txn) {
auto* value = txn.mutable_value();
if (value != nullptr) value->Clear<GenericTypeHandler<MessageLite>>();
return RawState::kCleared;
});
}
bool LazyRepeatedPtrField::IsEagerSerializeSafe(const MessageLite* prototype,
int32_t number,
Arena* arena) const {
// "prototype" may be null if it is for dynamic messages. This is ok as
// dynamic extensions won't be lazy as they lack verify functions any way.
if (prototype == nullptr) return false;
for (;;) {
switch (GetLogicalState()) {
case LogicalState::kClear:
case LogicalState::kClearExposed:
case LogicalState::kDirty:
return true;
case LogicalState::kNoParseRequired: {
const auto* value = raw_.load(std::memory_order_relaxed).value();
size_t tag_size = WireFormatLite::TagSize(
number, WireFormatLite::FieldType::TYPE_MESSAGE);
size_t total_size = tag_size * value->size();
for (int i = 0; i < value->size(); i++) {
total_size += WireFormatLite::LengthDelimitedSize(
value->Get<GenericTypeHandler<MessageLite>>(i).ByteSizeLong());
}
return total_size == unparsed_.Size();
}
case LogicalState::kParseRequired: {
GetByPrototype(prototype, arena);
break; // reswitch
}
}
}
// Required for certain compiler configurations.
ABSL_LOG(FATAL) << "Not reachable";
return false;
}
void LazyRepeatedPtrField::swap_atomics(std::atomic<MessageState>& lhs,
std::atomic<MessageState>& rhs) {
auto l = lhs.exchange(rhs.load(std::memory_order_relaxed),
std::memory_order_relaxed);
rhs.store(l, std::memory_order_relaxed);
}
void LazyRepeatedPtrField::Swap(LazyRepeatedPtrField* lhs, Arena* lhs_arena,
LazyRepeatedPtrField* rhs, Arena* rhs_arena) {
static auto reallocate = [](LazyRepeatedPtrField* f, Arena* arena,
bool cleanup_old) {
auto raw = f->raw_.load(std::memory_order_relaxed);
if (raw.value() != nullptr) {
auto* new_value = Arena::Create<RepeatedPtrFieldBase>(arena);
if (!raw.value()->empty()) {
new_value->MergeFrom<MessageLite>(*raw.value());
}
if (cleanup_old) {
delete reinterpret_cast<const RepeatedPtrField<MessageLite>*>(
raw.value());
};
raw.set_value(new_value);
f->raw_.store(raw, std::memory_order_relaxed);
}
auto old_unparsed = f->unparsed_;
f->unparsed_.Visit(
[] {},
[&](auto& cord) { f->unparsed_.InitAsCord(arena, std::move(cord)); },
[&](auto view) {
if (arena == nullptr) {
f->unparsed_.InitAsCord(arena, view);
} else {
f->unparsed_.InitAndSetArray(arena, view);
}
});
if (cleanup_old) old_unparsed.Destroy();
};
static auto take_ownership = [](LazyRepeatedPtrField* f, Arena* arena) {
if (internal::DebugHardenForceCopyInSwap()) {
reallocate(f, arena, true);
} else {
arena->Own(reinterpret_cast<RepeatedPtrField<MessageLite>*>(
f->raw_.load(std::memory_order_relaxed).mutable_value()));
f->unparsed_.TransferHeapOwnershipToArena(arena);
}
};
using std::swap; // Enable ADL with fallback
swap_atomics(lhs->raw_, rhs->raw_);
swap(lhs->unparsed_, rhs->unparsed_);
// At this point we are in a weird state. The messages have been swapped into
// their destination, but we have completely ignored the arenas, so the owning
// arena is actually on the opposite message. Now we straighten out our
// ownership by forcing reallocations/ownership changes as needed.
if (lhs_arena == rhs_arena) {
if (internal::DebugHardenForceCopyInSwap() && lhs_arena == nullptr) {
reallocate(lhs, lhs_arena, true);
reallocate(rhs, rhs_arena, true);
}
} else {
if (lhs_arena == nullptr) {
take_ownership(rhs, rhs_arena);
reallocate(lhs, lhs_arena, false);
} else if (rhs_arena == nullptr) {
take_ownership(lhs, lhs_arena);
reallocate(rhs, rhs_arena, false);
} else {
reallocate(lhs, lhs_arena, false);
reallocate(rhs, rhs_arena, false);
}
}
}
void LazyRepeatedPtrField::InternalSwap(
LazyRepeatedPtrField* PROTOBUF_RESTRICT lhs,
LazyRepeatedPtrField* PROTOBUF_RESTRICT rhs) {
using std::swap; // Enable ADL with fallback
swap_atomics(lhs->raw_, rhs->raw_);
swap(lhs->unparsed_, rhs->unparsed_);
}
bool LazyRepeatedPtrField::ParseWithOuterContext(RepeatedPtrFieldBase* value,
const absl::Cord& input,
ParseContext* ctx,
const MessageLite* prototype,
bool set_missing_required) {
absl::optional<absl::string_view> flat = input.TryFlat();
if (flat.has_value()) {
return ParseWithOuterContextImpl(*flat, ctx, value, prototype,
set_missing_required);
}
io::CordInputStream cis(&input);
return ParseWithOuterContextImpl(&cis, ctx, value, prototype,
set_missing_required);
}
bool LazyRepeatedPtrField::ParseWithOuterContext(RepeatedPtrFieldBase* value,
absl::string_view input,
ParseContext* ctx,
const MessageLite* prototype,
bool set_missing_required) {
return ParseWithOuterContextImpl(input, ctx, value, prototype,
set_missing_required);
}
size_t LazyRepeatedPtrField::ByteSizeLong(size_t tag_size) const {
switch (GetLogicalState()) {
case LogicalState::kClear:
case LogicalState::kClearExposed:
case LogicalState::kNoParseRequired:
case LogicalState::kParseRequired:
return unparsed_.Size();
case LogicalState::kDirty:
const auto* value = raw_.load(std::memory_order_relaxed).value();
size_t total_size = tag_size * value->size();
for (int i = 0; i < value->size(); i++) {
total_size += WireFormatLite::LengthDelimitedSize(
value->Get<GenericTypeHandler<MessageLite>>(i).ByteSizeLong());
}
return total_size;
}
// Required for certain compiler configurations.
ABSL_LOG(FATAL) << "Not reachable";
return -1;
}
void LazyRepeatedPtrField::LogParseError(const RepeatedPtrFieldBase* value) {
const MessageLite* message =
&value->at<internal::GenericTypeHandler<MessageLite>>(0);
auto get_error_string = [&value]() {
std::string str;
for (int i = 0; i < value->size(); i++) {
absl::StrAppend(&str, "[", i, "]: ",
value->at<internal::GenericTypeHandler<MessageLite>>(i)
.InitializationErrorString(),
"\n");
}
return str;
};
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
// In fuzzing mode, we log less to speed up fuzzing.
ABSL_LOG_EVERY_N(INFO, 100000)
#else
ABSL_LOG_EVERY_N_SEC(INFO, 1)
#endif
<< "Lazy parsing failed for RepeatedPtrField<" << message->GetTypeName()
<< "> error=" << get_error_string() << " (N = " << COUNTER << ")";
}
} // namespace internal
} // namespace protobuf
} // namespace google
#include "google/protobuf/port_undef.inc"