Internal change
PiperOrigin-RevId: 560100839
diff --git a/WORKSPACE b/WORKSPACE
index f8f687e..cb1390c 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -7,6 +7,15 @@
path = "examples",
)
+# We will soon merge upb and protobuf into the same Bazel repository, but for
+# now we depend on the separate Bazel repo in the upb/ directory. This is
+# important to ensure that the CI tests exercise upb at head instead of relying
+# on a stale version from protobuf_deps.bzl.
+local_repository(
+ name = "upb",
+ path = "upb",
+)
+
# Load common dependencies first to ensure we use the correct version
load("//:protobuf_deps.bzl", "PROTOBUF_MAVEN_ARTIFACTS", "protobuf_deps")
diff --git a/upb/.bazelci/presubmit.yml b/upb/.bazelci/presubmit.yml
new file mode 100644
index 0000000..c763047
--- /dev/null
+++ b/upb/.bazelci/presubmit.yml
@@ -0,0 +1,16 @@
+---
+tasks:
+ ubuntu:
+ platform: ubuntu2004
+ shell_commands:
+ - "sudo apt -y update && sudo apt -y install libreadline-dev cmake rsync"
+ build_flags:
+ - "--incompatible_disallow_empty_glob"
+ test_targets:
+ - //...
+ macos:
+ platform: macos
+ build_flags:
+ - "--incompatible_disallow_empty_glob"
+ test_targets:
+ - //...
diff --git a/upb/.bazelignore b/upb/.bazelignore
new file mode 100644
index 0000000..e35d885
--- /dev/null
+++ b/upb/.bazelignore
@@ -0,0 +1 @@
+_build
diff --git a/upb/.bazelrc b/upb/.bazelrc
new file mode 100644
index 0000000..464bef5
--- /dev/null
+++ b/upb/.bazelrc
@@ -0,0 +1,63 @@
+# temporary fix for https://github.com/bazelbuild/bazel/issues/12905 on macOS
+build --features=-debug_prefix_map_pwd_is_dot
+
+# Pin to C++17
+build --cxxopt=-std=c++17 --host_cxxopt=-std=c++17
+build:cpp17_msvc --cxxopt=/std:c++17 --host_cxxopt=/std:c++17
+
+# Disallow empty globs
+build: --incompatible_disallow_empty_glob
+
+# Use our custom-configured c++ toolchain.
+
+build:m32 --copt=-m32 --linkopt=-m32
+build:asan --copt=-fsanitize=address --linkopt=-fsanitize=address
+build:msan --copt=-fsanitize=memory --linkopt=-fsanitize=memory
+
+# For Valgrind, we have to disable checks of "possible" leaks because the Python
+# interpreter does the sorts of things that flag Valgrind "possible" leak checks.
+# Ideally we could enforce a stricter check for the non-Python tests, but I don't
+# know of an easy way to do that.
+#
+# We also have to disable pymalloc to avoid triggering Valgrind.
+build:valgrind --run_under='valgrind --leak-check=full --track-origins=yes --trace-children=yes --show-leak-kinds=all --error-exitcode=1 --num-callers=500 ' --action_env=PYTHONMALLOC=malloc
+
+build:ubsan --copt=-fsanitize=undefined --linkopt=-fsanitize=undefined --action_env=UBSAN_OPTIONS=halt_on_error=1:print_stacktrace=1
+# Workaround for the fact that Bazel links with $CC, not $CXX
+# https://github.com/bazelbuild/bazel/issues/11122#issuecomment-613746748
+build:ubsan --copt=-fno-sanitize=function --copt=-fno-sanitize=vptr
+# Workaround for https://bugs.llvm.org/show_bug.cgi?id=16404
+build:ubsan --linkopt=--rtlib=compiler-rt --linkopt=-lunwind
+
+build:Werror --copt=-Werror
+build:Werror --per_file_copt=json/parser@-Wno-error
+build:Werror --per_file_copt=com_google_protobuf@-Wno-error
+
+# GCC's -fanalyzer, a deeper static analysis than normal warnings.
+build:analyzer --copt=-fanalyzer --copt=-Werror
+build:analyzer --per_file_copt=json/parser@-fno-analyzer
+build:analyzer --per_file_copt=com_google_protobuf@-fno-analyzer
+build:analyzer --per_file_copt=com_github_google_benchmark@-fno-analyzer
+
+# --config=asan-libfuzzer
+build:asan-libfuzzer --action_env=CC=clang
+build:asan-libfuzzer --action_env=CXX=clang++
+build:asan-libfuzzer --@rules_fuzzing//fuzzing:cc_engine=@rules_fuzzing//fuzzing/engines:libfuzzer
+build:asan-libfuzzer --@rules_fuzzing//fuzzing:cc_engine_instrumentation=libfuzzer
+build:asan-libfuzzer --@rules_fuzzing//fuzzing:cc_engine_sanitizer=asan
+
+# --config=msan-libfuzzer
+build:msan-libfuzzer --action_env=CC=clang
+build:msan-libfuzzer --action_env=CXX=clang++
+build:msan-libfuzzer --@rules_fuzzing//fuzzing:cc_engine=@rules_fuzzing//fuzzing/engines:libfuzzer
+build:msan-libfuzzer --@rules_fuzzing//fuzzing:cc_engine_instrumentation=libfuzzer
+build:msan-libfuzzer --@rules_fuzzing//fuzzing:cc_engine_sanitizer=msan
+
+# --config=ubsan-libfuzzer
+build:ubsan-libfuzzer --action_env=CC=clang
+build:ubsan-libfuzzer --action_env=CXX=clang++
+build:ubsan-libfuzzer --@rules_fuzzing//fuzzing:cc_engine=@rules_fuzzing//fuzzing/engines:libfuzzer
+build:ubsan-libfuzzer --@rules_fuzzing//fuzzing:cc_engine_instrumentation=libfuzzer
+build:ubsan-libfuzzer --copt=-fsanitize=undefined
+build:ubsan-libfuzzer --linkopt=-fsanitize=undefined
+build:ubsan-libfuzzer --linkopt=-fsanitize-link-c++-runtime
diff --git a/upb/.clang-format b/upb/.clang-format
new file mode 100644
index 0000000..24d97f2
--- /dev/null
+++ b/upb/.clang-format
@@ -0,0 +1,3 @@
+BasedOnStyle: Google
+DerivePointerAlignment: false
+PointerAlignment: Left
diff --git a/upb/.github/actions/setup-bazel-cache/action.yml b/upb/.github/actions/setup-bazel-cache/action.yml
new file mode 100644
index 0000000..ac9ea83
--- /dev/null
+++ b/upb/.github/actions/setup-bazel-cache/action.yml
@@ -0,0 +1,21 @@
+name: "Setup Bazel Caching"
+description: "Sets up Bazel caching"
+inputs:
+ cache_url:
+ description: "URL of the Bazel cache to read/write"
+ required: false
+ default: https://storage.googleapis.com/protobuf-bazel-cache/upb
+ read_only:
+ description: "If true, we can read from the cache but not write it."
+ required: false
+ default: ${{ github.event.pull_request.head.repo.full_name != 'protocolbuffers/upb' }}
+outputs:
+ cache_args:
+ description: "Caching related arguments to pass to 'bazel build"
+ value: --remote_cache=${{ inputs.cache_url }} ${{ steps.set_auth_arg.outputs.auth_arg }}
+runs:
+ using: "composite"
+ steps:
+ - id: set_auth_arg
+ run: echo auth_arg=${{ inputs.read_only == 'true' && '--remote_upload_local_results=false' || '--google_default_credentials' }} >> $GITHUB_OUTPUT
+ shell: bash
diff --git a/upb/.github/workflows/bazel_tests.yml b/upb/.github/workflows/bazel_tests.yml
new file mode 100644
index 0000000..851f0f0
--- /dev/null
+++ b/upb/.github/workflows/bazel_tests.yml
@@ -0,0 +1,107 @@
+name: Bazel Tests
+
+on:
+ push:
+ branches:
+ - main
+ - '[0-9]+.x'
+ pull_request:
+ branches:
+ - main
+ - '[0-9]+.x'
+ workflow_dispatch:
+
+jobs:
+
+ ubuntu:
+ runs-on: ${{ matrix.os }}
+
+ strategy:
+ fail-fast: false # Don't cancel all jobs if one fails.
+ matrix:
+ include:
+ - { NAME: "Fastbuild", BAZEL: bazel, CC: clang, os: ubuntu-20-large, flags: "" }
+ - { NAME: "Optimized", BAZEL: bazel, CC: clang, os: ubuntu-20-large, flags: "-c opt" } # Some warnings only fire with -c opt
+ - { NAME: "GCC Optimized", BAZEL: bazel, CC: gcc-12, os: ubuntu-22.04, flags: "-c opt" }
+ - { NAME: "FastTable", BAZEL: bazel, CC: clang, os: ubuntu-20-large, flags: "--//:fasttable_enabled=true -- -cmake:test_generated_files" }
+ - { NAME: "ASAN", BAZEL: bazel, CC: clang, os: ubuntu-20-large, flags: "--config=asan -c dbg -- -benchmarks:benchmark -python/..." }
+ - { NAME: "UBSAN", BAZEL: bazel, CC: clang, os: ubuntu-20-large, flags: "--config=ubsan -c dbg -- -benchmarks:benchmark -python/... -lua/...", install: "libunwind-dev" }
+ - { NAME: "32-bit", BAZEL: bazel, CC: clang, os: ubuntu-20-large, flags: "--copt=-m32 --linkopt=-m32 -- benchmarks:benchmark -python/...", install: "g++-multilib" }
+ - { NAME: "32-bit ASAN", BAZEL: bazel, CC: clang, os: ubuntu-20-large, flags: "--config=asan -c dbg --copt=-m32 --linkopt=-m32 -- -//benchmarks/... -//python/... -//upb/message:copy_test -//upb/message:promote_test -//upb/message:test -//upb/test:test_generated_code", install: "g++-multilib" }
+ - { NAME: "Windows", BAZEL: bazel, os: windows-2019, startup-flags: "--output_user_root=C:/tmp", flags: "--config=cpp17_msvc", targets: "upb/... upbc/... python/... protos/... protos_generator/..." }
+ - { NAME: "macOS", BAZEL: bazel, CC: clang, os: macos-11 }
+ # Current github runners are all Intel based, so just build/compile for Apple Silicon to detect issues there.
+ - { NAME: "macOS ARM (build only)", BAZEL: bazel, BAZEL_CMD: build, CC: clang, os: macos-11, flags: "--cpu=darwin_arm64"}
+ # We support two Bazel versions back per https://opensource.google/documentation/policies/cplusplus-support
+ - { NAME: "Bazel 5.3.0", BAZEL: bazel-5.3.0-linux-x86_64, CC: clang, os: ubuntu-20-large }
+ - { NAME: "Bazel 6.1.0", BAZEL: bazel-6.1.0-linux-x86_64, CC: clang, os: ubuntu-20-large }
+
+ name: ${{ matrix.NAME }}
+
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Cloud SDK
+ uses: google-github-actions/auth@v0
+ with:
+ credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }}
+ export_environment_variables: true
+ if: ${{ github.event.pull_request.head.repo.full_name == 'protocolbuffers/upb' }}
+ - name: Download historical Bazel version
+ run: |
+ FILENAME=$HOME/bin/${{ matrix.BAZEL }}
+ VERSION=$(echo ${{ matrix.BAZEL }} | cut -d- -f 2 )
+ mkdir -p $HOME/bin
+ echo $HOME/bin >> $GITHUB_PATH
+ wget -O $FILENAME https://github.com/bazelbuild/bazel/releases/download/$VERSION/${{ matrix.BAZEL }}
+ chmod a+x $FILENAME
+ if: ${{ matrix.BAZEL != 'bazel' }}
+ - name: Check compiler versions
+ if: matrix.CC
+ run: ${{ matrix.CC }} --version
+ - name: Check Bazel versions
+ run: ${{ matrix.BAZEL }} --version
+ - id: bazel-cache
+ name: Set up Bazel caching
+ uses: ./.github/actions/setup-bazel-cache
+ - name: Setup Python venv
+ if: ${{ runner.os != 'Windows' }}
+ run: rm -rf /tmp/venv && python3 -m venv /tmp/venv && source /tmp/venv/bin/activate && python3 --version
+ - name: Install dependencies
+ run: sudo apt update && sudo apt install -y ${{ matrix.install }}
+ if: matrix.install != ''
+ - name: Install numpy
+ run: pip3 install numpy
+ - name: Setup environment variables
+ if: matrix.CC
+ run: echo "CC=${{ matrix.CC }}" >> $GITHUB_ENV
+ - name: Run tests
+ run: cd ${{ github.workspace }} && ${{ matrix.BAZEL }} ${{ matrix.startup-flags }} ${{ matrix.BAZEL_CMD || 'test' }} --test_output=errors ${{ steps.bazel-cache.outputs.cache_args }} ${{ matrix.targets || '...' }} ${{ matrix.flags }}
+ - uses: actions/upload-artifact@v3
+ with:
+ name: logs
+ path: |
+ **/*.log
+
+ no-python:
+ runs-on: ubuntu-20-large
+
+ strategy:
+ fail-fast: false # Don't cancel all jobs if one fails.
+
+ name: "No System Python"
+
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Cloud SDK
+ uses: google-github-actions/auth@v0
+ with:
+ credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }}
+ export_environment_variables: true
+ if: ${{ github.event.pull_request.head.repo.full_name == 'protocolbuffers/upb' }}
+ - id: bazel-cache
+ name: Set up Bazel caching
+ uses: ./.github/actions/setup-bazel-cache
+ - name: Uninstall python
+ run: which python3 && sudo mv `which python3` /tmp && ! which python3
+ - name: Run tests
+ run: cd ${{ github.workspace }} && bazel test --test_output=errors ${{ steps.bazel-cache.outputs.cache_args }} //python/... -- -//python/dist:source_wheel
diff --git a/upb/.github/workflows/clang_format.yml b/upb/.github/workflows/clang_format.yml
new file mode 100644
index 0000000..5103500
--- /dev/null
+++ b/upb/.github/workflows/clang_format.yml
@@ -0,0 +1,22 @@
+name: Check ClangFormat
+
+on:
+ push:
+ branches:
+ - main
+ - '[0-9]+.x'
+ pull_request:
+ branches:
+ - main
+ - '[0-9]+.x'
+ workflow_dispatch:
+
+jobs:
+ check_clang_format:
+ runs-on: ubuntu-20-large
+ steps:
+ - uses: actions/checkout@v2
+ - name: Run ClangFormat
+ run: find . | grep -E '\.(c|h|cc)$' | grep -E -v '^./(third_party|cmake)' | xargs clang-format -i
+ - name: Check for differences
+ run: git diff --exit-code
diff --git a/upb/.github/workflows/generate_files.yml b/upb/.github/workflows/generate_files.yml
new file mode 100644
index 0000000..ce9d4da
--- /dev/null
+++ b/upb/.github/workflows/generate_files.yml
@@ -0,0 +1,24 @@
+name: Generate Files
+
+# After any push to the main branch, re-generate pregenerated files.
+on:
+ push:
+ branches:
+ - main
+ - '[0-9]+.x'
+
+jobs:
+ generate:
+ if: github.repository == 'protocolbuffers/upb'
+ runs-on: ubuntu-22-large
+
+ steps:
+ - uses: actions/checkout@v3
+ with:
+ # Note: this token has an expiration date, so if the workflow starts
+ # failing then you may need to generate a fresh token.
+ token: ${{ secrets.BOT_ACCESS_TOKEN }}
+ - name: Configure name and email address in Git
+ run: cd ${{ github.workspace }} && git config user.name "Protobuf Team Bot" && git config user.email "protobuf-team-bot@google.com"
+ - name: Commit and push update
+ run: cd ${{ github.workspace }} && ./cmake/push_auto_update.sh
diff --git a/upb/.github/workflows/mergeable.yml b/upb/.github/workflows/mergeable.yml
new file mode 100644
index 0000000..255bc1c
--- /dev/null
+++ b/upb/.github/workflows/mergeable.yml
@@ -0,0 +1,15 @@
+mergeable:
+ pull_requests:
+ label:
+ and:
+ - must_exclude:
+ regex: '^disposition/DO NOT MERGE'
+ message: 'Pull request marked not mergeable'
+ - must_include:
+ regex: 'mergeable:force-allow'
+ message: 'Pull requests should not be merged directly and should instead
+ be handled by Copybara.
+
+ To enable Github merges, add the `mergeable:force-allow` label and get a second
+ approval. This should almost never be used except for releases or as a break glass
+ measure after discussing with the team.'
\ No newline at end of file
diff --git a/upb/.github/workflows/python_tests.yml b/upb/.github/workflows/python_tests.yml
new file mode 100644
index 0000000..42d93a7
--- /dev/null
+++ b/upb/.github/workflows/python_tests.yml
@@ -0,0 +1,173 @@
+name: Python Tests
+
+on:
+ push:
+ branches:
+ - main
+ - '[0-9]+.x'
+ pull_request:
+ branches:
+ - main
+ - '[0-9]+.x'
+ workflow_dispatch:
+
+jobs:
+ build_wheels:
+ name: Build Wheels
+ runs-on: ubuntu-large
+ if: ${{ github.event.pull_request.head.repo.full_name == 'protocolbuffers/upb' }}
+ env:
+ # Bazel 5.4.0. Once we have moved to toolchains, we can update to Bazel 6.x.
+ DOCKER_IMAGE: us-docker.pkg.dev/protobuf-build/release-containers/linux/apple@sha256:bb1d14738449916d489c1cbb062508c4dca5bd265ea3e67a2628ae40912b9b00
+
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Cloud SDK
+ uses: google-github-actions/auth@v0
+ with:
+ credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }}
+ export_environment_variables: true
+ - name: Use gcloud CLI
+ run: gcloud info
+ - name: Configure Docker
+ run: gcloud auth configure-docker -q us-docker.pkg.dev
+ - name: Pull Docker Image
+ run: docker pull $DOCKER_IMAGE
+ - name: Check Bazel version
+ run: cd ${{ github.workspace }} && docker run -v$PWD:/workspace $DOCKER_IMAGE --version
+ - id: bazel-cache
+ name: Set up Bazel caching
+ uses: ./.github/actions/setup-bazel-cache
+ - name: Build Wheels
+ run: cd ${{ github.workspace }} && docker run -e GOOGLE_APPLICATION_CREDENTIALS=/workspace/$(basename $GOOGLE_APPLICATION_CREDENTIALS) -v$PWD:/workspace $DOCKER_IMAGE build $BAZEL_CACHE --crosstool_top=@com_google_protobuf//toolchain:clang_suite --@com_google_protobuf//toolchain:release=true --symlink_prefix=/ -c dbg python/dist ${{ steps.bazel-cache.outputs.cache_args }} python/dist:test_wheel python/dist:source_wheel
+ - name: Move Wheels
+ run: mkdir wheels && find _build/out \( -name 'protobuf*.whl' -o -name 'protobuf-*.tar.gz' \) -exec mv '{}' wheels ';'
+ - uses: actions/upload-artifact@v3
+ with:
+ name: python-wheels
+ path: wheels/
+ - uses: actions/upload-artifact@v3
+ with:
+ name: requirements
+ # Tests shouldn't have access to the whole upb repo, upload the one file we need
+ path: python/requirements.txt
+
+ test_wheels:
+ name: Test Wheels
+ needs: build_wheels
+ strategy:
+ fail-fast: false # Don't cancel all jobs if one fails.
+ matrix:
+ include:
+ # Linux and Mac use the limited API, so all Python versions will use a single wheel.
+ # As a result we can just test the oldest and newest supported Python versions and assume
+ # this gives us sufficient test coverage.
+ - { os: ubuntu-18-large, python-version: "3.7", architecture: x64, type: 'binary' }
+ - { os: macos-11, python-version: "3.7", architecture: x64, type: 'binary' }
+ - { os: ubuntu-20-large, python-version: "3.10", architecture: x64, type: 'binary' }
+ - { os: macos-12, python-version: "3.10", architecture: x64, type: 'binary' }
+ - { os: ubuntu-18-large, python-version: "3.7", architecture: x64, type: 'source' }
+ - { os: macos-11, python-version: "3.7", architecture: x64, type: 'source' }
+ - { os: ubuntu-20-large, python-version: "3.10", architecture: x64, type: 'source' }
+ - { os: macos-12, python-version: "3.10", architecture: x64, type: 'source' }
+
+ # Windows uses the full API up until Python 3.10, so each of these
+ # jobs tests a distinct binary wheel.
+ - { os: windows-2019-large, python-version: "3.7", architecture: x86, type: 'binary' }
+ - { os: windows-2019-large, python-version: "3.8", architecture: x86, type: 'binary' }
+ - { os: windows-2019-large, python-version: "3.9", architecture: x86, type: 'binary' }
+ - { os: windows-2019-large, python-version: "3.10", architecture: x86, type: 'binary' }
+ - { os: windows-2019-large, python-version: "3.7", architecture: x64, type: 'binary' }
+ - { os: windows-2019-large, python-version: "3.8", architecture: x64, type: 'binary' }
+ - { os: windows-2019-large, python-version: "3.9", architecture: x64, type: 'binary' }
+ - { os: windows-2019-large, python-version: "3.10", architecture: x64, type: 'binary' }
+ runs-on: ${{ matrix.os }}
+ defaults:
+ run:
+ shell: bash
+ steps:
+ - name: Download Wheels
+ uses: actions/download-artifact@v3
+ with:
+ name: python-wheels
+ path: wheels
+ - name: Download Requirements
+ uses: actions/download-artifact@v3
+ with:
+ name: requirements
+ path: requirements
+ - uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+ architecture: ${{ matrix.architecture }}
+ - name: Setup Python venv
+ run: |
+ python -m pip install --upgrade pip
+ python -m venv env
+ # Windows uses 'Scripts' instead of 'bin'
+ source env/bin/activate || source env/Scripts/activate
+ echo "VIRTUAL ENV:" $VIRTUAL_ENV
+ - name: Install tzdata
+ run: pip install tzdata
+ # Only needed on Windows, Linux ships with tzdata.
+ if: ${{ contains(matrix.os, 'windows') }}
+ - name: Install requirements
+ run: pip install -r requirements/requirements.txt
+ - name: Install Protobuf Binary Wheel
+ run: pip install -vvv --no-index --find-links wheels protobuf
+ if: ${{ matrix.type == 'binary' }}
+ - name: Install Protobuf Source Wheel
+ run: |
+ cd wheels
+ tar -xzvf *.tar.gz
+ cd protobuf-*/
+ pip install .
+ if: ${{ matrix.type == 'source' }}
+ - name: Test that module is importable
+ run: python -v -c 'from google._upb import _message; assert "google._upb._message.MessageMeta" in str(_message.MessageMeta)'
+ if: ${{ !matrix.pure_python }}
+ - name: Install Protobuf Test Wheel
+ run: pip install -vvv --no-index --find-links wheels protobuftests
+ - name: Run the unit tests
+ run: |
+ TESTS=$(pip show -f protobuftests | grep pb_unit_tests.*py$ | sed 's,/,.,g' | sed 's,\\,.,g' | sed -E 's,.py$,,g')
+ for test in $TESTS; do
+ python -m unittest -v $test
+ done
+
+ test_pure_python_wheels:
+ name: Test Pure Python Wheels
+ needs: build_wheels
+ strategy:
+ fail-fast: false # Don't cancel all jobs if one fails.
+ matrix:
+ python-version: ["3.7", "3.10"]
+ runs-on: ubuntu-large
+
+ steps:
+ - name: Download Wheels
+ uses: actions/download-artifact@v3
+ with:
+ name: python-wheels
+ path: wheels
+ - name: Delete Binary Wheels
+ run: find wheels -type f | grep -v none-any | xargs rm
+ - uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Setup Python venv
+ run: |
+ python -m pip install --upgrade pip
+ python -m venv env
+ source env/bin/activate
+ echo "VIRTUAL ENV:" $VIRTUAL_ENV
+ - name: Install numpy
+ run: pip install numpy
+ - name: Install Protobuf Wheels
+ run: pip install -vvv --no-index --find-links wheels protobuf protobuftests
+ - name: Run the unit tests
+ run: |
+ TESTS=$(pip show -f protobuftests | grep _test.py | sed 's,/,.,g' | sed -E 's,.py$,,g')
+ for test in $TESTS; do
+ python -m unittest -v $test
+ done
diff --git a/upb/.gitignore b/upb/.gitignore
new file mode 100644
index 0000000..79d6c53
--- /dev/null
+++ b/upb/.gitignore
@@ -0,0 +1,6 @@
+*.sw?
+obj/
+lib/
+bazel-*
+_build
+.vscode
diff --git a/upb/BUILD b/upb/BUILD
new file mode 100644
index 0000000..49aa2cc
--- /dev/null
+++ b/upb/BUILD
@@ -0,0 +1,744 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load("@rules_python//python:defs.bzl", "py_binary")
+load("//bazel:build_defs.bzl", "UPB_DEFAULT_COPTS")
+load(
+ "//bazel:upb_proto_library.bzl",
+ "upb_proto_library_copts",
+ "upb_proto_reflection_library",
+)
+load("@bazel_skylib//rules:common_settings.bzl", "bool_flag")
+load(
+ "//upbc:bootstrap_compiler.bzl",
+ "bootstrap_cc_library",
+ "bootstrap_upb_proto_library",
+)
+
+# begin:google_only
+# load("//tools/build_defs/kotlin/native:rules.bzl", "kt_native_interop_hint")
+# load("//tools/build_defs/license:license.bzl", "license")
+# end:google_only
+
+# begin:github_only
+load(
+ "//bazel:amalgamation.bzl",
+ "upb_amalgamation",
+)
+# end:github_only
+
+# begin:google_only
+# package(default_applicable_licenses = ["//:license"])
+#
+# license(
+# name = "license",
+# package_name = "upb",
+# )
+# end:google_only
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+exports_files(
+ [
+ "BUILD",
+ "WORKSPACE",
+ ],
+ visibility = ["//cmake:__pkg__"],
+)
+
+config_setting(
+ name = "windows",
+ constraint_values = ["@platforms//os:windows"],
+ visibility = ["//visibility:public"],
+)
+
+bool_flag(
+ name = "fasttable_enabled",
+ build_setting_default = False,
+ visibility = ["//visibility:public"],
+)
+
+config_setting(
+ name = "fasttable_enabled_setting",
+ flag_values = {"//:fasttable_enabled": "true"},
+ visibility = ["//visibility:public"],
+)
+
+upb_proto_library_copts(
+ name = "upb_proto_library_copts__for_generated_code_only_do_not_use",
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+)
+
+# Please update copy.bara.sky target = ":friends" if
+# you make changes to this list.
+package_group(
+ name = "friends",
+ packages = ["//..."],
+)
+
+# This is a stub library to keep gRPC happy. Do not use it for any reason,
+# use the smaller targets below instead.
+cc_library(
+ name = "upb",
+ hdrs = [
+ "upb/upb.hpp",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":base",
+ ":mem",
+ ],
+)
+
+# Common support routines used by generated code. This library has no
+# implementation, but depends on :upb and exposes a few more hdrs.
+#
+# This is public only because we have no way of visibility-limiting it to
+# upb_proto_library() only. This interface is not stable and by using it you
+# give up any backward compatibility guarantees.
+cc_library(
+ name = "generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
+ hdrs = ["upb/generated_code_support.h"],
+ copts = UPB_DEFAULT_COPTS,
+ textual_hdrs = [
+ "//upb/port:inc",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":base",
+ ":collections",
+ ":collections_internal",
+ ":mem",
+ ":message",
+ ":message_accessors",
+ ":message_accessors_internal",
+ ":message_internal",
+ ":mini_descriptor",
+ ":mini_table",
+ ":wire",
+ ":wire_internal",
+ ],
+)
+
+# Common support code for C++ generated code.
+cc_library(
+ name = "generated_cpp_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
+ copts = UPB_DEFAULT_COPTS,
+ textual_hdrs = [
+ "//upb/port:inc",
+ ],
+ visibility = ["//visibility:public"],
+)
+
+cc_library(
+ name = "generated_reflection_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
+ hdrs = [
+ "upb/reflection/def.h",
+ "upb/reflection/internal/def_pool.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ textual_hdrs = [
+ "//upb/port:inc",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":mem",
+ ":mini_descriptor",
+ ":reflection_internal",
+ ],
+)
+
+bootstrap_upb_proto_library(
+ name = "descriptor_upb_proto",
+ base_dir = "upb/reflection/",
+ # TODO(b/289127200): Export 'net/proto2/proto/descriptor.upb.h' and remove "-layering_check".
+ features = ["-layering_check"],
+ google3_src_files = ["net/proto2/proto/descriptor.proto"],
+ google3_src_rules = ["//net/proto2/proto:descriptor_proto_source"],
+ oss_src_files = ["google/protobuf/descriptor.proto"],
+ oss_src_rules = ["@com_google_protobuf//:descriptor_proto_srcs"],
+ oss_strip_prefix = "third_party/protobuf/github/bootstrap/src",
+ proto_lib_deps = ["@com_google_protobuf//:descriptor_proto"],
+ visibility = ["//visibility:public"],
+)
+
+upb_proto_reflection_library(
+ name = "descriptor_upb_proto_reflection",
+ visibility = ["//visibility:public"],
+ deps = ["@com_google_protobuf//:descriptor_proto"],
+)
+
+# TODO(b/232091617): Once we can delete the deprecated forwarding headers
+# (= everything in upb/) we can move this build target down into reflection/
+bootstrap_cc_library(
+ name = "reflection",
+ hdrs = [
+ "upb/reflection/def.h",
+ "upb/reflection/def.hpp",
+ "upb/reflection/message.h",
+ "upb/reflection/message.hpp",
+ ],
+ bootstrap_deps = [":reflection_internal"],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":base",
+ ":collections",
+ ":mem",
+ ":port",
+ ],
+)
+
+bootstrap_cc_library(
+ name = "reflection_internal",
+ srcs = [
+ "upb/reflection/def_builder.c",
+ "upb/reflection/def_pool.c",
+ "upb/reflection/def_type.c",
+ "upb/reflection/desc_state.c",
+ "upb/reflection/enum_def.c",
+ "upb/reflection/enum_reserved_range.c",
+ "upb/reflection/enum_value_def.c",
+ "upb/reflection/extension_range.c",
+ "upb/reflection/field_def.c",
+ "upb/reflection/file_def.c",
+ "upb/reflection/message.c",
+ "upb/reflection/message_def.c",
+ "upb/reflection/message_reserved_range.c",
+ "upb/reflection/method_def.c",
+ "upb/reflection/oneof_def.c",
+ "upb/reflection/service_def.c",
+ ],
+ hdrs = [
+ "upb/reflection/common.h",
+ "upb/reflection/def.h",
+ "upb/reflection/def.hpp",
+ "upb/reflection/def_pool.h",
+ "upb/reflection/def_type.h",
+ "upb/reflection/enum_def.h",
+ "upb/reflection/enum_reserved_range.h",
+ "upb/reflection/enum_value_def.h",
+ "upb/reflection/extension_range.h",
+ "upb/reflection/field_def.h",
+ "upb/reflection/file_def.h",
+ "upb/reflection/internal/def_builder.h",
+ "upb/reflection/internal/def_pool.h",
+ "upb/reflection/internal/desc_state.h",
+ "upb/reflection/internal/enum_def.h",
+ "upb/reflection/internal/enum_reserved_range.h",
+ "upb/reflection/internal/enum_value_def.h",
+ "upb/reflection/internal/extension_range.h",
+ "upb/reflection/internal/field_def.h",
+ "upb/reflection/internal/file_def.h",
+ "upb/reflection/internal/message_def.h",
+ "upb/reflection/internal/message_reserved_range.h",
+ "upb/reflection/internal/method_def.h",
+ "upb/reflection/internal/oneof_def.h",
+ "upb/reflection/internal/service_def.h",
+ "upb/reflection/message.h",
+ "upb/reflection/message.hpp",
+ "upb/reflection/message_def.h",
+ "upb/reflection/message_reserved_range.h",
+ "upb/reflection/method_def.h",
+ "upb/reflection/oneof_def.h",
+ "upb/reflection/service_def.h",
+ ],
+ bootstrap_deps = [":descriptor_upb_proto"],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":base",
+ ":collections",
+ ":hash",
+ ":mem",
+ ":message",
+ ":message_accessors",
+ ":mini_descriptor",
+ ":mini_descriptor_internal",
+ ":mini_table",
+ ":port",
+ ],
+)
+
+# Aliases ######################################################################
+# TODO(b/295870230): Remove these.
+
+alias(
+ name = "base",
+ actual = "//upb/base",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "base_internal",
+ actual = "//upb/base:internal",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "collections",
+ actual = "//upb/collections",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "collections_internal",
+ actual = "//upb/collections:internal",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "collections_split64",
+ actual = "//upb/collections:split64",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "hash",
+ actual = "//upb/hash",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "json",
+ actual = "//upb/json",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "lex",
+ actual = "//upb/lex",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "mem",
+ actual = "//upb/mem",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "mem_internal",
+ actual = "//upb/mem:internal",
+ visibility = ["//:__subpackages__"],
+)
+
+alias(
+ name = "message",
+ actual = "//upb/message",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "message_accessors",
+ actual = "//upb/message:accessors",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "message_accessors_internal",
+ actual = "//upb/message:accessors_internal",
+ visibility = ["//:friends"],
+)
+
+alias(
+ name = "message_copy",
+ actual = "//upb/message:copy",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "message_internal",
+ actual = "//upb/message:internal",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "message_promote",
+ actual = "//upb/message:promote",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "message_rep_internal",
+ actual = "//upb/message:rep_internal",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "message_split64",
+ actual = "//upb/message:split64",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "message_tagged_ptr",
+ actual = "//upb/message:tagged_ptr",
+ visibility = ["//:friends"],
+)
+
+alias(
+ name = "message_types",
+ actual = "//upb/message:types",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "mini_descriptor",
+ actual = "//upb/mini_descriptor",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "mini_descriptor_internal",
+ actual = "//upb/mini_descriptor:internal",
+ visibility = ["//:__subpackages__"],
+)
+
+alias(
+ name = "mini_table",
+ actual = "//upb/mini_table",
+ visibility = ["//:friends"],
+)
+
+# begin:google_only
+# alias(
+# name = "mini_table_compat",
+# actual = "//upb/mini_table:compat",
+# compatible_with = ["//buildenv/target:non_prod"],
+# visibility = ["//:friends"],
+# )
+# end:google_only
+
+alias(
+ name = "mini_table_internal",
+ actual = "//upb/mini_table:internal",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "port",
+ actual = "//upb/port",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "text",
+ actual = "//upb/text",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "wire",
+ actual = "//upb/wire",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "wire_internal",
+ actual = "//upb/wire:internal",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "wire_reader",
+ actual = "//upb/wire:reader",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "wire_types",
+ actual = "//upb/wire:types",
+ visibility = ["//visibility:public"],
+)
+
+alias(
+ name = "eps_copy_input_stream",
+ actual = "//upb/wire:eps_copy_input_stream",
+ visibility = ["//visibility:public"],
+)
+
+# Tests ########################################################################
+
+cc_test(
+ name = "def_builder_test",
+ srcs = [
+ "upb/reflection/common.h",
+ "upb/reflection/def_builder_test.cc",
+ "upb/reflection/def_type.h",
+ "upb/reflection/internal/def_builder.h",
+ ],
+ deps = [
+ ":descriptor_upb_proto",
+ ":hash",
+ ":mem",
+ ":port",
+ ":reflection",
+ ":reflection_internal",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+# Internal C/C++ libraries #####################################################
+
+cc_binary(
+ name = "libupb.so",
+ srcs = ["upb/upb_so.c"],
+ copts = UPB_DEFAULT_COPTS + ["-DUPB_BUILD_API"],
+ linkshared = 1,
+ linkstatic = 1,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":collections",
+ ":collections_split64",
+ ":mem",
+ ":message",
+ ":message_accessors",
+ ":message_split64",
+ ":mini_descriptor",
+ ":mini_table",
+ ":port",
+ ],
+)
+
+# Amalgamation #################################################################
+
+# begin:github_only
+
+upb_amalgamation(
+ name = "gen_amalgamation",
+ outs = [
+ "upb.c",
+ "upb.h",
+ ],
+ libs = [
+ ":base",
+ ":base_internal",
+ ":collections_internal",
+ ":descriptor_upb_proto",
+ ":eps_copy_input_stream",
+ ":generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
+ ":hash",
+ ":lex",
+ ":mem",
+ ":mem_internal",
+ ":message",
+ ":message_accessors",
+ ":message_internal",
+ ":message_rep_internal",
+ ":message_tagged_ptr",
+ ":message_types",
+ ":mini_descriptor",
+ ":mini_descriptor_internal",
+ ":mini_table",
+ ":mini_table_internal",
+ ":port",
+ ":reflection",
+ ":reflection_internal",
+ ":wire",
+ ":wire_internal",
+ ":wire_reader",
+ ":wire_types",
+ ],
+ strip_import_prefix = ["src"],
+)
+
+cc_library(
+ name = "amalgamation",
+ srcs = ["upb.c"],
+ hdrs = ["upb.h"],
+ copts = UPB_DEFAULT_COPTS,
+ deps = ["@utf8_range"],
+)
+
+upb_amalgamation(
+ name = "gen_php_amalgamation",
+ outs = [
+ "php-upb.c",
+ "php-upb.h",
+ ],
+ libs = [
+ ":base",
+ ":base_internal",
+ ":collections_internal",
+ ":descriptor_upb_proto_reflection",
+ ":descriptor_upb_proto",
+ ":eps_copy_input_stream",
+ ":generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
+ ":hash",
+ ":json",
+ ":lex",
+ ":mem",
+ ":mem_internal",
+ ":message",
+ ":message_accessors",
+ ":message_internal",
+ ":message_rep_internal",
+ ":message_tagged_ptr",
+ ":message_types",
+ ":mini_descriptor",
+ ":mini_descriptor_internal",
+ ":mini_table",
+ ":mini_table_internal",
+ ":port",
+ ":reflection",
+ ":reflection_internal",
+ ":wire",
+ ":wire_internal",
+ ":wire_reader",
+ ":wire_types",
+ ],
+ prefix = "php-",
+ strip_import_prefix = ["src"],
+ visibility = ["@com_google_protobuf//php:__subpackages__"],
+)
+
+cc_library(
+ name = "php_amalgamation",
+ srcs = ["php-upb.c"],
+ hdrs = ["php-upb.h"],
+ copts = UPB_DEFAULT_COPTS,
+ deps = ["@utf8_range"],
+)
+
+upb_amalgamation(
+ name = "gen_ruby_amalgamation",
+ outs = [
+ "ruby-upb.c",
+ "ruby-upb.h",
+ ],
+ libs = [
+ ":base",
+ ":base_internal",
+ ":collections_internal",
+ ":descriptor_upb_proto",
+ ":eps_copy_input_stream",
+ ":generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
+ ":hash",
+ ":json",
+ ":lex",
+ ":mem",
+ ":mem_internal",
+ ":message",
+ ":message_accessors",
+ ":message_internal",
+ ":message_rep_internal",
+ ":message_tagged_ptr",
+ ":message_types",
+ ":mini_descriptor",
+ ":mini_descriptor_internal",
+ ":mini_table",
+ ":mini_table_internal",
+ ":port",
+ ":reflection",
+ ":reflection_internal",
+ ":wire",
+ ":wire_internal",
+ ":wire_reader",
+ ":wire_types",
+ ],
+ prefix = "ruby-",
+ strip_import_prefix = ["src"],
+ visibility = ["@com_google_protobuf//ruby:__subpackages__"],
+)
+
+cc_library(
+ name = "ruby_amalgamation",
+ srcs = ["ruby-upb.c"],
+ hdrs = ["ruby-upb.h"],
+ copts = UPB_DEFAULT_COPTS,
+ deps = ["@utf8_range"],
+)
+
+exports_files(
+ [
+ "third_party/lunit/console.lua",
+ "third_party/lunit/lunit.lua",
+ ],
+ visibility = ["//lua:__pkg__"],
+)
+
+filegroup(
+ name = "source_files",
+ srcs = glob(
+ [
+ "upb/**/*.c",
+ "upb/**/*.h",
+ "upb/**/*.hpp",
+ ],
+ exclude = [
+ "upb/**/conformance_upb.c",
+ "upb/reflection/stage0/**/*",
+ ],
+ ),
+ visibility = [
+ "//cmake:__pkg__",
+ "//python/dist:__pkg__",
+ ]
+)
+# end:github_only
+
+# begin:google_only
+#
+# py_binary(
+# name = "update_check_runs",
+# srcs = ["update_check_runs.py"],
+# main = "update_check_runs.py",
+# deps = [
+# "//third_party/py/absl:app",
+# "//third_party/py/absl/flags",
+# ],
+# )
+#
+# kt_native_interop_hint(
+# name = "upb_kotlin_native_hint",
+# compatible_with = ["//buildenv/target:non_prod"],
+# headers_to_exclude = glob([
+# "**/*.hpp",
+# ]),
+# kotlin_package = "upb",
+# no_string_conversion = ["_upb_MiniTable_Build"],
+# strict_enums = [
+# "upb_CType",
+# "upb_DecodeStatus",
+# "upb_EncodeStatus",
+# "upb_FieldType",
+# "upb_FindUnknown_Status",
+# "upb_GetExtension_Status",
+# "upb_GetExtensionAsBytes_Status",
+# "upb_Label",
+# "upb_MapInsertStatus",
+# "upb_UnknownToMessage_Status",
+# "upb_WireType",
+# ],
+# visibility = ["//:__subpackages__"],
+# )
+#
+# end:google_only
diff --git a/upb/CONTRIBUTING.md b/upb/CONTRIBUTING.md
new file mode 100644
index 0000000..df48bf6
--- /dev/null
+++ b/upb/CONTRIBUTING.md
@@ -0,0 +1,37 @@
+
+# How to Contribute
+
+We'd love to accept your patches and contributions to this project. There are
+just a few small guidelines you need to follow.
+
+## Get in touch
+
+If your idea will take you more than, say, 30 minutes to
+implement, please get in touch first via the issue tracker
+to touch base about your plan. That will give an
+opportunity for early feedback and help avoid wasting your
+time.
+
+## Contributor License Agreement
+
+Contributions to this project must be accompanied by a Contributor License
+Agreement. You (or your employer) retain the copyright to your contribution;
+this simply gives us permission to use and redistribute your contributions as
+part of the project. Head over to <https://cla.developers.google.com/> to see
+your current agreements on file or to sign a new one.
+
+You generally only need to submit a CLA once, so if you've already submitted one
+(even if it was for a different project), you probably don't need to do it
+again.
+
+## Code Reviews
+
+All submissions, including submissions by project members, require review. We
+use GitHub pull requests for this purpose. Consult
+[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
+information on using pull requests.
+
+## Community Guidelines
+
+This project follows [Google's Open Source Community
+Guidelines](https://opensource.google/conduct/).
diff --git a/upb/LICENSE b/upb/LICENSE
new file mode 100644
index 0000000..7f3bd16
--- /dev/null
+++ b/upb/LICENSE
@@ -0,0 +1,26 @@
+
+Copyright (c) 2009-2021, Google LLC
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Google LLC nor the names of any other
+ contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+EVENT SHALL GOOGLE LLC BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
diff --git a/upb/README.md b/upb/README.md
new file mode 100644
index 0000000..dcba8f6
--- /dev/null
+++ b/upb/README.md
@@ -0,0 +1,84 @@
+
+# μpb: small, fast C protos
+
+μpb (often written 'upb') is a small
+[protobuf](https://github.com/protocolbuffers/protobuf) implementation written
+in C.
+
+upb is the core runtime for protobuf languages extensions in
+[Ruby](https://github.com/protocolbuffers/protobuf/tree/master/ruby),
+[PHP](https://github.com/protocolbuffers/protobuf/tree/master/php), and
+[Python](https://github.com/protocolbuffers/upb/tree/main/python).
+
+While upb offers a C API, the C API & ABI **are not stable**. For this reason,
+upb is not generally offered as a C library for direct consumption, and there
+are no releases.
+
+## Features
+
+upb has comparable speed to protobuf C++, but is an order of magnitude smaller
+in code size.
+
+Like the main protobuf implementation in C++, it supports:
+
+- a generated API (in C)
+- reflection
+- binary & JSON wire formats
+- text format serialization
+- all standard features of protobufs (oneofs, maps, unknown fields, extensions,
+ etc.)
+- full conformance with the protobuf conformance tests
+
+upb also supports some features that C++ does not:
+
+- **optional reflection:** generated messages are agnostic to whether
+ reflection will be linked in or not.
+- **no global state:** no pre-main registration or other global state.
+- **fast reflection-based parsing:** messages loaded at runtime parse
+ just as fast as compiled-in messages.
+
+However there are a few features it does not support:
+
+- text format parsing
+- deep descriptor verification: upb's descriptor validation is not as exhaustive
+ as `protoc`.
+
+## Install
+
+For Ruby, use [RubyGems](https://rubygems.org/gems/google-protobuf):
+
+```
+$ gem install google-protobuf
+```
+
+For PHP, use [PECL](https://pecl.php.net/package/protobuf):
+
+```
+$ sudo pecl install protobuf
+```
+
+For Python, use [PyPI](https://pypi.org/project/protobuf/):
+
+```
+$ sudo pip install protobuf
+```
+
+Alternatively, you can build and install upb using
+[vcpkg](https://github.com/microsoft/vcpkg/) dependency manager:
+
+ git clone https://github.com/Microsoft/vcpkg.git
+ cd vcpkg
+ ./bootstrap-vcpkg.sh
+ ./vcpkg integrate install
+ ./vcpkg install upb
+
+The upb port in vcpkg is kept up to date by microsoft team members and community
+contributors.
+
+If the version is out of date, please
+[create an issue or pull request](https://github.com/Microsoft/vcpkg) on the
+vcpkg repository.
+
+## Contributing
+
+Please see [CONTRIBUTING.md](CONTRIBUTING.md).
diff --git a/upb/WORKSPACE b/upb/WORKSPACE
new file mode 100644
index 0000000..a9dd267
--- /dev/null
+++ b/upb/WORKSPACE
@@ -0,0 +1,86 @@
+workspace(name = "upb")
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+load("//bazel:workspace_deps.bzl", "upb_deps")
+
+upb_deps()
+
+load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps")
+protobuf_deps()
+
+load("@utf8_range//:workspace_deps.bzl", "utf8_range_deps")
+utf8_range_deps()
+
+http_archive(
+ name = "lua",
+ build_file = "//bazel:lua.BUILD",
+ sha256 = "b9e2e4aad6789b3b63a056d442f7b39f0ecfca3ae0f1fc0ae4e9614401b69f4b",
+ strip_prefix = "lua-5.2.4",
+ urls = [
+ "https://mirror.bazel.build/www.lua.org/ftp/lua-5.2.4.tar.gz",
+ "https://www.lua.org/ftp/lua-5.2.4.tar.gz",
+ ],
+)
+
+http_archive(
+ name = "com_github_google_benchmark",
+ urls = ["https://github.com/google/benchmark/archive/0baacde3618ca617da95375e0af13ce1baadea47.zip"],
+ strip_prefix = "benchmark-0baacde3618ca617da95375e0af13ce1baadea47",
+ sha256 = "62e2f2e6d8a744d67e4bbc212fcfd06647080de4253c97ad5c6749e09faf2cb0",
+)
+
+http_archive(
+ name = "com_google_googleapis",
+ urls = ["https://github.com/googleapis/googleapis/archive/refs/heads/master.zip"],
+ build_file = "//benchmarks:BUILD.googleapis",
+ strip_prefix = "googleapis-master",
+ patch_cmds = ["find google -type f -name BUILD.bazel -delete"],
+)
+
+http_archive(
+ name = "com_google_absl",
+ sha256 = "e7fdfe0bed87702a22c5b73b6b5fe08bedd25f17d617e52df6061b0f47d480b0",
+ strip_prefix = "abseil-cpp-e6044634dd7caec2d79a13aecc9e765023768757",
+ urls = [
+ "https://github.com/abseil/abseil-cpp/archive/e6044634dd7caec2d79a13aecc9e765023768757.tar.gz"
+ ],
+)
+
+http_archive(
+ name = "com_google_googletest",
+ sha256 = "730215d76eace9dd49bf74ce044e8daa065d175f1ac891cc1d6bb184ef94e565",
+ strip_prefix = "googletest-f53219cdcb7b084ef57414efea92ee5b71989558",
+ urls = [
+ "https://github.com/google/googletest/archive/f53219cdcb7b084ef57414efea92ee5b71989558.tar.gz" # 2023-03-16
+ ],
+)
+
+load("@com_google_googletest//:googletest_deps.bzl", "googletest_deps")
+
+googletest_deps()
+
+load("@rules_pkg//:deps.bzl", "rules_pkg_dependencies")
+
+rules_pkg_dependencies()
+
+load("//bazel:system_python.bzl", "system_python")
+system_python(
+ name = "system_python",
+ minimum_python_version = "3.7",
+)
+
+load("@system_python//:register.bzl", "register_system_python")
+register_system_python()
+
+load("@system_python//:pip.bzl", "pip_parse")
+
+pip_parse(
+ name="pip_deps",
+ requirements = "//python:requirements.txt",
+ requirements_overrides = {
+ "3.11": "//python:requirements_311.txt",
+ },
+)
+
+load("@pip_deps//:requirements.bzl", "install_deps")
+install_deps()
diff --git a/upb/bazel/BUILD b/upb/bazel/BUILD
new file mode 100644
index 0000000..4ef3626
--- /dev/null
+++ b/upb/bazel/BUILD
@@ -0,0 +1,63 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load("@rules_python//python:defs.bzl", "py_binary")
+load("@bazel_skylib//:bzl_library.bzl", "bzl_library")
+
+# begin:google_only
+# package(default_applicable_licenses = ["//:license"])
+# end:google_only
+
+licenses(["notice"])
+
+exports_files(
+ ["workspace_deps.bzl"],
+ visibility = ["//cmake:__pkg__"],
+)
+
+py_binary(
+ name = "amalgamate",
+ srcs = ["amalgamate.py"],
+ visibility = ["//:__pkg__"],
+)
+
+# py_proto_library() is private rule, only intended for internal use by upb.
+# Hopefully py_proto_library() will eventually be availble in rules_proto or
+# another upstream package.
+bzl_library(
+ name = "py_proto_library_bzl",
+ srcs = ["py_proto_library.bzl"],
+)
+
+bzl_library(
+ name = "upb_proto_library_bzl",
+ srcs = ["upb_proto_library.bzl"],
+ visibility = ["//visibility:public"],
+ deps = [
+ "@bazel_skylib//lib:paths",
+ "@bazel_tools//tools/cpp:toolchain_utils.bzl",
+ "@rules_proto//proto:defs",
+ ],
+)
diff --git a/upb/bazel/amalgamate.py b/upb/bazel/amalgamate.py
new file mode 100755
index 0000000..6f9f436
--- /dev/null
+++ b/upb/bazel/amalgamate.py
@@ -0,0 +1,134 @@
+#!/usr/bin/python
+#
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+import re
+import os
+
+INCLUDE_RE = re.compile('^#include "([^"]*)"$')
+
+def parse_include(line):
+ match = INCLUDE_RE.match(line)
+ return match.groups()[0] if match else None
+
+class Amalgamator:
+ def __init__(self, h_out, c_out):
+ self.include_paths = ["."]
+ self.included = set()
+ self.output_h = open(h_out, "w")
+ self.output_c = open(c_out, "w")
+ self.h_out = h_out.split("/")[-1]
+
+ def amalgamate(self, h_files, c_files):
+ self.h_files = set(h_files)
+ self.output_c.write("/* Amalgamated source file */\n")
+ self.output_c.write('#include "%s"\n' % (self.h_out))
+ if self.h_out == "ruby-upb.h":
+ self.output_h.write("// Ruby is still using proto3 enum semantics for proto2\n")
+ self.output_h.write("#define UPB_DISABLE_PROTO2_ENUM_CHECKING\n")
+
+ self.output_h.write("/* Amalgamated source file */\n")
+
+ port_def = self._find_include_file("upb/port/def.inc")
+ port_undef = self._find_include_file("upb/port/undef.inc")
+ self._process_file(port_def, self.output_h)
+ self._process_file(port_def, self.output_c)
+
+ for file in c_files:
+ self._process_file(file, self.output_c)
+
+ self._process_file(port_undef, self.output_h)
+ self._process_file(port_undef, self.output_c)
+
+ def _process_file(self, infile_name, outfile):
+ lines = open(infile_name).readlines()
+
+ has_copyright = lines[0].startswith(
+ "// Protocol Buffers - Google's data interchange format"
+ )
+ if has_copyright:
+ while not lines[0].startswith(
+ "// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH"
+ " DAMAGE"
+ ):
+ lines.pop(0)
+ lines.pop(0)
+
+ for line in lines:
+ if not self._process_include(line):
+ outfile.write(line)
+
+ def _find_include_file(self, name):
+ for h_file in self.h_files:
+ if h_file.endswith(name):
+ return h_file
+
+ def _process_include(self, line):
+ include = parse_include(line)
+ if not include:
+ return False
+ if not (include.startswith("upb") or include.startswith("google")):
+ return False
+ if include and (include.endswith("port/def.inc") or include.endswith("port/undef.inc")):
+ # Skip, we handle this separately
+ return True
+ if include.endswith("hpp"):
+ # Skip, we don't support the amalgamation from C++.
+ return True
+ elif include in self.included:
+ return True
+ else:
+ # Include this upb header inline.
+ h_file = self._find_include_file(include)
+ if h_file:
+ self.h_files.remove(h_file)
+ self.included.add(include)
+ self._process_file(h_file, self.output_h)
+ return True
+ raise RuntimeError("Couldn't find include: " + include + ", h_files=" + repr(self.h_files))
+
+# ---- main ----
+
+c_out = sys.argv[1]
+h_out = sys.argv[2]
+amalgamator = Amalgamator(h_out, c_out)
+c_files = []
+h_files = []
+
+for arg in sys.argv[3:]:
+ arg = arg.strip()
+ if arg.endswith(".h") or arg.endswith(".inc"):
+ h_files.append(arg)
+ else:
+ c_files.append(arg)
+
+amalgamator.amalgamate(h_files, c_files)
diff --git a/upb/bazel/amalgamation.bzl b/upb/bazel/amalgamation.bzl
new file mode 100644
index 0000000..cabbacc
--- /dev/null
+++ b/upb/bazel/amalgamation.bzl
@@ -0,0 +1,85 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Internal rules for building upb."""
+
+load(":upb_proto_library.bzl", "GeneratedSrcsInfo")
+
+# upb_amalgamation() rule, with file_list aspect.
+
+SrcList = provider(
+ fields = {
+ "srcs": "list of srcs",
+ },
+)
+
+def _file_list_aspect_impl(target, ctx):
+ if GeneratedSrcsInfo in target:
+ srcs = target[GeneratedSrcsInfo]
+ return [SrcList(srcs = srcs.srcs + srcs.hdrs)]
+
+ srcs = []
+ for src in ctx.rule.attr.srcs:
+ srcs += src.files.to_list()
+ for hdr in ctx.rule.attr.hdrs:
+ srcs += hdr.files.to_list()
+ for hdr in ctx.rule.attr.textual_hdrs:
+ srcs += hdr.files.to_list()
+ return [SrcList(srcs = srcs)]
+
+_file_list_aspect = aspect(
+ implementation = _file_list_aspect_impl,
+)
+
+def _upb_amalgamation(ctx):
+ inputs = []
+ for lib in ctx.attr.libs:
+ inputs += lib[SrcList].srcs
+ srcs = [src for src in inputs if not src.path.endswith("hpp")]
+ ctx.actions.run(
+ inputs = inputs,
+ outputs = ctx.outputs.outs,
+ arguments = [f.path for f in ctx.outputs.outs] + [f.path for f in srcs],
+ progress_message = "Making amalgamation",
+ executable = ctx.executable._amalgamator,
+ )
+ return []
+
+upb_amalgamation = rule(
+ attrs = {
+ "_amalgamator": attr.label(
+ executable = True,
+ cfg = "exec",
+ default = "//bazel:amalgamate",
+ ),
+ "prefix": attr.string(
+ default = "",
+ ),
+ "libs": attr.label_list(aspects = [_file_list_aspect]),
+ "outs": attr.output_list(),
+ "strip_import_prefix": attr.string_list(),
+ },
+ implementation = _upb_amalgamation,
+)
diff --git a/upb/bazel/build_defs.bzl b/upb/bazel/build_defs.bzl
new file mode 100644
index 0000000..e4212b3
--- /dev/null
+++ b/upb/bazel/build_defs.bzl
@@ -0,0 +1,101 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Internal rules for building upb."""
+
+_DEFAULT_CPPOPTS = []
+_DEFAULT_COPTS = []
+
+# begin:github_only
+_DEFAULT_CPPOPTS.extend([
+ "-Wextra",
+ # "-Wshorten-64-to-32", # not in GCC (and my Kokoro images doesn't have Clang)
+ "-Werror",
+ "-Wno-unused-parameter",
+ "-Wno-long-long",
+])
+_DEFAULT_COPTS.extend([
+ "-std=c99",
+ "-Wall",
+ "-Wstrict-prototypes",
+ # GCC (at least) emits spurious warnings for this that cannot be fixed
+ # without introducing redundant initialization (with runtime cost):
+ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80635
+ #"-Wno-maybe-uninitialized",
+])
+# end:github_only
+
+UPB_DEFAULT_CPPOPTS = select({
+ "//:windows": [],
+ "//conditions:default": _DEFAULT_CPPOPTS,
+})
+
+UPB_DEFAULT_COPTS = select({
+ "//:windows": [],
+ "//:fasttable_enabled_setting": ["-std=gnu99", "-DUPB_ENABLE_FASTTABLE"],
+ "//conditions:default": _DEFAULT_COPTS,
+})
+
+runfiles_init = """\
+# --- begin runfiles.bash initialization v2 ---
+# Copy-pasted from the Bazel Bash runfiles library v2.
+set -uo pipefail; f=bazel_tools/tools/bash/runfiles/runfiles.bash
+source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \
+ source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \
+ source "$0.runfiles/$f" 2>/dev/null || \
+ source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
+ source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
+ { echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e
+# --- end runfiles.bash initialization v2 ---
+"""
+
+def _get_real_short_path(file):
+ # For some reason, files from other archives have short paths that look like:
+ # ../com_google_protobuf/google/protobuf/descriptor.proto
+ short_path = file.short_path
+ if short_path.startswith("../"):
+ second_slash = short_path.index("/", 3)
+ short_path = short_path[second_slash + 1:]
+ return short_path
+
+def _get_real_root(file):
+ real_short_path = _get_real_short_path(file)
+ return file.path[:-len(real_short_path) - 1]
+
+def _get_real_roots(files):
+ roots = {}
+ for file in files:
+ real_root = _get_real_root(file)
+ if real_root:
+ roots[real_root] = True
+ return roots.keys()
+
+def make_shell_script(name, contents, out):
+ contents = contents.replace("$", "$$")
+ native.genrule(
+ name = "gen_" + name,
+ outs = [out],
+ cmd = "(cat <<'HEREDOC'\n%s\nHEREDOC\n) > $@" % contents,
+ )
diff --git a/upb/bazel/lua.BUILD b/upb/bazel/lua.BUILD
new file mode 100644
index 0000000..11755a4
--- /dev/null
+++ b/upb/bazel/lua.BUILD
@@ -0,0 +1,127 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package(
+ default_visibility = ["//visibility:public"],
+)
+
+cc_library(
+ name = "liblua_headers",
+ hdrs = [
+ "src/lauxlib.h",
+ "src/lua.h",
+ "src/lua.hpp",
+ "src/luaconf.h",
+ "src/lualib.h",
+ ],
+ defines = ["LUA_USE_LINUX"],
+ includes = ["src"],
+)
+
+cc_library(
+ name = "liblua",
+ srcs = [
+ "src/lapi.c",
+ "src/lapi.h",
+ "src/lauxlib.c",
+ "src/lauxlib.h",
+ "src/lbaselib.c",
+ "src/lbitlib.c",
+ "src/lcode.c",
+ "src/lcode.h",
+ "src/lcorolib.c",
+ "src/lctype.c",
+ "src/lctype.h",
+ "src/ldblib.c",
+ "src/ldebug.c",
+ "src/ldebug.h",
+ "src/ldo.c",
+ "src/ldo.h",
+ "src/ldump.c",
+ "src/lfunc.c",
+ "src/lfunc.h",
+ "src/lgc.c",
+ "src/lgc.h",
+ "src/linit.c",
+ "src/liolib.c",
+ "src/llex.c",
+ "src/llex.h",
+ "src/llimits.h",
+ "src/lmathlib.c",
+ "src/lmem.c",
+ "src/lmem.h",
+ "src/loadlib.c",
+ "src/lobject.c",
+ "src/lobject.h",
+ "src/lopcodes.c",
+ "src/lopcodes.h",
+ "src/loslib.c",
+ "src/lparser.c",
+ "src/lparser.h",
+ "src/lstate.c",
+ "src/lstate.h",
+ "src/lstring.c",
+ "src/lstring.h",
+ "src/lstrlib.c",
+ "src/ltable.c",
+ "src/ltable.h",
+ "src/ltablib.c",
+ "src/ltm.c",
+ "src/ltm.h",
+ "src/lundump.c",
+ "src/lundump.h",
+ "src/lvm.c",
+ "src/lvm.h",
+ "src/lzio.c",
+ "src/lzio.h",
+ ],
+ hdrs = [
+ "src/lauxlib.h",
+ "src/lua.h",
+ "src/lua.hpp",
+ "src/luaconf.h",
+ "src/lualib.h",
+ ],
+ defines = ["LUA_USE_LINUX"],
+ includes = ["src"],
+ linkopts = [
+ "-lm",
+ "-ldl",
+ ],
+)
+
+cc_binary(
+ name = "lua",
+ srcs = [
+ "src/lua.c",
+ ],
+ linkopts = [
+ "-lreadline",
+ "-rdynamic",
+ ],
+ deps = [
+ ":liblua",
+ ],
+)
diff --git a/upb/bazel/protobuf.patch b/upb/bazel/protobuf.patch
new file mode 100644
index 0000000..16e41c0
--- /dev/null
+++ b/upb/bazel/protobuf.patch
@@ -0,0 +1,40 @@
+--- python/google/protobuf/internal/test_util.py
++++ python/google/protobuf/internal/test_util.py
+@@ -39,6 +39,7 @@ __author__ = 'robinson@google.com (Will Robinson)'
+ import numbers
+ import operator
+ import os.path
++import pathlib
+
+ from google.protobuf import unittest_import_pb2
+ from google.protobuf import unittest_pb2
+@@ -617,17 +618,22 @@ def ExpectAllFieldsSet(test_case, message):
+ message.default_import_enum)
+
+
++def _SearchUp(path, filename):
++ path = pathlib.Path(path).resolve()
++ for parent in [path] + list(path.parents):
++ file_path = parent / ('google/protobuf/testdata/' + filename)
++ if file_path.exists():
++ # Found it. Load the golden file from the testdata directory.
++ return file_path.open('rb')
++ return None
++
+ def GoldenFile(filename):
+ """Finds the given golden file and returns a file object representing it."""
+
+ # Search up the directory tree looking for the C++ protobuf source code.
+- path = '.'
+- while os.path.exists(path):
+- if os.path.exists(os.path.join(path, 'src/google/protobuf')):
+- # Found it. Load the golden file from the testdata directory.
+- full_path = os.path.join(path, 'src/google/protobuf/testdata', filename)
+- return open(full_path, 'rb')
+- path = os.path.join(path, '..')
++ f = _SearchUp('.', filename) or _SearchUp(__file__, filename)
++ if f:
++ return f
+
+ # Search internally.
+ path = '.'
\ No newline at end of file
diff --git a/upb/bazel/py_proto_library.bzl b/upb/bazel/py_proto_library.bzl
new file mode 100644
index 0000000..1ce3a6e
--- /dev/null
+++ b/upb/bazel/py_proto_library.bzl
@@ -0,0 +1,157 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""An implementation of py_proto_library().
+
+We have to implement this ourselves because there is currently no reasonable
+py_proto_library() rule available for Bazel.
+
+Our py_proto_library() is similar to how a real py_proto_library() should work.
+But it hasn't been deeply tested or reviewed, and upb should not be in the
+business of vending py_proto_library(), so we keep it private to upb.
+"""
+
+load("@bazel_skylib//lib:paths.bzl", "paths")
+
+# begin:github_only
+load("@rules_proto//proto:defs.bzl", "ProtoInfo")
+# end:github_only
+
+# Generic support code #########################################################
+
+# begin:github_only
+_is_google3 = False
+# end:github_only
+
+# begin:google_only
+# _is_google3 = True
+# end:google_only
+
+def _get_real_short_path(file):
+ # For some reason, files from other archives have short paths that look like:
+ # ../com_google_protobuf/google/protobuf/descriptor.proto
+ short_path = file.short_path
+ if short_path.startswith("../"):
+ second_slash = short_path.index("/", 3)
+ short_path = short_path[second_slash + 1:]
+
+ # Sometimes it has another few prefixes like:
+ # _virtual_imports/any_proto/google/protobuf/any.proto
+ # benchmarks/_virtual_imports/100_msgs_proto/benchmarks/100_msgs.proto
+ # We want just google/protobuf/any.proto.
+ virtual_imports = "_virtual_imports/"
+ if virtual_imports in short_path:
+ short_path = short_path.split(virtual_imports)[1].split("/", 1)[1]
+ return short_path
+
+def _get_real_root(ctx, file):
+ real_short_path = _get_real_short_path(file)
+ root = file.path[:-len(real_short_path) - 1]
+
+ if not _is_google3 and ctx.rule.attr.strip_import_prefix:
+ root = paths.join(root, ctx.rule.attr.strip_import_prefix[1:])
+ return root
+
+def _generate_output_file(ctx, src, extension):
+ package = ctx.label.package
+ if not _is_google3:
+ strip_import_prefix = ctx.rule.attr.strip_import_prefix
+ if strip_import_prefix and strip_import_prefix != "/":
+ if not package.startswith(strip_import_prefix[1:]):
+ fail("%s does not begin with prefix %s" % (package, strip_import_prefix))
+ package = package[len(strip_import_prefix):]
+
+ real_short_path = _get_real_short_path(src)
+ real_short_path = paths.relativize(real_short_path, package)
+ output_filename = paths.replace_extension(real_short_path, extension)
+ ret = ctx.actions.declare_file(output_filename)
+ return ret
+
+# py_proto_library() ###########################################################
+
+def _py_proto_library_rule_impl(ctx):
+ # A real py_proto_library() should enforce this constraint.
+ # We don't bother for now, since it saves us some effort not to.
+ #
+ # if len(ctx.attr.deps) != 1:
+ # fail("only one deps dependency allowed.")
+
+ files = []
+ for dep in ctx.attr.deps:
+ files += dep[PyInfo].transitive_sources.to_list()
+ return [
+ DefaultInfo(files = depset(direct = files)),
+ ]
+
+def _py_proto_library_aspect_impl(target, ctx):
+ proto_info = target[ProtoInfo]
+ proto_sources = proto_info.direct_sources
+ srcs = [_generate_output_file(ctx, name, "_pb2.py") for name in proto_sources]
+ transitive_sets = proto_info.transitive_descriptor_sets.to_list()
+ ctx.actions.run(
+ inputs = depset(
+ direct = [proto_info.direct_descriptor_set],
+ transitive = [proto_info.transitive_descriptor_sets],
+ ),
+ outputs = srcs,
+ executable = ctx.executable._protoc,
+ arguments = [
+ "--python_out=" + _get_real_root(ctx, srcs[0]),
+ "--descriptor_set_in=" + ctx.configuration.host_path_separator.join([f.path for f in transitive_sets]),
+ ] +
+ [_get_real_short_path(file) for file in proto_sources],
+ progress_message = "Generating Python protos for :" + ctx.label.name,
+ )
+ outs_depset = depset(srcs)
+ return [
+ PyInfo(transitive_sources = outs_depset),
+ ]
+
+_py_proto_library_aspect = aspect(
+ attrs = {
+ "_protoc": attr.label(
+ executable = True,
+ cfg = "exec",
+ default = "@com_google_protobuf//:protoc",
+ ),
+ },
+ implementation = _py_proto_library_aspect_impl,
+ provides = [
+ PyInfo,
+ ],
+ attr_aspects = ["deps"],
+)
+
+py_proto_library = rule(
+ output_to_genfiles = True,
+ implementation = _py_proto_library_rule_impl,
+ attrs = {
+ "deps": attr.label_list(
+ aspects = [_py_proto_library_aspect],
+ allow_rules = ["proto_library"],
+ providers = [ProtoInfo],
+ ),
+ },
+)
diff --git a/upb/bazel/python_downloads.bzl b/upb/bazel/python_downloads.bzl
new file mode 100644
index 0000000..e237c93
--- /dev/null
+++ b/upb/bazel/python_downloads.bzl
@@ -0,0 +1,84 @@
+"""Helper methods to download different python versions"""
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+
+limited_api_build_file = """
+cc_library(
+ name = "python_headers",
+ hdrs = glob(["**/Include/**/*.h"]),
+ strip_include_prefix = "Python-{}/Include",
+ visibility = ["//visibility:public"],
+)
+"""
+
+def python_source_archive(name, sha256):
+ """Helper method to create a python_headers target that will work for linux and macos.
+
+ Args:
+ name: The name of the target, should be in the form python_{VERSION}
+ sha256: The sha256 of the python package for the specified version
+ """
+ version = name.split("-")[1]
+ http_archive(
+ name = name,
+ urls = [
+ "https://www.python.org/ftp/python/{0}/Python-{0}.tgz"
+ .format(version),
+ ],
+ sha256 = sha256,
+ build_file_content = limited_api_build_file.format(version),
+ patch_cmds = [
+ "echo '#define SIZEOF_WCHAR_T 4' > Python-{}/Include/pyconfig.h"
+ .format(version),
+ ],
+ )
+
+nuget_build_file = """
+cc_import(
+ name = "python_full_api",
+ hdrs = glob(["**/*.h"]),
+ shared_library = "python{0}.dll",
+ interface_library = "libs/python{0}.lib",
+ visibility = ["@upb//python:__pkg__"],
+)
+
+cc_import(
+ name = "python_limited_api",
+ hdrs = glob(["**/*.h"]),
+ shared_library = "python{1}.dll",
+ interface_library = "libs/python{1}.lib",
+ visibility = ["@upb//python:__pkg__"],
+)
+"""
+
+def python_nuget_package(name, sha256):
+ """Helper method to create full and limited api dependencies for windows using nuget
+
+ Args:
+ name: The name of the target, should be in the form nuget_python_{CPU}_{VERSION}
+ sha256: The sha256 of the nuget package for that version
+ """
+ cpu = name.split("_")[2]
+ version = name.split("_")[3]
+
+ full_api_lib_number = version.split(".")[0] + version.split(".")[1]
+ limited_api_lib_number = version.split(".")[0]
+
+ folder_name_dict = {
+ "i686": "pythonx86",
+ "x86-64": "python",
+ }
+
+ http_archive(
+ name = name,
+ urls = [
+ "https://www.nuget.org/api/v2/package/{}/{}"
+ .format(folder_name_dict[cpu], version),
+ ],
+ sha256 = sha256,
+ strip_prefix = "tools",
+ build_file_content =
+ nuget_build_file.format(full_api_lib_number, limited_api_lib_number),
+ type = "zip",
+ patch_cmds = ["cp -r include/* ."],
+ )
diff --git a/upb/bazel/system_python.bzl b/upb/bazel/system_python.bzl
new file mode 100644
index 0000000..04b53c2
--- /dev/null
+++ b/upb/bazel/system_python.bzl
@@ -0,0 +1,293 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Repository rule for using Python 3.x headers from the system."""
+
+# Mock out rules_python's pip.bzl for cases where no system python is found.
+_mock_pip = """
+def _pip_install_impl(repository_ctx):
+ repository_ctx.file("BUILD.bazel", '''
+py_library(
+ name = "noop",
+ visibility = ["//visibility:public"],
+)
+''')
+ repository_ctx.file("requirements.bzl", '''
+def install_deps(*args, **kwargs):
+ print("WARNING: could not install pip dependencies")
+
+def requirement(*args, **kwargs):
+ return "@{}//:noop"
+'''.format(repository_ctx.attr.name))
+pip_install = repository_rule(
+ implementation = _pip_install_impl,
+ attrs = {
+ "requirements": attr.string(),
+ "requirements_overrides": attr.string_dict(),
+ "python_interpreter_target": attr.string(),
+ },
+)
+pip_parse = pip_install
+"""
+
+# Alias rules_python's pip.bzl for cases where a system python is found.
+_alias_pip = """
+load("@rules_python//python:pip.bzl", _pip_install = "pip_install", _pip_parse = "pip_parse")
+
+def _get_requirements(requirements, requirements_overrides):
+ for version, override in requirements_overrides.items():
+ if version in "{python_version}":
+ requirements = override
+ break
+ return requirements
+
+def pip_install(requirements, requirements_overrides={{}}, **kwargs):
+ _pip_install(
+ python_interpreter_target = "@{repo}//:interpreter",
+ requirements = _get_requirements(requirements, requirements_overrides),
+ **kwargs,
+ )
+def pip_parse(requirements, requirements_overrides={{}}, **kwargs):
+ _pip_parse(
+ python_interpreter_target = "@{repo}//:interpreter",
+ requirements = _get_requirements(requirements, requirements_overrides),
+ **kwargs,
+ )
+"""
+
+_mock_fuzzing_py = """
+def fuzzing_py_install_deps():
+ print("WARNING: could not install fuzzing_py dependencies")
+"""
+
+# Alias rules_fuzzing's requirements.bzl for cases where a system python is found.
+_alias_fuzzing_py = """
+load("@fuzzing_py_deps//:requirements.bzl", _fuzzing_py_install_deps = "install_deps")
+
+def fuzzing_py_install_deps():
+ _fuzzing_py_install_deps()
+"""
+
+_build_file = """
+load("@bazel_skylib//lib:selects.bzl", "selects")
+load("@bazel_skylib//rules:common_settings.bzl", "string_flag")
+load("@bazel_tools//tools/python:toolchain.bzl", "py_runtime_pair")
+
+cc_library(
+ name = "python_headers",
+ hdrs = glob(["python/**/*.h"], allow_empty = True),
+ includes = ["python"],
+ visibility = ["//visibility:public"],
+)
+
+string_flag(
+ name = "internal_python_support",
+ build_setting_default = "{support}",
+ values = [
+ "None",
+ "Supported",
+ "Unsupported",
+ ]
+)
+
+config_setting(
+ name = "none",
+ flag_values = {{
+ ":internal_python_support": "None",
+ }},
+ visibility = ["//visibility:public"],
+)
+
+config_setting(
+ name = "supported",
+ flag_values = {{
+ ":internal_python_support": "Supported",
+ }},
+ visibility = ["//visibility:public"],
+)
+
+config_setting(
+ name = "unsupported",
+ flag_values = {{
+ ":internal_python_support": "Unsupported",
+ }},
+ visibility = ["//visibility:public"],
+)
+
+selects.config_setting_group(
+ name = "exists",
+ match_any = [":supported", ":unsupported"],
+ visibility = ["//visibility:public"],
+)
+
+sh_binary(
+ name = "interpreter",
+ srcs = ["interpreter"],
+ visibility = ["//visibility:public"],
+)
+
+py_runtime(
+ name = "py3_runtime",
+ interpreter_path = "{interpreter}",
+ python_version = "PY3",
+)
+
+py_runtime_pair(
+ name = "runtime_pair",
+ py3_runtime = ":py3_runtime",
+)
+
+toolchain(
+ name = "python_toolchain",
+ toolchain = ":runtime_pair",
+ toolchain_type = "@rules_python//python:toolchain_type",
+)
+"""
+
+_register = """
+def register_system_python():
+ native.register_toolchains("@{}//:python_toolchain")
+"""
+
+_mock_register = """
+def register_system_python():
+ pass
+"""
+
+def _get_python_version(repository_ctx):
+ py_program = "import sys; print(str(sys.version_info.major) + '.' + str(sys.version_info.minor) + '.' + str(sys.version_info.micro))"
+ result = repository_ctx.execute(["python3", "-c", py_program])
+ return (result.stdout).strip().split(".")
+
+def _get_python_path(repository_ctx):
+ py_program = "import sysconfig; print(sysconfig.get_config_var('%s'), end='')"
+ result = repository_ctx.execute(["python3", "-c", py_program % ("INCLUDEPY")])
+ if result.return_code != 0:
+ return None
+ return result.stdout
+
+def _populate_package(ctx, path, python3, python_version):
+ ctx.symlink(path, "python")
+ supported = True
+ for idx, v in enumerate(ctx.attr.minimum_python_version.split(".")):
+ if int(python_version[idx]) < int(v):
+ supported = False
+ break
+ if "win" in ctx.os.name:
+ # buildifier: disable=print
+ print("WARNING: python is not supported on Windows")
+ supported = False
+
+ build_file = _build_file.format(
+ interpreter = python3,
+ support = "Supported" if supported else "Unsupported",
+ )
+
+ ctx.file("interpreter", "#!/bin/sh\nexec {} \"$@\"".format(python3))
+ ctx.file("BUILD.bazel", build_file)
+ ctx.file("version.bzl", "SYSTEM_PYTHON_VERSION = '{}{}'".format(python_version[0], python_version[1]))
+ ctx.file("register.bzl", _register.format(ctx.attr.name))
+ if supported:
+ ctx.file("pip.bzl", _alias_pip.format(
+ python_version = ".".join(python_version),
+ repo = ctx.attr.name,
+ ))
+ ctx.file("fuzzing_py.bzl", _alias_fuzzing_py)
+ else:
+ # Dependencies are unlikely to be satisfiable for unsupported versions of python.
+ ctx.file("pip.bzl", _mock_pip)
+ ctx.file("fuzzing_py.bzl", _mock_fuzzing_py)
+
+def _populate_empty_package(ctx):
+ # Mock out all the entrypoints we need to run from WORKSPACE. Targets that
+ # actually need python should use `target_compatible_with` and the generated
+ # @system_python//:exists or @system_python//:supported constraints.
+ ctx.file(
+ "BUILD.bazel",
+ _build_file.format(
+ interpreter = "",
+ support = "None",
+ ),
+ )
+ ctx.file("version.bzl", "SYSTEM_PYTHON_VERSION = 'None'")
+ ctx.file("register.bzl", _mock_register)
+ ctx.file("pip.bzl", _mock_pip)
+ ctx.file("fuzzing_py.bzl", _mock_fuzzing_py)
+
+def _system_python_impl(repository_ctx):
+ path = _get_python_path(repository_ctx)
+ python3 = repository_ctx.which("python3")
+ python_version = _get_python_version(repository_ctx)
+
+ if path and python_version[0] == "3":
+ _populate_package(repository_ctx, path, python3, python_version)
+ else:
+ # buildifier: disable=print
+ print("WARNING: no system python available, builds against system python will fail")
+ _populate_empty_package(repository_ctx)
+
+# The system_python() repository rule exposes information from the version of python installed in the current system.
+#
+# In WORKSPACE:
+# system_python(
+# name = "system_python_repo",
+# minimum_python_version = "3.7",
+# )
+#
+# This repository exposes some repository rules for configuring python in Bazel. The python toolchain
+# *must* be registered in your WORKSPACE:
+# load("@system_python_repo//:register.bzl", "register_system_python")
+# register_system_python()
+#
+# Pip dependencies can optionally be specified using a wrapper around rules_python's repository rules:
+# load("@system_python//:pip.bzl", "pip_install")
+# pip_install(
+# name="pip_deps",
+# requirements = "@com_google_protobuf//python:requirements.txt",
+# )
+# An optional argument `requirements_overrides` takes a dictionary mapping python versions to alternate
+# requirements files. This works around the requirement for fully pinned dependencies in python_rules.
+#
+# Four config settings are exposed from this repository to help declare target compatibility in Bazel.
+# For example, `@system_python_repo//:exists` will be true if a system python version has been found.
+# The `none` setting will be true only if no python version was found, and `supported`/`unsupported`
+# correspond to whether or not the system version is compatible with `minimum_python_version`.
+#
+# This repository also exposes a header rule that you can depend on from BUILD files:
+# cc_library(
+# name = "foobar",
+# srcs = ["foobar.cc"],
+# deps = ["@system_python_repo//:python_headers"],
+# )
+#
+# The headers should correspond to the version of python obtained by running
+# the `python3` command on the system.
+system_python = repository_rule(
+ implementation = _system_python_impl,
+ local = True,
+ attrs = {
+ "minimum_python_version": attr.string(default = "3.7"),
+ },
+)
diff --git a/upb/bazel/upb_proto_library.bzl b/upb/bazel/upb_proto_library.bzl
new file mode 100644
index 0000000..941249d
--- /dev/null
+++ b/upb/bazel/upb_proto_library.bzl
@@ -0,0 +1,530 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Public rules for using upb protos:
+ - upb_proto_library()
+ - upb_proto_reflection_library()
+"""
+
+load("@bazel_skylib//lib:paths.bzl", "paths")
+
+# begin:google_only
+# load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain", "use_cpp_toolchain")
+# end:google_only
+
+# begin:github_only
+# Compatibility code for Bazel 4.x. Remove this when we drop support for Bazel 4.x.
+load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain")
+
+def use_cpp_toolchain():
+ return ["@bazel_tools//tools/cpp:toolchain_type"]
+# end:github_only
+
+# Generic support code #########################################################
+
+# begin:github_only
+_is_google3 = False
+# end:github_only
+
+# begin:google_only
+# _is_google3 = True
+# end:google_only
+
+def _get_real_short_path(file):
+ # For some reason, files from other archives have short paths that look like:
+ # ../com_google_protobuf/google/protobuf/descriptor.proto
+ short_path = file.short_path
+ if short_path.startswith("../"):
+ second_slash = short_path.index("/", 3)
+ short_path = short_path[second_slash + 1:]
+
+ # Sometimes it has another few prefixes like:
+ # _virtual_imports/any_proto/google/protobuf/any.proto
+ # benchmarks/_virtual_imports/100_msgs_proto/benchmarks/100_msgs.proto
+ # We want just google/protobuf/any.proto.
+ virtual_imports = "_virtual_imports/"
+ if virtual_imports in short_path:
+ short_path = short_path.split(virtual_imports)[1].split("/", 1)[1]
+ return short_path
+
+def _get_real_root(ctx, file):
+ real_short_path = _get_real_short_path(file)
+ root = file.path[:-len(real_short_path) - 1]
+
+ if not _is_google3 and ctx.rule.attr.strip_import_prefix:
+ root = paths.join(root, ctx.rule.attr.strip_import_prefix[1:])
+ return root
+
+def _generate_output_file(ctx, src, extension):
+ package = ctx.label.package
+ if not _is_google3:
+ strip_import_prefix = ctx.rule.attr.strip_import_prefix
+ if strip_import_prefix and strip_import_prefix != "/":
+ if not package.startswith(strip_import_prefix[1:]):
+ fail("%s does not begin with prefix %s" % (package, strip_import_prefix))
+ package = package[len(strip_import_prefix):]
+
+ real_short_path = _get_real_short_path(src)
+ real_short_path = paths.relativize(real_short_path, package)
+ output_filename = paths.replace_extension(real_short_path, extension)
+ ret = ctx.actions.declare_file(output_filename)
+ return ret
+
+def _generate_include_path(src, out, extension):
+ short_path = _get_real_short_path(src)
+ short_path = paths.replace_extension(short_path, extension)
+ if not out.path.endswith(short_path):
+ fail("%s does not end with %s" % (out.path, short_path))
+
+ return out.path[:-len(short_path)]
+
+def _filter_none(elems):
+ out = []
+ for elem in elems:
+ if elem:
+ out.append(elem)
+ return out
+
+def _cc_library_func(ctx, name, hdrs, srcs, copts, includes, dep_ccinfos):
+ """Like cc_library(), but callable from rules.
+
+ Args:
+ ctx: Rule context.
+ name: Unique name used to generate output files.
+ hdrs: Public headers that can be #included from other rules.
+ srcs: C/C++ source files.
+ copts: Additional options for cc compilation.
+ includes: Additional include paths.
+ dep_ccinfos: CcInfo providers of dependencies we should build/link against.
+
+ Returns:
+ CcInfo provider for this compilation.
+ """
+
+ compilation_contexts = [info.compilation_context for info in dep_ccinfos]
+ linking_contexts = [info.linking_context for info in dep_ccinfos]
+ toolchain = find_cpp_toolchain(ctx)
+ feature_configuration = cc_common.configure_features(
+ ctx = ctx,
+ cc_toolchain = toolchain,
+ requested_features = ctx.features,
+ unsupported_features = ctx.disabled_features,
+ )
+
+ (compilation_context, compilation_outputs) = cc_common.compile(
+ actions = ctx.actions,
+ feature_configuration = feature_configuration,
+ cc_toolchain = toolchain,
+ name = name,
+ srcs = srcs,
+ includes = includes,
+ public_hdrs = hdrs,
+ user_compile_flags = copts,
+ compilation_contexts = compilation_contexts,
+ )
+
+ # buildifier: disable=unused-variable
+ (linking_context, linking_outputs) = cc_common.create_linking_context_from_compilation_outputs(
+ actions = ctx.actions,
+ name = name,
+ feature_configuration = feature_configuration,
+ cc_toolchain = toolchain,
+ compilation_outputs = compilation_outputs,
+ linking_contexts = linking_contexts,
+ disallow_dynamic_library = cc_common.is_enabled(feature_configuration = feature_configuration, feature_name = "targets_windows"),
+ )
+
+ return CcInfo(
+ compilation_context = compilation_context,
+ linking_context = linking_context,
+ )
+
+# Dummy rule to expose select() copts to aspects ##############################
+
+UpbProtoLibraryCoptsInfo = provider(
+ "Provides copts for upb proto targets",
+ fields = {
+ "copts": "copts for upb_proto_library()",
+ },
+)
+
+def upb_proto_library_copts_impl(ctx):
+ return UpbProtoLibraryCoptsInfo(copts = ctx.attr.copts)
+
+upb_proto_library_copts = rule(
+ implementation = upb_proto_library_copts_impl,
+ attrs = {"copts": attr.string_list(default = [])},
+)
+
+# upb_proto_library / upb_proto_reflection_library shared code #################
+
+GeneratedSrcsInfo = provider(
+ "Provides generated headers and sources",
+ fields = {
+ "srcs": "list of srcs",
+ "hdrs": "list of hdrs",
+ "thunks": "Experimental, do not use. List of srcs defining C API. Incompatible with hdrs.",
+ "includes": "list of extra includes",
+ },
+)
+
+def _concat_lists(lists):
+ ret = []
+ for lst in lists:
+ ret = ret + lst
+ return ret
+
+def _merge_generated_srcs(srcs):
+ return GeneratedSrcsInfo(
+ srcs = _concat_lists([s.srcs for s in srcs]),
+ hdrs = _concat_lists([s.hdrs for s in srcs]),
+ thunks = _concat_lists([s.thunks for s in srcs]),
+ includes = _concat_lists([s.includes for s in srcs]),
+ )
+
+UpbWrappedCcInfo = provider("Provider for cc_info for protos", fields = ["cc_info", "cc_info_with_thunks"])
+
+def _merge_wrapped_cc_infos(infos, cc_infos):
+ return UpbWrappedCcInfo(
+ cc_info = cc_common.merge_cc_infos(
+ direct_cc_infos = cc_infos + [info.cc_info for info in infos],
+ ),
+ cc_info_with_thunks = cc_common.merge_cc_infos(
+ direct_cc_infos = [info.cc_info_with_thunks for info in infos],
+ ),
+ )
+
+_UpbDefsWrappedCcInfo = provider("Provider for cc_info for protos", fields = ["cc_info"])
+_UpbWrappedGeneratedSrcsInfo = provider("Provider for generated sources", fields = ["srcs"])
+_WrappedDefsGeneratedSrcsInfo = provider(
+ "Provider for generated reflective sources",
+ fields = ["srcs"],
+)
+
+def _generate_upb_protos(ctx, generator, proto_info, proto_sources):
+ if len(proto_sources) == 0:
+ return GeneratedSrcsInfo(srcs = [], hdrs = [], thunks = [], includes = [])
+
+ ext = "." + generator
+ tool = getattr(ctx.executable, "_gen_" + generator)
+ srcs = [_generate_output_file(ctx, name, ext + ".c") for name in proto_sources]
+ hdrs = [_generate_output_file(ctx, name, ext + ".h") for name in proto_sources]
+ thunks = []
+ if generator == "upb":
+ thunks = [_generate_output_file(ctx, name, ext + ".thunks.c") for name in proto_sources]
+ transitive_sets = proto_info.transitive_descriptor_sets.to_list()
+
+ args = ctx.actions.args()
+ args.use_param_file(param_file_arg = "@%s")
+ args.set_param_file_format("multiline")
+
+ args.add("--" + generator + "_out=" + _get_real_root(ctx, srcs[0]))
+ args.add("--plugin=protoc-gen-" + generator + "=" + tool.path)
+ args.add("--descriptor_set_in=" + ctx.configuration.host_path_separator.join([f.path for f in transitive_sets]))
+ args.add_all(proto_sources, map_each = _get_real_short_path)
+
+ ctx.actions.run(
+ inputs = depset(
+ direct = [proto_info.direct_descriptor_set],
+ transitive = [proto_info.transitive_descriptor_sets],
+ ),
+ tools = [tool],
+ outputs = srcs + hdrs,
+ executable = ctx.executable._protoc,
+ arguments = [args],
+ progress_message = "Generating upb protos for :" + ctx.label.name,
+ mnemonic = "GenUpbProtos",
+ )
+ if generator == "upb":
+ ctx.actions.run_shell(
+ inputs = hdrs,
+ outputs = thunks,
+ command = " && ".join([
+ "sed 's/UPB_INLINE //' {} > {}".format(hdr.path, thunk.path)
+ for (hdr, thunk) in zip(hdrs, thunks)
+ ]),
+ progress_message = "Generating thunks for upb protos API for: " + ctx.label.name,
+ mnemonic = "GenUpbProtosThunks",
+ )
+ return GeneratedSrcsInfo(
+ srcs = srcs,
+ hdrs = hdrs,
+ thunks = thunks,
+ includes = [_generate_include_path(proto_sources[0], hdrs[0], ext + ".h")],
+ )
+
+def _upb_proto_rule_impl(ctx):
+ if len(ctx.attr.deps) != 1:
+ fail("only one deps dependency allowed.")
+ dep = ctx.attr.deps[0]
+
+ if _WrappedDefsGeneratedSrcsInfo in dep:
+ srcs = dep[_WrappedDefsGeneratedSrcsInfo].srcs
+ elif _UpbWrappedGeneratedSrcsInfo in dep:
+ srcs = dep[_UpbWrappedGeneratedSrcsInfo].srcs
+ else:
+ fail("proto_library rule must generate _UpbWrappedGeneratedSrcsInfo or " +
+ "_WrappedDefsGeneratedSrcsInfo (aspect should have handled this).")
+
+ if _UpbDefsWrappedCcInfo in dep:
+ cc_info = dep[_UpbDefsWrappedCcInfo].cc_info
+ elif UpbWrappedCcInfo in dep:
+ cc_info = dep[UpbWrappedCcInfo].cc_info
+ else:
+ fail("proto_library rule must generate UpbWrappedCcInfo or " +
+ "_UpbDefsWrappedCcInfo (aspect should have handled this).")
+
+ lib = cc_info.linking_context.linker_inputs.to_list()[0].libraries[0]
+ files = _filter_none([
+ lib.static_library,
+ lib.pic_static_library,
+ lib.dynamic_library,
+ ])
+ return [
+ DefaultInfo(files = depset(files + srcs.hdrs + srcs.srcs)),
+ srcs,
+ cc_info,
+ ]
+
+def _generate_name(ctx, generator, thunks = False):
+ if thunks:
+ return ctx.rule.attr.name + "." + generator + ".thunks"
+ return ctx.rule.attr.name + "." + generator
+
+def _get_dep_cc_info(target, ctx, generator):
+ deps = ctx.rule.attr.deps + getattr(ctx.attr, "_" + generator)
+ dep_ccinfos = [dep[CcInfo] for dep in deps if CcInfo in dep]
+ dep_ccinfos += [dep[_UpbDefsWrappedCcInfo].cc_info for dep in deps if _UpbDefsWrappedCcInfo in dep]
+
+ dep_wrapped_infos = [dep[UpbWrappedCcInfo] for dep in deps if UpbWrappedCcInfo in dep]
+ if generator == "upbdefs":
+ if UpbWrappedCcInfo not in target:
+ fail("Target should have UpbWrappedCcInfo provider")
+ dep_wrapped_infos.append(target[UpbWrappedCcInfo])
+
+ return _merge_wrapped_cc_infos(dep_wrapped_infos, dep_ccinfos)
+
+def _compile_upb_protos(ctx, files, generator, dep_wrapped_ccinfo, cc_provider):
+ cc_info = _cc_library_func(
+ ctx = ctx,
+ name = _generate_name(ctx, generator),
+ hdrs = files.hdrs,
+ srcs = files.srcs,
+ includes = files.includes,
+ copts = ctx.attr._copts[UpbProtoLibraryCoptsInfo].copts,
+ dep_ccinfos = [dep_wrapped_ccinfo.cc_info],
+ )
+
+ if files.thunks:
+ cc_info_with_thunks = _cc_library_func(
+ ctx = ctx,
+ name = _generate_name(ctx, generator, files.thunks),
+ hdrs = [],
+ srcs = files.thunks,
+ includes = files.includes,
+ copts = ctx.attr._copts[UpbProtoLibraryCoptsInfo].copts,
+ dep_ccinfos = [dep_wrapped_ccinfo.cc_info, cc_info],
+ )
+ return cc_provider(
+ cc_info = cc_info,
+ cc_info_with_thunks = cc_info_with_thunks,
+ )
+ else:
+ return cc_provider(
+ cc_info = cc_info,
+ )
+
+def _get_hint_providers(ctx, generator):
+ if generator not in _GENERATORS:
+ fail("Please add new generator '{}' to _GENERATORS list".format(generator))
+
+ possible_owners = []
+ for generator in _GENERATORS:
+ possible_owners.append(ctx.label.relative(_generate_name(ctx, generator)))
+ possible_owners.append(ctx.label.relative(_generate_name(ctx, generator, thunks = True)))
+
+ if hasattr(cc_common, "CcSharedLibraryHintInfo"):
+ return [cc_common.CcSharedLibraryHintInfo(owners = possible_owners)]
+ elif hasattr(cc_common, "CcSharedLibraryHintInfo_6_X_constructor_do_not_use"):
+ # This branch can be deleted once 6.X is not supported by upb rules
+ return [cc_common.CcSharedLibraryHintInfo_6_X_constructor_do_not_use(owners = possible_owners)]
+
+ return []
+
+def _upb_proto_aspect_impl(target, ctx, generator, cc_provider, file_provider, provide_cc_shared_library_hints = True):
+ dep_wrapped_ccinfo = _get_dep_cc_info(target, ctx, generator)
+ if not getattr(ctx.rule.attr, "srcs", []):
+ # This target doesn't declare any sources, reexport all its deps instead.
+ # This is known as an "alias library":
+ # https://bazel.build/reference/be/protocol-buffer#proto_library.srcs
+ files = _merge_generated_srcs([dep[file_provider].srcs for dep in ctx.rule.attr.deps])
+ wrapped_cc_info = dep_wrapped_ccinfo
+ else:
+ proto_info = target[ProtoInfo]
+ files = _generate_upb_protos(
+ ctx,
+ generator,
+ proto_info,
+ proto_info.direct_sources,
+ )
+ wrapped_cc_info = _compile_upb_protos(
+ ctx,
+ files,
+ generator,
+ dep_wrapped_ccinfo,
+ cc_provider,
+ )
+
+ hints = _get_hint_providers(ctx, generator) if provide_cc_shared_library_hints else []
+
+ return hints + [
+ file_provider(srcs = files),
+ wrapped_cc_info,
+ ]
+
+_GENERATORS = ["upb", "upbdefs"]
+
+def upb_proto_library_aspect_impl(target, ctx):
+ return _upb_proto_aspect_impl(target, ctx, "upb", UpbWrappedCcInfo, _UpbWrappedGeneratedSrcsInfo)
+
+def _upb_proto_reflection_library_aspect_impl(target, ctx):
+ return _upb_proto_aspect_impl(target, ctx, "upbdefs", _UpbDefsWrappedCcInfo, _WrappedDefsGeneratedSrcsInfo, provide_cc_shared_library_hints = False)
+
+# upb_proto_library() ##########################################################
+
+def _get_upb_proto_library_aspect_provides():
+ provides = [
+ UpbWrappedCcInfo,
+ _UpbWrappedGeneratedSrcsInfo,
+ ]
+
+ if hasattr(cc_common, "CcSharedLibraryHintInfo"):
+ provides.append(cc_common.CcSharedLibraryHintInfo)
+ elif hasattr(cc_common, "CcSharedLibraryHintInfo_6_X_getter_do_not_use"):
+ # This branch can be deleted once 6.X is not supported by upb rules
+ provides.append(cc_common.CcSharedLibraryHintInfo_6_X_getter_do_not_use)
+
+ return provides
+
+upb_proto_library_aspect = aspect(
+ attrs = {
+ "_copts": attr.label(
+ default = "//:upb_proto_library_copts__for_generated_code_only_do_not_use",
+ ),
+ "_gen_upb": attr.label(
+ executable = True,
+ cfg = "exec",
+ default = "//upbc:protoc-gen-upb_stage1",
+ ),
+ "_protoc": attr.label(
+ executable = True,
+ cfg = "exec",
+ default = "@com_google_protobuf//:protoc",
+ ),
+ "_cc_toolchain": attr.label(
+ default = "@bazel_tools//tools/cpp:current_cc_toolchain",
+ ),
+ "_upb": attr.label_list(default = [
+ "//:generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
+ ]),
+ "_fasttable_enabled": attr.label(default = "//:fasttable_enabled"),
+ },
+ implementation = upb_proto_library_aspect_impl,
+ provides = _get_upb_proto_library_aspect_provides(),
+ attr_aspects = ["deps"],
+ fragments = ["cpp"],
+ toolchains = use_cpp_toolchain(),
+ incompatible_use_toolchain_transition = True,
+)
+
+upb_proto_library = rule(
+ output_to_genfiles = True,
+ implementation = _upb_proto_rule_impl,
+ attrs = {
+ "deps": attr.label_list(
+ aspects = [upb_proto_library_aspect],
+ allow_rules = ["proto_library"],
+ providers = [ProtoInfo],
+ ),
+ },
+ provides = [CcInfo],
+)
+
+# upb_proto_reflection_library() ###############################################
+
+_upb_proto_reflection_library_aspect = aspect(
+ attrs = {
+ "_copts": attr.label(
+ default = "//:upb_proto_library_copts__for_generated_code_only_do_not_use",
+ ),
+ "_gen_upbdefs": attr.label(
+ executable = True,
+ cfg = "exec",
+ default = "//upbc:protoc-gen-upbdefs",
+ ),
+ "_protoc": attr.label(
+ executable = True,
+ cfg = "exec",
+ default = "@com_google_protobuf//:protoc",
+ ),
+ "_cc_toolchain": attr.label(
+ default = "@bazel_tools//tools/cpp:current_cc_toolchain",
+ ),
+ "_upbdefs": attr.label_list(
+ default = [
+ "//:generated_reflection_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
+ ],
+ ),
+ },
+ implementation = _upb_proto_reflection_library_aspect_impl,
+ provides = [
+ _UpbDefsWrappedCcInfo,
+ _WrappedDefsGeneratedSrcsInfo,
+ ],
+ required_aspect_providers = [
+ UpbWrappedCcInfo,
+ _UpbWrappedGeneratedSrcsInfo,
+ ],
+ attr_aspects = ["deps"],
+ fragments = ["cpp"],
+ toolchains = use_cpp_toolchain(),
+ incompatible_use_toolchain_transition = True,
+)
+
+upb_proto_reflection_library = rule(
+ output_to_genfiles = True,
+ implementation = _upb_proto_rule_impl,
+ attrs = {
+ "deps": attr.label_list(
+ aspects = [
+ upb_proto_library_aspect,
+ _upb_proto_reflection_library_aspect,
+ ],
+ allow_rules = ["proto_library"],
+ providers = [ProtoInfo],
+ ),
+ },
+ provides = [CcInfo],
+)
diff --git a/upb/bazel/workspace_deps.bzl b/upb/bazel/workspace_deps.bzl
new file mode 100644
index 0000000..937ffcd
--- /dev/null
+++ b/upb/bazel/workspace_deps.bzl
@@ -0,0 +1,104 @@
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe")
+load("//bazel:python_downloads.bzl", "python_nuget_package", "python_source_archive")
+
+def _github_archive(repo, commit, **kwargs):
+ repo_name = repo.split("/")[-1]
+ http_archive(
+ urls = [repo + "/archive/" + commit + ".zip"],
+ strip_prefix = repo_name + "-" + commit,
+ **kwargs
+ )
+
+def upb_deps():
+ maybe(
+ _github_archive,
+ name = "com_google_absl",
+ repo = "https://github.com/abseil/abseil-cpp",
+ commit = "c2435f8342c2d0ed8101cb43adfd605fdc52dca2", # Abseil LTS 20230125.3
+ sha256 = "ea1d31db00eb37e607bfda17ffac09064670ddf05da067944c4766f517876390",
+ )
+
+ maybe(
+ _github_archive,
+ name = "com_google_protobuf",
+ repo = "https://github.com/protocolbuffers/protobuf",
+ commit = "22e845e279bd79ad013bff4b79660b8c8b72d935",
+ sha256 = "276215041e767973f274299783b5d7b7de1a3c55628b9890bd9eb064dfa5daaf",
+ patches = ["@upb//bazel:protobuf.patch"],
+ )
+
+ maybe(
+ _github_archive,
+ name = "utf8_range",
+ repo = "https://github.com/protocolbuffers/utf8_range",
+ commit = "de0b4a8ff9b5d4c98108bdfe723291a33c52c54f",
+ sha256 = "5da960e5e5d92394c809629a03af3c7709d2d3d0ca731dacb3a9fb4bf28f7702",
+ )
+
+ maybe(
+ http_archive,
+ name = "rules_pkg",
+ urls = [
+ "https://mirror.bazel.build/github.com/bazelbuild/rules_pkg/releases/download/0.7.0/rules_pkg-0.7.0.tar.gz",
+ "https://github.com/bazelbuild/rules_pkg/releases/download/0.7.0/rules_pkg-0.7.0.tar.gz",
+ ],
+ sha256 = "8a298e832762eda1830597d64fe7db58178aa84cd5926d76d5b744d6558941c2",
+ )
+
+ maybe(
+ _github_archive,
+ name = "rules_python",
+ repo = "https://github.com/bazelbuild/rules_python",
+ commit = "912a5051f51581784fd64094f6bdabf93f6d698f", # 0.14.0
+ sha256 = "a3e4b4ade7c4a52e757b16a16e94d0b2640333062180cba577d81fac087a501d",
+ )
+
+ maybe(
+ http_archive,
+ name = "bazel_skylib",
+ urls = [
+ "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.3.0/bazel-skylib-1.3.0.tar.gz",
+ "https://github.com/bazelbuild/bazel-skylib/releases/download/1.3.0/bazel-skylib-1.3.0.tar.gz",
+ ],
+ sha256 = "74d544d96f4a5bb630d465ca8bbcfe231e3594e5aae57e1edbf17a6eb3ca2506",
+ )
+
+ #Python Downloads
+
+ python_source_archive(
+ name = "python-3.7.0",
+ sha256 = "85bb9feb6863e04fb1700b018d9d42d1caac178559ffa453d7e6a436e259fd0d",
+ )
+ python_nuget_package(
+ name = "nuget_python_i686_3.7.0",
+ sha256 = "a8bb49fa1ca62ad55430fcafaca1b58015e22943e66b1a87d5e7cef2556c6a54",
+ )
+ python_nuget_package(
+ name = "nuget_python_x86-64_3.7.0",
+ sha256 = "66eb796a5bdb1e6787b8f655a1237a6b6964af2115b7627cf4f0032cf068b4b2",
+ )
+ python_nuget_package(
+ name = "nuget_python_i686_3.8.0",
+ sha256 = "87a6481f5eef30b42ac12c93f06f73bd0b8692f26313b76a6615d1641c4e7bca",
+ )
+ python_nuget_package(
+ name = "nuget_python_x86-64_3.8.0",
+ sha256 = "96c61321ce90dd053c8a04f305a5f6cc6d91350b862db34440e4a4f069b708a0",
+ )
+ python_nuget_package(
+ name = "nuget_python_i686_3.9.0",
+ sha256 = "229abecbe49dc08fe5709e0b31e70edfb3b88f23335ebfc2904c44f940fd59b6",
+ )
+ python_nuget_package(
+ name = "nuget_python_x86-64_3.9.0",
+ sha256 = "6af58a733e7dfbfcdd50d55788134393d6ffe7ab8270effbf724bdb786558832",
+ )
+ python_nuget_package(
+ name = "nuget_python_i686_3.10.0",
+ sha256 = "e115e102eb90ce160ab0ef7506b750a8d7ecc385bde0a496f02a54337a8bc333",
+ )
+ python_nuget_package(
+ name = "nuget_python_x86-64_3.10.0",
+ sha256 = "4474c83c25625d93e772e926f95f4cd398a0abbb52793625fa30f39af3d2cc00",
+ )
diff --git a/upb/benchmarks/BUILD b/upb/benchmarks/BUILD
new file mode 100644
index 0000000..9983ba8
--- /dev/null
+++ b/upb/benchmarks/BUILD
@@ -0,0 +1,262 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load("@rules_python//python:defs.bzl", "py_binary")
+
+# begin:google_only
+# load("@rules_cc//cc:defs.bzl", "cc_proto_library")
+# end:google_only
+
+load(
+ "//bazel:upb_proto_library.bzl",
+ "upb_proto_library",
+ "upb_proto_reflection_library",
+)
+load(
+ ":build_defs.bzl",
+ "cc_optimizefor_proto_library",
+ "expand_suffixes",
+ "proto_library",
+ "tmpl_cc_binary",
+)
+
+# begin:google_only
+# package(default_applicable_licenses = ["//:license"])
+# end:google_only
+
+licenses(["notice"])
+
+proto_library(
+ name = "descriptor_proto",
+ srcs = ["descriptor.proto"],
+)
+
+upb_proto_library(
+ name = "benchmark_descriptor_upb_proto",
+ deps = [":descriptor_proto"],
+)
+
+upb_proto_reflection_library(
+ name = "benchmark_descriptor_upb_proto_reflection",
+ deps = [":descriptor_proto"],
+)
+
+upb_proto_reflection_library(
+ name = "ads_upb_proto_reflection",
+ deps = ["@com_google_googleapis//:ads_proto"],
+)
+
+cc_proto_library(
+ name = "benchmark_descriptor_cc_proto",
+ deps = [":descriptor_proto"],
+)
+
+proto_library(
+ name = "benchmark_descriptor_sv_proto",
+ srcs = ["descriptor_sv.proto"],
+)
+
+cc_proto_library(
+ name = "benchmark_descriptor_sv_cc_proto",
+ deps = [":benchmark_descriptor_sv_proto"],
+)
+
+cc_test(
+ name = "benchmark",
+ testonly = 1,
+ srcs = ["benchmark.cc"],
+ deps = [
+ ":ads_upb_proto_reflection",
+ ":benchmark_descriptor_cc_proto",
+ ":benchmark_descriptor_sv_cc_proto",
+ ":benchmark_descriptor_upb_proto",
+ ":benchmark_descriptor_upb_proto_reflection",
+ "//:base",
+ "//:base_internal",
+ "//:descriptor_upb_proto",
+ "//:mem",
+ "//:reflection",
+ "@com_github_google_benchmark//:benchmark_main",
+ "@com_google_absl//absl/container:flat_hash_set",
+ "@com_google_protobuf//:protobuf",
+ ],
+)
+
+# Size benchmarks.
+
+SIZE_BENCHMARKS = {
+ "empty": "Empty",
+ "descriptor": "FileDescriptorSet",
+ "100_msgs": "Message100",
+ "200_msgs": "Message200",
+ "100_fields": "Message",
+ "200_fields": "Message",
+}
+
+py_binary(
+ name = "gen_synthetic_protos",
+ srcs = ["gen_synthetic_protos.py"],
+ python_version = "PY3",
+)
+
+py_binary(
+ name = "gen_upb_binary_c",
+ srcs = ["gen_upb_binary_c.py"],
+ python_version = "PY3",
+)
+
+py_binary(
+ name = "gen_protobuf_binary_cc",
+ srcs = ["gen_protobuf_binary_cc.py"],
+ python_version = "PY3",
+)
+
+genrule(
+ name = "do_gen_synthetic_protos",
+ outs = [
+ "100_msgs.proto",
+ "200_msgs.proto",
+ "100_fields.proto",
+ "200_fields.proto",
+ ],
+ cmd = "$(execpath :gen_synthetic_protos) $(RULEDIR)",
+ tools = [":gen_synthetic_protos"],
+)
+
+proto_library(
+ name = "100_msgs_proto",
+ srcs = ["100_msgs.proto"],
+)
+
+proto_library(
+ name = "200_msgs_proto",
+ srcs = ["200_msgs.proto"],
+)
+
+proto_library(
+ name = "100_fields_proto",
+ srcs = ["100_fields.proto"],
+)
+
+proto_library(
+ name = "200_fields_proto",
+ srcs = ["200_fields.proto"],
+)
+
+proto_library(
+ name = "empty_proto",
+ srcs = ["empty.proto"],
+)
+
+[(
+ upb_proto_library(
+ name = k + "_upb_proto",
+ deps = [":" + k + "_proto"],
+ ),
+ cc_proto_library(
+ name = k + "_cc_proto",
+ deps = [":" + k + "_proto"],
+ ),
+ tmpl_cc_binary(
+ name = k + "_upb_binary",
+ testonly = 1,
+ args = [
+ package_name() + "/" + k + ".upb.h",
+ "upb_benchmark_" + v,
+ ],
+ gen = ":gen_upb_binary_c",
+ deps = [
+ ":" + k + "_upb_proto",
+ ],
+ ),
+ tmpl_cc_binary(
+ name = k + "_protobuf_binary",
+ testonly = 1,
+ args = [
+ package_name() + "/" + k + ".pb.h",
+ "upb_benchmark::" + v,
+ ],
+ gen = ":gen_protobuf_binary_cc",
+ deps = [
+ ":" + k + "_cc_proto",
+ ],
+ ),
+ cc_optimizefor_proto_library(
+ name = k + "_cc_lite_proto",
+ srcs = [k + ".proto"],
+ outs = [k + "_lite.proto"],
+ optimize_for = "LITE_RUNTIME",
+ ),
+ tmpl_cc_binary(
+ name = k + "_lite_protobuf_binary",
+ testonly = 1,
+ args = [
+ package_name() + "/" + k + "_lite.pb.h",
+ "upb_benchmark::" + v,
+ ],
+ gen = ":gen_protobuf_binary_cc",
+ deps = [
+ ":" + k + "_cc_lite_proto",
+ ],
+ ),
+ cc_optimizefor_proto_library(
+ name = k + "_cc_codesize_proto",
+ srcs = [k + ".proto"],
+ outs = [k + "_codesize.proto"],
+ optimize_for = "CODE_SIZE",
+ ),
+ tmpl_cc_binary(
+ name = k + "_codesize_protobuf_binary",
+ testonly = 1,
+ args = [
+ package_name() + "/" + k + "_codesize.pb.h",
+ "upb_benchmark::" + v,
+ ],
+ gen = ":gen_protobuf_binary_cc",
+ deps = [
+ ":" + k + "_cc_codesize_proto",
+ ],
+ ),
+) for k, v in SIZE_BENCHMARKS.items()]
+
+genrule(
+ name = "size_data",
+ testonly = 1,
+ srcs = expand_suffixes(
+ SIZE_BENCHMARKS.keys(),
+ suffixes = [
+ "_upb_binary",
+ "_protobuf_binary",
+ "_lite_protobuf_binary",
+ "_codesize_protobuf_binary",
+ ],
+ ),
+ outs = ["size_data.txt"],
+ # We want --format=GNU which counts rodata with data, not text.
+ cmd = "size $$($$OSTYPE == 'linux-gnu' ? '--format=GNU -d' : '') $(SRCS) > $@",
+ # "size" sometimes isn't available remotely.
+ local = 1,
+ tags = ["no-remote-exec"],
+)
diff --git a/upb/benchmarks/BUILD.googleapis b/upb/benchmarks/BUILD.googleapis
new file mode 100644
index 0000000..4469678
--- /dev/null
+++ b/upb/benchmarks/BUILD.googleapis
@@ -0,0 +1,59 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load(
+ "@rules_proto//proto:defs.bzl",
+ "proto_library",
+)
+
+proto_library(
+ name = "ads_proto",
+ srcs = glob([
+ "google/ads/googleads/v13/**/*.proto",
+ "google/api/**/*.proto",
+ "google/rpc/**/*.proto",
+ "google/longrunning/**/*.proto",
+ "google/logging/**/*.proto",
+ ]),
+ #srcs = ["google/ads/googleads/v5/services/google_ads_service.proto"],
+ visibility = ["//visibility:public"],
+ deps = [
+ "@com_google_protobuf//:any_proto",
+ "@com_google_protobuf//:api_proto",
+ "@com_google_protobuf//:descriptor_proto",
+ "@com_google_protobuf//:duration_proto",
+ "@com_google_protobuf//:empty_proto",
+ "@com_google_protobuf//:field_mask_proto",
+ "@com_google_protobuf//:struct_proto",
+ "@com_google_protobuf//:timestamp_proto",
+ "@com_google_protobuf//:type_proto",
+ "@com_google_protobuf//:wrappers_proto",
+ ],
+)
diff --git a/upb/benchmarks/benchmark.cc b/upb/benchmarks/benchmark.cc
new file mode 100644
index 0000000..a2e2cc2
--- /dev/null
+++ b/upb/benchmarks/benchmark.cc
@@ -0,0 +1,390 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <benchmark/benchmark.h>
+
+#include <string.h>
+
+#include <vector>
+
+#include "google/ads/googleads/v13/services/google_ads_service.upbdefs.h"
+#include "google/protobuf/descriptor.pb.h"
+#include "absl/container/flat_hash_set.h"
+#include "google/protobuf/dynamic_message.h"
+#include "benchmarks/descriptor.pb.h"
+#include "benchmarks/descriptor.upb.h"
+#include "benchmarks/descriptor.upbdefs.h"
+#include "benchmarks/descriptor_sv.pb.h"
+#include "upb/base/internal/log2.h"
+#include "upb/mem/arena.h"
+#include "upb/reflection/def.hpp"
+
+upb_StringView descriptor = benchmarks_descriptor_proto_upbdefinit.descriptor;
+namespace protobuf = ::google::protobuf;
+
+// A buffer big enough to parse descriptor.proto without going to heap.
+// We use 64-bit ints here to force alignment.
+int64_t buf[8191];
+
+void CollectFileDescriptors(
+ const _upb_DefPool_Init* file,
+ std::vector<upb_StringView>& serialized_files,
+ absl::flat_hash_set<const _upb_DefPool_Init*>& seen) {
+ if (!seen.insert(file).second) return;
+ for (_upb_DefPool_Init** deps = file->deps; *deps; deps++) {
+ CollectFileDescriptors(*deps, serialized_files, seen);
+ }
+ serialized_files.push_back(file->descriptor);
+}
+
+static void BM_ArenaOneAlloc(benchmark::State& state) {
+ for (auto _ : state) {
+ upb_Arena* arena = upb_Arena_New();
+ upb_Arena_Malloc(arena, 1);
+ upb_Arena_Free(arena);
+ }
+}
+BENCHMARK(BM_ArenaOneAlloc);
+
+static void BM_ArenaInitialBlockOneAlloc(benchmark::State& state) {
+ for (auto _ : state) {
+ upb_Arena* arena = upb_Arena_Init(buf, sizeof(buf), nullptr);
+ upb_Arena_Malloc(arena, 1);
+ upb_Arena_Free(arena);
+ }
+}
+BENCHMARK(BM_ArenaInitialBlockOneAlloc);
+
+static void BM_ArenaFuseUnbalanced(benchmark::State& state) {
+ std::vector<upb_Arena*> arenas(state.range(0));
+ size_t n = 0;
+ for (auto _ : state) {
+ for (auto& arena : arenas) {
+ arena = upb_Arena_New();
+ }
+ for (auto& arena : arenas) {
+ upb_Arena_Fuse(arenas[0], arena);
+ }
+ for (auto& arena : arenas) {
+ upb_Arena_Free(arena);
+ }
+ n += arenas.size();
+ }
+ state.SetItemsProcessed(n);
+}
+BENCHMARK(BM_ArenaFuseUnbalanced)->Range(2, 128);
+
+static void BM_ArenaFuseBalanced(benchmark::State& state) {
+ std::vector<upb_Arena*> arenas(state.range(0));
+ size_t n = 0;
+
+ for (auto _ : state) {
+ for (auto& arena : arenas) {
+ arena = upb_Arena_New();
+ }
+
+ // Perform a series of fuses that keeps the halves balanced.
+ size_t max = upb_Log2Ceiling(arenas.size());
+ for (size_t n = 0; n <= max; n++) {
+ size_t step = 1 << n;
+ for (size_t i = 0; i + step < arenas.size(); i += (step * 2)) {
+ upb_Arena_Fuse(arenas[i], arenas[i + step]);
+ }
+ }
+
+ for (auto& arena : arenas) {
+ upb_Arena_Free(arena);
+ }
+ n += arenas.size();
+ }
+ state.SetItemsProcessed(n);
+}
+BENCHMARK(BM_ArenaFuseBalanced)->Range(2, 128);
+
+enum LoadDescriptorMode {
+ NoLayout,
+ WithLayout,
+};
+
+// This function is mostly copied from upb/def.c, but it is modified to avoid
+// passing in the pre-generated mini-tables, in order to force upb to compute
+// them dynamically. Generally you would never want to do this, but we want to
+// simulate the cost we would pay if we were loading these types purely from
+// descriptors, with no mini-tales available.
+bool LoadDefInit_BuildLayout(upb_DefPool* s, const _upb_DefPool_Init* init,
+ size_t* bytes) {
+ _upb_DefPool_Init** deps = init->deps;
+ google_protobuf_FileDescriptorProto* file;
+ upb_Arena* arena;
+ upb_Status status;
+
+ upb_Status_Clear(&status);
+
+ if (upb_DefPool_FindFileByName(s, init->filename)) {
+ return true;
+ }
+
+ arena = upb_Arena_New();
+
+ for (; *deps; deps++) {
+ if (!LoadDefInit_BuildLayout(s, *deps, bytes)) goto err;
+ }
+
+ file = google_protobuf_FileDescriptorProto_parse_ex(
+ init->descriptor.data, init->descriptor.size, nullptr,
+ kUpb_DecodeOption_AliasString, arena);
+ *bytes += init->descriptor.size;
+
+ if (!file) {
+ upb_Status_SetErrorFormat(
+ &status,
+ "Failed to parse compiled-in descriptor for file '%s'. This should "
+ "never happen.",
+ init->filename);
+ goto err;
+ }
+
+ // KEY DIFFERENCE: Here we pass in only the descriptor, and not the
+ // pre-generated minitables.
+ if (!upb_DefPool_AddFile(s, file, &status)) {
+ goto err;
+ }
+
+ upb_Arena_Free(arena);
+ return true;
+
+err:
+ fprintf(stderr,
+ "Error loading compiled-in descriptor for file '%s' (this should "
+ "never happen): %s\n",
+ init->filename, upb_Status_ErrorMessage(&status));
+ exit(1);
+}
+
+template <LoadDescriptorMode Mode>
+static void BM_LoadAdsDescriptor_Upb(benchmark::State& state) {
+ size_t bytes_per_iter = 0;
+ for (auto _ : state) {
+ upb::DefPool defpool;
+ if (Mode == NoLayout) {
+ google_ads_googleads_v13_services_SearchGoogleAdsRequest_getmsgdef(
+ defpool.ptr());
+ bytes_per_iter = _upb_DefPool_BytesLoaded(defpool.ptr());
+ } else {
+ bytes_per_iter = 0;
+ LoadDefInit_BuildLayout(
+ defpool.ptr(),
+ &google_ads_googleads_v13_services_google_ads_service_proto_upbdefinit,
+ &bytes_per_iter);
+ }
+ }
+ state.SetBytesProcessed(state.iterations() * bytes_per_iter);
+}
+BENCHMARK_TEMPLATE(BM_LoadAdsDescriptor_Upb, NoLayout);
+BENCHMARK_TEMPLATE(BM_LoadAdsDescriptor_Upb, WithLayout);
+
+template <LoadDescriptorMode Mode>
+static void BM_LoadAdsDescriptor_Proto2(benchmark::State& state) {
+ extern _upb_DefPool_Init
+ google_ads_googleads_v13_services_google_ads_service_proto_upbdefinit;
+ std::vector<upb_StringView> serialized_files;
+ absl::flat_hash_set<const _upb_DefPool_Init*> seen_files;
+ CollectFileDescriptors(
+ &google_ads_googleads_v13_services_google_ads_service_proto_upbdefinit,
+ serialized_files, seen_files);
+ size_t bytes_per_iter = 0;
+ for (auto _ : state) {
+ bytes_per_iter = 0;
+ protobuf::Arena arena;
+ protobuf::DescriptorPool pool;
+ for (auto file : serialized_files) {
+ absl::string_view input(file.data, file.size);
+ auto proto =
+ protobuf::Arena::CreateMessage<protobuf::FileDescriptorProto>(&arena);
+ bool ok = proto->ParseFrom<protobuf::MessageLite::kMergePartial>(input) &&
+ pool.BuildFile(*proto) != nullptr;
+ if (!ok) {
+ printf("Failed to add file.\n");
+ exit(1);
+ }
+ bytes_per_iter += input.size();
+ }
+
+ if (Mode == WithLayout) {
+ protobuf::DynamicMessageFactory factory;
+ const protobuf::Descriptor* d = pool.FindMessageTypeByName(
+ "google.ads.googleads.v13.services.SearchGoogleAdsResponse");
+ if (!d) {
+ printf("Failed to find descriptor.\n");
+ exit(1);
+ }
+ factory.GetPrototype(d);
+ }
+ }
+ state.SetBytesProcessed(state.iterations() * bytes_per_iter);
+}
+BENCHMARK_TEMPLATE(BM_LoadAdsDescriptor_Proto2, NoLayout);
+BENCHMARK_TEMPLATE(BM_LoadAdsDescriptor_Proto2, WithLayout);
+
+enum CopyStrings {
+ Copy,
+ Alias,
+};
+
+enum ArenaMode {
+ NoArena,
+ UseArena,
+ InitBlock,
+};
+
+template <ArenaMode AMode, CopyStrings Copy>
+static void BM_Parse_Upb_FileDesc(benchmark::State& state) {
+ for (auto _ : state) {
+ upb_Arena* arena;
+ if (AMode == InitBlock) {
+ arena = upb_Arena_Init(buf, sizeof(buf), nullptr);
+ } else {
+ arena = upb_Arena_New();
+ }
+ upb_benchmark_FileDescriptorProto* set =
+ upb_benchmark_FileDescriptorProto_parse_ex(
+ descriptor.data, descriptor.size, nullptr,
+ Copy == Alias ? kUpb_DecodeOption_AliasString : 0, arena);
+ if (!set) {
+ printf("Failed to parse.\n");
+ exit(1);
+ }
+ upb_Arena_Free(arena);
+ }
+ state.SetBytesProcessed(state.iterations() * descriptor.size);
+}
+BENCHMARK_TEMPLATE(BM_Parse_Upb_FileDesc, UseArena, Copy);
+BENCHMARK_TEMPLATE(BM_Parse_Upb_FileDesc, UseArena, Alias);
+BENCHMARK_TEMPLATE(BM_Parse_Upb_FileDesc, InitBlock, Copy);
+BENCHMARK_TEMPLATE(BM_Parse_Upb_FileDesc, InitBlock, Alias);
+
+template <ArenaMode AMode, class P>
+struct Proto2Factory;
+
+template <class P>
+struct Proto2Factory<NoArena, P> {
+ public:
+ P* GetProto() { return &proto; }
+
+ private:
+ P proto;
+};
+
+template <class P>
+struct Proto2Factory<UseArena, P> {
+ public:
+ P* GetProto() { return protobuf::Arena::CreateMessage<P>(&arena); }
+
+ private:
+ protobuf::Arena arena;
+};
+
+template <class P>
+struct Proto2Factory<InitBlock, P> {
+ public:
+ Proto2Factory() : arena(GetOptions()) {}
+ P* GetProto() { return protobuf::Arena::CreateMessage<P>(&arena); }
+
+ private:
+ protobuf::ArenaOptions GetOptions() {
+ protobuf::ArenaOptions opts;
+ opts.initial_block = (char*)buf;
+ opts.initial_block_size = sizeof(buf);
+ return opts;
+ }
+
+ protobuf::Arena arena;
+};
+
+using FileDesc = ::upb_benchmark::FileDescriptorProto;
+using FileDescSV = ::upb_benchmark::sv::FileDescriptorProto;
+
+template <class P, ArenaMode AMode, CopyStrings kCopy>
+void BM_Parse_Proto2(benchmark::State& state) {
+ constexpr protobuf::MessageLite::ParseFlags kParseFlags =
+ kCopy == Copy
+ ? protobuf::MessageLite::ParseFlags::kMergePartial
+ : protobuf::MessageLite::ParseFlags::kMergePartialWithAliasing;
+ for (auto _ : state) {
+ Proto2Factory<AMode, P> proto_factory;
+ auto proto = proto_factory.GetProto();
+ absl::string_view input(descriptor.data, descriptor.size);
+ bool ok = proto->template ParseFrom<kParseFlags>(input);
+ if (!ok) {
+ printf("Failed to parse.\n");
+ exit(1);
+ }
+ }
+ state.SetBytesProcessed(state.iterations() * descriptor.size);
+}
+BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, NoArena, Copy);
+BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, UseArena, Copy);
+BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, InitBlock, Copy);
+BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, InitBlock, Alias);
+
+static void BM_SerializeDescriptor_Proto2(benchmark::State& state) {
+ upb_benchmark::FileDescriptorProto proto;
+ proto.ParseFromArray(descriptor.data, descriptor.size);
+ for (auto _ : state) {
+ proto.SerializePartialToArray(buf, sizeof(buf));
+ }
+ state.SetBytesProcessed(state.iterations() * descriptor.size);
+}
+BENCHMARK(BM_SerializeDescriptor_Proto2);
+
+static void BM_SerializeDescriptor_Upb(benchmark::State& state) {
+ int64_t total = 0;
+ upb_Arena* arena = upb_Arena_New();
+ upb_benchmark_FileDescriptorProto* set =
+ upb_benchmark_FileDescriptorProto_parse(descriptor.data, descriptor.size,
+ arena);
+ if (!set) {
+ printf("Failed to parse.\n");
+ exit(1);
+ }
+ for (auto _ : state) {
+ upb_Arena* enc_arena = upb_Arena_Init(buf, sizeof(buf), nullptr);
+ size_t size;
+ char* data =
+ upb_benchmark_FileDescriptorProto_serialize(set, enc_arena, &size);
+ if (!data) {
+ printf("Failed to serialize.\n");
+ exit(1);
+ }
+ total += size;
+ }
+ state.SetBytesProcessed(total);
+}
+BENCHMARK(BM_SerializeDescriptor_Upb);
diff --git a/upb/benchmarks/build_defs.bzl b/upb/benchmarks/build_defs.bzl
new file mode 100644
index 0000000..168dab6
--- /dev/null
+++ b/upb/benchmarks/build_defs.bzl
@@ -0,0 +1,88 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# begin:google_only
+# load("@rules_cc//cc:defs.bzl", _cc_proto_library = "cc_proto_library")
+#
+# _is_google3 = True
+# end:google_only
+
+# begin:github_only
+_cc_proto_library = native.cc_proto_library
+_is_google3 = False
+# end:github_only
+
+def proto_library(**kwargs):
+ if _is_google3:
+ kwargs["cc_api_version"] = 2
+ native.proto_library(
+ **kwargs
+ )
+
+def tmpl_cc_binary(name, gen, args, replacements = [], **kwargs):
+ srcs = [name + ".cc"]
+ native.genrule(
+ name = name + "_gen_srcs",
+ tools = [gen],
+ outs = srcs,
+ cmd = "$(location " + gen + ") " + " ".join(args) + " > $@",
+ )
+
+ if _is_google3:
+ kwargs["malloc"] = "//base:system_malloc"
+ kwargs["features"] = ["-static_linking_mode"]
+ native.cc_binary(
+ name = name,
+ srcs = srcs,
+ **kwargs
+ )
+
+def cc_optimizefor_proto_library(name, srcs, outs, optimize_for):
+ if len(srcs) != 1:
+ fail("Currently srcs must have exactly 1 element")
+
+ native.genrule(
+ name = name + "_gen_proto",
+ srcs = srcs,
+ outs = outs,
+ cmd = "cp $< $@ && chmod a+w $@ && echo 'option optimize_for = " + optimize_for + ";' >> $@",
+ )
+
+ proto_library(
+ name = name + "_proto",
+ srcs = outs,
+ )
+
+ _cc_proto_library(
+ name = name,
+ deps = [":" + name + "_proto"],
+ )
+
+def expand_suffixes(vals, suffixes):
+ ret = []
+ for val in vals:
+ for suffix in suffixes:
+ ret.append(val + suffix)
+ return ret
diff --git a/upb/benchmarks/compare.py b/upb/benchmarks/compare.py
new file mode 100755
index 0000000..027d994
--- /dev/null
+++ b/upb/benchmarks/compare.py
@@ -0,0 +1,123 @@
+#!/usr/bin/python3
+#
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Benchmarks the current working directory against a given baseline.
+
+This script benchmarks both size and speed. Sample output:
+"""
+
+import contextlib
+import json
+import os
+import re
+import subprocess
+import sys
+import tempfile
+
+@contextlib.contextmanager
+def GitWorktree(commit):
+ tmpdir = tempfile.mkdtemp()
+ subprocess.run(['git', 'worktree', 'add', '-q', '-d', tmpdir, commit], check=True)
+ cwd = os.getcwd()
+ os.chdir(tmpdir)
+ try:
+ yield tmpdir
+ finally:
+ os.chdir(cwd)
+ subprocess.run(['git', 'worktree', 'remove', tmpdir], check=True)
+
+def Run(cmd):
+ subprocess.check_call(cmd, shell=True)
+
+def Benchmark(outbase, bench_cpu=True, runs=12, fasttable=False):
+ tmpfile = "/tmp/bench-output.json"
+ Run("rm -rf {}".format(tmpfile))
+ #Run("CC=clang bazel test ...")
+ if fasttable:
+ extra_args = " --//:fasttable_enabled=true"
+ else:
+ extra_args = ""
+
+ if bench_cpu:
+ Run("CC=clang bazel build -c opt --copt=-march=native benchmarks:benchmark" + extra_args)
+ Run("./bazel-bin/benchmarks/benchmark --benchmark_out_format=json --benchmark_out={} --benchmark_repetitions={} --benchmark_min_time=0.05 --benchmark_enable_random_interleaving=true".format(tmpfile, runs))
+ with open(tmpfile) as f:
+ bench_json = json.load(f)
+
+ # Translate into the format expected by benchstat.
+ txt_filename = outbase + ".txt"
+ with open(txt_filename, "w") as f:
+ for run in bench_json["benchmarks"]:
+ if run["run_type"] == "aggregate":
+ continue
+ name = run["name"]
+ name = name.replace(" ", "")
+ name = re.sub(r'^BM_', 'Benchmark', name)
+ values = (name, run["iterations"], run["cpu_time"])
+ print("{} {} {} ns/op".format(*values), file=f)
+ Run("sort {} -o {} ".format(txt_filename, txt_filename))
+
+ Run("CC=clang bazel build -c opt --copt=-g --copt=-march=native :conformance_upb"
+ + extra_args)
+ Run("cp -f bazel-bin/conformance_upb {}.bin".format(outbase))
+
+
+baseline = "main"
+bench_cpu = True
+fasttable = False
+
+if len(sys.argv) > 1:
+ baseline = sys.argv[1]
+
+ # Quickly verify that the baseline exists.
+ with GitWorktree(baseline):
+ pass
+
+# Benchmark our current directory first, since it's more likely to be broken.
+Benchmark("/tmp/new", bench_cpu, fasttable=fasttable)
+
+# Benchmark the baseline.
+with GitWorktree(baseline):
+ Benchmark("/tmp/old", bench_cpu, fasttable=fasttable)
+
+print()
+print()
+
+if bench_cpu:
+ Run("~/go/bin/benchstat /tmp/old.txt /tmp/new.txt")
+
+print()
+print()
+
+Run("objcopy --strip-debug /tmp/old.bin /tmp/old.bin.stripped")
+Run("objcopy --strip-debug /tmp/new.bin /tmp/new.bin.stripped")
+Run("~/code/bloaty/bloaty /tmp/new.bin.stripped -- /tmp/old.bin.stripped --debug-file=/tmp/old.bin --debug-file=/tmp/new.bin -d compileunits,symbols")
diff --git a/upb/benchmarks/descriptor.proto b/upb/benchmarks/descriptor.proto
new file mode 100644
index 0000000..a912dbf
--- /dev/null
+++ b/upb/benchmarks/descriptor.proto
@@ -0,0 +1,888 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// The messages in this file describe the definitions found in .proto files.
+// A valid .proto file can be translated directly to a FileDescriptorProto
+// without any other information (e.g. without reading its imports).
+
+syntax = "proto2";
+
+package upb_benchmark;
+
+option go_package = "google.golang.org/protobuf/types/descriptorpb";
+option java_package = "com.google.protobuf";
+option java_outer_classname = "DescriptorProtos";
+option csharp_namespace = "Google.Protobuf.Reflection";
+option objc_class_prefix = "UPBB";
+option cc_enable_arenas = true;
+
+// The protocol compiler can output a FileDescriptorSet containing the .proto
+// files it parses.
+message FileDescriptorSet {
+ repeated FileDescriptorProto file = 1;
+}
+
+// Describes a complete .proto file.
+message FileDescriptorProto {
+ optional string name = 1; // file name, relative to root of source tree
+ optional string package = 2; // e.g. "foo", "foo.bar", etc.
+
+ // Names of files imported by this file.
+ repeated string dependency = 3;
+ // Indexes of the public imported files in the dependency list above.
+ repeated int32 public_dependency = 10;
+ // Indexes of the weak imported files in the dependency list.
+ // For Google-internal migration only. Do not use.
+ repeated int32 weak_dependency = 11;
+
+ // All top-level definitions in this file.
+ repeated DescriptorProto message_type = 4;
+ repeated EnumDescriptorProto enum_type = 5;
+ repeated ServiceDescriptorProto service = 6;
+ repeated FieldDescriptorProto extension = 7;
+
+ optional FileOptions options = 8;
+
+ // This field contains optional information about the original source code.
+ // You may safely remove this entire field without harming runtime
+ // functionality of the descriptors -- the information is needed only by
+ // development tools.
+ optional SourceCodeInfo source_code_info = 9;
+
+ // The syntax of the proto file.
+ // The supported values are "proto2" and "proto3".
+ optional string syntax = 12;
+}
+
+// Describes a message type.
+message DescriptorProto {
+ optional string name = 1;
+
+ repeated FieldDescriptorProto field = 2;
+ repeated FieldDescriptorProto extension = 6;
+
+ repeated DescriptorProto nested_type = 3;
+ repeated EnumDescriptorProto enum_type = 4;
+
+ message ExtensionRange {
+ optional int32 start = 1; // Inclusive.
+ optional int32 end = 2; // Exclusive.
+
+ optional ExtensionRangeOptions options = 3;
+ }
+ repeated ExtensionRange extension_range = 5;
+
+ repeated OneofDescriptorProto oneof_decl = 8;
+
+ optional MessageOptions options = 7;
+
+ // Range of reserved tag numbers. Reserved tag numbers may not be used by
+ // fields or extension ranges in the same message. Reserved ranges may
+ // not overlap.
+ message ReservedRange {
+ optional int32 start = 1; // Inclusive.
+ optional int32 end = 2; // Exclusive.
+ }
+ repeated ReservedRange reserved_range = 9;
+ // Reserved field names, which may not be used by fields in the same message.
+ // A given name may only be reserved once.
+ repeated string reserved_name = 10;
+}
+
+message ExtensionRangeOptions {
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+// Describes a field within a message.
+message FieldDescriptorProto {
+ enum Type {
+ // 0 is reserved for errors.
+ // Order is weird for historical reasons.
+ TYPE_DOUBLE = 1;
+ TYPE_FLOAT = 2;
+ // Not ZigZag encoded. Negative numbers take 10 bytes. Use TYPE_SINT64 if
+ // negative values are likely.
+ TYPE_INT64 = 3;
+ TYPE_UINT64 = 4;
+ // Not ZigZag encoded. Negative numbers take 10 bytes. Use TYPE_SINT32 if
+ // negative values are likely.
+ TYPE_INT32 = 5;
+ TYPE_FIXED64 = 6;
+ TYPE_FIXED32 = 7;
+ TYPE_BOOL = 8;
+ TYPE_STRING = 9;
+ // Tag-delimited aggregate.
+ // Group type is deprecated and not supported in proto3. However, Proto3
+ // implementations should still be able to parse the group wire format and
+ // treat group fields as unknown fields.
+ TYPE_GROUP = 10;
+ TYPE_MESSAGE = 11; // Length-delimited aggregate.
+
+ // New in version 2.
+ TYPE_BYTES = 12;
+ TYPE_UINT32 = 13;
+ TYPE_ENUM = 14;
+ TYPE_SFIXED32 = 15;
+ TYPE_SFIXED64 = 16;
+ TYPE_SINT32 = 17; // Uses ZigZag encoding.
+ TYPE_SINT64 = 18; // Uses ZigZag encoding.
+ }
+
+ enum Label {
+ // 0 is reserved for errors
+ LABEL_OPTIONAL = 1;
+ LABEL_REQUIRED = 2;
+ LABEL_REPEATED = 3;
+ }
+
+ optional string name = 1;
+ optional int32 number = 3;
+ optional Label label = 4;
+
+ // If type_name is set, this need not be set. If both this and type_name
+ // are set, this must be one of TYPE_ENUM, TYPE_MESSAGE or TYPE_GROUP.
+ optional Type type = 5;
+
+ // For message and enum types, this is the name of the type. If the name
+ // starts with a '.', it is fully-qualified. Otherwise, C++-like scoping
+ // rules are used to find the type (i.e. first the nested types within this
+ // message are searched, then within the parent, on up to the root
+ // namespace).
+ optional string type_name = 6;
+
+ // For extensions, this is the name of the type being extended. It is
+ // resolved in the same manner as type_name.
+ optional string extendee = 2;
+
+ // For numeric types, contains the original text representation of the value.
+ // For booleans, "true" or "false".
+ // For strings, contains the default text contents (not escaped in any way).
+ // For bytes, contains the C escaped value. All bytes >= 128 are escaped.
+ // TODO(kenton): Base-64 encode?
+ optional string default_value = 7;
+
+ // If set, gives the index of a oneof in the containing type's oneof_decl
+ // list. This field is a member of that oneof.
+ optional int32 oneof_index = 9;
+
+ // JSON name of this field. The value is set by protocol compiler. If the
+ // user has set a "json_name" option on this field, that option's value
+ // will be used. Otherwise, it's deduced from the field's name by converting
+ // it to camelCase.
+ optional string json_name = 10;
+
+ optional FieldOptions options = 8;
+
+ // If true, this is a proto3 "optional". When a proto3 field is optional, it
+ // tracks presence regardless of field type.
+ //
+ // When proto3_optional is true, this field must be belong to a oneof to
+ // signal to old proto3 clients that presence is tracked for this field. This
+ // oneof is known as a "synthetic" oneof, and this field must be its sole
+ // member (each proto3 optional field gets its own synthetic oneof). Synthetic
+ // oneofs exist in the descriptor only, and do not generate any API. Synthetic
+ // oneofs must be ordered after all "real" oneofs.
+ //
+ // For message fields, proto3_optional doesn't create any semantic change,
+ // since non-repeated message fields always track presence. However it still
+ // indicates the semantic detail of whether the user wrote "optional" or not.
+ // This can be useful for round-tripping the .proto file. For consistency we
+ // give message fields a synthetic oneof also, even though it is not required
+ // to track presence. This is especially important because the parser can't
+ // tell if a field is a message or an enum, so it must always create a
+ // synthetic oneof.
+ //
+ // Proto2 optional fields do not set this flag, because they already indicate
+ // optional with `LABEL_OPTIONAL`.
+ optional bool proto3_optional = 17;
+}
+
+// Describes a oneof.
+message OneofDescriptorProto {
+ optional string name = 1;
+ optional OneofOptions options = 2;
+}
+
+// Describes an enum type.
+message EnumDescriptorProto {
+ optional string name = 1;
+
+ repeated EnumValueDescriptorProto value = 2;
+
+ optional EnumOptions options = 3;
+
+ // Range of reserved numeric values. Reserved values may not be used by
+ // entries in the same enum. Reserved ranges may not overlap.
+ //
+ // Note that this is distinct from DescriptorProto.ReservedRange in that it
+ // is inclusive such that it can appropriately represent the entire int32
+ // domain.
+ message EnumReservedRange {
+ optional int32 start = 1; // Inclusive.
+ optional int32 end = 2; // Inclusive.
+ }
+
+ // Range of reserved numeric values. Reserved numeric values may not be used
+ // by enum values in the same enum declaration. Reserved ranges may not
+ // overlap.
+ repeated EnumReservedRange reserved_range = 4;
+
+ // Reserved enum value names, which may not be reused. A given name may only
+ // be reserved once.
+ repeated string reserved_name = 5;
+}
+
+// Describes a value within an enum.
+message EnumValueDescriptorProto {
+ optional string name = 1;
+ optional int32 number = 2;
+
+ optional EnumValueOptions options = 3;
+}
+
+// Describes a service.
+message ServiceDescriptorProto {
+ optional string name = 1;
+ repeated MethodDescriptorProto method = 2;
+
+ optional ServiceOptions options = 3;
+}
+
+// Describes a method of a service.
+message MethodDescriptorProto {
+ optional string name = 1;
+
+ // Input and output type names. These are resolved in the same way as
+ // FieldDescriptorProto.type_name, but must refer to a message type.
+ optional string input_type = 2;
+ optional string output_type = 3;
+
+ optional MethodOptions options = 4;
+
+ // Identifies if client streams multiple client messages
+ optional bool client_streaming = 5 [default = false];
+ // Identifies if server streams multiple server messages
+ optional bool server_streaming = 6 [default = false];
+}
+
+// ===================================================================
+// Options
+
+// Each of the definitions above may have "options" attached. These are
+// just annotations which may cause code to be generated slightly differently
+// or may contain hints for code that manipulates protocol messages.
+//
+// Clients may define custom options as extensions of the *Options messages.
+// These extensions may not yet be known at parsing time, so the parser cannot
+// store the values in them. Instead it stores them in a field in the *Options
+// message called uninterpreted_option. This field must have the same name
+// across all *Options messages. We then use this field to populate the
+// extensions when we build a descriptor, at which point all protos have been
+// parsed and so all extensions are known.
+//
+// Extension numbers for custom options may be chosen as follows:
+// * For options which will only be used within a single application or
+// organization, or for experimental options, use field numbers 50000
+// through 99999. It is up to you to ensure that you do not use the
+// same number for multiple options.
+// * For options which will be published and used publicly by multiple
+// independent entities, e-mail protobuf-global-extension-registry@google.com
+// to reserve extension numbers. Simply provide your project name (e.g.
+// Objective-C plugin) and your project website (if available) -- there's no
+// need to explain how you intend to use them. Usually you only need one
+// extension number. You can declare multiple options with only one extension
+// number by putting them in a sub-message. See the Custom Options section of
+// the docs for examples:
+// https://developers.google.com/protocol-buffers/docs/proto#options
+// If this turns out to be popular, a web service will be set up
+// to automatically assign option numbers.
+
+message FileOptions {
+ // Sets the Java package where classes generated from this .proto will be
+ // placed. By default, the proto package is used, but this is often
+ // inappropriate because proto packages do not normally start with backwards
+ // domain names.
+ optional string java_package = 1;
+
+ // If set, all the classes from the .proto file are wrapped in a single
+ // outer class with the given name. This applies to both Proto1
+ // (equivalent to the old "--one_java_file" option) and Proto2 (where
+ // a .proto always translates to a single class, but you may want to
+ // explicitly choose the class name).
+ optional string java_outer_classname = 8;
+
+ // If set true, then the Java code generator will generate a separate .java
+ // file for each top-level message, enum, and service defined in the .proto
+ // file. Thus, these types will *not* be nested inside the outer class
+ // named by java_outer_classname. However, the outer class will still be
+ // generated to contain the file's getDescriptor() method as well as any
+ // top-level extensions defined in the file.
+ optional bool java_multiple_files = 10 [default = false];
+
+ // This option does nothing.
+ optional bool java_generate_equals_and_hash = 20 [deprecated = true];
+
+ // If set true, then the Java2 code generator will generate code that
+ // throws an exception whenever an attempt is made to assign a non-UTF-8
+ // byte sequence to a string field.
+ // Message reflection will do the same.
+ // However, an extension field still accepts non-UTF-8 byte sequences.
+ // This option has no effect on when used with the lite runtime.
+ optional bool java_string_check_utf8 = 27 [default = false];
+
+ // Generated classes can be optimized for speed or code size.
+ enum OptimizeMode {
+ SPEED = 1; // Generate complete code for parsing, serialization,
+ // etc.
+ CODE_SIZE = 2; // Use ReflectionOps to implement these methods.
+ LITE_RUNTIME = 3; // Generate code using MessageLite and the lite runtime.
+ }
+ optional OptimizeMode optimize_for = 9 [default = SPEED];
+
+ // Sets the Go package where structs generated from this .proto will be
+ // placed. If omitted, the Go package will be derived from the following:
+ // - The basename of the package import path, if provided.
+ // - Otherwise, the package statement in the .proto file, if present.
+ // - Otherwise, the basename of the .proto file, without extension.
+ optional string go_package = 11;
+
+ // Should generic services be generated in each language? "Generic" services
+ // are not specific to any particular RPC system. They are generated by the
+ // main code generators in each language (without additional plugins).
+ // Generic services were the only kind of service generation supported by
+ // early versions of google.protobuf.
+ //
+ // Generic services are now considered deprecated in favor of using plugins
+ // that generate code specific to your particular RPC system. Therefore,
+ // these default to false. Old code which depends on generic services should
+ // explicitly set them to true.
+ optional bool cc_generic_services = 16 [default = false];
+ optional bool java_generic_services = 17 [default = false];
+ optional bool py_generic_services = 18 [default = false];
+ optional bool php_generic_services = 42 [default = false];
+
+ // Is this file deprecated?
+ // Depending on the target platform, this can emit Deprecated annotations
+ // for everything in the file, or it will be completely ignored; in the very
+ // least, this is a formalization for deprecating files.
+ optional bool deprecated = 23 [default = false];
+
+ // Enables the use of arenas for the proto messages in this file. This applies
+ // only to generated classes for C++.
+ optional bool cc_enable_arenas = 31 [default = true];
+
+ // Sets the objective c class prefix which is prepended to all objective c
+ // generated classes from this .proto. There is no default.
+ optional string objc_class_prefix = 36;
+
+ // Namespace for generated classes; defaults to the package.
+ optional string csharp_namespace = 37;
+
+ // By default Swift generators will take the proto package and CamelCase it
+ // replacing '.' with underscore and use that to prefix the types/symbols
+ // defined. When this options is provided, they will use this value instead
+ // to prefix the types/symbols defined.
+ optional string swift_prefix = 39;
+
+ // Sets the php class prefix which is prepended to all php generated classes
+ // from this .proto. Default is empty.
+ optional string php_class_prefix = 40;
+
+ // Use this option to change the namespace of php generated classes. Default
+ // is empty. When this option is empty, the package name will be used for
+ // determining the namespace.
+ optional string php_namespace = 41;
+
+ // Use this option to change the namespace of php generated metadata classes.
+ // Default is empty. When this option is empty, the proto file name will be
+ // used for determining the namespace.
+ optional string php_metadata_namespace = 44;
+
+ // Use this option to change the package of ruby generated classes. Default
+ // is empty. When this option is not set, the package name will be used for
+ // determining the ruby package.
+ optional string ruby_package = 45;
+
+ // The parser stores options it doesn't recognize here.
+ // See the documentation for the "Options" section above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message.
+ // See the documentation for the "Options" section above.
+ extensions 1000 to max;
+
+ reserved 38;
+}
+
+message MessageOptions {
+ // Set true to use the old proto1 MessageSet wire format for extensions.
+ // This is provided for backwards-compatibility with the MessageSet wire
+ // format. You should not use this for any other reason: It's less
+ // efficient, has fewer features, and is more complicated.
+ //
+ // The message must be defined exactly as follows:
+ // message Foo {
+ // option message_set_wire_format = true;
+ // extensions 4 to max;
+ // }
+ // Note that the message cannot have any defined fields; MessageSets only
+ // have extensions.
+ //
+ // All extensions of your type must be singular messages; e.g. they cannot
+ // be int32s, enums, or repeated messages.
+ //
+ // Because this is an option, the above two restrictions are not enforced by
+ // the protocol compiler.
+ optional bool message_set_wire_format = 1 [default = false];
+
+ // Disables the generation of the standard "descriptor()" accessor, which can
+ // conflict with a field of the same name. This is meant to make migration
+ // from proto1 easier; new code should avoid fields named "descriptor".
+ optional bool no_standard_descriptor_accessor = 2 [default = false];
+
+ // Is this message deprecated?
+ // Depending on the target platform, this can emit Deprecated annotations
+ // for the message, or it will be completely ignored; in the very least,
+ // this is a formalization for deprecating messages.
+ optional bool deprecated = 3 [default = false];
+
+ // Whether the message is an automatically generated map entry type for the
+ // maps field.
+ //
+ // For maps fields:
+ // map<KeyType, ValueType> map_field = 1;
+ // The parsed descriptor looks like:
+ // message MapFieldEntry {
+ // option map_entry = true;
+ // optional KeyType key = 1;
+ // optional ValueType value = 2;
+ // }
+ // repeated MapFieldEntry map_field = 1;
+ //
+ // Implementations may choose not to generate the map_entry=true message, but
+ // use a native map in the target language to hold the keys and values.
+ // The reflection APIs in such implementations still need to work as
+ // if the field is a repeated message field.
+ //
+ // NOTE: Do not set the option in .proto files. Always use the maps syntax
+ // instead. The option should only be implicitly set by the proto compiler
+ // parser.
+ optional bool map_entry = 7;
+
+ reserved 8; // javalite_serializable
+ reserved 9; // javanano_as_lite
+
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+message FieldOptions {
+ // The ctype option instructs the C++ code generator to use a different
+ // representation of the field than it normally would. See the specific
+ // options below. This option is not yet implemented in the open source
+ // release -- sorry, we'll try to include it in a future version!
+ optional CType ctype = 1 [default = STRING];
+ enum CType {
+ // Default mode.
+ STRING = 0;
+
+ CORD = 1;
+
+ STRING_PIECE = 2;
+ }
+ // The packed option can be enabled for repeated primitive fields to enable
+ // a more efficient representation on the wire. Rather than repeatedly
+ // writing the tag and type for each element, the entire array is encoded as
+ // a single length-delimited blob. In proto3, only explicit setting it to
+ // false will avoid using packed encoding.
+ optional bool packed = 2;
+
+ // The jstype option determines the JavaScript type used for values of the
+ // field. The option is permitted only for 64 bit integral and fixed types
+ // (int64, uint64, sint64, fixed64, sfixed64). A field with jstype JS_STRING
+ // is represented as JavaScript string, which avoids loss of precision that
+ // can happen when a large value is converted to a floating point JavaScript.
+ // Specifying JS_NUMBER for the jstype causes the generated JavaScript code to
+ // use the JavaScript "number" type. The behavior of the default option
+ // JS_NORMAL is implementation dependent.
+ //
+ // This option is an enum to permit additional types to be added, e.g.
+ // goog.math.Integer.
+ optional JSType jstype = 6 [default = JS_NORMAL];
+ enum JSType {
+ // Use the default type.
+ JS_NORMAL = 0;
+
+ // Use JavaScript strings.
+ JS_STRING = 1;
+
+ // Use JavaScript numbers.
+ JS_NUMBER = 2;
+ }
+
+ // Should this field be parsed lazily? Lazy applies only to message-type
+ // fields. It means that when the outer message is initially parsed, the
+ // inner message's contents will not be parsed but instead stored in encoded
+ // form. The inner message will actually be parsed when it is first accessed.
+ //
+ // This is only a hint. Implementations are free to choose whether to use
+ // eager or lazy parsing regardless of the value of this option. However,
+ // setting this option true suggests that the protocol author believes that
+ // using lazy parsing on this field is worth the additional bookkeeping
+ // overhead typically needed to implement it.
+ //
+ // This option does not affect the public interface of any generated code;
+ // all method signatures remain the same. Furthermore, thread-safety of the
+ // interface is not affected by this option; const methods remain safe to
+ // call from multiple threads concurrently, while non-const methods continue
+ // to require exclusive access.
+ //
+ //
+ // Note that implementations may choose not to check required fields within
+ // a lazy sub-message. That is, calling IsInitialized() on the outer message
+ // may return true even if the inner message has missing required fields.
+ // This is necessary because otherwise the inner message would have to be
+ // parsed in order to perform the check, defeating the purpose of lazy
+ // parsing. An implementation which chooses not to check required fields
+ // must be consistent about it. That is, for any particular sub-message, the
+ // implementation must either *always* check its required fields, or *never*
+ // check its required fields, regardless of whether or not the message has
+ // been parsed.
+ optional bool lazy = 5 [default = false];
+
+ // Is this field deprecated?
+ // Depending on the target platform, this can emit Deprecated annotations
+ // for accessors, or it will be completely ignored; in the very least, this
+ // is a formalization for deprecating fields.
+ optional bool deprecated = 3 [default = false];
+
+ // For Google-internal migration only. Do not use.
+ optional bool weak = 10 [default = false];
+
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+
+ reserved 4; // removed jtype
+}
+
+message OneofOptions {
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+message EnumOptions {
+ // Set this option to true to allow mapping different tag names to the same
+ // value.
+ optional bool allow_alias = 2;
+
+ // Is this enum deprecated?
+ // Depending on the target platform, this can emit Deprecated annotations
+ // for the enum, or it will be completely ignored; in the very least, this
+ // is a formalization for deprecating enums.
+ optional bool deprecated = 3 [default = false];
+
+ reserved 5; // javanano_as_lite
+
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+message EnumValueOptions {
+ // Is this enum value deprecated?
+ // Depending on the target platform, this can emit Deprecated annotations
+ // for the enum value, or it will be completely ignored; in the very least,
+ // this is a formalization for deprecating enum values.
+ optional bool deprecated = 1 [default = false];
+
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+message ServiceOptions {
+ // Note: Field numbers 1 through 32 are reserved for Google's internal RPC
+ // framework. We apologize for hoarding these numbers to ourselves, but
+ // we were already using them long before we decided to release Protocol
+ // Buffers.
+
+ // Is this service deprecated?
+ // Depending on the target platform, this can emit Deprecated annotations
+ // for the service, or it will be completely ignored; in the very least,
+ // this is a formalization for deprecating services.
+ optional bool deprecated = 33 [default = false];
+
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+message MethodOptions {
+ // Note: Field numbers 1 through 32 are reserved for Google's internal RPC
+ // framework. We apologize for hoarding these numbers to ourselves, but
+ // we were already using them long before we decided to release Protocol
+ // Buffers.
+
+ // Is this method deprecated?
+ // Depending on the target platform, this can emit Deprecated annotations
+ // for the method, or it will be completely ignored; in the very least,
+ // this is a formalization for deprecating methods.
+ optional bool deprecated = 33 [default = false];
+
+ // Is this method side-effect-free (or safe in HTTP parlance), or idempotent,
+ // or neither? HTTP based RPC implementation may choose GET verb for safe
+ // methods, and PUT verb for idempotent methods instead of the default POST.
+ enum IdempotencyLevel {
+ IDEMPOTENCY_UNKNOWN = 0;
+ NO_SIDE_EFFECTS = 1; // implies idempotent
+ IDEMPOTENT = 2; // idempotent, but may have side effects
+ }
+ optional IdempotencyLevel idempotency_level = 34
+ [default = IDEMPOTENCY_UNKNOWN];
+
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+// A message representing a option the parser does not recognize. This only
+// appears in options protos created by the compiler::Parser class.
+// DescriptorPool resolves these when building Descriptor objects. Therefore,
+// options protos in descriptor objects (e.g. returned by Descriptor::options(),
+// or produced by Descriptor::CopyTo()) will never have UninterpretedOptions
+// in them.
+message UninterpretedOption {
+ // The name of the uninterpreted option. Each string represents a segment in
+ // a dot-separated name. is_extension is true iff a segment represents an
+ // extension (denoted with parentheses in options specs in .proto files).
+ // E.g.,{ ["foo", false], ["bar.baz", true], ["qux", false] } represents
+ // "foo.(bar.baz).qux".
+ message NamePart {
+ optional string name_part = 1;
+ optional bool is_extension = 2;
+ }
+ repeated NamePart name = 2;
+
+ // The value of the uninterpreted option, in whatever type the tokenizer
+ // identified it as during parsing. Exactly one of these should be set.
+ optional string identifier_value = 3;
+ optional uint64 positive_int_value = 4;
+ optional int64 negative_int_value = 5;
+ optional double double_value = 6;
+ optional bytes string_value = 7;
+ optional string aggregate_value = 8;
+}
+
+// ===================================================================
+// Optional source code info
+
+// Encapsulates information about the original source file from which a
+// FileDescriptorProto was generated.
+message SourceCodeInfo {
+ // A Location identifies a piece of source code in a .proto file which
+ // corresponds to a particular definition. This information is intended
+ // to be useful to IDEs, code indexers, documentation generators, and similar
+ // tools.
+ //
+ // For example, say we have a file like:
+ // message Foo {
+ // optional string foo = 1;
+ // }
+ // Let's look at just the field definition:
+ // optional string foo = 1;
+ // ^ ^^ ^^ ^ ^^^
+ // a bc de f ghi
+ // We have the following locations:
+ // span path represents
+ // [a,i) [ 4, 0, 2, 0 ] The whole field definition.
+ // [a,b) [ 4, 0, 2, 0, 4 ] The label (optional).
+ // [c,d) [ 4, 0, 2, 0, 5 ] The type (string).
+ // [e,f) [ 4, 0, 2, 0, 1 ] The name (foo).
+ // [g,h) [ 4, 0, 2, 0, 3 ] The number (1).
+ //
+ // Notes:
+ // - A location may refer to a repeated field itself (i.e. not to any
+ // particular index within it). This is used whenever a set of elements are
+ // logically enclosed in a single code segment. For example, an entire
+ // extend block (possibly containing multiple extension definitions) will
+ // have an outer location whose path refers to the "extensions" repeated
+ // field without an index.
+ // - Multiple locations may have the same path. This happens when a single
+ // logical declaration is spread out across multiple places. The most
+ // obvious example is the "extend" block again -- there may be multiple
+ // extend blocks in the same scope, each of which will have the same path.
+ // - A location's span is not always a subset of its parent's span. For
+ // example, the "extendee" of an extension declaration appears at the
+ // beginning of the "extend" block and is shared by all extensions within
+ // the block.
+ // - Just because a location's span is a subset of some other location's span
+ // does not mean that it is a descendant. For example, a "group" defines
+ // both a type and a field in a single declaration. Thus, the locations
+ // corresponding to the type and field and their components will overlap.
+ // - Code which tries to interpret locations should probably be designed to
+ // ignore those that it doesn't understand, as more types of locations could
+ // be recorded in the future.
+ repeated Location location = 1;
+ message Location {
+ // Identifies which part of the FileDescriptorProto was defined at this
+ // location.
+ //
+ // Each element is a field number or an index. They form a path from
+ // the root FileDescriptorProto to the place where the definition. For
+ // example, this path:
+ // [ 4, 3, 2, 7, 1 ]
+ // refers to:
+ // file.message_type(3) // 4, 3
+ // .field(7) // 2, 7
+ // .name() // 1
+ // This is because FileDescriptorProto.message_type has field number 4:
+ // repeated DescriptorProto message_type = 4;
+ // and DescriptorProto.field has field number 2:
+ // repeated FieldDescriptorProto field = 2;
+ // and FieldDescriptorProto.name has field number 1:
+ // optional string name = 1;
+ //
+ // Thus, the above path gives the location of a field name. If we removed
+ // the last element:
+ // [ 4, 3, 2, 7 ]
+ // this path refers to the whole field declaration (from the beginning
+ // of the label to the terminating semicolon).
+ repeated int32 path = 1 [packed = true];
+
+ // Always has exactly three or four elements: start line, start column,
+ // end line (optional, otherwise assumed same as start line), end column.
+ // These are packed into a single field for efficiency. Note that line
+ // and column numbers are zero-based -- typically you will want to add
+ // 1 to each before displaying to a user.
+ repeated int32 span = 2 [packed = true];
+
+ // If this SourceCodeInfo represents a complete declaration, these are any
+ // comments appearing before and after the declaration which appear to be
+ // attached to the declaration.
+ //
+ // A series of line comments appearing on consecutive lines, with no other
+ // tokens appearing on those lines, will be treated as a single comment.
+ //
+ // leading_detached_comments will keep paragraphs of comments that appear
+ // before (but not connected to) the current element. Each paragraph,
+ // separated by empty lines, will be one comment element in the repeated
+ // field.
+ //
+ // Only the comment content is provided; comment markers (e.g. //) are
+ // stripped out. For block comments, leading whitespace and an asterisk
+ // will be stripped from the beginning of each line other than the first.
+ // Newlines are included in the output.
+ //
+ // Examples:
+ //
+ // optional int32 foo = 1; // Comment attached to foo.
+ // // Comment attached to bar.
+ // optional int32 bar = 2;
+ //
+ // optional string baz = 3;
+ // // Comment attached to baz.
+ // // Another line attached to baz.
+ //
+ // // Comment attached to qux.
+ // //
+ // // Another line attached to qux.
+ // optional double qux = 4;
+ //
+ // // Detached comment for corge. This is not leading or trailing comments
+ // // to qux or corge because there are blank lines separating it from
+ // // both.
+ //
+ // // Detached comment for corge paragraph 2.
+ //
+ // optional string corge = 5;
+ // /* Block comment attached
+ // * to corge. Leading asterisks
+ // * will be removed. */
+ // /* Block comment attached to
+ // * grault. */
+ // optional int32 grault = 6;
+ //
+ // // ignored detached comments.
+ optional string leading_comments = 3;
+ optional string trailing_comments = 4;
+ repeated string leading_detached_comments = 6;
+ }
+}
+
+// Describes the relationship between generated code and its original source
+// file. A GeneratedCodeInfo message is associated with only one generated
+// source file, but may contain references to different source .proto files.
+message GeneratedCodeInfo {
+ // An Annotation connects some span of text in generated code to an element
+ // of its generating .proto file.
+ repeated Annotation annotation = 1;
+ message Annotation {
+ // Identifies the element in the original source .proto file. This field
+ // is formatted the same as SourceCodeInfo.Location.path.
+ repeated int32 path = 1 [packed = true];
+
+ // Identifies the filesystem path to the original source .proto.
+ optional string source_file = 2;
+
+ // Identifies the starting offset in bytes in the generated code
+ // that relates to the identified object.
+ optional int32 begin = 3;
+
+ // Identifies the ending offset in bytes in the generated code that
+ // relates to the identified offset. The end offset should be one past
+ // the last relevant byte (so the length of the text = end - begin).
+ optional int32 end = 4;
+ }
+}
diff --git a/upb/benchmarks/descriptor_sv.proto b/upb/benchmarks/descriptor_sv.proto
new file mode 100644
index 0000000..8ca0888
--- /dev/null
+++ b/upb/benchmarks/descriptor_sv.proto
@@ -0,0 +1,890 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// The messages in this file describe the definitions found in .proto files.
+// A valid .proto file can be translated directly to a FileDescriptorProto
+// without any other information (e.g. without reading its imports).
+
+syntax = "proto2";
+
+package upb_benchmark.sv;
+
+option go_package = "google.golang.org/protobuf/types/descriptorpb";
+option java_package = "com.google.protobuf";
+option java_outer_classname = "DescriptorProtos";
+option csharp_namespace = "Google.Protobuf.Reflection";
+option objc_class_prefix = "GPB";
+option cc_enable_arenas = true;
+
+// The protocol compiler can output a FileDescriptorSet containing the .proto
+// files it parses.
+message FileDescriptorSet {
+ repeated FileDescriptorProto file = 1;
+}
+
+// Describes a complete .proto file.
+message FileDescriptorProto {
+ optional string name = 1
+ [ctype = STRING_PIECE]; // file name, relative to root of source tree
+ optional string package = 2
+ [ctype = STRING_PIECE]; // e.g. "foo", "foo.bar", etc.
+
+ // Names of files imported by this file.
+ repeated string dependency = 3 [ctype = STRING_PIECE];
+ // Indexes of the public imported files in the dependency list above.
+ repeated int32 public_dependency = 10;
+ // Indexes of the weak imported files in the dependency list.
+ // For Google-internal migration only. Do not use.
+ repeated int32 weak_dependency = 11;
+
+ // All top-level definitions in this file.
+ repeated DescriptorProto message_type = 4;
+ repeated EnumDescriptorProto enum_type = 5;
+ repeated ServiceDescriptorProto service = 6;
+ repeated FieldDescriptorProto extension = 7;
+
+ optional FileOptions options = 8;
+
+ // This field contains optional information about the original source code.
+ // You may safely remove this entire field without harming runtime
+ // functionality of the descriptors -- the information is needed only by
+ // development tools.
+ optional SourceCodeInfo source_code_info = 9;
+
+ // The syntax of the proto file.
+ // The supported values are "proto2" and "proto3".
+ optional string syntax = 12 [ctype = STRING_PIECE];
+}
+
+// Describes a message type.
+message DescriptorProto {
+ optional string name = 1 [ctype = STRING_PIECE];
+
+ repeated FieldDescriptorProto field = 2;
+ repeated FieldDescriptorProto extension = 6;
+
+ repeated DescriptorProto nested_type = 3;
+ repeated EnumDescriptorProto enum_type = 4;
+
+ message ExtensionRange {
+ optional int32 start = 1; // Inclusive.
+ optional int32 end = 2; // Exclusive.
+
+ optional ExtensionRangeOptions options = 3;
+ }
+ repeated ExtensionRange extension_range = 5;
+
+ repeated OneofDescriptorProto oneof_decl = 8;
+
+ optional MessageOptions options = 7;
+
+ // Range of reserved tag numbers. Reserved tag numbers may not be used by
+ // fields or extension ranges in the same message. Reserved ranges may
+ // not overlap.
+ message ReservedRange {
+ optional int32 start = 1; // Inclusive.
+ optional int32 end = 2; // Exclusive.
+ }
+ repeated ReservedRange reserved_range = 9;
+ // Reserved field names, which may not be used by fields in the same message.
+ // A given name may only be reserved once.
+ repeated string reserved_name = 10 [ctype = STRING_PIECE];
+}
+
+message ExtensionRangeOptions {
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+// Describes a field within a message.
+message FieldDescriptorProto {
+ enum Type {
+ // 0 is reserved for errors.
+ // Order is weird for historical reasons.
+ TYPE_DOUBLE = 1;
+ TYPE_FLOAT = 2;
+ // Not ZigZag encoded. Negative numbers take 10 bytes. Use TYPE_SINT64 if
+ // negative values are likely.
+ TYPE_INT64 = 3;
+ TYPE_UINT64 = 4;
+ // Not ZigZag encoded. Negative numbers take 10 bytes. Use TYPE_SINT32 if
+ // negative values are likely.
+ TYPE_INT32 = 5;
+ TYPE_FIXED64 = 6;
+ TYPE_FIXED32 = 7;
+ TYPE_BOOL = 8;
+ TYPE_STRING = 9;
+ // Tag-delimited aggregate.
+ // Group type is deprecated and not supported in proto3. However, Proto3
+ // implementations should still be able to parse the group wire format and
+ // treat group fields as unknown fields.
+ TYPE_GROUP = 10;
+ TYPE_MESSAGE = 11; // Length-delimited aggregate.
+
+ // New in version 2.
+ TYPE_BYTES = 12;
+ TYPE_UINT32 = 13;
+ TYPE_ENUM = 14;
+ TYPE_SFIXED32 = 15;
+ TYPE_SFIXED64 = 16;
+ TYPE_SINT32 = 17; // Uses ZigZag encoding.
+ TYPE_SINT64 = 18; // Uses ZigZag encoding.
+ }
+
+ enum Label {
+ // 0 is reserved for errors
+ LABEL_OPTIONAL = 1;
+ LABEL_REQUIRED = 2;
+ LABEL_REPEATED = 3;
+ }
+
+ optional string name = 1 [ctype = STRING_PIECE];
+ optional int32 number = 3;
+ optional Label label = 4;
+
+ // If type_name is set, this need not be set. If both this and type_name
+ // are set, this must be one of TYPE_ENUM, TYPE_MESSAGE or TYPE_GROUP.
+ optional Type type = 5;
+
+ // For message and enum types, this is the name of the type. If the name
+ // starts with a '.', it is fully-qualified. Otherwise, C++-like scoping
+ // rules are used to find the type (i.e. first the nested types within this
+ // message are searched, then within the parent, on up to the root
+ // namespace).
+ optional string type_name = 6 [ctype = STRING_PIECE];
+
+ // For extensions, this is the name of the type being extended. It is
+ // resolved in the same manner as type_name.
+ optional string extendee = 2 [ctype = STRING_PIECE];
+
+ // For numeric types, contains the original text representation of the value.
+ // For booleans, "true" or "false".
+ // For strings, contains the default text contents (not escaped in any way).
+ // For bytes, contains the C escaped value. All bytes >= 128 are escaped.
+ // TODO(kenton): Base-64 encode?
+ optional string default_value = 7 [ctype = STRING_PIECE];
+
+ // If set, gives the index of a oneof in the containing type's oneof_decl
+ // list. This field is a member of that oneof.
+ optional int32 oneof_index = 9;
+
+ // JSON name of this field. The value is set by protocol compiler. If the
+ // user has set a "json_name" option on this field, that option's value
+ // will be used. Otherwise, it's deduced from the field's name by converting
+ // it to camelCase.
+ optional string json_name = 10 [ctype = STRING_PIECE];
+
+ optional FieldOptions options = 8;
+
+ // If true, this is a proto3 "optional". When a proto3 field is optional, it
+ // tracks presence regardless of field type.
+ //
+ // When proto3_optional is true, this field must be belong to a oneof to
+ // signal to old proto3 clients that presence is tracked for this field. This
+ // oneof is known as a "synthetic" oneof, and this field must be its sole
+ // member (each proto3 optional field gets its own synthetic oneof). Synthetic
+ // oneofs exist in the descriptor only, and do not generate any API. Synthetic
+ // oneofs must be ordered after all "real" oneofs.
+ //
+ // For message fields, proto3_optional doesn't create any semantic change,
+ // since non-repeated message fields always track presence. However it still
+ // indicates the semantic detail of whether the user wrote "optional" or not.
+ // This can be useful for round-tripping the .proto file. For consistency we
+ // give message fields a synthetic oneof also, even though it is not required
+ // to track presence. This is especially important because the parser can't
+ // tell if a field is a message or an enum, so it must always create a
+ // synthetic oneof.
+ //
+ // Proto2 optional fields do not set this flag, because they already indicate
+ // optional with `LABEL_OPTIONAL`.
+ optional bool proto3_optional = 17;
+}
+
+// Describes a oneof.
+message OneofDescriptorProto {
+ optional string name = 1 [ctype = STRING_PIECE];
+ optional OneofOptions options = 2;
+}
+
+// Describes an enum type.
+message EnumDescriptorProto {
+ optional string name = 1 [ctype = STRING_PIECE];
+
+ repeated EnumValueDescriptorProto value = 2;
+
+ optional EnumOptions options = 3;
+
+ // Range of reserved numeric values. Reserved values may not be used by
+ // entries in the same enum. Reserved ranges may not overlap.
+ //
+ // Note that this is distinct from DescriptorProto.ReservedRange in that it
+ // is inclusive such that it can appropriately represent the entire int32
+ // domain.
+ message EnumReservedRange {
+ optional int32 start = 1; // Inclusive.
+ optional int32 end = 2; // Inclusive.
+ }
+
+ // Range of reserved numeric values. Reserved numeric values may not be used
+ // by enum values in the same enum declaration. Reserved ranges may not
+ // overlap.
+ repeated EnumReservedRange reserved_range = 4;
+
+ // Reserved enum value names, which may not be reused. A given name may only
+ // be reserved once.
+ repeated string reserved_name = 5 [ctype = STRING_PIECE];
+}
+
+// Describes a value within an enum.
+message EnumValueDescriptorProto {
+ optional string name = 1 [ctype = STRING_PIECE];
+ optional int32 number = 2;
+
+ optional EnumValueOptions options = 3;
+}
+
+// Describes a service.
+message ServiceDescriptorProto {
+ optional string name = 1 [ctype = STRING_PIECE];
+ repeated MethodDescriptorProto method = 2;
+
+ optional ServiceOptions options = 3;
+}
+
+// Describes a method of a service.
+message MethodDescriptorProto {
+ optional string name = 1 [ctype = STRING_PIECE];
+
+ // Input and output type names. These are resolved in the same way as
+ // FieldDescriptorProto.type_name, but must refer to a message type.
+ optional string input_type = 2 [ctype = STRING_PIECE];
+ optional string output_type = 3 [ctype = STRING_PIECE];
+
+ optional MethodOptions options = 4;
+
+ // Identifies if client streams multiple client messages
+ optional bool client_streaming = 5 [default = false];
+ // Identifies if server streams multiple server messages
+ optional bool server_streaming = 6 [default = false];
+}
+
+// ===================================================================
+// Options
+
+// Each of the definitions above may have "options" attached. These are
+// just annotations which may cause code to be generated slightly differently
+// or may contain hints for code that manipulates protocol messages.
+//
+// Clients may define custom options as extensions of the *Options messages.
+// These extensions may not yet be known at parsing time, so the parser cannot
+// store the values in them. Instead it stores them in a field in the *Options
+// message called uninterpreted_option. This field must have the same name
+// across all *Options messages. We then use this field to populate the
+// extensions when we build a descriptor, at which point all protos have been
+// parsed and so all extensions are known.
+//
+// Extension numbers for custom options may be chosen as follows:
+// * For options which will only be used within a single application or
+// organization, or for experimental options, use field numbers 50000
+// through 99999. It is up to you to ensure that you do not use the
+// same number for multiple options.
+// * For options which will be published and used publicly by multiple
+// independent entities, e-mail protobuf-global-extension-registry@google.com
+// to reserve extension numbers. Simply provide your project name (e.g.
+// Objective-C plugin) and your project website (if available) -- there's no
+// need to explain how you intend to use them. Usually you only need one
+// extension number. You can declare multiple options with only one extension
+// number by putting them in a sub-message. See the Custom Options section of
+// the docs for examples:
+// https://developers.google.com/protocol-buffers/docs/proto#options
+// If this turns out to be popular, a web service will be set up
+// to automatically assign option numbers.
+
+message FileOptions {
+ // Sets the Java package where classes generated from this .proto will be
+ // placed. By default, the proto package is used, but this is often
+ // inappropriate because proto packages do not normally start with backwards
+ // domain names.
+ optional string java_package = 1 [ctype = STRING_PIECE];
+
+ // If set, all the classes from the .proto file are wrapped in a single
+ // outer class with the given name. This applies to both Proto1
+ // (equivalent to the old "--one_java_file" option) and Proto2 (where
+ // a .proto always translates to a single class, but you may want to
+ // explicitly choose the class name).
+ optional string java_outer_classname = 8 [ctype = STRING_PIECE];
+
+ // If set true, then the Java code generator will generate a separate .java
+ // file for each top-level message, enum, and service defined in the .proto
+ // file. Thus, these types will *not* be nested inside the outer class
+ // named by java_outer_classname. However, the outer class will still be
+ // generated to contain the file's getDescriptor() method as well as any
+ // top-level extensions defined in the file.
+ optional bool java_multiple_files = 10 [default = false];
+
+ // This option does nothing.
+ optional bool java_generate_equals_and_hash = 20 [deprecated = true];
+
+ // If set true, then the Java2 code generator will generate code that
+ // throws an exception whenever an attempt is made to assign a non-UTF-8
+ // byte sequence to a string field.
+ // Message reflection will do the same.
+ // However, an extension field still accepts non-UTF-8 byte sequences.
+ // This option has no effect on when used with the lite runtime.
+ optional bool java_string_check_utf8 = 27 [default = false];
+
+ // Generated classes can be optimized for speed or code size.
+ enum OptimizeMode {
+ SPEED = 1; // Generate complete code for parsing, serialization,
+ // etc.
+ CODE_SIZE = 2; // Use ReflectionOps to implement these methods.
+ LITE_RUNTIME = 3; // Generate code using MessageLite and the lite runtime.
+ }
+ optional OptimizeMode optimize_for = 9 [default = SPEED];
+
+ // Sets the Go package where structs generated from this .proto will be
+ // placed. If omitted, the Go package will be derived from the following:
+ // - The basename of the package import path, if provided.
+ // - Otherwise, the package statement in the .proto file, if present.
+ // - Otherwise, the basename of the .proto file, without extension.
+ optional string go_package = 11 [ctype = STRING_PIECE];
+
+ // Should generic services be generated in each language? "Generic" services
+ // are not specific to any particular RPC system. They are generated by the
+ // main code generators in each language (without additional plugins).
+ // Generic services were the only kind of service generation supported by
+ // early versions of google.protobuf.
+ //
+ // Generic services are now considered deprecated in favor of using plugins
+ // that generate code specific to your particular RPC system. Therefore,
+ // these default to false. Old code which depends on generic services should
+ // explicitly set them to true.
+ optional bool cc_generic_services = 16 [default = false];
+ optional bool java_generic_services = 17 [default = false];
+ optional bool py_generic_services = 18 [default = false];
+ optional bool php_generic_services = 42 [default = false];
+
+ // Is this file deprecated?
+ // Depending on the target platform, this can emit Deprecated annotations
+ // for everything in the file, or it will be completely ignored; in the very
+ // least, this is a formalization for deprecating files.
+ optional bool deprecated = 23 [default = false];
+
+ // Enables the use of arenas for the proto messages in this file. This applies
+ // only to generated classes for C++.
+ optional bool cc_enable_arenas = 31 [default = true];
+
+ // Sets the objective c class prefix which is prepended to all objective c
+ // generated classes from this .proto. There is no default.
+ optional string objc_class_prefix = 36 [ctype = STRING_PIECE];
+
+ // Namespace for generated classes; defaults to the package.
+ optional string csharp_namespace = 37 [ctype = STRING_PIECE];
+
+ // By default Swift generators will take the proto package and CamelCase it
+ // replacing '.' with underscore and use that to prefix the types/symbols
+ // defined. When this options is provided, they will use this value instead
+ // to prefix the types/symbols defined.
+ optional string swift_prefix = 39 [ctype = STRING_PIECE];
+
+ // Sets the php class prefix which is prepended to all php generated classes
+ // from this .proto. Default is empty.
+ optional string php_class_prefix = 40 [ctype = STRING_PIECE];
+
+ // Use this option to change the namespace of php generated classes. Default
+ // is empty. When this option is empty, the package name will be used for
+ // determining the namespace.
+ optional string php_namespace = 41 [ctype = STRING_PIECE];
+
+ // Use this option to change the namespace of php generated metadata classes.
+ // Default is empty. When this option is empty, the proto file name will be
+ // used for determining the namespace.
+ optional string php_metadata_namespace = 44 [ctype = STRING_PIECE];
+
+ // Use this option to change the package of ruby generated classes. Default
+ // is empty. When this option is not set, the package name will be used for
+ // determining the ruby package.
+ optional string ruby_package = 45 [ctype = STRING_PIECE];
+
+ // The parser stores options it doesn't recognize here.
+ // See the documentation for the "Options" section above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message.
+ // See the documentation for the "Options" section above.
+ extensions 1000 to max;
+
+ reserved 38;
+}
+
+message MessageOptions {
+ // Set true to use the old proto1 MessageSet wire format for extensions.
+ // This is provided for backwards-compatibility with the MessageSet wire
+ // format. You should not use this for any other reason: It's less
+ // efficient, has fewer features, and is more complicated.
+ //
+ // The message must be defined exactly as follows:
+ // message Foo {
+ // option message_set_wire_format = true;
+ // extensions 4 to max;
+ // }
+ // Note that the message cannot have any defined fields; MessageSets only
+ // have extensions.
+ //
+ // All extensions of your type must be singular messages; e.g. they cannot
+ // be int32s, enums, or repeated messages.
+ //
+ // Because this is an option, the above two restrictions are not enforced by
+ // the protocol compiler.
+ optional bool message_set_wire_format = 1 [default = false];
+
+ // Disables the generation of the standard "descriptor()" accessor, which can
+ // conflict with a field of the same name. This is meant to make migration
+ // from proto1 easier; new code should avoid fields named "descriptor".
+ optional bool no_standard_descriptor_accessor = 2 [default = false];
+
+ // Is this message deprecated?
+ // Depending on the target platform, this can emit Deprecated annotations
+ // for the message, or it will be completely ignored; in the very least,
+ // this is a formalization for deprecating messages.
+ optional bool deprecated = 3 [default = false];
+
+ // Whether the message is an automatically generated map entry type for the
+ // maps field.
+ //
+ // For maps fields:
+ // map<KeyType, ValueType> map_field = 1;
+ // The parsed descriptor looks like:
+ // message MapFieldEntry {
+ // option map_entry = true;
+ // optional KeyType key = 1;
+ // optional ValueType value = 2;
+ // }
+ // repeated MapFieldEntry map_field = 1;
+ //
+ // Implementations may choose not to generate the map_entry=true message, but
+ // use a native map in the target language to hold the keys and values.
+ // The reflection APIs in such implementations still need to work as
+ // if the field is a repeated message field.
+ //
+ // NOTE: Do not set the option in .proto files. Always use the maps syntax
+ // instead. The option should only be implicitly set by the proto compiler
+ // parser.
+ optional bool map_entry = 7;
+
+ reserved 8; // javalite_serializable
+ reserved 9; // javanano_as_lite
+
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+message FieldOptions {
+ // The ctype option instructs the C++ code generator to use a different
+ // representation of the field than it normally would. See the specific
+ // options below. This option is not yet implemented in the open source
+ // release -- sorry, we'll try to include it in a future version!
+ optional CType ctype = 1 [default = STRING];
+ enum CType {
+ // Default mode.
+ STRING = 0;
+
+ CORD = 1;
+
+ STRING_PIECE = 2;
+ }
+ // The packed option can be enabled for repeated primitive fields to enable
+ // a more efficient representation on the wire. Rather than repeatedly
+ // writing the tag and type for each element, the entire array is encoded as
+ // a single length-delimited blob. In proto3, only explicit setting it to
+ // false will avoid using packed encoding.
+ optional bool packed = 2;
+
+ // The jstype option determines the JavaScript type used for values of the
+ // field. The option is permitted only for 64 bit integral and fixed types
+ // (int64, uint64, sint64, fixed64, sfixed64). A field with jstype JS_STRING
+ // is represented as JavaScript string, which avoids loss of precision that
+ // can happen when a large value is converted to a floating point JavaScript.
+ // Specifying JS_NUMBER for the jstype causes the generated JavaScript code to
+ // use the JavaScript "number" type. The behavior of the default option
+ // JS_NORMAL is implementation dependent.
+ //
+ // This option is an enum to permit additional types to be added, e.g.
+ // goog.math.Integer.
+ optional JSType jstype = 6 [default = JS_NORMAL];
+ enum JSType {
+ // Use the default type.
+ JS_NORMAL = 0;
+
+ // Use JavaScript strings.
+ JS_STRING = 1;
+
+ // Use JavaScript numbers.
+ JS_NUMBER = 2;
+ }
+
+ // Should this field be parsed lazily? Lazy applies only to message-type
+ // fields. It means that when the outer message is initially parsed, the
+ // inner message's contents will not be parsed but instead stored in encoded
+ // form. The inner message will actually be parsed when it is first accessed.
+ //
+ // This is only a hint. Implementations are free to choose whether to use
+ // eager or lazy parsing regardless of the value of this option. However,
+ // setting this option true suggests that the protocol author believes that
+ // using lazy parsing on this field is worth the additional bookkeeping
+ // overhead typically needed to implement it.
+ //
+ // This option does not affect the public interface of any generated code;
+ // all method signatures remain the same. Furthermore, thread-safety of the
+ // interface is not affected by this option; const methods remain safe to
+ // call from multiple threads concurrently, while non-const methods continue
+ // to require exclusive access.
+ //
+ //
+ // Note that implementations may choose not to check required fields within
+ // a lazy sub-message. That is, calling IsInitialized() on the outer message
+ // may return true even if the inner message has missing required fields.
+ // This is necessary because otherwise the inner message would have to be
+ // parsed in order to perform the check, defeating the purpose of lazy
+ // parsing. An implementation which chooses not to check required fields
+ // must be consistent about it. That is, for any particular sub-message, the
+ // implementation must either *always* check its required fields, or *never*
+ // check its required fields, regardless of whether or not the message has
+ // been parsed.
+ optional bool lazy = 5 [default = false];
+
+ // Is this field deprecated?
+ // Depending on the target platform, this can emit Deprecated annotations
+ // for accessors, or it will be completely ignored; in the very least, this
+ // is a formalization for deprecating fields.
+ optional bool deprecated = 3 [default = false];
+
+ // For Google-internal migration only. Do not use.
+ optional bool weak = 10 [default = false];
+
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+
+ reserved 4; // removed jtype
+}
+
+message OneofOptions {
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+message EnumOptions {
+ // Set this option to true to allow mapping different tag names to the same
+ // value.
+ optional bool allow_alias = 2;
+
+ // Is this enum deprecated?
+ // Depending on the target platform, this can emit Deprecated annotations
+ // for the enum, or it will be completely ignored; in the very least, this
+ // is a formalization for deprecating enums.
+ optional bool deprecated = 3 [default = false];
+
+ reserved 5; // javanano_as_lite
+
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+message EnumValueOptions {
+ // Is this enum value deprecated?
+ // Depending on the target platform, this can emit Deprecated annotations
+ // for the enum value, or it will be completely ignored; in the very least,
+ // this is a formalization for deprecating enum values.
+ optional bool deprecated = 1 [default = false];
+
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+message ServiceOptions {
+ // Note: Field numbers 1 through 32 are reserved for Google's internal RPC
+ // framework. We apologize for hoarding these numbers to ourselves, but
+ // we were already using them long before we decided to release Protocol
+ // Buffers.
+
+ // Is this service deprecated?
+ // Depending on the target platform, this can emit Deprecated annotations
+ // for the service, or it will be completely ignored; in the very least,
+ // this is a formalization for deprecating services.
+ optional bool deprecated = 33 [default = false];
+
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+message MethodOptions {
+ // Note: Field numbers 1 through 32 are reserved for Google's internal RPC
+ // framework. We apologize for hoarding these numbers to ourselves, but
+ // we were already using them long before we decided to release Protocol
+ // Buffers.
+
+ // Is this method deprecated?
+ // Depending on the target platform, this can emit Deprecated annotations
+ // for the method, or it will be completely ignored; in the very least,
+ // this is a formalization for deprecating methods.
+ optional bool deprecated = 33 [default = false];
+
+ // Is this method side-effect-free (or safe in HTTP parlance), or idempotent,
+ // or neither? HTTP based RPC implementation may choose GET verb for safe
+ // methods, and PUT verb for idempotent methods instead of the default POST.
+ enum IdempotencyLevel {
+ IDEMPOTENCY_UNKNOWN = 0;
+ NO_SIDE_EFFECTS = 1; // implies idempotent
+ IDEMPOTENT = 2; // idempotent, but may have side effects
+ }
+ optional IdempotencyLevel idempotency_level = 34
+ [default = IDEMPOTENCY_UNKNOWN];
+
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+// A message representing a option the parser does not recognize. This only
+// appears in options protos created by the compiler::Parser class.
+// DescriptorPool resolves these when building Descriptor objects. Therefore,
+// options protos in descriptor objects (e.g. returned by Descriptor::options(),
+// or produced by Descriptor::CopyTo()) will never have UninterpretedOptions
+// in them.
+message UninterpretedOption {
+ // The name of the uninterpreted option. Each string represents a segment in
+ // a dot-separated name. is_extension is true iff a segment represents an
+ // extension (denoted with parentheses in options specs in .proto files).
+ // E.g.,{ ["foo", false], ["bar.baz", true], ["qux", false] } represents
+ // "foo.(bar.baz).qux".
+ message NamePart {
+ optional string name_part = 1 [ctype = STRING_PIECE];
+ optional bool is_extension = 2;
+ }
+ repeated NamePart name = 2;
+
+ // The value of the uninterpreted option, in whatever type the tokenizer
+ // identified it as during parsing. Exactly one of these should be set.
+ optional string identifier_value = 3 [ctype = STRING_PIECE];
+ optional uint64 positive_int_value = 4;
+ optional int64 negative_int_value = 5;
+ optional double double_value = 6;
+ optional bytes string_value = 7;
+ optional string aggregate_value = 8 [ctype = STRING_PIECE];
+}
+
+// ===================================================================
+// Optional source code info
+
+// Encapsulates information about the original source file from which a
+// FileDescriptorProto was generated.
+message SourceCodeInfo {
+ // A Location identifies a piece of source code in a .proto file which
+ // corresponds to a particular definition. This information is intended
+ // to be useful to IDEs, code indexers, documentation generators, and similar
+ // tools.
+ //
+ // For example, say we have a file like:
+ // message Foo {
+ // optional string foo = 1 [ctype = STRING_PIECE];
+ // }
+ // Let's look at just the field definition:
+ // optional string foo = 1 [ctype = STRING_PIECE];
+ // ^ ^^ ^^ ^ ^^^
+ // a bc de f ghi
+ // We have the following locations:
+ // span path represents
+ // [a,i) [ 4, 0, 2, 0 ] The whole field definition.
+ // [a,b) [ 4, 0, 2, 0, 4 ] The label (optional).
+ // [c,d) [ 4, 0, 2, 0, 5 ] The type (string).
+ // [e,f) [ 4, 0, 2, 0, 1 ] The name (foo).
+ // [g,h) [ 4, 0, 2, 0, 3 ] The number (1).
+ //
+ // Notes:
+ // - A location may refer to a repeated field itself (i.e. not to any
+ // particular index within it). This is used whenever a set of elements are
+ // logically enclosed in a single code segment. For example, an entire
+ // extend block (possibly containing multiple extension definitions) will
+ // have an outer location whose path refers to the "extensions" repeated
+ // field without an index.
+ // - Multiple locations may have the same path. This happens when a single
+ // logical declaration is spread out across multiple places. The most
+ // obvious example is the "extend" block again -- there may be multiple
+ // extend blocks in the same scope, each of which will have the same path.
+ // - A location's span is not always a subset of its parent's span. For
+ // example, the "extendee" of an extension declaration appears at the
+ // beginning of the "extend" block and is shared by all extensions within
+ // the block.
+ // - Just because a location's span is a subset of some other location's span
+ // does not mean that it is a descendant. For example, a "group" defines
+ // both a type and a field in a single declaration. Thus, the locations
+ // corresponding to the type and field and their components will overlap.
+ // - Code which tries to interpret locations should probably be designed to
+ // ignore those that it doesn't understand, as more types of locations could
+ // be recorded in the future.
+ repeated Location location = 1;
+ message Location {
+ // Identifies which part of the FileDescriptorProto was defined at this
+ // location.
+ //
+ // Each element is a field number or an index. They form a path from
+ // the root FileDescriptorProto to the place where the definition. For
+ // example, this path:
+ // [ 4, 3, 2, 7, 1 ]
+ // refers to:
+ // file.message_type(3) // 4, 3
+ // .field(7) // 2, 7
+ // .name() // 1
+ // This is because FileDescriptorProto.message_type has field number 4:
+ // repeated DescriptorProto message_type = 4;
+ // and DescriptorProto.field has field number 2:
+ // repeated FieldDescriptorProto field = 2;
+ // and FieldDescriptorProto.name has field number 1:
+ // optional string name = 1 [ctype = STRING_PIECE];
+ //
+ // Thus, the above path gives the location of a field name. If we removed
+ // the last element:
+ // [ 4, 3, 2, 7 ]
+ // this path refers to the whole field declaration (from the beginning
+ // of the label to the terminating semicolon).
+ repeated int32 path = 1 [packed = true];
+
+ // Always has exactly three or four elements: start line, start column,
+ // end line (optional, otherwise assumed same as start line), end column.
+ // These are packed into a single field for efficiency. Note that line
+ // and column numbers are zero-based -- typically you will want to add
+ // 1 to each before displaying to a user.
+ repeated int32 span = 2 [packed = true];
+
+ // If this SourceCodeInfo represents a complete declaration, these are any
+ // comments appearing before and after the declaration which appear to be
+ // attached to the declaration.
+ //
+ // A series of line comments appearing on consecutive lines, with no other
+ // tokens appearing on those lines, will be treated as a single comment.
+ //
+ // leading_detached_comments will keep paragraphs of comments that appear
+ // before (but not connected to) the current element. Each paragraph,
+ // separated by empty lines, will be one comment element in the repeated
+ // field.
+ //
+ // Only the comment content is provided; comment markers (e.g. //) are
+ // stripped out. For block comments, leading whitespace and an asterisk
+ // will be stripped from the beginning of each line other than the first.
+ // Newlines are included in the output.
+ //
+ // Examples:
+ //
+ // optional int32 foo = 1; // Comment attached to foo.
+ // // Comment attached to bar.
+ // optional int32 bar = 2;
+ //
+ // optional string baz = 3 [ctype = STRING_PIECE];
+ // // Comment attached to baz.
+ // // Another line attached to baz.
+ //
+ // // Comment attached to qux.
+ // //
+ // // Another line attached to qux.
+ // optional double qux = 4;
+ //
+ // // Detached comment for corge. This is not leading or trailing comments
+ // // to qux or corge because there are blank lines separating it from
+ // // both.
+ //
+ // // Detached comment for corge paragraph 2.
+ //
+ // optional string corge = 5 [ctype = STRING_PIECE];
+ // /* Block comment attached
+ // * to corge. Leading asterisks
+ // * will be removed. */
+ // /* Block comment attached to
+ // * grault. */
+ // optional int32 grault = 6;
+ //
+ // // ignored detached comments.
+ optional string leading_comments = 3 [ctype = STRING_PIECE];
+ optional string trailing_comments = 4 [ctype = STRING_PIECE];
+ repeated string leading_detached_comments = 6 [ctype = STRING_PIECE];
+ }
+}
+
+// Describes the relationship between generated code and its original source
+// file. A GeneratedCodeInfo message is associated with only one generated
+// source file, but may contain references to different source .proto files.
+message GeneratedCodeInfo {
+ // An Annotation connects some span of text in generated code to an element
+ // of its generating .proto file.
+ repeated Annotation annotation = 1;
+ message Annotation {
+ // Identifies the element in the original source .proto file. This field
+ // is formatted the same as SourceCodeInfo.Location.path.
+ repeated int32 path = 1 [packed = true];
+
+ // Identifies the filesystem path to the original source .proto.
+ optional string source_file = 2 [ctype = STRING_PIECE];
+
+ // Identifies the starting offset in bytes in the generated code
+ // that relates to the identified object.
+ optional int32 begin = 3;
+
+ // Identifies the ending offset in bytes in the generated code that
+ // relates to the identified offset. The end offset should be one past
+ // the last relevant byte (so the length of the text = end - begin).
+ optional int32 end = 4;
+ }
+}
diff --git a/upb/benchmarks/empty.proto b/upb/benchmarks/empty.proto
new file mode 100644
index 0000000..3c32ccc
--- /dev/null
+++ b/upb/benchmarks/empty.proto
@@ -0,0 +1,35 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto3";
+
+package upb_benchmark;
+
+message Empty {}
diff --git a/upb/benchmarks/gen_protobuf_binary_cc.py b/upb/benchmarks/gen_protobuf_binary_cc.py
new file mode 100644
index 0000000..ece5b23
--- /dev/null
+++ b/upb/benchmarks/gen_protobuf_binary_cc.py
@@ -0,0 +1,69 @@
+#!/usr/bin/python3
+#
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+import re
+
+include = sys.argv[1]
+msg_basename = sys.argv[2]
+count = 1
+
+m = re.search(r'(.*\D)(\d+)$', sys.argv[2])
+if m:
+ msg_basename = m.group(1)
+ count = int(m.group(2))
+
+print('''
+#include "{include}"
+
+char buf[1];
+
+int main() {{
+'''.format(include=include))
+
+def RefMessage(name):
+ print('''
+ {{
+ {name} proto;
+ proto.ParseFromArray(buf, 0);
+ proto.SerializePartialToArray(&buf[0], 0);
+ }}
+ '''.format(name=name))
+
+RefMessage(msg_basename)
+
+for i in range(2, count + 1):
+ RefMessage(msg_basename + str(i))
+
+print('''
+ return 0;
+}''')
diff --git a/upb/benchmarks/gen_synthetic_protos.py b/upb/benchmarks/gen_synthetic_protos.py
new file mode 100644
index 0000000..3b4e37e
--- /dev/null
+++ b/upb/benchmarks/gen_synthetic_protos.py
@@ -0,0 +1,123 @@
+#!/usr/bin/python3
+#
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+import random
+
+base = sys.argv[1]
+
+field_freqs = [
+ (('bool', 'optional'), 8.321),
+ (('bool', 'repeated'), 0.033),
+ (('bytes', 'optional'), 0.809),
+ (('bytes', 'repeated'), 0.065),
+ (('double', 'optional'), 2.845),
+ (('double', 'repeated'), 0.143),
+ (('fixed32', 'optional'), 0.084),
+ (('fixed32', 'repeated'), 0.012),
+ (('fixed64', 'optional'), 0.204),
+ (('fixed64', 'repeated'), 0.027),
+ (('float', 'optional'), 2.355),
+ (('float', 'repeated'), 0.132),
+ (('int32', 'optional'), 6.717),
+ (('int32', 'repeated'), 0.366),
+ (('int64', 'optional'), 9.678),
+ (('int64', 'repeated'), 0.425),
+ (('sfixed32', 'optional'), 0.018),
+ (('sfixed32', 'repeated'), 0.005),
+ (('sfixed64', 'optional'), 0.022),
+ (('sfixed64', 'repeated'), 0.005),
+ (('sint32', 'optional'), 0.026),
+ (('sint32', 'repeated'), 0.009),
+ (('sint64', 'optional'), 0.018),
+ (('sint64', 'repeated'), 0.006),
+ (('string', 'optional'), 25.461),
+ (('string', 'repeated'), 2.606),
+ (('Enum', 'optional'), 6.16),
+ (('Enum', 'repeated'), 0.576),
+ (('Message', 'optional'), 22.472),
+ (('Message', 'repeated'), 7.766),
+ (('uint32', 'optional'), 1.289),
+ (('uint32', 'repeated'), 0.051),
+ (('uint64', 'optional'), 1.044),
+ (('uint64', 'repeated'), 0.079),
+]
+
+population = [item[0] for item in field_freqs]
+weights = [item[1] for item in field_freqs]
+
+def choices(k):
+ if sys.version_info >= (3, 6):
+ return random.choices(population=population, weights=weights, k=k)
+ else:
+ print("WARNING: old Python version, field types are not properly weighted!")
+ return [random.choice(population) for _ in range(k)]
+
+with open(base + "/100_msgs.proto", "w") as f:
+ f.write('syntax = "proto3";\n')
+ f.write('package upb_benchmark;\n')
+ f.write('message Message {}\n')
+ for i in range(2, 101):
+ f.write('message Message{i} {{}}\n'.format(i=i))
+
+with open(base + "/200_msgs.proto", "w") as f:
+ f.write('syntax = "proto3";\n')
+ f.write('package upb_benchmark;\n')
+ f.write('message Message {}\n')
+ for i in range(2, 501):
+ f.write('message Message{i} {{}}\n'.format(i=i))
+
+with open(base + "/100_fields.proto", "w") as f:
+ f.write('syntax = "proto2";\n')
+ f.write('package upb_benchmark;\n')
+ f.write('enum Enum { ZERO = 0; }\n')
+ f.write('message Message {\n')
+ i = 1
+ random.seed(a=0, version=2)
+ for field in choices(100):
+ field_type, label = field
+ f.write(' {label} {field_type} field{i} = {i};\n'.format(i=i, label=label, field_type=field_type))
+ i += 1
+ f.write('}\n')
+
+with open(base + "/200_fields.proto", "w") as f:
+ f.write('syntax = "proto2";\n')
+ f.write('package upb_benchmark;\n')
+ f.write('enum Enum { ZERO = 0; }\n')
+ f.write('message Message {\n')
+ i = 1
+ random.seed(a=0, version=2)
+ for field in choices(200):
+ field_type, label = field
+ f.write(' {label} {field_type} field{i} = {i};\n'.format(i=i, label=label,field_type=field_type))
+ i += 1
+ f.write('}\n')
diff --git a/upb/benchmarks/gen_upb_binary_c.py b/upb/benchmarks/gen_upb_binary_c.py
new file mode 100644
index 0000000..a68a779
--- /dev/null
+++ b/upb/benchmarks/gen_upb_binary_c.py
@@ -0,0 +1,70 @@
+#!/usr/bin/python3
+#
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+import re
+
+include = sys.argv[1]
+msg_basename = sys.argv[2]
+count = 1
+
+m = re.search(r'(.*\D)(\d+)$', sys.argv[2])
+if m:
+ msg_basename = m.group(1)
+ count = int(m.group(2))
+
+print('''
+#include "{include}"
+
+char buf[1];
+
+int main() {{
+ upb_Arena *arena = upb_Arena_New();
+ size_t size;
+'''.format(include=include))
+
+def RefMessage(name):
+ print('''
+ {{
+ {name} *proto = {name}_parse(buf, 1, arena);
+ {name}_serialize(proto, arena, &size);
+ }}
+ '''.format(name=name))
+
+RefMessage(msg_basename)
+
+for i in range(2, count + 1):
+ RefMessage(msg_basename + str(i))
+
+print('''
+ return 0;
+}''')
diff --git a/upb/cmake/BUILD.bazel b/upb/cmake/BUILD.bazel
new file mode 100644
index 0000000..aec4615
--- /dev/null
+++ b/upb/cmake/BUILD.bazel
@@ -0,0 +1,125 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load(
+ ":build_defs.bzl",
+ "staleness_test",
+)
+load(
+ "//bazel:build_defs.bzl",
+ "make_shell_script",
+)
+
+licenses(["notice"])
+
+exports_files(["staleness_test.py"])
+
+py_library(
+ name = "staleness_test_lib",
+ testonly = 1,
+ srcs = ["staleness_test_lib.py"],
+ # This is public only for use by the staleness_test() macro. Please do not
+ # depend on this target directly.
+ visibility = ["//visibility:public"],
+)
+
+py_binary(
+ name = "make_cmakelists",
+ srcs = ["make_cmakelists.py"],
+)
+
+genrule(
+ name = "gen_cmakelists",
+ srcs = [
+ "//:BUILD",
+ "//:WORKSPACE",
+ "//bazel:workspace_deps.bzl",
+ ],
+ outs = ["generated-in/CMakeLists.txt"],
+ cmd = "$(location :make_cmakelists) " +
+ "$(location //bazel:workspace_deps.bzl) " +
+ "$(location //:WORKSPACE) " +
+ "$(location //:BUILD) $@",
+ tools = [":make_cmakelists"],
+)
+
+genrule(
+ name = "copy_protos",
+ srcs = ["//:descriptor_upb_proto"],
+ outs = [
+ "generated-in/google/protobuf/descriptor.upb.c",
+ "generated-in/google/protobuf/descriptor.upb.h",
+ ],
+ cmd = "cp $(SRCS) $(@D)/generated-in/google/protobuf",
+)
+
+staleness_test(
+ name = "test_generated_files",
+ outs = [
+ "CMakeLists.txt",
+ "google/protobuf/descriptor.upb.c",
+ "google/protobuf/descriptor.upb.h",
+ ],
+ generated_pattern = "generated-in/%s",
+ tags = ["manual"],
+)
+
+# Test the CMake build #########################################################
+
+make_shell_script(
+ name = "gen_run_cmake_build",
+ out = "run_cmake_build.sh",
+ contents = "set -ex\n" +
+ "cd $(dirname $1) && cp -r . .. && cd ../..\n" +
+ "mkdir build && cd build && cmake ../cmake && make -j8 && make test",
+)
+
+sh_test(
+ name = "cmake_build",
+ srcs = ["run_cmake_build.sh"],
+ args = ["$(location :gen_cmakelists)"],
+ data = [
+ ":copy_protos",
+ ":gen_cmakelists",
+ "//:source_files",
+ "//upb/base:source_files",
+ "//upb/collections:source_files",
+ "//upb/hash:source_files",
+ "//upb/lex:source_files",
+ "//upb/mem:source_files",
+ "//upb/message:source_files",
+ "//upb/mini_descriptor:source_files",
+ "//upb/mini_table:source_files",
+ "//upb/port:source_files",
+ "//upb/text:source_files",
+ "//upb/wire:source_files",
+ "@utf8_range//:utf8_range_srcs",
+ ],
+ target_compatible_with = select({
+ "@platforms//os:windows": ["@platforms//:incompatible"],
+ "//conditions:default": [],
+ }),
+ deps = ["@bazel_tools//tools/bash/runfiles"],
+)
diff --git a/upb/cmake/README.md b/upb/cmake/README.md
new file mode 100644
index 0000000..211a054
--- /dev/null
+++ b/upb/cmake/README.md
@@ -0,0 +1,23 @@
+
+# upb CMake build (EXPERIMENTAL)
+
+upb's CMake support is experimental. The core library builds successfully
+under CMake, and this is verified by the Bazel tests in this directory.
+However there is no support for building the upb compiler or for generating
+.upb.c/upb.h files. This means upb's CMake support is incomplete at best,
+unless your application is intended to be purely reflective.
+
+If you find this CMake setup useful in its current state, please consider
+filing an issue so we know. If you have suggestions for how it could be
+more useful (and particularly if you can contribute some code for it)
+please feel free to file an issue for that too. Do keep in mind that upb
+does not currently provide any ABI stability, so we want to avoid providing
+a shared library.
+
+The CMakeLists.txt is generated from the Bazel BUILD files using the Python
+scripts in this directory. We want to avoid having two separate sources of
+truth that both need to be updated when a file is added or removed.
+
+This directory also contains some generated files that would be created
+on the fly during a Bazel build. These are automaticaly kept in sync by
+the Bazel test `//cmake:test_generated_files`.
diff --git a/upb/cmake/build_defs.bzl b/upb/cmake/build_defs.bzl
new file mode 100644
index 0000000..157c3cb
--- /dev/null
+++ b/upb/cmake/build_defs.bzl
@@ -0,0 +1,77 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Bazel support functions related to CMake support."""
+
+def staleness_test(name, outs, generated_pattern, target_files = None, tags = [], **kwargs):
+ """Tests that checked-in file(s) match the contents of generated file(s).
+
+ The resulting test will verify that all output files exist and have the
+ correct contents. If the test fails, it can be invoked with --fix to
+ bring the checked-in files up to date.
+
+ Args:
+ name: Name of the rule.
+ outs: the checked-in files that are copied from generated files.
+ generated_pattern: the pattern for transforming each "out" file into a
+ generated file. For example, if generated_pattern="generated/%s" then
+ a file foo.txt will look for generated file generated/foo.txt.
+ target_files: A glob representing all of the files that should be
+ covered by this rule. Files in this glob but not generated will
+ be deleted. (Not currently implemented in OSS).
+ **kwargs: Additional keyword arguments to pass through to py_test().
+ """
+
+ script_name = name + ".py"
+ script_src = Label("//cmake:staleness_test.py")
+
+ # Filter out non-existing rules so Blaze doesn't error out before we even
+ # run the test.
+ existing_outs = native.glob(include = outs)
+
+ # The file list contains a few extra bits of information at the end.
+ # These get unpacked by the Config class in staleness_test_lib.py.
+ file_list = outs + [generated_pattern, native.package_name() or ".", name]
+
+ native.genrule(
+ name = name + "_makescript",
+ outs = [script_name],
+ srcs = [script_src],
+ testonly = 1,
+ cmd = "cp $< $@; " +
+ "sed -i.bak -e 's|INSERT_FILE_LIST_HERE|" + "\\\n ".join(file_list) + "|' $@",
+ )
+
+ native.py_test(
+ name = name,
+ srcs = [script_name],
+ data = existing_outs + [generated_pattern % file for file in outs],
+ python_version = "PY3",
+ deps = [
+ Label("//cmake:staleness_test_lib"),
+ ],
+ tags = ["staleness_test"] + tags,
+ **kwargs
+ )
diff --git a/upb/cmake/make_cmakelists.py b/upb/cmake/make_cmakelists.py
new file mode 100755
index 0000000..b38dcca
--- /dev/null
+++ b/upb/cmake/make_cmakelists.py
@@ -0,0 +1,420 @@
+#!/usr/bin/python
+#
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""A tool to convert {WORKSPACE, BUILD} -> CMakeLists.txt.
+
+This tool is very upb-specific at the moment, and should not be seen as a
+generic Bazel -> CMake converter.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sys
+import textwrap
+import os
+
+def StripFirstChar(deps):
+ return [dep[1:] for dep in deps]
+
+def IsSourceFile(name):
+ return name.endswith(".c") or name.endswith(".cc")
+
+
+ADD_LIBRARY_FORMAT = """
+add_library(%(name)s %(type)s
+ %(sources)s
+)
+target_include_directories(%(name)s %(keyword)s
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..>
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cmake>
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_BINRARY_DIR}>
+)
+"""
+
+
+class BuildFileFunctions(object):
+ def __init__(self, converter):
+ self.converter = converter
+
+ def _add_deps(self, kwargs, keyword=""):
+ if "deps" not in kwargs:
+ return
+ self.converter.toplevel += "target_link_libraries(%s%s\n %s)\n" % (
+ kwargs["name"],
+ keyword,
+ "\n ".join(StripFirstChar(kwargs["deps"]))
+ )
+
+ def load(self, *args):
+ pass
+
+ def cc_library(self, **kwargs):
+ if kwargs["name"].endswith("amalgamation"):
+ return
+ if kwargs["name"] == "upbc_generator":
+ return
+ if kwargs["name"] == "lupb":
+ return
+ if "testonly" in kwargs:
+ return
+ files = kwargs.get("srcs", []) + kwargs.get("hdrs", [])
+ found_files = []
+ pregenerated_files = [
+ "CMakeLists.txt", "descriptor.upb.h", "descriptor.upb.c"
+ ]
+ for file in files:
+ if os.path.basename(file) in pregenerated_files:
+ found_files.append("../cmake/" + file)
+ else:
+ found_files.append("../" + file)
+
+ if list(filter(IsSourceFile, files)):
+ # Has sources, make this a normal library.
+ self.converter.toplevel += ADD_LIBRARY_FORMAT % {
+ "name": kwargs["name"],
+ "type": "",
+ "keyword": "PUBLIC",
+ "sources": "\n ".join(found_files),
+ }
+ self._add_deps(kwargs)
+ else:
+ # Header-only library, have to do a couple things differently.
+ # For some info, see:
+ # http://mariobadr.com/creating-a-header-only-library-with-cmake.html
+ self.converter.toplevel += ADD_LIBRARY_FORMAT % {
+ "name": kwargs["name"],
+ "type": "INTERFACE",
+ "keyword": "INTERFACE",
+ "sources": "",
+ }
+ self._add_deps(kwargs, " INTERFACE")
+
+ def cc_binary(self, **kwargs):
+ pass
+
+ def cc_test(self, **kwargs):
+ # Disable this until we properly support upb_proto_library().
+ # self.converter.toplevel += "add_executable(%s\n %s)\n" % (
+ # kwargs["name"],
+ # "\n ".join(kwargs["srcs"])
+ # )
+ # self.converter.toplevel += "add_test(NAME %s COMMAND %s)\n" % (
+ # kwargs["name"],
+ # kwargs["name"],
+ # )
+
+ # if "data" in kwargs:
+ # for data_dep in kwargs["data"]:
+ # self.converter.toplevel += textwrap.dedent("""\
+ # add_custom_command(
+ # TARGET %s POST_BUILD
+ # COMMAND ${CMAKE_COMMAND} -E copy
+ # ${CMAKE_SOURCE_DIR}/%s
+ # ${CMAKE_CURRENT_BINARY_DIR}/%s)\n""" % (
+ # kwargs["name"], data_dep, data_dep
+ # ))
+
+ # self._add_deps(kwargs)
+ pass
+
+ def cc_fuzz_test(self, **kwargs):
+ pass
+
+ def pkg_files(self, **kwargs):
+ pass
+
+ def py_library(self, **kwargs):
+ pass
+
+ def py_binary(self, **kwargs):
+ pass
+
+ def lua_proto_library(self, **kwargs):
+ pass
+
+ def sh_test(self, **kwargs):
+ pass
+
+ def make_shell_script(self, **kwargs):
+ pass
+
+ def exports_files(self, files, **kwargs):
+ pass
+
+ def proto_library(self, **kwargs):
+ pass
+
+ def cc_proto_library(self, **kwargs):
+ pass
+
+ def staleness_test(self, **kwargs):
+ pass
+
+ def upb_amalgamation(self, **kwargs):
+ pass
+
+ def upb_proto_library(self, **kwargs):
+ pass
+
+ def upb_proto_library_copts(self, **kwargs):
+ pass
+
+ def upb_proto_reflection_library(self, **kwargs):
+ pass
+
+ def upb_proto_srcs(self, **kwargs):
+ pass
+
+ def genrule(self, **kwargs):
+ pass
+
+ def config_setting(self, **kwargs):
+ pass
+
+ def upb_fasttable_enabled(self, **kwargs):
+ pass
+
+ def select(self, arg_dict):
+ return []
+
+ def glob(self, *args, **kwargs):
+ return []
+
+ def licenses(self, *args):
+ pass
+
+ def filegroup(self, **kwargs):
+ pass
+
+ def map_dep(self, arg):
+ return arg
+
+ def package_group(self, **kwargs):
+ pass
+
+ def bool_flag(self, **kwargs):
+ pass
+
+ def bootstrap_upb_proto_library(self, **kwargs):
+ pass
+
+ def bootstrap_cc_library(self, **kwargs):
+ pass
+
+ def alias(self, **kwargs):
+ pass
+
+
+class WorkspaceFileFunctions(object):
+ def __init__(self, converter):
+ self.converter = converter
+
+ def load(self, *args, **kwargs):
+ pass
+
+ def workspace(self, **kwargs):
+ self.converter.prelude += "project(%s)\n" % (kwargs["name"])
+ self.converter.prelude += "set(CMAKE_C_STANDARD 99)\n"
+
+ def maybe(self, rule, **kwargs):
+ if kwargs["name"] == "utf8_range":
+ self.converter.utf8_range_commit = kwargs["commit"]
+ pass
+
+ def http_archive(self, **kwargs):
+ pass
+
+ def git_repository(self, **kwargs):
+ pass
+
+ def new_git_repository(self, **kwargs):
+ pass
+
+ def bazel_version_repository(self, **kwargs):
+ pass
+
+ def protobuf_deps(self):
+ pass
+
+ def utf8_range_deps(self):
+ pass
+
+ def pip_parse(self, **kwargs):
+ pass
+
+ def rules_fuzzing_dependencies(self):
+ pass
+
+ def rules_fuzzing_init(self):
+ pass
+
+ def rules_pkg_dependencies(self):
+ pass
+
+ def system_python(self, **kwargs):
+ pass
+
+ def register_system_python(self, **kwargs):
+ pass
+
+ def register_toolchains(self, toolchain):
+ pass
+
+ def python_source_archive(self, **kwargs):
+ pass
+
+ def python_nuget_package(self, **kwargs):
+ pass
+
+ def install_deps(self):
+ pass
+
+ def fuzzing_py_install_deps(self):
+ pass
+
+ def googletest_deps(self):
+ pass
+
+
+class Converter(object):
+ def __init__(self):
+ self.prelude = ""
+ self.toplevel = ""
+ self.if_lua = ""
+ self.utf8_range_commit = ""
+
+ def convert(self):
+ return self.template % {
+ "prelude": converter.prelude,
+ "toplevel": converter.toplevel,
+ "utf8_range_commit": converter.utf8_range_commit,
+ }
+
+ template = textwrap.dedent("""\
+ # This file was generated from BUILD using tools/make_cmakelists.py.
+
+ cmake_minimum_required(VERSION 3.10...3.24)
+
+ %(prelude)s
+
+ # Prevent CMake from setting -rdynamic on Linux (!!).
+ SET(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "")
+ SET(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "")
+
+ # Set default build type.
+ if(NOT CMAKE_BUILD_TYPE)
+ message(STATUS "Setting build type to 'RelWithDebInfo' as none was specified.")
+ set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
+ "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel."
+ FORCE)
+ endif()
+
+ # When using Ninja, compiler output won't be colorized without this.
+ include(CheckCXXCompilerFlag)
+ CHECK_CXX_COMPILER_FLAG(-fdiagnostics-color=always SUPPORTS_COLOR_ALWAYS)
+ if(SUPPORTS_COLOR_ALWAYS)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
+ endif()
+
+ # Implement ASAN/UBSAN options
+ if(UPB_ENABLE_ASAN)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address")
+ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address")
+ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fsanitize=address")
+ endif()
+
+ if(UPB_ENABLE_UBSAN)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address")
+ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address")
+ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fsanitize=address")
+ endif()
+
+ if(NOT TARGET utf8_range)
+ if(EXISTS ../external/utf8_range)
+ # utf8_range is already installed
+ include_directories(../external/utf8_range)
+ elseif(EXISTS ../../utf8_range)
+ include_directories(../../utf8_range)
+ else()
+ include(FetchContent)
+ FetchContent_Declare(
+ utf8_range
+ GIT_REPOSITORY "https://github.com/protocolbuffers/utf8_range.git"
+ GIT_TAG "%(utf8_range_commit)s"
+ )
+ FetchContent_GetProperties(utf8_range)
+ if(NOT utf8_range_POPULATED)
+ FetchContent_Populate(utf8_range)
+ include_directories(${utf8_range_SOURCE_DIR})
+ endif()
+ endif()
+ endif()
+
+ if(APPLE)
+ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -undefined dynamic_lookup -flat_namespace")
+ elseif(UNIX)
+ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id")
+ endif()
+
+ enable_testing()
+
+ %(toplevel)s
+
+ """)
+
+data = {}
+converter = Converter()
+
+def GetDict(obj):
+ ret = {}
+ ret["UPB_DEFAULT_COPTS"] = [] # HACK
+ ret["UPB_DEFAULT_CPPOPTS"] = [] # HACK
+ for k in dir(obj):
+ if not k.startswith("_"):
+ ret[k] = getattr(obj, k);
+ return ret
+
+globs = GetDict(converter)
+
+workspace_dict = GetDict(WorkspaceFileFunctions(converter))
+# We take all file paths as command-line arguments to ensure that we can find
+# each file regardless of how exactly Bazel was invoked.
+exec(open(sys.argv[1]).read(), workspace_dict) # workspace_deps.bzl
+exec(open(sys.argv[2]).read(), workspace_dict) # WORKSPACE
+exec(open(sys.argv[3]).read(), GetDict(BuildFileFunctions(converter))) # BUILD
+
+with open(sys.argv[4], "w") as f:
+ f.write(converter.convert())
diff --git a/upb/cmake/push_auto_update.sh b/upb/cmake/push_auto_update.sh
new file mode 100755
index 0000000..6b7dba0
--- /dev/null
+++ b/upb/cmake/push_auto_update.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This script updates checked-in generated files (currently CMakeLists.txt,
+# descriptor.upb.h, and descriptor.upb.c), commits the resulting change, and
+# pushes it. This does not do anything useful when run manually, but should be
+# run by our GitHub action instead.
+
+set -ex
+
+# Exit early if the previous commit was made by the bot. This reduces the risk
+# of a bug causing an infinite loop of auto-generated commits.
+if (git log -1 --pretty=format:'%an' | grep -q "Protobuf Team Bot"); then
+ echo "Previous commit was authored by bot"
+ exit 0
+fi
+
+cd $(dirname -- "$0")/..
+bazel test //cmake:test_generated_files || bazel-bin/cmake/test_generated_files --fix
+
+# Try to determine the most recent pull request number.
+title=$(git log -1 --pretty='%s')
+pr_from_merge=$(echo "$title" | sed -n 's/^Merge pull request #\([0-9]\+\).*/\1/p')
+pr_from_squash=$(echo "$title" | sed -n 's/^.*(#\([0-9]\+\))$/\1/p')
+
+pr_number=""
+if [ ! -z "$pr_from_merge" ]; then
+ pr_number="$pr_from_merge"
+elif [ ! -z "$pr_from_squash" ]; then
+ pr_number="$pr_from_squash"
+fi
+
+if [ ! -z "$pr_number" ]; then
+ commit_message="Auto-generate CMake file lists after PR #$pr_number"
+else
+ # If we are unable to determine the pull request number, we fall back on this
+ # default commit message. Typically this should not occur, but could happen
+ # if a pull request was merged via a rebase.
+ commit_message="Auto-generate CMake file lists"
+fi
+
+git add -A
+git diff --staged --quiet || git commit -am "$commit_message"
+git push
diff --git a/upb/cmake/staleness_test.py b/upb/cmake/staleness_test.py
new file mode 100644
index 0000000..c2971c2
--- /dev/null
+++ b/upb/cmake/staleness_test.py
@@ -0,0 +1,60 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""The py_test() script for staleness_test() rules.
+
+Note that this file is preprocessed! The INSERT_<...> text below is replaced
+with the actual list of files before we actually run the script.
+"""
+
+from __future__ import absolute_import
+
+from cmake import staleness_test_lib
+import unittest
+import sys
+
+file_list = """
+ INSERT_FILE_LIST_HERE
+""".split()
+
+config = staleness_test_lib.Config(file_list)
+
+
+class TestFilesMatch(unittest.TestCase):
+
+ def testFilesMatch(self):
+ errors = staleness_test_lib.CheckFilesMatch(config)
+ self.assertFalse(errors, errors)
+
+
+if len(sys.argv) > 1 and sys.argv[1] == "--fix":
+ staleness_test_lib.FixFiles(config)
+else:
+ unittest.main()
diff --git a/upb/cmake/staleness_test_lib.py b/upb/cmake/staleness_test_lib.py
new file mode 100644
index 0000000..d9b78a6
--- /dev/null
+++ b/upb/cmake/staleness_test_lib.py
@@ -0,0 +1,194 @@
+#!/usr/bin/python
+#
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Shared code for validating staleness_test() rules.
+
+This code is used by test scripts generated from staleness_test() rules.
+"""
+
+from __future__ import absolute_import
+from __future__ import print_function
+
+import difflib
+import sys
+import os
+from shutil import copyfile
+
+
+class _FilePair(object):
+ """Represents a single (target, generated) file pair."""
+
+ def __init__(self, target, generated):
+ self.target = target
+ self.generated = generated
+
+
+class Config(object):
+ """Represents the configuration for a single staleness test target."""
+
+ def __init__(self, file_list):
+ # Duplicate to avoid modifying our arguments.
+ file_list = list(file_list)
+
+ # The file list contains a few other bits of information at the end.
+ # This is packed by the code in build_defs.bzl.
+ self.target_name = file_list.pop()
+ self.package_name = file_list.pop()
+ self.pattern = file_list.pop()
+
+ self.file_list = file_list
+
+
+def _GetFilePairs(config):
+ """Generates the list of file pairs.
+
+ Args:
+ config: a Config object representing this target's config.
+
+ Returns:
+ A list of _FilePair objects.
+ """
+
+ ret = []
+
+ has_bazel_genfiles = os.path.exists("bazel-bin")
+
+ for filename in config.file_list:
+ target = os.path.join(config.package_name, filename)
+ generated = os.path.join(config.package_name, config.pattern % filename)
+ if has_bazel_genfiles:
+ generated = os.path.join("bazel-bin", generated)
+
+ # Generated files should always exist. Blaze should guarantee this before
+ # we are run.
+ if not os.path.isfile(generated):
+ print("Generated file '%s' does not exist." % generated)
+ print("Please run this command to generate it:")
+ print(" bazel build %s:%s" % (config.package_name, config.target_name))
+ sys.exit(1)
+ ret.append(_FilePair(target, generated))
+
+ return ret
+
+
+def _GetMissingAndStaleFiles(file_pairs):
+ """Generates lists of missing and stale files.
+
+ Args:
+ file_pairs: a list of _FilePair objects.
+
+ Returns:
+ missing_files: a list of _FilePair objects representing missing files.
+ These target files do not exist at all.
+ stale_files: a list of _FilePair objects representing stale files.
+ These target files exist but have stale contents.
+ """
+
+ missing_files = []
+ stale_files = []
+
+ for pair in file_pairs:
+ if not os.path.isfile(pair.target):
+ missing_files.append(pair)
+ continue
+
+ with open(pair.generated) as g, open(pair.target) as t:
+ if g.read() != t.read():
+ stale_files.append(pair)
+
+ return missing_files, stale_files
+
+
+def _CopyFiles(file_pairs):
+ """Copies all generated files to the corresponding target file.
+
+ The target files must be writable already.
+
+ Args:
+ file_pairs: a list of _FilePair objects that we want to copy.
+ """
+
+ for pair in file_pairs:
+ target_dir = os.path.dirname(pair.target)
+ if not os.path.isdir(target_dir):
+ os.makedirs(target_dir)
+ copyfile(pair.generated, pair.target)
+
+
+def FixFiles(config):
+ """Implements the --fix option: overwrites missing or out-of-date files.
+
+ Args:
+ config: the Config object for this test.
+ """
+
+ file_pairs = _GetFilePairs(config)
+ missing_files, stale_files = _GetMissingAndStaleFiles(file_pairs)
+
+ _CopyFiles(stale_files + missing_files)
+
+
+def CheckFilesMatch(config):
+ """Checks whether each target file matches the corresponding generated file.
+
+ Args:
+ config: the Config object for this test.
+
+ Returns:
+ None if everything matches, otherwise a string error message.
+ """
+
+ diff_errors = []
+
+ file_pairs = _GetFilePairs(config)
+ missing_files, stale_files = _GetMissingAndStaleFiles(file_pairs)
+
+ for pair in missing_files:
+ diff_errors.append("File %s does not exist" % pair.target)
+ continue
+
+ for pair in stale_files:
+ with open(pair.generated) as g, open(pair.target) as t:
+ diff = ''.join(difflib.unified_diff(g.read().splitlines(keepends=True),
+ t.read().splitlines(keepends=True)))
+ diff_errors.append("File %s is out of date:\n%s" % (pair.target, diff))
+
+ if diff_errors:
+ error_msg = "Files out of date!\n\n"
+ error_msg += "To fix run THIS command:\n"
+ error_msg += " bazel-bin/%s/%s --fix\n\n" % (config.package_name,
+ config.target_name)
+ error_msg += "Errors:\n"
+ error_msg += " " + "\n ".join(diff_errors)
+ return error_msg
+ else:
+ return None
diff --git a/upb/docs/design.md b/upb/docs/design.md
new file mode 100644
index 0000000..a46364a
--- /dev/null
+++ b/upb/docs/design.md
@@ -0,0 +1,429 @@
+# upb Design
+
+[TOC]
+
+upb is a protobuf kernel written in C. It is a fast and conformant
+implementation of protobuf, with a low-level C API that is designed to be
+wrapped in other languages.
+
+upb is not designed to be used by applications directly. The C API is very
+low-level, unsafe, and changes frequently. It is important that upb is able to
+make breaking API changes as necessary, to avoid taking on technical debt that
+would compromise upb's goals of small code size and fast performance.
+
+## Design goals
+
+Goals:
+
+- Full protobuf conformance
+- Small code size
+- Fast performance (without compromising code size)
+- Easy to wrap in language runtimes
+- Easy to adapt to different memory management schemes (refcounting, GC, etc)
+
+Non-Goals:
+
+- Stable API
+- Safe API
+- Ergonomic API for applications
+
+Parameters:
+
+- C99
+- 32 or 64-bit CPU (assumes 4 or 8 byte pointers)
+- Uses pointer tagging, but avoids other implementation-defined behavior
+- Aims to never invoke undefined behavior (tests with ASAN, UBSAN, etc)
+- No global state, fully re-entrant
+
+## Arenas
+
+All memory management in upb uses arenas, using the type `upb_Arena`. Arenas are
+an alternative to `malloc()` and `free()` that significantly reduces the costs
+of memory allocation.
+
+Arenas obtain blocks of memory using some underlying allocator (likely
+`malloc()` and `free()`), and satisfy allocations using a simple bump allocator
+that walks through each block in linear order. Allocations cannot be freed
+individually: it is only possible to free the arena as a whole, which frees all
+of the underlying blocks.
+
+Here is an example of using the `upb_Arena` type:
+
+```c
+ upb_Arena* arena = upb_Arena_New();
+
+ // Perform some allocations.
+ int* x = upb_Arena_Malloc(arena, sizeof(*x));
+ int* y = upb_Arena_Malloc(arena, sizeof(*y));
+
+ // We cannot free `x` and `y` separately, we can only free the arena
+ // as a whole.
+ upb_Arena_Free(arena);
+```
+
+upb uses arenas for all memory management, and this fact is reflected in the API
+for all upb data structures. All upb functions that allocate take a `upb_Arena*`
+parameter and perform allocations using that arena rather than calling
+`malloc()` or `free()`.
+
+```c
+// upb API to create a message.
+UPB_API upb_Message* upb_Message_New(const upb_MiniTable* mini_table,
+ upb_Arena* arena);
+
+void MakeMessage(const upb_MiniTable* mini_table) {
+ upb_Arena* arena = upb_Arena_New();
+
+ // This message is allocated on our arena.
+ upb_Message* msg = upb_Message_New(mini_table, arena);
+
+ // We can free the arena whenever we want, but we cannot free the
+ // message separately from the arena.
+ upb_Arena_Free(arena);
+
+ // msg is now deleted.
+}
+```
+
+Arenas are a key part of upb's performance story. Parsing a large protobuf
+payload usually involves rapidly creating a series of messages, arrays (repeated
+fields), and maps. It is crucial for parsing performance that these allocations
+are as fast as possible. Equally important, freeing the tree of messages should
+be as fast as possible, and arenas can reduce this cost from `O(n)` to `O(lg
+n)`.
+
+### Avoiding Dangling Pointers
+
+Objects allocated on an arena will frequently contain pointers to other
+arena-allocated objects. For example, a `upb_Message` will have pointers to
+sub-messages that are also arena-allocated.
+
+Unlike unique ownership schemes (such as `unique_ptr<>`), arenas cannot provide
+automatic safety from dangling pointers. Instead, upb provides tools to help
+bridge between higher-level memory management schemes (GC, refcounting, RAII,
+borrow checkers) and arenas.
+
+If there is only one arena, dangling pointers within the arena are impossible,
+because all objects are freed at the same time. This is the simplest case. The
+user must still be careful not to keep dangling pointers that point at arena
+memory after it has been freed, but dangling pointers *between* the arena
+objects will be impossible.
+
+But what if there are multiple arenas? If we have a pointer from one arena to
+another, how do we ensure that this will not become a dangling pointer?
+
+To help with the multiple arena case, upb provides a primitive called "fuse".
+
+```c
+// Fuses the lifetimes of `a` and `b`. None of the blocks from `a` or `b`
+// will be freed until both arenas are freed.
+UPB_API bool upb_Arena_Fuse(upb_Arena* a, upb_Arena* b);
+```
+
+When two arenas are fused together, their lifetimes are irreversibly joined,
+such that none of the arena blocks in either arena will be freed until *both*
+arenas are freed with `upb_Arena_Free()`. This means that dangling pointers
+between the two arenas will no longer be possible.
+
+Fuse is useful when joining two messages from separate arenas (making one a
+sub-message of the other). Fuse is a relatively cheap operation, on the order of
+150ns, and is very nearly `O(1)` in the number of arenas being fused (the true
+complexity is the inverse Ackermann function, which grows extremely slowly).
+
+Each arena does consume some memory, so repeatedly creating and fusing an
+additional arena is not free, but the CPU cost of fusing two arenas together is
+modest.
+
+### Initial Block and Custom Allocators
+
+`upb_Arena` normally uses `malloc()` and `free()` to allocate and return its
+underlying blocks. But this default strategy can be customized to support the
+needs of a particular language.
+
+The lowest-level function for creating a `upb_Arena` is:
+
+```c
+// Creates an arena from the given initial block (if any -- n may be 0).
+// Additional blocks will be allocated from |alloc|. If |alloc| is NULL,
+// this is a fixed-size arena and cannot grow.
+UPB_API upb_Arena* upb_Arena_Init(void* mem, size_t n, upb_alloc* alloc);
+```
+
+The buffer `[mem, n]` will be used as an "initial block", which is used to
+satisfy allocations before calling any underlying allocation function. Note that
+the `upb_Arena` itself will be allocated from the initial block if possible, so
+the amount of memory available for allocation from the arena will be less than
+`n`.
+
+The `alloc` parameter specifies a custom memory allocation function which will
+be used once the initial block is exhausted. The user can pass `NULL` as the
+allocation function, in which case the initial block is the only memory
+available in the arena. This can allow upb to be used even in situations where
+there is no heap.
+
+It follows that `upb_Arena_Malloc()` is a fallible operation, and all allocating
+operations like `upb_Message_New()` should be checked for failure if there is
+any possibility that a fixed size arena is in use.
+
+## Schemas
+
+Nearly all operations in upb require that you have a schema. A protobuf schema
+is a data structure that contains all of the message, field, enum, etc.
+definitions that are specified in a `.proto` file. To create, parse, serialize,
+or access a message you must have a schema. For this reason, loading a schema is
+generally the first thing you must do when you use upb. [^0]
+
+[^0]: This requirement comes from the protobuf wire format itself, which is a
+ deep insight about the nature of protobuf (or at least the existing wire
+ format). Unlike JSON, protobuf cannot be parsed or manipulated in a
+ schema-less way. This is because the binary wire format does not
+ distinguish between strings and sub-messages, so a generic parser that is
+ oblivious to the schema is not possible. If a future version of the wire
+ format did distinguish between these, it could be possible to have a
+ schema-agnostic data representation, parser, and serializer.
+
+upb has two main data structures that represent a protobuf schema:
+
+* **MiniTables** are a compact, stripped down version of the schema that
+ contains only the information necessary for parsing and serializing the
+ binary wire format.
+* **Reflection** contains basically all of the data from a `.proto` file,
+ including the original names of all messages/fields/etc., and all options.
+
+The table below summarizes the main differences between these two:
+
+| | MiniTables | Reflection |
+| ------------------- | ------------------------- | -------------------------- |
+| Contains | Field numbers and types | All data in `.proto` file, |
+: : only : including names of :
+: : : everything :
+| Used to parse | binary format | JSON / TextFormat |
+| Wire representation | MiniDescriptor | Descriptor |
+| Type names | `upb_MiniTable`, | `upb_MessageDef`, |
+: : `upb_MiniTableField`, ... : `upb_FieldDef`, ... :
+| Registry | `upb_ExtensionRegistry` | `upb_DefPool` |
+: : (for extensions) : :
+
+MiniTables are useful if you only need the binary wire format, because they are
+much lighter weight than full reflection.
+
+Reflection is useful if you need to parse JSON or TextFormat, or you need access
+to options that were specified in the `proto` file. Note that reflection also
+includes MiniTables, so if you have reflection, you also have MiniTables
+available.
+
+### MiniTables
+
+MiniTables are represented by a set of data structures with names like
+`upb_MiniTable` (representing a message), `upb_MiniTableField`,
+`upb_MiniTableFile`, etc. Whenever you see one of these types in a function
+signature, you know that this particular operation requires a MiniTable. For
+example:
+
+```
+// Parses the wire format data in the given buffer `[buf, size]` and writes it
+// to the message `msg`, which has the type `mt`.
+UPB_API upb_DecodeStatus upb_Decode(const char* buf, size_t size,
+ upb_Message* msg, const upb_MiniTable* mt,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena);
+```
+
+The subset of upb that requires only MiniTables can be thought of as "upb lite,"
+because both the code size and the runtime memory overhead will be less than
+"upb full" (the parts that use reflection).
+
+#### Loading
+
+There are three main ways of loading a MiniTable:
+
+1. **From C generated code:** The upb code generator can emit `.upb.c` files that
+ contain the MiniTables as global constant variables. When the main program
+ links against these, the MiniTable will be placed into `.rodata` (or
+ `.data.rel.ro`) in the binary. The MiniTable can then be obtained from a
+ generated function. In Blaze/Bazel these files can be generated and linked
+ using the `upb_proto_library()` rule.
+2. **From MiniDescriptors:** The user can build MiniDescriptors into MiniTables
+ at runtime. MiniDescriptors are a compact upb-specific wire format designed
+ specially for this purpose. The user can call `upb_MiniTable_Build()` at
+ runtime to convert MiniDescriptors to MiniTables.
+3. **From reflection:** If you have already built reflection data structures
+ for your type, then you can obtain the `upb_MiniTable` corresponding to a
+ `upb_MessageDef` using `upb_MessageDef_MiniTable()`.
+
+For languages that are already using reflection, (3) is an obvious choice.
+
+For languages that are avoiding reflection, here is a general guideline for
+choosing between (1) and (2): if the language being wrapped participates in the
+standard binary linking model on a given platform (in particular, if it is
+generally linked using `ld`), then it is better to use (1), which is also known
+as "static loading".
+
+Static loading of MiniTables has the benefit of requiring no runtime
+initialization[^2], leading to faster startup. Static loading of MiniTables also
+facilitates cross-language sharing of proto messages, because sharing generally
+requires that both languages are using exactly the same MiniTables.
+
+The main downside of static loading is that it requires the user to generate one
+`.upb.c` file per `.proto` and link against the transitive closure of `.upb.c`
+files. Blaze and Bazel make this reasonably easy, but for other build systems it
+can be more of a challenge.
+
+[^2]: aside from possible pointer relocations performed by the ELF/Mach-O loader
+ if the library or binary is position-independent
+
+Loading from MiniDescriptors, as in option (2), has the advantage that it does
+not require per-message linking of C code. For many language toolchains,
+generating and linking some custom C code for every protobuf file or message
+type would be a burdensome requirement. MiniDescriptors are a convenient way of
+loading MiniTables without needing to cross the FFI boundary outside the core
+runtime.
+
+A common pattern when using dynamic loading is to embed strings containing
+MiniDescriptors directly into generated code. For example, the generated code in
+Dart for a message with only primitive fields currently looks something like:
+
+```dart
+ const desc = r'$(+),*-#$%&! /10';
+ _accessor = $pb.instance.registry.newMessageAccessor(desc);
+```
+
+The implementation of `newMessageAccesor()` is mainly just a wrapper around
+`upb_MiniTable_Build()`, which builds a MiniTable from a MiniDescriptor. In the
+code generator, the MiniDescriptor can be obtained from the
+`upb_MessageDef_MiniDescriptorEncode()` API; users should never need to encode a
+MiniDescriptor manually.
+
+#### Linking
+
+When building MiniTables dynamically, it is the user's responsibility to link
+each message to the to the appropriate sub-messages and or enums. Each message
+must have its message and closed enum fields linked using
+`upb_MiniTable_SetSubMessage()` and `upb_MiniTable_SetSubEnum()`, respectively.
+
+A higher-level function that links all fields at the same time is also
+available, as `upb_MiniTable_Link()`. This function pairs well with
+`upb_MiniTable_GetSubList()` which can be used in a code generator to get a list
+of all the messages and enums which must be passed to `upb_MiniTable_Link()`.
+
+A common pattern is to embed the `link()` calls directly into the generated
+code. For example, here is an example from Dart of building a MiniTable that
+contains sub-messages and enums:
+
+```dart
+ const desc = r'$3334';
+ _accessor = $pb.instance.registry.newMessageAccessor(desc);
+ _accessor!.link(
+ [
+ M2.$_accessor,
+ M3.$_accessor,
+ M4.$_accessor,
+ ],
+ [
+ E.$_accessor,
+ ],
+ );
+```
+
+In this case, `upb_MiniTable_GetSubList()` was used in the code generator to
+discover the 3 sub-message fields and 1 sub-enum field that require linking. At
+runtime, these lists of MiniTables are passed to the `link()` function, which
+will internally call `upb_MiniTable_Link()`.
+
+Note that in some cases, the application may choose to delay or even skip the
+registration of sub-message types, as part of a tree shaking strategy.
+
+When using static MiniTables, a manual link step is not necessary, as linking is
+performed automatically by `ld`.
+
+#### Enums
+
+MiniTables primarily carry data about messages, fields, and extensions. However
+for closed enums, we must also have a `upb_MiniTableEnum` structure that stores
+the set of all numbers that are defined in the enum. This is because closed
+enums have the unfortunate behavior of putting unknown enum values into the
+unknown field set.
+
+Over time, closed enums will hopefully be phased out via editions, and the
+relevance and overhead of `upb_MiniTableEnum` will shrink and eventually
+disappear.
+
+### Reflection
+
+Reflection uses types like `upb_MessageDef` and `upb_FieldDef` to represent the
+full contents of a `.proto` file at runtime. These types are upb's direct
+equivalents of `google::protobuf::Descriptor`, `google::protobuf::FieldDescriptor`, etc. [^1]
+
+[^1]: upb consistently uses `Def` where C++ would use `Descriptor` in type
+ names. This introduces divergence with C++; the rationale was to conserve
+ horizontal line length, as `Def` is less than 1/3 the length of
+ `Descriptor`. This is more relevant in C, where the type name is repeated
+ in every function, eg. `upb_FieldDef_Name()` vs.
+ `upb_FieldDescriptor_Name()`.
+
+Whenever you see one of these types in a function signature, you know that the
+given operation requires reflection. For example:
+
+```c
+// Parses JSON format into a message object, using reflection.
+UPB_API bool upb_JsonDecode(const char* buf, size_t size, upb_Message* msg,
+ const upb_MessageDef* m, const upb_DefPool* symtab,
+ int options, upb_Arena* arena, upb_Status* status);
+```
+
+The part of upb that requires reflection can be thought of as "upb full." These
+parts of the library cannot be used if a given application has only loaded
+MiniTables. There is no way to convert a MiniTable into reflection.
+
+The `upb_DefPool` type is the top-level container that builds and owns some set
+of defs. This type is a close analogue of `google::protobuf::DescriptorPool` in C++. The
+user must always ensure that the `upb_DefPool` outlives any def objects that it
+owns.
+
+#### Loading
+
+As with MiniTable loading, we have multiple options for how to load full
+reflection:
+
+1. **From C generated code**: The upb code generator can create `foo.upbdefs.c`
+ files that embed the descriptors and exports generated C functions for
+ adding them to a user-provided `upb_DefPool`.
+2. **From descriptors**: The user can make manual calls to
+ `upb_DefPool_AddFile()`, using descriptors obtained at runtime. Defs for
+ individual messages can then be obtained using
+ `upb_DefPool_FindMessageByName()`.
+
+Unlike MiniTables, loading from generated code requires runtime initialization,
+as reflection data structures like `upb_MessageDef` are not capable of being
+emitted directly into `.rodata` like `upb_MiniTable` is. Instead, the generated
+code embeds serialized descriptor protos into `.rodata` which are then built
+into heap objects at runtime.
+
+From this you might conclude that option (1) is nothing but a convenience
+wrapper around option (2), but that is not quite correct either. Option (1)
+*does* link against the static `.upb.c` structures for the MiniTables, whereas
+option (2) will build the MiniTables from scratch on the heap. So option (1)
+will use marginally less CPU and RAM when the descriptors are loaded into a
+`upb_DefPool`. More importantly, the resulting descriptors will be capable of
+reflecting over any messages built from the generated `.upb.c` MiniTables,
+whereas descriptors built using option (2) will have distinct MiniTables that
+cannot reflect over messages that use the generated MiniTables.
+
+A common pattern for dynamic languages like PHP, Ruby, or Python, is to use
+option (2) with descriptors that are embedded into the generated code. For
+example, the generated code in Python currently looks something like:
+
+```python
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf.internal import builder as _builder
+
+_desc = b'\n\x1aprotoc_explorer/main.proto\x12\x03pkg'
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(_desc)
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
+_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'google3.protoc_explorer.main_pb2', _globals)
+```
+
+The `AddSerializedFile()` API above is mainly just a thin wrapper around
+`upb_DefPool_AddFile()`.
diff --git a/upb/docs/render.py b/upb/docs/render.py
new file mode 100755
index 0000000..0f3bdc2
--- /dev/null
+++ b/upb/docs/render.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import subprocess
+import sys
+import shutil
+import os
+
+if len(sys.argv) < 2:
+ print("Must pass a filename argument")
+ sys.exit(1)
+
+in_filename = sys.argv[1]
+out_filename = in_filename.replace(".in.md", ".md")
+out_dir = in_filename.replace(".in.md", "")
+
+if in_filename == out_filename:
+ print("File must end in .in.md")
+ sys.exit(1)
+
+if os.path.isdir(out_dir):
+ shutil.rmtree(out_dir)
+
+os.mkdir(out_dir)
+file_num = 1
+
+with open(out_filename, "wb") as out_file, open(in_filename, "rb") as in_file:
+ for line in in_file:
+ if line.startswith(b"```dot"):
+ dot_lines = []
+ while True:
+ dot_line = next(in_file)
+ if dot_line == b"```\n":
+ break
+ dot_lines.append(dot_line)
+ dot_input = b"".join(dot_lines)
+ svg_filename = out_dir + "/" + str(file_num) + ".svg"
+ svg = subprocess.check_output(['dot', '-Tsvg', '-o', svg_filename], input=dot_input)
+ out_file.write(b"<div align=center>\n")
+ out_file.write(b"<img src='%s'/>\n" % (svg_filename.encode('utf-8')))
+ out_file.write(b"</div>\n")
+ file_num += 1
+ else:
+ out_file.write(line)
diff --git a/upb/docs/style-guide.md b/upb/docs/style-guide.md
new file mode 100644
index 0000000..e02571b
--- /dev/null
+++ b/upb/docs/style-guide.md
@@ -0,0 +1,65 @@
+# C style guide
+
+<!--*
+# Document freshness: For more information, see go/fresh-source.
+freshness: { owner: 'haberman' reviewed: '2022-05-08' }
+*-->
+
+Since upb is written in pure C, we supplement the
+[Google C++ style guide](https://google.github.io/styleguide/cppguide.html) with
+some C-specific guidance.
+
+Everything written here is intended to follow the spirit of the C++ style guide.
+
+upb is currently inconsistent about following these conventions. It is intended
+that all code will be updated to match these guidelines. The priority is
+converting public interfaces as these are more difficult to change later.
+
+## Naming
+
+### Functions and Types
+
+C does not have namespaces. Anywhere you would normally use a namespace
+separator (`::`) in C++, we use an underscore (`_`) in C:
+
+```c++
+// C equivalent for upb::Arena::New()
+upb_Arena* upb_Arena_New();
+```
+
+Since we rely on `_` to be our namespace separator, we never use it to merely
+separate words in function or type names:
+
+```c++
+// BAD: this would be interpreted as upb::FieldDef::has::default().
+bool upb_FieldDef_has_default(const upb_FieldDef* f);
+
+// GOOD: this is equivalent to upb::FieldDef::HasDefault().
+bool upb_FieldDef_HasDefault(const upb_FieldDef* f);
+```
+
+For multi-word namespaces, we use `PascalCase`:
+
+```c++
+// `PyUpb` is the namespace.
+PyObject* PyUpb_CMessage_GetAttr(PyObject* _self, PyObject* attr);
+```
+
+### Private Functions and Members
+
+Since we do not have `private` in C++, we use a leading underscore convention
+to mark internal functions and variables that should only be accessed from
+upb:
+
+```c++
+// Internal-only function.
+int64_t _upb_Int64_FromLL();
+
+// Internal-only members. Underscore prefixes are only necessary when the
+// structure is defined in a header file.
+typedef struct {
+ const int32_t* _values; // List of values <0 or >63
+ uint64_t _mask; // Bits are set for acceptable value 0 <= x < 64
+ int _value_count;
+} upb_MiniTableEnum;
+```
diff --git a/upb/docs/vs-cpp-protos.md b/upb/docs/vs-cpp-protos.md
new file mode 100644
index 0000000..fe2a864
--- /dev/null
+++ b/upb/docs/vs-cpp-protos.md
@@ -0,0 +1,261 @@
+<!--*
+# Document freshness: For more information, see go/fresh-source.
+freshness: { owner: 'haberman' reviewed: '2023-02-24' }
+*-->
+
+# upb vs. C++ Protobuf Design
+
+[upb](https://github.com/protocolbuffers/upb) is a small C protobuf library.
+While some of the design follows in the footsteps of the C++ Protobuf Library,
+upb departs from C++'s design in several key ways. This document compares
+and contrasts the two libraries on several design points.
+
+## Design Goals
+
+Before we begin, it is worth calling out that upb and C++ have different design
+goals, and this motivates some of the differences we will see.
+
+C++ protobuf is a user-level library: it is designed to be used directly by C++
+applications. These applications will expect a full-featured C++ API surface
+that uses C++ idioms. The C++ library is also willing to add features to
+increase server performance, even if these features would add size or complexity
+to the library. Because C++ protobuf is a user-level library, API stability is
+of utmost importance: breaking API changes are rare and carefully managed when
+they do occur. The focus on C++ also means that ABI compatibility with C is not
+a priority.
+
+upb, on the other hand, is designed primarily to be wrapped by other languages.
+It is a C protobuf kernel that forms the basis on which a user-level protobuf
+library can be built. This means we prefer to keep the API surface as small and
+orthogonal as possible. While upb supports all protobuf features required for
+full conformance, upb prioritizes simplicity and small code size, and avoids
+adding features like lazy fields that can accelerate some use cases but at great
+cost in terms of complexity. As upb is not aimed directly at users, there is
+much more freedom to make API-breaking changes when necessary, which helps the
+core to stay small and simple. We want to be compatible with all FFI
+interfaces, so C ABI compatibility is a must.
+
+Despite these differences, C++ protos and upb offer [roughly the same core set
+of features](https://github.com/protocolbuffers/upb#features).
+
+## Arenas
+
+upb and C++ protos both offer arena allocation, but there are some key
+differences.
+
+### C++
+
+As a matter of history, when C++ protos were open-sourced in 2008, they did not
+support arenas. Originally there was only unique ownership, whereby each
+message uniquely owns all child messages and will free them when the parent is
+freed.
+
+Arena allocation was added as a feature in 2014 as a way of dramatically
+reducing allocation and (especially) deallocation costs. But the library was
+not at liberty to remove the unique ownership model, because it would break far
+too many users. As a result, C++ has supported a **hybrid allocation model**
+ever since, allowing users to allocate messages either directly from the
+stack/heap or from an arena. The library attempts to ensure that there are
+no dangling pointers by performing automatic copies in some cases (for example
+`a->set_allocated_b(b)`, where `a` and `b` are on different arenas).
+
+C++'s arena object itself `google::protobuf::Arena` is **thread-safe** by
+design, which allows users to allocate from multiple threads simultaneously
+without external synchronization. The user can supply an initial block of
+memory to the arena, and can choose some parameters to control the arena block
+size. The user can also supply block alloc/dealloc functions, but the alloc
+function is expected to always return some memory. The C++ library in general
+does not attempt to handle out of memory conditions.
+
+### upb
+
+upb uses **arena allocation exclusively**. All messages must be allocated from
+an arena, and can only be freed by freeing the arena. It is entirely the user's
+responsibility to ensure that there are no dangling pointers: when a user sets a
+message field, this will always trivially overwrite the pointer and will never
+perform an implicit copy.
+
+upb's `upb::Arena` is **thread-compatible**, which means it cannot be used
+concurrently without synchronization. The arena can be seeded with an initial
+block of memory, but it does not explicitly support any parameters for choosing
+block size. It supports a custom alloc/dealloc function, and this function is
+allowed to return `NULL` if no dynamic memory is available. This allows upb
+arenas to have a max/fixed size, and makes it possible in theory to write code
+that is tolerant to out-of-memory errors.
+
+upb's arena also supports a novel operation known as **fuse**, which joins two
+arenas together into a single lifetime. Though both arenas must still be freed
+separately, none of the memory will actually be freed until *both* arenas have
+been freed. This is useful for avoiding dangling pointers when reparenting a
+message with one that may be on a different arena.
+
+### Comparison
+
+**hybrid allocation vs. arena-only**
+
+* The C++ hybrid allocation model introduces a great deal of complexity and
+ unpredictability into the library. upb benefits from having a much simpler
+ and more predictable design.
+* Some of the complexity in C++'s hybrid model arises from the fact that arenas
+ were added after the fact. Designing for a hybrid model from the outset
+ would likely yield a simpler result.
+* Unique ownership does support some usage patterns that arenas cannot directly
+ accommodate. For example, you can reparent a message and the child will precisely
+ follow the lifetime of its new parent. An arena would require you to either
+ perform a deep copy or extend the lifetime.
+
+**thread-compatible vs. thread-safe arena**
+
+* A thread-safe arena (as in C++) is safer and easier to use. A thread-compatible
+ arena requires that the user prove that the arena cannot be used concurrently.
+* [Thread Sanitizer](https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual)
+ is far more accessible than it was in 2014 (when C++ introduced a thread-safe
+ arena). We now have more tools at our disposal to ensure that we do not trigger
+ data races in a thread-compatible arena like upb.
+* Thread-compatible arenas are more performant.
+* Thread-compatible arenas have a far simpler implementation. The C++ thread-safe
+ arena relies on thread-local variables, which introduce complications on some
+ platforms. It also requires far more subtle reasoning for correctness and
+ performance.
+
+**fuse vs. no fuse**
+
+* The `upb_Arena_Fuse()` operation is a key part of how upb supports reparenting
+ of messages when the parent may be on a different arena. Without this, upb has
+ no way of supporting `foo.bar = bar` in dynamic languages without performing a
+ deep copy.
+* A downside of `upb_Arena_Fuse()` is that passing an arena to a function can allow
+ that function to extend the lifetime of the arena in potentially
+ unpredictable ways. This can be prevented if necessary, as fuse can fail, eg. if
+ one arena has an initial block. But this adds some complexity by requiring callers
+ to handle the case where fuse fails.
+
+## Code Generation vs. Tables
+
+The C++ protobuf library has always been built around code generation, while upb
+generates only tables. In other words, `foo.pb.cc` files contain functions,
+whereas `foo.upb.c` files emit only data structures.
+
+### C++
+
+C++ generated code emits a large number of functions into `foo.pb.cc` files.
+An incomplete list:
+
+* `FooMsg::FooMsg()` (constructor): initializes all fields to their default value.
+* `FooMsg::~FooMsg()` (destructor): frees any present child messages.
+* `FooMsg::Clear()`: clears all fields back to their default/empty value.
+* `FooMsg::_InternalParse()`: generated code for parsing a message.
+* `FooMsg::_InternalSerialize()`: generated code for serializing a message.
+* `FooMsg::ByteSizeLong()`: calculates serialized size, as a first pass before serializing.
+* `FooMsg::MergeFrom()`: copies/appends present fields from another message.
+* `FooMsg::IsInitialized()`: checks whether required fields are set.
+
+This code lives in the `.text` section and contains function calls to the generated
+classes for child messages.
+
+### upb
+
+upb does not generate any code into `foo.upb.c` files, only data structures. upb uses a
+compact data table known as a *mini table* to represent the schema and all fields.
+
+upb uses mini tables to perform all of the operations that would traditionally be done
+with generated code. Revisiting the list from the previous section:
+
+* `FooMsg::FooMsg()` (constructor): upb instead initializes all messages with `memset(msg, 0, size)`.
+ Non-zero defaults are injected in the accessors.
+* `FooMsg::~FooMsg()` (destructor): upb messages are freed by freeing the arena.
+* `FooMsg::Clear()`: can be performed with `memset(msg, 0, size)`.
+* `FooMsg::_InternalParse()`: upb's parser uses mini tables as data, instead of generating code.
+* `FooMsg::_InternalSerialize()`: upb's serializer also uses mini-tables instead of generated code.
+* `FooMsg::ByteSizeLong()`: upb performs serialization in reverse so that an initial pass is not required.
+* `FooMsg::MergeFrom()`: upb supports this via serialize+parse from the other message.
+* `FooMsg::IsInitialized()`: upb's encoder and decoder have special flags to check for required fields.
+ A util library `upb/util/required_fields.h` handles the corner cases.
+
+### Comparison
+
+If we compare compiled code size, upb is far smaller. Here is a comparison of the code
+size of a trivial binary that does nothing but a parse and serialize of `descriptor.proto`.
+This means we are seeing both the overhead of the core library itself as well as the
+generated code (or table) for `descriptor.proto`. (For extra clarity we should break this
+down by generated code vs core library in the future).
+
+
+| Library | `.text` | `.data` | `.bss` |
+|------------ |---------|---------|--------|
+| upb | 26Ki | 0.6Ki | 0.01Ki |
+| C++ (lite) | 187Ki | 2.8Ki | 1.25Ki |
+| C++ (code size) | 904Ki | 6.1Ki | 1.88Ki |
+| C++ (full) | 983Ki | 6.1Ki | 1.88Ki |
+
+"C++ (code size)" refers to protos compiled with `optimize_for = CODE_SIZE`, a mode
+in which generated code contains reflection only, in an attempt to make the
+generated code size smaller (however it requires the full runtime instead
+of the lite runtime).
+
+## Bifurcated vs. Optional Reflection
+
+upb and C++ protos both offer reflection without making it mandatory. However
+the models for enabling/disabling reflection are very different.
+
+### C++
+
+C++ messages offer full reflection by default. Messages in C++ generally
+derive from `Message`, and the base class provides a member function
+`Reflection* Message::GetReflection()` which returns the reflection object.
+
+It follows that any message deriving from `Message` will always have reflection
+linked into the binary, whether or not the reflection object is ever used.
+Because `GetReflection()` is a function on the base class, it is not possible
+to statically determine if a given message's reflection is used:
+
+```c++
+Reflection* GetReflection(const Message& message) {
+ // Can refer to any message in the whole binary.
+ return message.GetReflection();
+}
+```
+
+The C++ library does provide a way of omitting reflection: `MessageLite`. We can
+cause a message to be lite in two different ways:
+
+* `optimize_for = LITE_RUNTIME` in a `.proto` file will cause all messages in that
+ file to be lite.
+* `lite` as a codegen param: this will force all messages to lite, even if the
+ `.proto` file does not have `optimize_for = LITE_RUNTIME`.
+
+A lite message will derive from `MessageLite` instead of `Message`. Since
+`MessageLite` has no `GetReflection()` function, this means no reflection is
+available, so we can avoid taking the code size hit.
+
+### upb
+
+upb does not have the `Message` vs. `MessageLite` bifurcation. There is only one
+kind of message type `upb_Message`, which means there is no need to configure in
+a `.proto` file which messages will need reflection and which will not.
+Every message has the *option* to link in reflection from a separate `foo.upbdefs.o`
+file, without needing to change the message itself in any way.
+
+upb does not provide the equivalent of `Message::GetReflection()`: there is no
+facility for retrieving the reflection of a message whose type is not known statically.
+It would be possible to layer such a facility on top of the upb core, though this
+would probably require some kind of code generation.
+
+### Comparison
+
+* Most messages in C++ will not bother to declare themselves as "lite". This means
+ that many C++ messages will link in reflection even when it is never used, bloating
+ binaries unnecessarily.
+* `optimize_for = LITE_RUNTIME` is difficult to use in practice, because it prevents
+ any non-lite protos from `import`ing that file.
+* Forcing all protos to lite via a codegen parameter (for example, when building for
+ mobile) is more practical than `optimize_for = LITE_RUNTIME`. But this will break
+ the compile for any code that tries to upcast to `Message`, or tries to use a
+ non-lite method.
+* The one major advantage of the C++ model is that it can support `msg.DebugString()`
+ on a type-erased proto. For upb you have to explicitly pass the `upb_MessageDef*`
+ separately if you want to perform an operation like printing a proto to text format.
+
+## Explicit Registration vs. Globals
+
+TODO
diff --git a/upb/docs/wrapping-upb.md b/upb/docs/wrapping-upb.md
new file mode 100644
index 0000000..bc1bd63
--- /dev/null
+++ b/upb/docs/wrapping-upb.md
@@ -0,0 +1,444 @@
+
+<!---
+This document contains embedded graphviz diagrams inside ```dot blocks.
+
+To convert it to rendered form using render.py:
+ $ ./render.py wrapping-upb.in.md
+
+You can also live-preview this document with all diagrams using Markdown Preview Enhanced
+in Visual Studio Code:
+ https://marketplace.visualstudio.com/items?itemName=shd101wyy.markdown-preview-enhanced
+--->
+
+# Building a protobuf library on upb
+
+This is a guide for creating a new protobuf implementation based on upb. It
+starts from the beginning and walks you through the process, highlighting
+some important design choices you will need to make.
+
+## Overview
+
+A protobuf implementation consists of two main pieces:
+
+1. a code generator, run at compile time, to turn `.proto` files into source
+ files in your language (we will call this "zlang", assuming an extension of ".z").
+2. a runtime component, which implements the wire format and provides the data
+ structures for representing protobuf data and metadata.
+
+<br/>
+
+```dot {align="center"}
+digraph {
+ rankdir=LR;
+ newrank=true;
+ node [style="rounded,filled" shape=box]
+ "foo.proto" -> protoc;
+ "foo.proto" [shape=folder];
+ protoc [fillcolor=lightgrey];
+ protoc -> "protoc-gen-zlang";
+ "protoc-gen-zlang" -> "foo.z";
+ "protoc-gen-zlang" [fillcolor=palegreen3];
+ "foo.z" [shape=folder];
+ labelloc="b";
+ label="Compile Time";
+}
+```
+
+<br/>
+
+```dot {align="center"}
+digraph {
+ newrank=true;
+ node [style="rounded,filled" shape=box fillcolor=lightgrey]
+ "foo.z" -> "zlang/upb glue (FFI)";
+ "zlang/upb glue (FFI)" -> "upb (C)";
+ "zlang/upb glue (FFI)" [fillcolor=palegreen3];
+ labelloc="b";
+ label="Runtime";
+}
+```
+
+The parts in green are what you will need to implement.
+
+Note that your code generator (`protoc-gen-zlang`) does *not* need to generate
+any C code (eg. `foo.c`). While upb itself is written in C, upb's parsers and
+serializers are fully table-driven, which means there is never any need or even
+benefit to generating C code for each proto. upb is capable of full-speed
+parsing even when schema data is loaded at runtime from strings embedded into
+`foo.z`. This is a key benefit of upb compared with C++ protos, which have
+traditionally relied on generated parsers in `foo.pb.cc` files to achieve full
+parsing speed, and suffered a ~10x speed penalty in the parser when the schema
+data was loaded at runtime.
+
+## Prerequisites
+
+There are a few things that the language runtime must provide in order to wrap
+upb.
+
+1. **FFI**: To wrap upb, your language must be able to call into a C API
+ through a Foreign Function Interface (FFI). Most languages support FFI in
+ some form, either through "native extensions" (in which you write some C
+ code to implement new methods in your language) or through a direct FFI (in
+ which you can call into regular C functions directly from your language
+ using a special library).
+2. **Finalizers, Destructors, or Cleaners**: The runtime must provide
+ finalizers or destructors of some sort. There must be a way of triggering a
+ call to a C function when the language garbage collects or otherwise
+ destroys an object. We don't care much whether it is a finalizer, a
+ destructor, or a cleaner, as long as it gets called eventually when the
+ object is destroyed. upb allocates memory in C space, and a finalizer is our
+ only way of making sure that memory is freed and does not leak.
+3. **HashMap with weak values**: (optional) This is not a strong requirement,
+ but it is sometimes helpful to have a global hashmap with weak values to act
+ as a `upb_msg* -> wrapper` object cache. We want the values to be weak (not
+ the keys). There is some question about whether we want to continue to use
+ this pattern going forward.
+
+## Reflection vs. MiniTables
+
+The first key design decision you will need to make is whether your generated
+code will access message data via reflection or minitables. Generally more
+dynamic languages will want to use reflection and more static languages will
+want to use minitables.
+
+### Reflection
+
+Reflection-based data access makes the most sense in highly dynamic language
+interpreters, where method dispatch is generally resolved via strings and hash
+table lookups.
+
+In such languages, you can often implement a special method like `__getattr__`
+(Python) or `method_missing` (Ruby) that receives the method name as a string.
+Using upb's reflection, you can look up a field name using the method name,
+thereby using a hash table belonging to upb instead of one provided by the
+language.
+
+```python
+class FooMessage:
+ # Written in Python for illustration, but in practice we will want to
+ # implement this in C for speed.
+ def __getattr__(self, name):
+ field = FooMessage.descriptor.fields_by_name[name]
+ return field.get_value(self)
+```
+
+Using this design, we only need to attach a single `__getattr__` method to each
+message class, instead of defining a getter/setter for each field. In this way
+we can avoid duplicating hash tables between upb and the language interpreter,
+reducing memory usage.
+
+Reflection-based access requires loading full reflection at runtime. Your
+generated code will need to embed serialized descriptors (ie. a serialized
+message of `descriptor.proto`), which has some amount of size overhead and
+exposes all message/field names to the binary. It also forces a hash table
+lookup in the critical path of field access. If method calls in your language
+already have this overhead, then this is no added burden, but for statically
+dispatched languages it would cause extra overhead.
+
+If we take this path to its logical conclusion, all class creation can be
+performed fully dynamically, using only a binary descriptor as input. The
+"generated code" becomes little more than an embedded descriptor plus a
+library call to load it. Python has recently gone down this path. Generated
+code now looks something like this:
+
+```python
+# main_pb2.py
+from google3.net.proto2.python.internal import builder as _builder
+from google3.net.proto2.python.public import descriptor_pool as _descriptor_pool
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile("<...>")
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
+_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'google3.main_pb2', globals())
+```
+
+This is all the runtime needs to create all of the classes for messages defined
+in that serialized descriptor. This code has no pretense of readability, but
+a separate `.pyi` stub file provides a fully expanded and readable list of all
+methods a user can expect to be available:
+
+```python
+# main_pb2.pyi
+from google3.net.proto2.python.public import descriptor as _descriptor
+from google3.net.proto2.python.public import message as _message
+from typing import ClassVar as _ClassVar, Optional as _Optional
+
+DESCRIPTOR: _descriptor.FileDescriptor
+
+class MyMessage(_message.Message):
+ __slots__ = ["my_field"]
+ MY_FIELD_FIELD_NUMBER: _ClassVar[int]
+ my_field: str
+ def __init__(self, my_field: _Optional[str] = ...) -> None: ...
+```
+
+To use reflection-based access:
+
+1. Load and access descriptor data using the interfaces in upb/reflection/def.h.
+2. Access message data using the interfaces in upb/reflection/message.h.
+
+### MiniTables
+
+MiniTables are a "lite" schema representation that are much smaller than
+reflection. MiniTables omit names, options, and almost everything else from the
+`.proto` file, retaining only enough information to parse and serialize binary
+format.
+
+MiniTables can be loaded into upb through *MiniDescriptors*. MiniDescriptors are
+a byte-oriented format that can be embedded into your generated code and passed
+to upb to construct MiniTables. MiniDescriptors only use printable characters,
+and therefore do not require escaping when embedding them into generated code
+strings. Overall the size savings of MiniDescriptors are ~60x compared with
+regular descriptors.
+
+MiniTables and MiniDescriptors are a natural choice for compiled languages that
+resolve method calls at compile time. For languages that are sometimes compiled
+and sometimes interpreted, there might not be an obvious choice. When a method
+call is statically bound, we want to remove as much overhead as possible,
+especially from accessors. In the extreme case, we can use unsafe APIs to read
+raw memory at a known offset:
+
+```java
+// Example of a maximally-optimized generated accessor.
+class FooMessage {
+ public long getBarField() {
+ // Using Unsafe should give us performance that is comparable to a
+ // native member access.
+ //
+ // The constant "24" is obtained from upb at compile time.
+ sun.misc.Unsafe.getLong(this.ptr, 24);
+ }
+}
+```
+
+This design is very low-level, and tightly couples the generated code to one
+specific version of the schema and compiler. A slower but safer version would
+look up a field by field number:
+
+```java
+// Example of a more loosely-coupled accessor.
+class FooMessage {
+ public long getBarField() {
+ // The constant "2" is the field number. Internally this will look
+ // up the number "2" in the MiniTable and use that to read the value
+ // from the message.
+ upb.glue.getLong(this.ptr, 2);
+ }
+}
+```
+
+One downside of MiniTables is that they cannot support parsing or serializing
+to JSON or TextFormat, because they do not know the field names. It should be
+possible to generate reflection data "on the side", into separate generated
+code files, so that reflection is only pulled in if it is being used. However
+APIs to do this do not exist yet.
+
+To use MiniTable-based access:
+
+1. Load and access MiniDescriptors data using the interfaces in upb/mini_descriptor/decode.h.
+2. Access message data using the interfaces in upb/message/accessors.h.
+
+## Memory Management
+
+One of the core design challenges when wrapping upb is memory management. Every
+language runtime will have some memory management system, whether it is
+garbage collection, reference counting, manual memory management, or some hybrid
+of these. upb is written in C and uses arenas for memory management, but upb is
+designed to integrate with a wide variety of memory management schemes, and it
+provides a number of tools for making this integration as smooth as possible.
+
+### Arenas
+
+upb defines data structures in C to represent messages, arrays (repeated
+fields), and maps. A protobuf message is a hierarchical tree of these objects.
+For example, a relatively simple protobuf tree might look something like this:
+
+```dot {align="center"}
+digraph G {
+ rankdir=LR;
+ newrank=true;
+ node [style="rounded,filled" shape=box colorscheme=accent8 fillcolor=1, ordering=out]
+ upb_msg -> upb_msg2;
+ upb_msg -> upb_array;
+ upb_msg [label="upb Message" fillcolor=1]
+ upb_msg2 [label="upb Message"];
+ upb_array [label="upb Array"]
+}
+```
+
+All upb objects are allocated from an arena. An arena lets you allocate objects
+individually, but you cannot free individual objects; you can only free the arena
+as a whole. When the arena is freed, all of the individual objects allocated
+from that arena are freed together.
+
+```dot {align="center"}
+digraph G {
+ rankdir=LR;
+ newrank=true;
+ subgraph cluster_0 {
+ label = "upb Arena"
+ graph[style="rounded,filled" fillcolor=gray]
+ node [style="rounded,filled" shape=box colorscheme=accent8 fillcolor=1, ordering=out]
+ upb_msg -> upb_array;
+ upb_msg -> upb_msg2;
+ upb_msg [label="upb Message" fillcolor=1]
+ upb_msg2 [label="upb Message"];
+ upb_array [label="upb Array"];
+ }
+}
+```
+
+In simple cases, the entire tree of objects will all live in a single arena.
+This has the nice property that there cannot be any dangling pointers between
+objects, since all objects are freed at the same time.
+
+However upb allows you to create links between any two objects, whether or
+not they are in the same arena. The library does not know or care what arenas
+the objects are in when you create links between them.
+
+```dot {align="center"}
+digraph G {
+ rankdir=LR;
+ newrank=true;
+ subgraph cluster_0 {
+ label = "upb Arena 1"
+ graph[style="rounded,filled" fillcolor=gray]
+ node [style="rounded,filled" shape=box colorscheme=accent8 fillcolor=1, ordering=out]
+ upb_msg -> upb_array;
+ upb_msg -> upb_msg2;
+ upb_msg [label="upb Message 1" fillcolor=1]
+ upb_msg2 [label="upb Message 2"];
+ upb_array [label="upb Array"];
+ }
+ subgraph cluster_1 {
+ label = "upb Arena 2"
+ graph[style="rounded,filled" fillcolor=gray]
+ node [style="rounded,filled" shape=box colorscheme=accent8 fillcolor=1]
+ upb_msg3;
+ }
+ upb_msg2 -> upb_msg3;
+ upb_msg3 [label="upb Message 3"];
+}
+```
+
+When objects are on separate arenas, it is the user's responsibility to ensure
+that there are no dangling pointers. In the example above, this means Arena 2
+must outlive Message 1 and Message 2.
+
+### Integrating GC with upb
+
+In languages with automatic memory management, the goal is to handle all of the
+arenas behind the scenes, so that the user does not have to manage them manually
+or even know that they exist.
+
+We can achieve this goal if we set up the object graph in a particular way. The
+general strategy is to create wrapper objects around all of the C objects,
+including the arena. Our key goal is to make sure the arena wrapper is not
+GC'd until all of the C objects in that arena have become unreachable.
+
+For this example, we will assume we are wrapping upb in Python:
+
+```dot {align="center"}
+digraph G {
+ rankdir=LR;
+ newrank=true;
+ compound=true;
+
+ subgraph cluster_1 {
+ label = "upb Arena"
+ graph[style="rounded,filled" fillcolor=gray]
+ node [style="rounded,filled" shape=box colorscheme=accent8 fillcolor=1, ordering=out]
+ upb_msg -> upb_array [style=dashed];
+ upb_msg -> upb_msg2 [style=dashed];
+ upb_msg [label="upb Message" fillcolor=1]
+ upb_msg2 [label="upb Message"];
+ upb_array [label="upb Array"]
+ dummy [style=invis]
+ }
+ subgraph cluster_python {
+ node [style="rounded,filled" shape=box colorscheme=accent8 fillcolor=2]
+ peripheries=0
+ py_upb_msg [label="Python Message"];
+ py_upb_msg2 [label="Python Message"];
+ py_upb_arena [label="Python Arena"];
+ }
+ py_upb_msg -> upb_msg [style=dashed];
+ py_upb_msg2->upb_msg2 [style=dashed];
+ py_upb_msg2 -> py_upb_arena [color=springgreen4];
+ py_upb_msg -> py_upb_arena [color=springgreen4];
+ py_upb_arena -> dummy [lhead=cluster_1, color=red];
+ {
+ rank=same;
+ upb_msg;
+ py_upb_msg;
+ }
+ {
+ rank=same;
+ upb_array;
+ upb_msg2;
+ py_upb_msg2;
+ }
+ { rank=same;
+ dummy;
+ py_upb_arena;
+ }
+ dummy->upb_array [style=invis];
+ dummy->upb_msg2 [style=invis];
+
+ subgraph cluster_01 {
+ node [shape=plaintext]
+ peripheries=0
+ key [label=<<table border="0" cellpadding="2" cellspacing="0" cellborder="0">
+ <tr><td align="right" port="i1">raw ptr</td></tr>
+ <tr><td align="right" port="i2">unique ptr</td></tr>
+ <tr><td align="right" port="i3">shared (GC) ptr</td></tr>
+ </table>>]
+ key2 [label=<<table border="0" cellpadding="2" cellspacing="0" cellborder="0">
+ <tr><td port="i1"> </td></tr>
+ <tr><td port="i2"> </td></tr>
+ <tr><td port="i3"> </td></tr>
+ </table>>]
+ key:i1:e -> key2:i1:w [style=dashed]
+ key:i2:e -> key2:i2:w [color=red]
+ key:i3:e -> key2:i3:w [color=springgreen4]
+ }
+ key2:i1:w -> upb_msg [style=invis];
+ {
+ rank=same;
+ key;
+ upb_msg;
+ }
+}
+```
+
+In this example we have three different kinds of pointers:
+
+* **raw ptr**: This is a pointer that carries no ownership.
+* **unique ptr**: This is a pointer has *unique ownership* of the target. The owner
+ will free the target in its destructor (or finalizer, or cleaner). There can
+ only be a single unique pointer to a given object.
+* **shared (GC) ptr**: This is a pointer that has *shared ownership* of the
+ target. Many objects can point to the target, and the target will be deleted
+ only when all such references are gone. In a runtime with automatic memory
+ management (GC), this is a reference that participates in GC. In Python such
+ references use reference counting, but in other VMs they may use mark and
+ sweep or some other form of GC instead.
+
+The Python Message wrappers have only raw pointers to the underlying message,
+but they contain a shared pointer to the arena that will ensure that the raw
+pointer remains valid. Only when all message wrapper objects are destroyed
+will the Python Arena become unreachable, and the upb arena ultimately freed.
+
+### Links between arenas with "Fuse"
+
+The design given above works well for objects that live in a single arena. But
+what if a user wants to create a link between two objects in different arenas?
+
+TODO
+
+## UTF-8 vs. UTF-16
+
+TODO
+
+## Object Cache
+
+TODO
diff --git a/upb/lua/BUILD.bazel b/upb/lua/BUILD.bazel
new file mode 100644
index 0000000..e8e200d
--- /dev/null
+++ b/upb/lua/BUILD.bazel
@@ -0,0 +1,130 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load(
+ "//bazel:build_defs.bzl",
+ "UPB_DEFAULT_COPTS",
+ "UPB_DEFAULT_CPPOPTS",
+)
+load(
+ "//lua:lua_proto_library.bzl",
+ "lua_proto_library",
+)
+
+licenses(["notice"])
+
+cc_library(
+ name = "lupb",
+ srcs = [
+ "def.c",
+ "msg.c",
+ "upb.c",
+ ],
+ hdrs = [
+ "upb.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ "//:json",
+ "//:message",
+ "//:reflection",
+ "//:text",
+ "@lua//:liblua",
+ ],
+)
+
+cc_binary(
+ name = "protoc-gen-lua",
+ srcs = ["upbc.cc"],
+ copts = UPB_DEFAULT_CPPOPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ "@com_google_absl//absl/strings",
+ "@com_google_protobuf//src/google/protobuf/compiler:code_generator",
+ ],
+)
+
+exports_files(["upb.lua"])
+
+cc_test(
+ name = "test_lua",
+ srcs = ["main.c"],
+ args = ["$(location :test_upb.lua)"],
+ copts = UPB_DEFAULT_COPTS,
+ data = [
+ "test_upb.lua",
+ ":descriptor_proto_lua",
+ ":empty_proto_lua",
+ ":test_messages_proto2_proto_lua",
+ ":test_messages_proto3_proto_lua",
+ ":test_proto_lua",
+ "//:third_party/lunit/console.lua",
+ "//:third_party/lunit/lunit.lua",
+ "//lua:upb.lua",
+ "@com_google_protobuf//:descriptor_proto",
+ "@com_google_protobuf//conformance:conformance_proto",
+ ],
+ linkstatic = 1,
+ deps = [
+ "//lua:lupb",
+ "@lua//:liblua",
+ ],
+)
+
+proto_library(
+ name = "test_proto",
+ testonly = 1,
+ srcs = ["test.proto"],
+ deps = ["@com_google_protobuf//:timestamp_proto"],
+)
+
+lua_proto_library(
+ name = "test_proto_lua",
+ testonly = 1,
+ deps = [":test_proto"],
+)
+
+lua_proto_library(
+ name = "descriptor_proto_lua",
+ deps = ["@com_google_protobuf//:descriptor_proto"],
+)
+
+lua_proto_library(
+ name = "empty_proto_lua",
+ deps = ["@com_google_protobuf//:empty_proto"],
+)
+
+lua_proto_library(
+ name = "test_messages_proto3_proto_lua",
+ testonly = 1,
+ deps = ["@com_google_protobuf//src/google/protobuf:test_messages_proto3_proto"],
+)
+
+lua_proto_library(
+ name = "test_messages_proto2_proto_lua",
+ testonly = 1,
+ deps = ["@com_google_protobuf//src/google/protobuf:test_messages_proto2_proto"],
+)
diff --git a/upb/lua/README.md b/upb/lua/README.md
new file mode 100644
index 0000000..9374f26
--- /dev/null
+++ b/upb/lua/README.md
@@ -0,0 +1,8 @@
+# upb Lua bindings
+
+These are some bare-bones upb bindings for Lua.
+
+These bindings exist primarily for experimentation and testing.
+They are incomplete and are not really intended for use in any application.
+This is by no means a complete or supported protobuf library, and in fact
+we don't even claim it to be functional.
diff --git a/upb/lua/def.c b/upb/lua/def.c
new file mode 100644
index 0000000..9affe52
--- /dev/null
+++ b/upb/lua/def.c
@@ -0,0 +1,943 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/reflection/def.h"
+
+#include <float.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "lauxlib.h"
+#include "lua/upb.h"
+#include "upb/reflection/message.h"
+
+#define LUPB_ENUMDEF "lupb.enumdef"
+#define LUPB_ENUMVALDEF "lupb.enumvaldef"
+#define LUPB_FIELDDEF "lupb.fielddef"
+#define LUPB_FILEDEF "lupb.filedef"
+#define LUPB_MSGDEF "lupb.msgdef"
+#define LUPB_ONEOFDEF "lupb.oneof"
+#define LUPB_SYMTAB "lupb.defpool"
+#define LUPB_OBJCACHE "lupb.objcache"
+
+static void lupb_DefPool_pushwrapper(lua_State* L, int narg, const void* def,
+ const char* type);
+
+/* lupb_wrapper ***************************************************************/
+
+/* Wrappers around upb def objects. The userval contains a reference to the
+ * defpool. */
+
+#define LUPB_SYMTAB_INDEX 1
+
+typedef struct {
+ const void* def; /* upb_MessageDef, upb_EnumDef, upb_OneofDef, etc. */
+} lupb_wrapper;
+
+static const void* lupb_wrapper_check(lua_State* L, int narg,
+ const char* type) {
+ lupb_wrapper* w = luaL_checkudata(L, narg, type);
+ return w->def;
+}
+
+static void lupb_wrapper_pushdefpool(lua_State* L, int narg) {
+ lua_getiuservalue(L, narg, LUPB_SYMTAB_INDEX);
+}
+
+/* lupb_wrapper_pushwrapper()
+ *
+ * For a given def wrapper at index |narg|, pushes a wrapper for the given |def|
+ * and the given |type|. The new wrapper will be part of the same defpool. */
+static void lupb_wrapper_pushwrapper(lua_State* L, int narg, const void* def,
+ const char* type) {
+ lupb_wrapper_pushdefpool(L, narg);
+ lupb_DefPool_pushwrapper(L, -1, def, type);
+ lua_replace(L, -2); /* Remove defpool from stack. */
+}
+
+/* lupb_MessageDef_pushsubmsgdef()
+ *
+ * Pops the msgdef wrapper at the top of the stack and replaces it with a msgdef
+ * wrapper for field |f| of this msgdef (submsg may not be direct, for example
+ * it may be the submessage of the map value).
+ */
+void lupb_MessageDef_pushsubmsgdef(lua_State* L, const upb_FieldDef* f) {
+ const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f);
+ assert(m);
+ lupb_wrapper_pushwrapper(L, -1, m, LUPB_MSGDEF);
+ lua_replace(L, -2); /* Replace msgdef with submsgdef. */
+}
+
+/* lupb_FieldDef **************************************************************/
+
+const upb_FieldDef* lupb_FieldDef_check(lua_State* L, int narg) {
+ return lupb_wrapper_check(L, narg, LUPB_FIELDDEF);
+}
+
+static int lupb_FieldDef_ContainingOneof(lua_State* L) {
+ const upb_FieldDef* f = lupb_FieldDef_check(L, 1);
+ const upb_OneofDef* o = upb_FieldDef_ContainingOneof(f);
+ lupb_wrapper_pushwrapper(L, 1, o, LUPB_ONEOFDEF);
+ return 1;
+}
+
+static int lupb_FieldDef_ContainingType(lua_State* L) {
+ const upb_FieldDef* f = lupb_FieldDef_check(L, 1);
+ const upb_MessageDef* m = upb_FieldDef_ContainingType(f);
+ lupb_wrapper_pushwrapper(L, 1, m, LUPB_MSGDEF);
+ return 1;
+}
+
+static int lupb_FieldDef_Default(lua_State* L) {
+ const upb_FieldDef* f = lupb_FieldDef_check(L, 1);
+ upb_CType type = upb_FieldDef_CType(f);
+ if (type == kUpb_CType_Message) {
+ return luaL_error(L, "Message fields do not have explicit defaults.");
+ }
+ lupb_pushmsgval(L, 0, type, upb_FieldDef_Default(f));
+ return 1;
+}
+
+static int lupb_FieldDef_Type(lua_State* L) {
+ const upb_FieldDef* f = lupb_FieldDef_check(L, 1);
+ lua_pushnumber(L, upb_FieldDef_Type(f));
+ return 1;
+}
+
+static int lupb_FieldDef_HasSubDef(lua_State* L) {
+ const upb_FieldDef* f = lupb_FieldDef_check(L, 1);
+ lua_pushboolean(L, upb_FieldDef_HasSubDef(f));
+ return 1;
+}
+
+static int lupb_FieldDef_Index(lua_State* L) {
+ const upb_FieldDef* f = lupb_FieldDef_check(L, 1);
+ lua_pushinteger(L, upb_FieldDef_Index(f));
+ return 1;
+}
+
+static int lupb_FieldDef_IsExtension(lua_State* L) {
+ const upb_FieldDef* f = lupb_FieldDef_check(L, 1);
+ lua_pushboolean(L, upb_FieldDef_IsExtension(f));
+ return 1;
+}
+
+static int lupb_FieldDef_Label(lua_State* L) {
+ const upb_FieldDef* f = lupb_FieldDef_check(L, 1);
+ lua_pushinteger(L, upb_FieldDef_Label(f));
+ return 1;
+}
+
+static int lupb_FieldDef_Name(lua_State* L) {
+ const upb_FieldDef* f = lupb_FieldDef_check(L, 1);
+ lua_pushstring(L, upb_FieldDef_Name(f));
+ return 1;
+}
+
+static int lupb_FieldDef_Number(lua_State* L) {
+ const upb_FieldDef* f = lupb_FieldDef_check(L, 1);
+ int32_t num = upb_FieldDef_Number(f);
+ if (num) {
+ lua_pushinteger(L, num);
+ } else {
+ lua_pushnil(L);
+ }
+ return 1;
+}
+
+static int lupb_FieldDef_IsPacked(lua_State* L) {
+ const upb_FieldDef* f = lupb_FieldDef_check(L, 1);
+ lua_pushboolean(L, upb_FieldDef_IsPacked(f));
+ return 1;
+}
+
+static int lupb_FieldDef_MessageSubDef(lua_State* L) {
+ const upb_FieldDef* f = lupb_FieldDef_check(L, 1);
+ const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f);
+ lupb_wrapper_pushwrapper(L, 1, m, LUPB_MSGDEF);
+ return 1;
+}
+
+static int lupb_FieldDef_EnumSubDef(lua_State* L) {
+ const upb_FieldDef* f = lupb_FieldDef_check(L, 1);
+ const upb_EnumDef* e = upb_FieldDef_EnumSubDef(f);
+ lupb_wrapper_pushwrapper(L, 1, e, LUPB_ENUMDEF);
+ return 1;
+}
+
+static int lupb_FieldDef_CType(lua_State* L) {
+ const upb_FieldDef* f = lupb_FieldDef_check(L, 1);
+ lua_pushinteger(L, upb_FieldDef_CType(f));
+ return 1;
+}
+
+static const struct luaL_Reg lupb_FieldDef_m[] = {
+ {"containing_oneof", lupb_FieldDef_ContainingOneof},
+ {"containing_type", lupb_FieldDef_ContainingType},
+ {"default", lupb_FieldDef_Default},
+ {"descriptor_type", lupb_FieldDef_Type},
+ {"has_subdef", lupb_FieldDef_HasSubDef},
+ {"index", lupb_FieldDef_Index},
+ {"is_extension", lupb_FieldDef_IsExtension},
+ {"label", lupb_FieldDef_Label},
+ {"name", lupb_FieldDef_Name},
+ {"number", lupb_FieldDef_Number},
+ {"packed", lupb_FieldDef_IsPacked},
+ {"msgsubdef", lupb_FieldDef_MessageSubDef},
+ {"enumsubdef", lupb_FieldDef_EnumSubDef},
+ {"type", lupb_FieldDef_CType},
+ {NULL, NULL}};
+
+/* lupb_OneofDef **************************************************************/
+
+const upb_OneofDef* lupb_OneofDef_check(lua_State* L, int narg) {
+ return lupb_wrapper_check(L, narg, LUPB_ONEOFDEF);
+}
+
+static int lupb_OneofDef_ContainingType(lua_State* L) {
+ const upb_OneofDef* o = lupb_OneofDef_check(L, 1);
+ const upb_MessageDef* m = upb_OneofDef_ContainingType(o);
+ lupb_wrapper_pushwrapper(L, 1, m, LUPB_MSGDEF);
+ return 1;
+}
+
+static int lupb_OneofDef_Field(lua_State* L) {
+ const upb_OneofDef* o = lupb_OneofDef_check(L, 1);
+ int32_t idx = lupb_checkint32(L, 2);
+ int count = upb_OneofDef_FieldCount(o);
+
+ if (idx < 0 || idx >= count) {
+ const char* msg =
+ lua_pushfstring(L, "index %d exceeds field count %d", idx, count);
+ return luaL_argerror(L, 2, msg);
+ }
+
+ lupb_wrapper_pushwrapper(L, 1, upb_OneofDef_Field(o, idx), LUPB_FIELDDEF);
+ return 1;
+}
+
+static int lupb_oneofiter_next(lua_State* L) {
+ const upb_OneofDef* o = lupb_OneofDef_check(L, lua_upvalueindex(1));
+ int* index = lua_touserdata(L, lua_upvalueindex(2));
+ const upb_FieldDef* f;
+ if (*index == upb_OneofDef_FieldCount(o)) return 0;
+ f = upb_OneofDef_Field(o, (*index)++);
+ lupb_wrapper_pushwrapper(L, lua_upvalueindex(1), f, LUPB_FIELDDEF);
+ return 1;
+}
+
+static int lupb_OneofDef_Fields(lua_State* L) {
+ int* index = lua_newuserdata(L, sizeof(int));
+ lupb_OneofDef_check(L, 1);
+ *index = 0;
+
+ /* Closure upvalues are: oneofdef, index. */
+ lua_pushcclosure(L, &lupb_oneofiter_next, 2);
+ return 1;
+}
+
+static int lupb_OneofDef_len(lua_State* L) {
+ const upb_OneofDef* o = lupb_OneofDef_check(L, 1);
+ lua_pushinteger(L, upb_OneofDef_FieldCount(o));
+ return 1;
+}
+
+/* lupb_OneofDef_lookupfield()
+ *
+ * Handles:
+ * oneof.lookup_field(field_number)
+ * oneof.lookup_field(field_name)
+ */
+static int lupb_OneofDef_lookupfield(lua_State* L) {
+ const upb_OneofDef* o = lupb_OneofDef_check(L, 1);
+ const upb_FieldDef* f;
+
+ switch (lua_type(L, 2)) {
+ case LUA_TNUMBER:
+ f = upb_OneofDef_LookupNumber(o, lua_tointeger(L, 2));
+ break;
+ case LUA_TSTRING:
+ f = upb_OneofDef_LookupName(o, lua_tostring(L, 2));
+ break;
+ default: {
+ const char* msg = lua_pushfstring(L, "number or string expected, got %s",
+ luaL_typename(L, 2));
+ return luaL_argerror(L, 2, msg);
+ }
+ }
+
+ lupb_wrapper_pushwrapper(L, 1, f, LUPB_FIELDDEF);
+ return 1;
+}
+
+static int lupb_OneofDef_Name(lua_State* L) {
+ const upb_OneofDef* o = lupb_OneofDef_check(L, 1);
+ lua_pushstring(L, upb_OneofDef_Name(o));
+ return 1;
+}
+
+static const struct luaL_Reg lupb_OneofDef_m[] = {
+ {"containing_type", lupb_OneofDef_ContainingType},
+ {"field", lupb_OneofDef_Field},
+ {"fields", lupb_OneofDef_Fields},
+ {"lookup_field", lupb_OneofDef_lookupfield},
+ {"name", lupb_OneofDef_Name},
+ {NULL, NULL}};
+
+static const struct luaL_Reg lupb_OneofDef_mm[] = {{"__len", lupb_OneofDef_len},
+ {NULL, NULL}};
+
+/* lupb_MessageDef
+ * ****************************************************************/
+
+typedef struct {
+ const upb_MessageDef* md;
+} lupb_MessageDef;
+
+const upb_MessageDef* lupb_MessageDef_check(lua_State* L, int narg) {
+ return lupb_wrapper_check(L, narg, LUPB_MSGDEF);
+}
+
+static int lupb_MessageDef_FieldCount(lua_State* L) {
+ const upb_MessageDef* m = lupb_MessageDef_check(L, 1);
+ lua_pushinteger(L, upb_MessageDef_FieldCount(m));
+ return 1;
+}
+
+static int lupb_MessageDef_OneofCount(lua_State* L) {
+ const upb_MessageDef* m = lupb_MessageDef_check(L, 1);
+ lua_pushinteger(L, upb_MessageDef_OneofCount(m));
+ return 1;
+}
+
+static bool lupb_MessageDef_pushnested(lua_State* L, int msgdef, int name) {
+ const upb_MessageDef* m = lupb_MessageDef_check(L, msgdef);
+ lupb_wrapper_pushdefpool(L, msgdef);
+ upb_DefPool* defpool = lupb_DefPool_check(L, -1);
+ lua_pop(L, 1);
+
+ /* Construct full package.Message.SubMessage name. */
+ lua_pushstring(L, upb_MessageDef_FullName(m));
+ lua_pushstring(L, ".");
+ lua_pushvalue(L, name);
+ lua_concat(L, 3);
+ const char* nested_name = lua_tostring(L, -1);
+
+ /* Try lookup. */
+ const upb_MessageDef* nested =
+ upb_DefPool_FindMessageByName(defpool, nested_name);
+ if (!nested) return false;
+ lupb_wrapper_pushwrapper(L, msgdef, nested, LUPB_MSGDEF);
+ return true;
+}
+
+/* lupb_MessageDef_Field()
+ *
+ * Handles:
+ * msg.field(field_number) -> fielddef
+ * msg.field(field_name) -> fielddef
+ */
+static int lupb_MessageDef_Field(lua_State* L) {
+ const upb_MessageDef* m = lupb_MessageDef_check(L, 1);
+ const upb_FieldDef* f;
+
+ switch (lua_type(L, 2)) {
+ case LUA_TNUMBER:
+ f = upb_MessageDef_FindFieldByNumber(m, lua_tointeger(L, 2));
+ break;
+ case LUA_TSTRING:
+ f = upb_MessageDef_FindFieldByName(m, lua_tostring(L, 2));
+ break;
+ default: {
+ const char* msg = lua_pushfstring(L, "number or string expected, got %s",
+ luaL_typename(L, 2));
+ return luaL_argerror(L, 2, msg);
+ }
+ }
+
+ lupb_wrapper_pushwrapper(L, 1, f, LUPB_FIELDDEF);
+ return 1;
+}
+
+/* lupb_MessageDef_FindByNameWithSize()
+ *
+ * Handles:
+ * msg.lookup_name(name) -> fielddef or oneofdef
+ */
+static int lupb_MessageDef_FindByNameWithSize(lua_State* L) {
+ const upb_MessageDef* m = lupb_MessageDef_check(L, 1);
+ const upb_FieldDef* f;
+ const upb_OneofDef* o;
+
+ if (!upb_MessageDef_FindByName(m, lua_tostring(L, 2), &f, &o)) {
+ lua_pushnil(L);
+ } else if (o) {
+ lupb_wrapper_pushwrapper(L, 1, o, LUPB_ONEOFDEF);
+ } else {
+ lupb_wrapper_pushwrapper(L, 1, f, LUPB_FIELDDEF);
+ }
+
+ return 1;
+}
+
+/* lupb_MessageDef_Name()
+ *
+ * Handles:
+ * msg.name() -> string
+ */
+static int lupb_MessageDef_Name(lua_State* L) {
+ const upb_MessageDef* m = lupb_MessageDef_check(L, 1);
+ lua_pushstring(L, upb_MessageDef_Name(m));
+ return 1;
+}
+
+static int lupb_msgfielditer_next(lua_State* L) {
+ const upb_MessageDef* m = lupb_MessageDef_check(L, lua_upvalueindex(1));
+ int* index = lua_touserdata(L, lua_upvalueindex(2));
+ const upb_FieldDef* f;
+ if (*index == upb_MessageDef_FieldCount(m)) return 0;
+ f = upb_MessageDef_Field(m, (*index)++);
+ lupb_wrapper_pushwrapper(L, lua_upvalueindex(1), f, LUPB_FIELDDEF);
+ return 1;
+}
+
+static int lupb_MessageDef_Fields(lua_State* L) {
+ int* index = lua_newuserdata(L, sizeof(int));
+ lupb_MessageDef_check(L, 1);
+ *index = 0;
+
+ /* Closure upvalues are: msgdef, index. */
+ lua_pushcclosure(L, &lupb_msgfielditer_next, 2);
+ return 1;
+}
+
+static int lupb_MessageDef_File(lua_State* L) {
+ const upb_MessageDef* m = lupb_MessageDef_check(L, 1);
+ const upb_FileDef* file = upb_MessageDef_File(m);
+ lupb_wrapper_pushwrapper(L, 1, file, LUPB_FILEDEF);
+ return 1;
+}
+
+static int lupb_MessageDef_FullName(lua_State* L) {
+ const upb_MessageDef* m = lupb_MessageDef_check(L, 1);
+ lua_pushstring(L, upb_MessageDef_FullName(m));
+ return 1;
+}
+
+static int lupb_MessageDef_index(lua_State* L) {
+ if (!lupb_MessageDef_pushnested(L, 1, 2)) {
+ luaL_error(L, "No such nested message");
+ }
+ return 1;
+}
+
+static int lupb_msgoneofiter_next(lua_State* L) {
+ const upb_MessageDef* m = lupb_MessageDef_check(L, lua_upvalueindex(1));
+ int* index = lua_touserdata(L, lua_upvalueindex(2));
+ const upb_OneofDef* o;
+ if (*index == upb_MessageDef_OneofCount(m)) return 0;
+ o = upb_MessageDef_Oneof(m, (*index)++);
+ lupb_wrapper_pushwrapper(L, lua_upvalueindex(1), o, LUPB_ONEOFDEF);
+ return 1;
+}
+
+static int lupb_MessageDef_Oneofs(lua_State* L) {
+ int* index = lua_newuserdata(L, sizeof(int));
+ lupb_MessageDef_check(L, 1);
+ *index = 0;
+
+ /* Closure upvalues are: msgdef, index. */
+ lua_pushcclosure(L, &lupb_msgoneofiter_next, 2);
+ return 1;
+}
+
+static int lupb_MessageDef_IsMapEntry(lua_State* L) {
+ const upb_MessageDef* m = lupb_MessageDef_check(L, 1);
+ lua_pushboolean(L, upb_MessageDef_IsMapEntry(m));
+ return 1;
+}
+
+static int lupb_MessageDef_Syntax(lua_State* L) {
+ const upb_MessageDef* m = lupb_MessageDef_check(L, 1);
+ lua_pushinteger(L, upb_MessageDef_Syntax(m));
+ return 1;
+}
+
+static int lupb_MessageDef_tostring(lua_State* L) {
+ const upb_MessageDef* m = lupb_MessageDef_check(L, 1);
+ lua_pushfstring(L, "<upb.MessageDef name=%s, field_count=%d>",
+ upb_MessageDef_FullName(m),
+ (int)upb_MessageDef_FieldCount(m));
+ return 1;
+}
+
+static const struct luaL_Reg lupb_MessageDef_mm[] = {
+ {"__call", lupb_MessageDef_call},
+ {"__index", lupb_MessageDef_index},
+ {"__len", lupb_MessageDef_FieldCount},
+ {"__tostring", lupb_MessageDef_tostring},
+ {NULL, NULL}};
+
+static const struct luaL_Reg lupb_MessageDef_m[] = {
+ {"field", lupb_MessageDef_Field},
+ {"fields", lupb_MessageDef_Fields},
+ {"field_count", lupb_MessageDef_FieldCount},
+ {"file", lupb_MessageDef_File},
+ {"full_name", lupb_MessageDef_FullName},
+ {"lookup_name", lupb_MessageDef_FindByNameWithSize},
+ {"name", lupb_MessageDef_Name},
+ {"oneof_count", lupb_MessageDef_OneofCount},
+ {"oneofs", lupb_MessageDef_Oneofs},
+ {"syntax", lupb_MessageDef_Syntax},
+ {"_map_entry", lupb_MessageDef_IsMapEntry},
+ {NULL, NULL}};
+
+/* lupb_EnumDef ***************************************************************/
+
+const upb_EnumDef* lupb_EnumDef_check(lua_State* L, int narg) {
+ return lupb_wrapper_check(L, narg, LUPB_ENUMDEF);
+}
+
+static int lupb_EnumDef_len(lua_State* L) {
+ const upb_EnumDef* e = lupb_EnumDef_check(L, 1);
+ lua_pushinteger(L, upb_EnumDef_ValueCount(e));
+ return 1;
+}
+
+static int lupb_EnumDef_File(lua_State* L) {
+ const upb_EnumDef* e = lupb_EnumDef_check(L, 1);
+ const upb_FileDef* file = upb_EnumDef_File(e);
+ lupb_wrapper_pushwrapper(L, 1, file, LUPB_FILEDEF);
+ return 1;
+}
+
+/* lupb_EnumDef_Value()
+ *
+ * Handles:
+ * enum.value(number) -> enumval
+ * enum.value(name) -> enumval
+ */
+static int lupb_EnumDef_Value(lua_State* L) {
+ const upb_EnumDef* e = lupb_EnumDef_check(L, 1);
+ const upb_EnumValueDef* ev;
+
+ switch (lua_type(L, 2)) {
+ case LUA_TNUMBER:
+ ev = upb_EnumDef_FindValueByNumber(e, lupb_checkint32(L, 2));
+ break;
+ case LUA_TSTRING:
+ ev = upb_EnumDef_FindValueByName(e, lua_tostring(L, 2));
+ break;
+ default: {
+ const char* msg = lua_pushfstring(L, "number or string expected, got %s",
+ luaL_typename(L, 2));
+ return luaL_argerror(L, 2, msg);
+ }
+ }
+
+ lupb_wrapper_pushwrapper(L, 1, ev, LUPB_ENUMVALDEF);
+ return 1;
+}
+
+static const struct luaL_Reg lupb_EnumDef_mm[] = {{"__len", lupb_EnumDef_len},
+ {NULL, NULL}};
+
+static const struct luaL_Reg lupb_EnumDef_m[] = {
+ {"file", lupb_EnumDef_File}, {"value", lupb_EnumDef_Value}, {NULL, NULL}};
+
+/* lupb_EnumValueDef
+ * ************************************************************/
+
+const upb_EnumValueDef* lupb_enumvaldef_check(lua_State* L, int narg) {
+ return lupb_wrapper_check(L, narg, LUPB_ENUMVALDEF);
+}
+
+static int lupb_EnumValueDef_Enum(lua_State* L) {
+ const upb_EnumValueDef* ev = lupb_enumvaldef_check(L, 1);
+ const upb_EnumDef* e = upb_EnumValueDef_Enum(ev);
+ lupb_wrapper_pushwrapper(L, 1, e, LUPB_ENUMDEF);
+ return 1;
+}
+
+static int lupb_EnumValueDef_FullName(lua_State* L) {
+ const upb_EnumValueDef* ev = lupb_enumvaldef_check(L, 1);
+ lua_pushstring(L, upb_EnumValueDef_FullName(ev));
+ return 1;
+}
+
+static int lupb_EnumValueDef_Name(lua_State* L) {
+ const upb_EnumValueDef* ev = lupb_enumvaldef_check(L, 1);
+ lua_pushstring(L, upb_EnumValueDef_Name(ev));
+ return 1;
+}
+
+static int lupb_EnumValueDef_Number(lua_State* L) {
+ const upb_EnumValueDef* ev = lupb_enumvaldef_check(L, 1);
+ lupb_pushint32(L, upb_EnumValueDef_Number(ev));
+ return 1;
+}
+
+static const struct luaL_Reg lupb_enumvaldef_m[] = {
+ {"enum", lupb_EnumValueDef_Enum},
+ {"full_name", lupb_EnumValueDef_FullName},
+ {"name", lupb_EnumValueDef_Name},
+ {"number", lupb_EnumValueDef_Number},
+ {NULL, NULL}};
+
+/* lupb_FileDef ***************************************************************/
+
+const upb_FileDef* lupb_FileDef_check(lua_State* L, int narg) {
+ return lupb_wrapper_check(L, narg, LUPB_FILEDEF);
+}
+
+static int lupb_FileDef_Dependency(lua_State* L) {
+ const upb_FileDef* f = lupb_FileDef_check(L, 1);
+ int index = luaL_checkint(L, 2);
+ const upb_FileDef* dep = upb_FileDef_Dependency(f, index);
+ lupb_wrapper_pushwrapper(L, 1, dep, LUPB_FILEDEF);
+ return 1;
+}
+
+static int lupb_FileDef_DependencyCount(lua_State* L) {
+ const upb_FileDef* f = lupb_FileDef_check(L, 1);
+ lua_pushnumber(L, upb_FileDef_DependencyCount(f));
+ return 1;
+}
+
+static int lupb_FileDef_enum(lua_State* L) {
+ const upb_FileDef* f = lupb_FileDef_check(L, 1);
+ int index = luaL_checkint(L, 2);
+ const upb_EnumDef* e = upb_FileDef_TopLevelEnum(f, index);
+ lupb_wrapper_pushwrapper(L, 1, e, LUPB_ENUMDEF);
+ return 1;
+}
+
+static int lupb_FileDef_enumcount(lua_State* L) {
+ const upb_FileDef* f = lupb_FileDef_check(L, 1);
+ lua_pushnumber(L, upb_FileDef_TopLevelEnumCount(f));
+ return 1;
+}
+
+static int lupb_FileDef_msg(lua_State* L) {
+ const upb_FileDef* f = lupb_FileDef_check(L, 1);
+ int index = luaL_checkint(L, 2);
+ const upb_MessageDef* m = upb_FileDef_TopLevelMessage(f, index);
+ lupb_wrapper_pushwrapper(L, 1, m, LUPB_MSGDEF);
+ return 1;
+}
+
+static int lupb_FileDef_msgcount(lua_State* L) {
+ const upb_FileDef* f = lupb_FileDef_check(L, 1);
+ lua_pushnumber(L, upb_FileDef_TopLevelMessageCount(f));
+ return 1;
+}
+
+static int lupb_FileDef_Name(lua_State* L) {
+ const upb_FileDef* f = lupb_FileDef_check(L, 1);
+ lua_pushstring(L, upb_FileDef_Name(f));
+ return 1;
+}
+
+static int lupb_FileDef_Package(lua_State* L) {
+ const upb_FileDef* f = lupb_FileDef_check(L, 1);
+ lua_pushstring(L, upb_FileDef_Package(f));
+ return 1;
+}
+
+static int lupb_FileDef_Pool(lua_State* L) {
+ const upb_FileDef* f = lupb_FileDef_check(L, 1);
+ const upb_DefPool* defpool = upb_FileDef_Pool(f);
+ lupb_wrapper_pushwrapper(L, 1, defpool, LUPB_SYMTAB);
+ return 1;
+}
+
+static int lupb_FileDef_Syntax(lua_State* L) {
+ const upb_FileDef* f = lupb_FileDef_check(L, 1);
+ lua_pushnumber(L, upb_FileDef_Syntax(f));
+ return 1;
+}
+
+static const struct luaL_Reg lupb_FileDef_m[] = {
+ {"dep", lupb_FileDef_Dependency},
+ {"depcount", lupb_FileDef_DependencyCount},
+ {"enum", lupb_FileDef_enum},
+ {"enumcount", lupb_FileDef_enumcount},
+ {"msg", lupb_FileDef_msg},
+ {"msgcount", lupb_FileDef_msgcount},
+ {"name", lupb_FileDef_Name},
+ {"package", lupb_FileDef_Package},
+ {"defpool", lupb_FileDef_Pool},
+ {"syntax", lupb_FileDef_Syntax},
+ {NULL, NULL}};
+
+/* lupb_DefPool
+ * ****************************************************************/
+
+/* The defpool owns all defs. Thus GC-rooting the defpool ensures that all
+ * underlying defs stay alive.
+ *
+ * The defpool's userval is a cache of def* -> object. */
+
+#define LUPB_CACHE_INDEX 1
+
+typedef struct {
+ upb_DefPool* defpool;
+} lupb_DefPool;
+
+upb_DefPool* lupb_DefPool_check(lua_State* L, int narg) {
+ lupb_DefPool* ldefpool = luaL_checkudata(L, narg, LUPB_SYMTAB);
+ if (!ldefpool->defpool) {
+ luaL_error(L, "called into dead object");
+ }
+ return ldefpool->defpool;
+}
+
+void lupb_DefPool_pushwrapper(lua_State* L, int narg, const void* def,
+ const char* type) {
+ narg = lua_absindex(L, narg);
+ assert(luaL_testudata(L, narg, LUPB_SYMTAB));
+
+ if (def == NULL) {
+ lua_pushnil(L);
+ return;
+ }
+
+ lua_getiuservalue(L, narg, LUPB_CACHE_INDEX); /* Get cache. */
+
+ /* Index by "def" pointer. */
+ lua_rawgetp(L, -1, def);
+
+ /* Stack is now: cache, cached value. */
+ if (lua_isnil(L, -1)) {
+ /* Create new wrapper. */
+ lupb_wrapper* w = lupb_newuserdata(L, sizeof(*w), 1, type);
+ w->def = def;
+ lua_replace(L, -2); /* Replace nil */
+
+ /* Set defpool as userval. */
+ lua_pushvalue(L, narg);
+ lua_setiuservalue(L, -2, LUPB_SYMTAB_INDEX);
+
+ /* Add wrapper to the the cache. */
+ lua_pushvalue(L, -1);
+ lua_rawsetp(L, -3, def);
+ }
+
+ lua_replace(L, -2); /* Remove cache, leaving only the wrapper. */
+}
+
+/* upb_DefPool_New()
+ *
+ * Handles:
+ * upb.DefPool() -> <new instance>
+ */
+static int lupb_DefPool_New(lua_State* L) {
+ lupb_DefPool* ldefpool =
+ lupb_newuserdata(L, sizeof(*ldefpool), 1, LUPB_SYMTAB);
+ ldefpool->defpool = upb_DefPool_New();
+
+ /* Create our object cache. */
+ lua_newtable(L);
+
+ /* Cache metatable: specifies that values are weak. */
+ lua_createtable(L, 0, 1);
+ lua_pushstring(L, "v");
+ lua_setfield(L, -2, "__mode");
+ lua_setmetatable(L, -2);
+
+ /* Put the defpool itself in the cache metatable. */
+ lua_pushvalue(L, -2);
+ lua_rawsetp(L, -2, ldefpool->defpool);
+
+ /* Set the cache as our userval. */
+ lua_setiuservalue(L, -2, LUPB_CACHE_INDEX);
+
+ return 1;
+}
+
+static int lupb_DefPool_gc(lua_State* L) {
+ lupb_DefPool* ldefpool = luaL_checkudata(L, 1, LUPB_SYMTAB);
+ upb_DefPool_Free(ldefpool->defpool);
+ ldefpool->defpool = NULL;
+ return 0;
+}
+
+static int lupb_DefPool_AddFile(lua_State* L) {
+ size_t len;
+ upb_DefPool* s = lupb_DefPool_check(L, 1);
+ const char* str = luaL_checklstring(L, 2, &len);
+ upb_Arena* arena = lupb_Arena_pushnew(L);
+ const google_protobuf_FileDescriptorProto* file;
+ const upb_FileDef* file_def;
+ upb_Status status;
+
+ upb_Status_Clear(&status);
+ file = google_protobuf_FileDescriptorProto_parse(str, len, arena);
+
+ if (!file) {
+ luaL_argerror(L, 2, "failed to parse descriptor");
+ }
+
+ file_def = upb_DefPool_AddFile(s, file, &status);
+ lupb_checkstatus(L, &status);
+
+ lupb_DefPool_pushwrapper(L, 1, file_def, LUPB_FILEDEF);
+
+ return 1;
+}
+
+static int lupb_DefPool_addset(lua_State* L) {
+ size_t i, n, len;
+ const google_protobuf_FileDescriptorProto* const* files;
+ google_protobuf_FileDescriptorSet* set;
+ upb_DefPool* s = lupb_DefPool_check(L, 1);
+ const char* str = luaL_checklstring(L, 2, &len);
+ upb_Arena* arena = lupb_Arena_pushnew(L);
+ upb_Status status;
+
+ upb_Status_Clear(&status);
+ set = google_protobuf_FileDescriptorSet_parse(str, len, arena);
+
+ if (!set) {
+ luaL_argerror(L, 2, "failed to parse descriptor");
+ }
+
+ files = google_protobuf_FileDescriptorSet_file(set, &n);
+ for (i = 0; i < n; i++) {
+ upb_DefPool_AddFile(s, files[i], &status);
+ lupb_checkstatus(L, &status);
+ }
+
+ return 0;
+}
+
+static int lupb_DefPool_FindMessageByName(lua_State* L) {
+ const upb_DefPool* s = lupb_DefPool_check(L, 1);
+ const upb_MessageDef* m =
+ upb_DefPool_FindMessageByName(s, luaL_checkstring(L, 2));
+ lupb_DefPool_pushwrapper(L, 1, m, LUPB_MSGDEF);
+ return 1;
+}
+
+static int lupb_DefPool_FindEnumByName(lua_State* L) {
+ const upb_DefPool* s = lupb_DefPool_check(L, 1);
+ const upb_EnumDef* e = upb_DefPool_FindEnumByName(s, luaL_checkstring(L, 2));
+ lupb_DefPool_pushwrapper(L, 1, e, LUPB_ENUMDEF);
+ return 1;
+}
+
+static int lupb_DefPool_FindEnumByNameval(lua_State* L) {
+ const upb_DefPool* s = lupb_DefPool_check(L, 1);
+ const upb_EnumValueDef* e =
+ upb_DefPool_FindEnumByNameval(s, luaL_checkstring(L, 2));
+ lupb_DefPool_pushwrapper(L, 1, e, LUPB_ENUMVALDEF);
+ return 1;
+}
+
+static int lupb_DefPool_tostring(lua_State* L) {
+ lua_pushfstring(L, "<upb.DefPool>");
+ return 1;
+}
+
+static const struct luaL_Reg lupb_DefPool_m[] = {
+ {"add_file", lupb_DefPool_AddFile},
+ {"add_set", lupb_DefPool_addset},
+ {"lookup_msg", lupb_DefPool_FindMessageByName},
+ {"lookup_enum", lupb_DefPool_FindEnumByName},
+ {"lookup_enumval", lupb_DefPool_FindEnumByNameval},
+ {NULL, NULL}};
+
+static const struct luaL_Reg lupb_DefPool_mm[] = {
+ {"__gc", lupb_DefPool_gc},
+ {"__tostring", lupb_DefPool_tostring},
+ {NULL, NULL}};
+
+/* lupb toplevel **************************************************************/
+
+static void lupb_setfieldi(lua_State* L, const char* field, int i) {
+ lua_pushinteger(L, i);
+ lua_setfield(L, -2, field);
+}
+
+static const struct luaL_Reg lupbdef_toplevel_m[] = {
+ {"DefPool", lupb_DefPool_New}, {NULL, NULL}};
+
+void lupb_def_registertypes(lua_State* L) {
+ lupb_setfuncs(L, lupbdef_toplevel_m);
+
+ /* Register types. */
+ lupb_register_type(L, LUPB_ENUMDEF, lupb_EnumDef_m, lupb_EnumDef_mm);
+ lupb_register_type(L, LUPB_ENUMVALDEF, lupb_enumvaldef_m, NULL);
+ lupb_register_type(L, LUPB_FIELDDEF, lupb_FieldDef_m, NULL);
+ lupb_register_type(L, LUPB_FILEDEF, lupb_FileDef_m, NULL);
+ lupb_register_type(L, LUPB_MSGDEF, lupb_MessageDef_m, lupb_MessageDef_mm);
+ lupb_register_type(L, LUPB_ONEOFDEF, lupb_OneofDef_m, lupb_OneofDef_mm);
+ lupb_register_type(L, LUPB_SYMTAB, lupb_DefPool_m, lupb_DefPool_mm);
+
+ /* Register constants. */
+ lupb_setfieldi(L, "LABEL_OPTIONAL", kUpb_Label_Optional);
+ lupb_setfieldi(L, "LABEL_REQUIRED", kUpb_Label_Required);
+ lupb_setfieldi(L, "LABEL_REPEATED", kUpb_Label_Repeated);
+
+ lupb_setfieldi(L, "TYPE_DOUBLE", kUpb_CType_Double);
+ lupb_setfieldi(L, "TYPE_FLOAT", kUpb_CType_Float);
+ lupb_setfieldi(L, "TYPE_INT64", kUpb_CType_Int64);
+ lupb_setfieldi(L, "TYPE_UINT64", kUpb_CType_UInt64);
+ lupb_setfieldi(L, "TYPE_INT32", kUpb_CType_Int32);
+ lupb_setfieldi(L, "TYPE_BOOL", kUpb_CType_Bool);
+ lupb_setfieldi(L, "TYPE_STRING", kUpb_CType_String);
+ lupb_setfieldi(L, "TYPE_MESSAGE", kUpb_CType_Message);
+ lupb_setfieldi(L, "TYPE_BYTES", kUpb_CType_Bytes);
+ lupb_setfieldi(L, "TYPE_UINT32", kUpb_CType_UInt32);
+ lupb_setfieldi(L, "TYPE_ENUM", kUpb_CType_Enum);
+
+ lupb_setfieldi(L, "DESCRIPTOR_TYPE_DOUBLE", kUpb_FieldType_Double);
+ lupb_setfieldi(L, "DESCRIPTOR_TYPE_FLOAT", kUpb_FieldType_Float);
+ lupb_setfieldi(L, "DESCRIPTOR_TYPE_INT64", kUpb_FieldType_Int64);
+ lupb_setfieldi(L, "DESCRIPTOR_TYPE_UINT64", kUpb_FieldType_UInt64);
+ lupb_setfieldi(L, "DESCRIPTOR_TYPE_INT32", kUpb_FieldType_Int32);
+ lupb_setfieldi(L, "DESCRIPTOR_TYPE_FIXED64", kUpb_FieldType_Fixed64);
+ lupb_setfieldi(L, "DESCRIPTOR_TYPE_FIXED32", kUpb_FieldType_Fixed32);
+ lupb_setfieldi(L, "DESCRIPTOR_TYPE_BOOL", kUpb_FieldType_Bool);
+ lupb_setfieldi(L, "DESCRIPTOR_TYPE_STRING", kUpb_FieldType_String);
+ lupb_setfieldi(L, "DESCRIPTOR_TYPE_GROUP", kUpb_FieldType_Group);
+ lupb_setfieldi(L, "DESCRIPTOR_TYPE_MESSAGE", kUpb_FieldType_Message);
+ lupb_setfieldi(L, "DESCRIPTOR_TYPE_BYTES", kUpb_FieldType_Bytes);
+ lupb_setfieldi(L, "DESCRIPTOR_TYPE_UINT32", kUpb_FieldType_UInt32);
+ lupb_setfieldi(L, "DESCRIPTOR_TYPE_ENUM", kUpb_FieldType_Enum);
+ lupb_setfieldi(L, "DESCRIPTOR_TYPE_SFIXED32", kUpb_FieldType_SFixed32);
+ lupb_setfieldi(L, "DESCRIPTOR_TYPE_SFIXED64", kUpb_FieldType_SFixed64);
+ lupb_setfieldi(L, "DESCRIPTOR_TYPE_SINT32", kUpb_FieldType_SInt32);
+ lupb_setfieldi(L, "DESCRIPTOR_TYPE_SINT64", kUpb_FieldType_SInt64);
+
+ lupb_setfieldi(L, "SYNTAX_PROTO2", kUpb_Syntax_Proto2);
+ lupb_setfieldi(L, "SYNTAX_PROTO3", kUpb_Syntax_Proto3);
+}
diff --git a/upb/lua/lua_proto_library.bzl b/upb/lua/lua_proto_library.bzl
new file mode 100644
index 0000000..cc1486a
--- /dev/null
+++ b/upb/lua/lua_proto_library.bzl
@@ -0,0 +1,154 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""lua_proto_library(): a rule for building Lua protos."""
+
+load("@bazel_skylib//lib:paths.bzl", "paths")
+
+# Generic support code #########################################################
+
+# begin:github_only
+_is_google3 = False
+# end:github_only
+
+# begin:google_only
+# _is_google3 = True
+# end:google_only
+
+def _get_real_short_path(file):
+ # For some reason, files from other archives have short paths that look like:
+ # ../com_google_protobuf/google/protobuf/descriptor.proto
+ short_path = file.short_path
+ if short_path.startswith("../"):
+ second_slash = short_path.index("/", 3)
+ short_path = short_path[second_slash + 1:]
+
+ # Sometimes it has another few prefixes like:
+ # _virtual_imports/any_proto/google/protobuf/any.proto
+ # benchmarks/_virtual_imports/100_msgs_proto/benchmarks/100_msgs.proto
+ # We want just google/protobuf/any.proto.
+ virtual_imports = "_virtual_imports/"
+ if virtual_imports in short_path:
+ short_path = short_path.split(virtual_imports)[1].split("/", 1)[1]
+ return short_path
+
+def _get_real_root(ctx, file):
+ real_short_path = _get_real_short_path(file)
+ root = file.path[:-len(real_short_path) - 1]
+ if not _is_google3 and ctx.rule.attr.strip_import_prefix:
+ root = paths.join(root, ctx.rule.attr.strip_import_prefix[1:])
+ return root
+
+def _generate_output_file(ctx, src, extension):
+ package = ctx.label.package
+ if not _is_google3 and ctx.rule.attr.strip_import_prefix and ctx.rule.attr.strip_import_prefix != "/":
+ package = package[len(ctx.rule.attr.strip_import_prefix):]
+ real_short_path = _get_real_short_path(src)
+ real_short_path = paths.relativize(real_short_path, package)
+ output_filename = paths.replace_extension(real_short_path, extension)
+ ret = ctx.actions.declare_file(output_filename)
+ return ret
+
+# upb_proto_library / upb_proto_reflection_library shared code #################
+
+_LuaFilesInfo = provider(
+ "A set of lua files generated from .proto files",
+ fields = ["files"],
+)
+
+def _compile_upb_protos(ctx, proto_info, proto_sources):
+ files = [_generate_output_file(ctx, name, "_pb.lua") for name in proto_sources]
+ transitive_sets = proto_info.transitive_descriptor_sets.to_list()
+ ctx.actions.run(
+ inputs = depset(
+ direct = [proto_info.direct_descriptor_set],
+ transitive = [proto_info.transitive_descriptor_sets],
+ ),
+ tools = [ctx.executable._upbc],
+ outputs = files,
+ executable = ctx.executable._protoc,
+ arguments = [
+ "--lua_out=" + _get_real_root(ctx, files[0]),
+ "--plugin=protoc-gen-lua=" + ctx.executable._upbc.path,
+ "--descriptor_set_in=" + ctx.configuration.host_path_separator.join([f.path for f in transitive_sets]),
+ ] +
+ [_get_real_short_path(file) for file in proto_sources],
+ progress_message = "Generating Lua protos for :" + ctx.label.name,
+ )
+ return files
+
+def _lua_proto_rule_impl(ctx):
+ if len(ctx.attr.deps) != 1:
+ fail("only one deps dependency allowed.")
+ dep = ctx.attr.deps[0]
+ if _LuaFilesInfo not in dep:
+ fail("proto_library rule must generate _LuaFilesInfo (aspect should have handled this).")
+ files = dep[_LuaFilesInfo].files
+ return [
+ DefaultInfo(
+ files = files,
+ data_runfiles = ctx.runfiles(files = files.to_list()),
+ ),
+ ]
+
+def _lua_proto_library_aspect_impl(target, ctx):
+ proto_info = target[ProtoInfo]
+ files = _compile_upb_protos(ctx, proto_info, proto_info.direct_sources)
+ deps = ctx.rule.attr.deps
+ transitive = [dep[_LuaFilesInfo].files for dep in deps if _LuaFilesInfo in dep]
+ return [_LuaFilesInfo(files = depset(direct = files, transitive = transitive))]
+
+# lua_proto_library() ##########################################################
+
+_lua_proto_library_aspect = aspect(
+ attrs = {
+ "_upbc": attr.label(
+ executable = True,
+ cfg = "exec",
+ default = "//lua:protoc-gen-lua",
+ ),
+ "_protoc": attr.label(
+ executable = True,
+ cfg = "exec",
+ default = "@com_google_protobuf//:protoc",
+ ),
+ },
+ implementation = _lua_proto_library_aspect_impl,
+ provides = [_LuaFilesInfo],
+ attr_aspects = ["deps"],
+ fragments = ["cpp"],
+)
+
+lua_proto_library = rule(
+ output_to_genfiles = True,
+ implementation = _lua_proto_rule_impl,
+ attrs = {
+ "deps": attr.label_list(
+ aspects = [_lua_proto_library_aspect],
+ allow_rules = ["proto_library"],
+ providers = [ProtoInfo],
+ ),
+ },
+)
diff --git a/upb/lua/main.c b/upb/lua/main.c
new file mode 100644
index 0000000..6d2b56d
--- /dev/null
+++ b/upb/lua/main.c
@@ -0,0 +1,96 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <lauxlib.h>
+#include <lua.h>
+#include <lualib.h>
+#include <signal.h>
+
+#include "lua/upb.h"
+
+lua_State* L;
+
+static void interrupt(lua_State* L, lua_Debug* ar) {
+ (void)ar;
+ lua_sethook(L, NULL, 0, 0);
+ luaL_error(L, "SIGINT");
+}
+
+static void sighandler(int i) {
+ fprintf(stderr, "Signal!\n");
+ signal(i, SIG_DFL);
+ lua_sethook(L, interrupt, LUA_MASKCALL | LUA_MASKRET | LUA_MASKCOUNT, 1);
+}
+
+const char* init =
+ "package.preload['lupb'] = ... "
+ "package.path = '"
+ "./?.lua;"
+ "./third_party/lunit/?.lua;"
+ "external/com_google_protobuf/?.lua;"
+ "external/com_google_protobuf/src/?.lua;"
+ "bazel-bin/?.lua;"
+ "bazel-bin/external/com_google_protobuf/src/?.lua;"
+ "bazel-bin/external/com_google_protobuf/?.lua;"
+ "lua/?.lua;"
+ // These additional paths handle the case where this test is invoked from
+ // the protobuf repo's Bazel workspace.
+ "external/upb/?.lua;"
+ "external/upb/third_party/lunit/?.lua;"
+ "src/?.lua;"
+ "bazel-bin/external/upb/?.lua;"
+ "external/upb/lua/?.lua"
+ "'";
+
+int main(int argc, char** argv) {
+ if (argc < 2) {
+ fprintf(stderr, "missing argument with path to .lua file\n");
+ return 1;
+ }
+
+ int ret = 0;
+ L = luaL_newstate();
+ luaL_openlibs(L);
+ lua_pushcfunction(L, luaopen_lupb);
+ ret = luaL_loadstring(L, init);
+ lua_pushcfunction(L, luaopen_lupb);
+
+ signal(SIGINT, sighandler);
+ ret = ret || lua_pcall(L, 1, LUA_MULTRET, 0) || luaL_dofile(L, argv[1]);
+ signal(SIGINT, SIG_DFL);
+
+ if (ret) {
+ fprintf(stderr, "error testing Lua: %s\n", lua_tostring(L, -1));
+ ret = 1;
+ }
+
+ lua_close(L);
+ return ret;
+}
diff --git a/upb/lua/msg.c b/upb/lua/msg.c
new file mode 100644
index 0000000..bdec7a2
--- /dev/null
+++ b/upb/lua/msg.c
@@ -0,0 +1,1118 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+/*
+ * lupb_Message -- Message/Array/Map objects in Lua/C that wrap upb
+ */
+
+#include <float.h>
+#include <math.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "lauxlib.h"
+#include "lua/upb.h"
+#include "upb/collections/map.h"
+#include "upb/json/decode.h"
+#include "upb/json/encode.h"
+#include "upb/message/message.h"
+#include "upb/port/def.inc"
+#include "upb/reflection/message.h"
+#include "upb/text/encode.h"
+
+/*
+ * Message/Map/Array objects. These objects form a directed graph: a message
+ * can contain submessages, arrays, and maps, which can then point to other
+ * messages. This graph can technically be cyclic, though this is an error and
+ * a cyclic graph cannot be serialized. So it's better to think of this as a
+ * tree of objects.
+ *
+ * The actual data exists at the upb level (upb_Message, upb_Map, upb_Array),
+ * independently of Lua. The upb objects contain all the canonical data and
+ * edges between objects. Lua wrapper objects expose the upb objects to Lua,
+ * but ultimately they are just wrappers. They pass through all reads and
+ * writes to the underlying upb objects.
+ *
+ * Each upb object lives in a upb arena. We have a Lua object to wrap the upb
+ * arena, but arenas are never exposed to the user. The Lua arena object just
+ * serves to own the upb arena and free it at the proper time, once the Lua GC
+ * has determined that there are no more references to anything that lives in
+ * that arena. All wrapper objects strongly reference the arena to which they
+ * belong.
+ *
+ * A global object cache stores a mapping of C pointer (upb_Message*,
+ * upb_Array*, upb_Map*) to a corresponding Lua wrapper. These references are
+ * weak so that the wrappers can be collected if they are no longer needed. A
+ * new wrapper object can always be recreated later.
+ *
+ * +-----+
+ * lupb_Arena |cache|-weak-+
+ * | ^ +-----+ |
+ * | | V
+ * Lua level | +------------lupb_Message
+ * ----------------|-----------------|------------------------------------------
+ * upb level | |
+ * | +----V----------------------------------+
+ * +->upb_Arena | upb_Message ...(empty arena storage) |
+ * +---------------------------------------+
+ *
+ * If the user creates a reference between two objects that have different
+ * arenas, we need to fuse the two arenas together, so that the blocks will
+ * outlive both arenas.
+ *
+ * +-------------------------->(fused)<----------------+
+ * | |
+ * V +-----+ V
+ * lupb_Arena +-weak-|cache|-weak-+ lupb_Arena
+ * | ^ | +-----+ | ^ |
+ * | | V V | |
+ * Lua level | +------------lupb_Message lupb_Message--+ |
+ * ----------------|-----------------|----------------------|-----------|------
+ * upb level | | | |
+ * | +----V--------+ +----V--------+ V
+ * +->upb_Arena | upb_Message | | upb_Message | upb_Arena
+ * +------|------+ +--^----------+
+ * +------------------+
+ * Key invariants:
+ * 1. every wrapper references the arena that contains it.
+ * 2. every fused arena includes all arenas that own upb objects reachable
+ * from that arena. In other words, when a wrapper references an arena,
+ * this is sufficient to ensure that any upb object reachable from that
+ * wrapper will stay alive.
+ *
+ * Additionally, every message object contains a strong reference to the
+ * corresponding Descriptor object. Likewise, array/map objects reference a
+ * Descriptor object if they are typed to store message values.
+ */
+
+#define LUPB_ARENA "lupb.arena"
+#define LUPB_ARRAY "lupb.array"
+#define LUPB_MAP "lupb.map"
+#define LUPB_MSG "lupb.msg"
+
+#define LUPB_ARENA_INDEX 1
+#define LUPB_MSGDEF_INDEX 2 /* For msg, and map/array that store msg */
+
+static void lupb_Message_Newmsgwrapper(lua_State* L, int narg,
+ upb_MessageValue val);
+static upb_Message* lupb_msg_check(lua_State* L, int narg);
+
+static upb_CType lupb_checkfieldtype(lua_State* L, int narg) {
+ uint32_t n = lupb_checkuint32(L, narg);
+ bool ok = n >= kUpb_CType_Bool && n <= kUpb_CType_Bytes;
+ luaL_argcheck(L, ok, narg, "invalid field type");
+ return n;
+}
+
+char cache_key;
+
+/* lupb_cacheinit()
+ *
+ * Creates the global cache used by lupb_cacheget() and lupb_cacheset().
+ */
+static void lupb_cacheinit(lua_State* L) {
+ /* Create our object cache. */
+ lua_newtable(L);
+
+ /* Cache metatable gives the cache weak values */
+ lua_createtable(L, 0, 1);
+ lua_pushstring(L, "v");
+ lua_setfield(L, -2, "__mode");
+ lua_setmetatable(L, -2);
+
+ /* Set cache in the registry. */
+ lua_rawsetp(L, LUA_REGISTRYINDEX, &cache_key);
+}
+
+/* lupb_cacheget()
+ *
+ * Pushes cache[key] and returns true if this key is present in the cache.
+ * Otherwise returns false and leaves nothing on the stack.
+ */
+static bool lupb_cacheget(lua_State* L, const void* key) {
+ if (key == NULL) {
+ lua_pushnil(L);
+ return true;
+ }
+
+ lua_rawgetp(L, LUA_REGISTRYINDEX, &cache_key);
+ lua_rawgetp(L, -1, key);
+ if (lua_isnil(L, -1)) {
+ lua_pop(L, 2); /* Pop table, nil. */
+ return false;
+ } else {
+ lua_replace(L, -2); /* Replace cache table. */
+ return true;
+ }
+}
+
+/* lupb_cacheset()
+ *
+ * Sets cache[key] = val, where "val" is the value at the top of the stack.
+ * Does not pop the value.
+ */
+static void lupb_cacheset(lua_State* L, const void* key) {
+ lua_rawgetp(L, LUA_REGISTRYINDEX, &cache_key);
+ lua_pushvalue(L, -2);
+ lua_rawsetp(L, -2, key);
+ lua_pop(L, 1); /* Pop table. */
+}
+
+/* lupb_Arena *****************************************************************/
+
+/* lupb_Arena only exists to wrap a upb_Arena. It is never exposed to users; it
+ * is an internal memory management detail. Other wrapper objects refer to this
+ * object from their userdata to keep the arena-owned data alive.
+ */
+
+typedef struct {
+ upb_Arena* arena;
+} lupb_Arena;
+
+static upb_Arena* lupb_Arena_check(lua_State* L, int narg) {
+ lupb_Arena* a = luaL_checkudata(L, narg, LUPB_ARENA);
+ return a->arena;
+}
+
+upb_Arena* lupb_Arena_pushnew(lua_State* L) {
+ lupb_Arena* a = lupb_newuserdata(L, sizeof(lupb_Arena), 1, LUPB_ARENA);
+ a->arena = upb_Arena_New();
+ return a->arena;
+}
+
+/**
+ * lupb_Arena_Fuse()
+ *
+ * Merges |from| into |to| so that there is a single arena group that contains
+ * both, and both arenas will point at this new table. */
+static void lupb_Arena_Fuse(lua_State* L, int to, int from) {
+ upb_Arena* to_arena = lupb_Arena_check(L, to);
+ upb_Arena* from_arena = lupb_Arena_check(L, from);
+ upb_Arena_Fuse(to_arena, from_arena);
+}
+
+static void lupb_Arena_Fuseobjs(lua_State* L, int to, int from) {
+ lua_getiuservalue(L, to, LUPB_ARENA_INDEX);
+ lua_getiuservalue(L, from, LUPB_ARENA_INDEX);
+ lupb_Arena_Fuse(L, lua_absindex(L, -2), lua_absindex(L, -1));
+ lua_pop(L, 2);
+}
+
+static int lupb_Arena_gc(lua_State* L) {
+ upb_Arena* a = lupb_Arena_check(L, 1);
+ upb_Arena_Free(a);
+ return 0;
+}
+
+static const struct luaL_Reg lupb_Arena_mm[] = {{"__gc", lupb_Arena_gc},
+ {NULL, NULL}};
+
+/* lupb_Arenaget()
+ *
+ * Returns the arena from the given message, array, or map object.
+ */
+static upb_Arena* lupb_Arenaget(lua_State* L, int narg) {
+ upb_Arena* arena;
+ lua_getiuservalue(L, narg, LUPB_ARENA_INDEX);
+ arena = lupb_Arena_check(L, -1);
+ lua_pop(L, 1);
+ return arena;
+}
+
+/* upb <-> Lua type conversion ************************************************/
+
+/* Whether string data should be copied into the containing arena. We can
+ * avoid a copy if the string data is only needed temporarily (like for a map
+ * lookup).
+ */
+typedef enum {
+ LUPB_COPY, /* Copy string data into the arena. */
+ LUPB_REF /* Reference the Lua copy of the string data. */
+} lupb_copy_t;
+
+/**
+ * lupb_tomsgval()
+ *
+ * Converts the given Lua value |narg| to a upb_MessageValue.
+ */
+static upb_MessageValue lupb_tomsgval(lua_State* L, upb_CType type, int narg,
+ int container, lupb_copy_t copy) {
+ upb_MessageValue ret;
+ switch (type) {
+ case kUpb_CType_Int32:
+ case kUpb_CType_Enum:
+ ret.int32_val = lupb_checkint32(L, narg);
+ break;
+ case kUpb_CType_Int64:
+ ret.int64_val = lupb_checkint64(L, narg);
+ break;
+ case kUpb_CType_UInt32:
+ ret.uint32_val = lupb_checkuint32(L, narg);
+ break;
+ case kUpb_CType_UInt64:
+ ret.uint64_val = lupb_checkuint64(L, narg);
+ break;
+ case kUpb_CType_Double:
+ ret.double_val = lupb_checkdouble(L, narg);
+ break;
+ case kUpb_CType_Float:
+ ret.float_val = lupb_checkfloat(L, narg);
+ break;
+ case kUpb_CType_Bool:
+ ret.bool_val = lupb_checkbool(L, narg);
+ break;
+ case kUpb_CType_String:
+ case kUpb_CType_Bytes: {
+ size_t len;
+ const char* ptr = lupb_checkstring(L, narg, &len);
+ switch (copy) {
+ case LUPB_COPY: {
+ upb_Arena* arena = lupb_Arenaget(L, container);
+ char* data = upb_Arena_Malloc(arena, len);
+ memcpy(data, ptr, len);
+ ret.str_val = upb_StringView_FromDataAndSize(data, len);
+ break;
+ }
+ case LUPB_REF:
+ ret.str_val = upb_StringView_FromDataAndSize(ptr, len);
+ break;
+ }
+ break;
+ }
+ case kUpb_CType_Message:
+ ret.msg_val = lupb_msg_check(L, narg);
+ /* Typecheck message. */
+ lua_getiuservalue(L, container, LUPB_MSGDEF_INDEX);
+ lua_getiuservalue(L, narg, LUPB_MSGDEF_INDEX);
+ luaL_argcheck(L, lua_rawequal(L, -1, -2), narg, "message type mismatch");
+ lua_pop(L, 2);
+ break;
+ }
+ return ret;
+}
+
+void lupb_pushmsgval(lua_State* L, int container, upb_CType type,
+ upb_MessageValue val) {
+ switch (type) {
+ case kUpb_CType_Int32:
+ case kUpb_CType_Enum:
+ lupb_pushint32(L, val.int32_val);
+ return;
+ case kUpb_CType_Int64:
+ lupb_pushint64(L, val.int64_val);
+ return;
+ case kUpb_CType_UInt32:
+ lupb_pushuint32(L, val.uint32_val);
+ return;
+ case kUpb_CType_UInt64:
+ lupb_pushuint64(L, val.uint64_val);
+ return;
+ case kUpb_CType_Double:
+ lua_pushnumber(L, val.double_val);
+ return;
+ case kUpb_CType_Float:
+ lua_pushnumber(L, val.float_val);
+ return;
+ case kUpb_CType_Bool:
+ lua_pushboolean(L, val.bool_val);
+ return;
+ case kUpb_CType_String:
+ case kUpb_CType_Bytes:
+ lua_pushlstring(L, val.str_val.data, val.str_val.size);
+ return;
+ case kUpb_CType_Message:
+ assert(container);
+ if (!lupb_cacheget(L, val.msg_val)) {
+ lupb_Message_Newmsgwrapper(L, container, val);
+ }
+ return;
+ }
+ LUPB_UNREACHABLE();
+}
+
+/* lupb_array *****************************************************************/
+
+typedef struct {
+ upb_Array* arr;
+ upb_CType type;
+} lupb_array;
+
+static lupb_array* lupb_array_check(lua_State* L, int narg) {
+ return luaL_checkudata(L, narg, LUPB_ARRAY);
+}
+
+/**
+ * lupb_array_checkindex()
+ *
+ * Checks the array index at Lua stack index |narg| to verify that it is an
+ * integer between 1 and |max|, inclusively. Also corrects it to be zero-based
+ * for C.
+ */
+static int lupb_array_checkindex(lua_State* L, int narg, uint32_t max) {
+ uint32_t n = lupb_checkuint32(L, narg);
+ luaL_argcheck(L, n != 0 && n <= max, narg, "invalid array index");
+ return n - 1; /* Lua uses 1-based indexing. */
+}
+
+/* lupb_array Public API */
+
+/* lupb_Array_New():
+ *
+ * Handles:
+ * Array(upb.TYPE_INT32)
+ * Array(message_type)
+ */
+static int lupb_Array_New(lua_State* L) {
+ int arg_count = lua_gettop(L);
+ lupb_array* larray;
+ upb_Arena* arena;
+
+ if (lua_type(L, 1) == LUA_TNUMBER) {
+ upb_CType type = lupb_checkfieldtype(L, 1);
+ larray = lupb_newuserdata(L, sizeof(*larray), 1, LUPB_ARRAY);
+ larray->type = type;
+ } else {
+ lupb_MessageDef_check(L, 1);
+ larray = lupb_newuserdata(L, sizeof(*larray), 2, LUPB_ARRAY);
+ larray->type = kUpb_CType_Message;
+ lua_pushvalue(L, 1);
+ lua_setiuservalue(L, -2, LUPB_MSGDEF_INDEX);
+ }
+
+ arena = lupb_Arena_pushnew(L);
+ lua_setiuservalue(L, -2, LUPB_ARENA_INDEX);
+
+ larray->arr = upb_Array_New(arena, larray->type);
+ lupb_cacheset(L, larray->arr);
+
+ if (arg_count > 1) {
+ /* Set initial fields from table. */
+ int msg = arg_count + 1;
+ lua_pushnil(L);
+ while (lua_next(L, 2) != 0) {
+ lua_pushvalue(L, -2); /* now stack is key, val, key */
+ lua_insert(L, -3); /* now stack is key, key, val */
+ lua_settable(L, msg);
+ }
+ }
+
+ return 1;
+}
+
+/* lupb_Array_Newindex():
+ *
+ * Handles:
+ * array[idx] = val
+ *
+ * idx can be within the array or one past the end to extend.
+ */
+static int lupb_Array_Newindex(lua_State* L) {
+ lupb_array* larray = lupb_array_check(L, 1);
+ size_t size = upb_Array_Size(larray->arr);
+ uint32_t n = lupb_array_checkindex(L, 2, size + 1);
+ upb_MessageValue msgval = lupb_tomsgval(L, larray->type, 3, 1, LUPB_COPY);
+
+ if (n == size) {
+ upb_Array_Append(larray->arr, msgval, lupb_Arenaget(L, 1));
+ } else {
+ upb_Array_Set(larray->arr, n, msgval);
+ }
+
+ if (larray->type == kUpb_CType_Message) {
+ lupb_Arena_Fuseobjs(L, 1, 3);
+ }
+
+ return 0; /* 1 for chained assignments? */
+}
+
+/* lupb_array_index():
+ *
+ * Handles:
+ * array[idx] -> val
+ *
+ * idx must be within the array.
+ */
+static int lupb_array_index(lua_State* L) {
+ lupb_array* larray = lupb_array_check(L, 1);
+ size_t size = upb_Array_Size(larray->arr);
+ uint32_t n = lupb_array_checkindex(L, 2, size);
+ upb_MessageValue val = upb_Array_Get(larray->arr, n);
+
+ lupb_pushmsgval(L, 1, larray->type, val);
+
+ return 1;
+}
+
+/* lupb_array_len():
+ *
+ * Handles:
+ * #array -> len
+ */
+static int lupb_array_len(lua_State* L) {
+ lupb_array* larray = lupb_array_check(L, 1);
+ lua_pushnumber(L, upb_Array_Size(larray->arr));
+ return 1;
+}
+
+static const struct luaL_Reg lupb_array_mm[] = {
+ {"__index", lupb_array_index},
+ {"__len", lupb_array_len},
+ {"__newindex", lupb_Array_Newindex},
+ {NULL, NULL}};
+
+/* lupb_map *******************************************************************/
+
+typedef struct {
+ upb_Map* map;
+ upb_CType key_type;
+ upb_CType value_type;
+} lupb_map;
+
+#define MAP_MSGDEF_INDEX 1
+
+static lupb_map* lupb_map_check(lua_State* L, int narg) {
+ return luaL_checkudata(L, narg, LUPB_MAP);
+}
+
+/* lupb_map Public API */
+
+/**
+ * lupb_Map_New
+ *
+ * Handles:
+ * new_map = upb.Map(key_type, value_type)
+ * new_map = upb.Map(key_type, value_msgdef)
+ */
+static int lupb_Map_New(lua_State* L) {
+ upb_Arena* arena;
+ lupb_map* lmap;
+
+ if (lua_type(L, 2) == LUA_TNUMBER) {
+ lmap = lupb_newuserdata(L, sizeof(*lmap), 1, LUPB_MAP);
+ lmap->value_type = lupb_checkfieldtype(L, 2);
+ } else {
+ lupb_MessageDef_check(L, 2);
+ lmap = lupb_newuserdata(L, sizeof(*lmap), 2, LUPB_MAP);
+ lmap->value_type = kUpb_CType_Message;
+ lua_pushvalue(L, 2);
+ lua_setiuservalue(L, -2, MAP_MSGDEF_INDEX);
+ }
+
+ arena = lupb_Arena_pushnew(L);
+ lua_setiuservalue(L, -2, LUPB_ARENA_INDEX);
+
+ lmap->key_type = lupb_checkfieldtype(L, 1);
+ lmap->map = upb_Map_New(arena, lmap->key_type, lmap->value_type);
+ lupb_cacheset(L, lmap->map);
+
+ return 1;
+}
+
+/**
+ * lupb_map_index
+ *
+ * Handles:
+ * map[key]
+ */
+static int lupb_map_index(lua_State* L) {
+ lupb_map* lmap = lupb_map_check(L, 1);
+ upb_MessageValue key = lupb_tomsgval(L, lmap->key_type, 2, 1, LUPB_REF);
+ upb_MessageValue val;
+
+ if (upb_Map_Get(lmap->map, key, &val)) {
+ lupb_pushmsgval(L, 1, lmap->value_type, val);
+ } else {
+ lua_pushnil(L);
+ }
+
+ return 1;
+}
+
+/**
+ * lupb_map_len
+ *
+ * Handles:
+ * map_len = #map
+ */
+static int lupb_map_len(lua_State* L) {
+ lupb_map* lmap = lupb_map_check(L, 1);
+ lua_pushnumber(L, upb_Map_Size(lmap->map));
+ return 1;
+}
+
+/**
+ * lupb_Map_Newindex
+ *
+ * Handles:
+ * map[key] = val
+ * map[key] = nil # to remove from map
+ */
+static int lupb_Map_Newindex(lua_State* L) {
+ lupb_map* lmap = lupb_map_check(L, 1);
+ upb_Map* map = lmap->map;
+ upb_MessageValue key = lupb_tomsgval(L, lmap->key_type, 2, 1, LUPB_REF);
+
+ if (lua_isnil(L, 3)) {
+ upb_Map_Delete(map, key, NULL);
+ } else {
+ upb_MessageValue val = lupb_tomsgval(L, lmap->value_type, 3, 1, LUPB_COPY);
+ upb_Map_Set(map, key, val, lupb_Arenaget(L, 1));
+ if (lmap->value_type == kUpb_CType_Message) {
+ lupb_Arena_Fuseobjs(L, 1, 3);
+ }
+ }
+
+ return 0;
+}
+
+static int lupb_MapIterator_Next(lua_State* L) {
+ int map = lua_upvalueindex(2);
+ size_t* iter = lua_touserdata(L, lua_upvalueindex(1));
+ lupb_map* lmap = lupb_map_check(L, map);
+
+ upb_MessageValue key, val;
+ if (upb_Map_Next(lmap->map, &key, &val, iter)) {
+ lupb_pushmsgval(L, map, lmap->key_type, key);
+ lupb_pushmsgval(L, map, lmap->value_type, val);
+ return 2;
+ } else {
+ return 0;
+ }
+}
+
+/**
+ * lupb_map_pairs()
+ *
+ * Handles:
+ * pairs(map)
+ */
+static int lupb_map_pairs(lua_State* L) {
+ size_t* iter = lua_newuserdata(L, sizeof(*iter));
+ lupb_map_check(L, 1);
+
+ *iter = kUpb_Map_Begin;
+ lua_pushvalue(L, 1);
+
+ /* Upvalues are [iter, lupb_map]. */
+ lua_pushcclosure(L, &lupb_MapIterator_Next, 2);
+
+ return 1;
+}
+
+/* upb_mapiter ]]] */
+
+static const struct luaL_Reg lupb_map_mm[] = {{"__index", lupb_map_index},
+ {"__len", lupb_map_len},
+ {"__newindex", lupb_Map_Newindex},
+ {"__pairs", lupb_map_pairs},
+ {NULL, NULL}};
+
+/* lupb_Message
+ * *******************************************************************/
+
+typedef struct {
+ upb_Message* msg;
+} lupb_Message;
+
+/* lupb_Message helpers */
+
+static upb_Message* lupb_msg_check(lua_State* L, int narg) {
+ lupb_Message* msg = luaL_checkudata(L, narg, LUPB_MSG);
+ return msg->msg;
+}
+
+static const upb_MessageDef* lupb_Message_Getmsgdef(lua_State* L, int msg) {
+ lua_getiuservalue(L, msg, LUPB_MSGDEF_INDEX);
+ const upb_MessageDef* m = lupb_MessageDef_check(L, -1);
+ lua_pop(L, 1);
+ return m;
+}
+
+static const upb_FieldDef* lupb_msg_tofield(lua_State* L, int msg, int field) {
+ size_t len;
+ const char* fieldname = luaL_checklstring(L, field, &len);
+ const upb_MessageDef* m = lupb_Message_Getmsgdef(L, msg);
+ return upb_MessageDef_FindFieldByNameWithSize(m, fieldname, len);
+}
+
+static const upb_FieldDef* lupb_msg_checkfield(lua_State* L, int msg,
+ int field) {
+ const upb_FieldDef* f = lupb_msg_tofield(L, msg, field);
+ if (f == NULL) {
+ luaL_error(L, "no such field '%s'", lua_tostring(L, field));
+ }
+ return f;
+}
+
+upb_Message* lupb_msg_pushnew(lua_State* L, int narg) {
+ const upb_MessageDef* m = lupb_MessageDef_check(L, narg);
+ lupb_Message* lmsg = lupb_newuserdata(L, sizeof(lupb_Message), 2, LUPB_MSG);
+ upb_Arena* arena = lupb_Arena_pushnew(L);
+
+ lua_setiuservalue(L, -2, LUPB_ARENA_INDEX);
+ lua_pushvalue(L, 1);
+ lua_setiuservalue(L, -2, LUPB_MSGDEF_INDEX);
+
+ lmsg->msg = upb_Message_New(upb_MessageDef_MiniTable(m), arena);
+ lupb_cacheset(L, lmsg->msg);
+ return lmsg->msg;
+}
+
+/**
+ * lupb_Message_Newmsgwrapper()
+ *
+ * Creates a new wrapper for a message, copying the arena and msgdef references
+ * from |narg| (which should be an array or map).
+ */
+static void lupb_Message_Newmsgwrapper(lua_State* L, int narg,
+ upb_MessageValue val) {
+ lupb_Message* lmsg = lupb_newuserdata(L, sizeof(*lmsg), 2, LUPB_MSG);
+ lmsg->msg = (upb_Message*)val.msg_val; /* XXX: cast isn't great. */
+ lupb_cacheset(L, lmsg->msg);
+
+ /* Copy both arena and msgdef into the wrapper. */
+ lua_getiuservalue(L, narg, LUPB_ARENA_INDEX);
+ lua_setiuservalue(L, -2, LUPB_ARENA_INDEX);
+ lua_getiuservalue(L, narg, LUPB_MSGDEF_INDEX);
+ lua_setiuservalue(L, -2, LUPB_MSGDEF_INDEX);
+}
+
+/**
+ * lupb_Message_Newud()
+ *
+ * Creates the Lua userdata for a new wrapper object, adding a reference to
+ * the msgdef if necessary.
+ */
+static void* lupb_Message_Newud(lua_State* L, int narg, size_t size,
+ const char* type, const upb_FieldDef* f) {
+ if (upb_FieldDef_CType(f) == kUpb_CType_Message) {
+ /* Wrapper needs a reference to the msgdef. */
+ void* ud = lupb_newuserdata(L, size, 2, type);
+ lua_getiuservalue(L, narg, LUPB_MSGDEF_INDEX);
+ lupb_MessageDef_pushsubmsgdef(L, f);
+ lua_setiuservalue(L, -2, LUPB_MSGDEF_INDEX);
+ return ud;
+ } else {
+ return lupb_newuserdata(L, size, 1, type);
+ }
+}
+
+/**
+ * lupb_Message_Newwrapper()
+ *
+ * Creates a new Lua wrapper object to wrap the given array, map, or message.
+ */
+static void lupb_Message_Newwrapper(lua_State* L, int narg,
+ const upb_FieldDef* f,
+ upb_MutableMessageValue val) {
+ if (upb_FieldDef_IsMap(f)) {
+ const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
+ const upb_FieldDef* key_f =
+ upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber);
+ const upb_FieldDef* val_f =
+ upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber);
+ lupb_map* lmap =
+ lupb_Message_Newud(L, narg, sizeof(*lmap), LUPB_MAP, val_f);
+ lmap->key_type = upb_FieldDef_CType(key_f);
+ lmap->value_type = upb_FieldDef_CType(val_f);
+ lmap->map = val.map;
+ } else if (upb_FieldDef_IsRepeated(f)) {
+ lupb_array* larr =
+ lupb_Message_Newud(L, narg, sizeof(*larr), LUPB_ARRAY, f);
+ larr->type = upb_FieldDef_CType(f);
+ larr->arr = val.array;
+ } else {
+ lupb_Message* lmsg =
+ lupb_Message_Newud(L, narg, sizeof(*lmsg), LUPB_MSG, f);
+ lmsg->msg = val.msg;
+ }
+
+ /* Copy arena ref to new wrapper. This may be a different arena than the
+ * underlying data was originally constructed from, but if so both arenas
+ * must be in the same group. */
+ lua_getiuservalue(L, narg, LUPB_ARENA_INDEX);
+ lua_setiuservalue(L, -2, LUPB_ARENA_INDEX);
+
+ lupb_cacheset(L, val.msg);
+}
+
+/**
+ * lupb_msg_typechecksubmsg()
+ *
+ * Typechecks the given array, map, or msg against this upb_FieldDef.
+ */
+static void lupb_msg_typechecksubmsg(lua_State* L, int narg, int msgarg,
+ const upb_FieldDef* f) {
+ /* Typecheck this map's msgdef against this message field. */
+ lua_getiuservalue(L, narg, LUPB_MSGDEF_INDEX);
+ lua_getiuservalue(L, msgarg, LUPB_MSGDEF_INDEX);
+ lupb_MessageDef_pushsubmsgdef(L, f);
+ luaL_argcheck(L, lua_rawequal(L, -1, -2), narg, "message type mismatch");
+ lua_pop(L, 2);
+}
+
+/* lupb_Message Public API */
+
+/**
+ * lupb_MessageDef_call
+ *
+ * Handles:
+ * new_msg = MessageClass()
+ * new_msg = MessageClass{foo = "bar", baz = 3, quux = {foo = 3}}
+ */
+int lupb_MessageDef_call(lua_State* L) {
+ int arg_count = lua_gettop(L);
+ lupb_msg_pushnew(L, 1);
+
+ if (arg_count > 1) {
+ /* Set initial fields from table. */
+ int msg = arg_count + 1;
+ lua_pushnil(L);
+ while (lua_next(L, 2) != 0) {
+ lua_pushvalue(L, -2); /* now stack is key, val, key */
+ lua_insert(L, -3); /* now stack is key, key, val */
+ lua_settable(L, msg);
+ }
+ }
+
+ return 1;
+}
+
+/**
+ * lupb_msg_index
+ *
+ * Handles:
+ * msg.foo
+ * msg["foo"]
+ * msg[field_descriptor] # (for extensions) (TODO)
+ */
+static int lupb_msg_index(lua_State* L) {
+ upb_Message* msg = lupb_msg_check(L, 1);
+ const upb_FieldDef* f = lupb_msg_checkfield(L, 1, 2);
+
+ if (upb_FieldDef_IsRepeated(f) || upb_FieldDef_IsSubMessage(f)) {
+ /* Wrapped type; get or create wrapper. */
+ upb_Arena* arena = upb_FieldDef_IsRepeated(f) ? lupb_Arenaget(L, 1) : NULL;
+ upb_MutableMessageValue val = upb_Message_Mutable(msg, f, arena);
+ if (!lupb_cacheget(L, val.msg)) {
+ lupb_Message_Newwrapper(L, 1, f, val);
+ }
+ } else {
+ /* Value type, just push value and return .*/
+ upb_MessageValue val = upb_Message_GetFieldByDef(msg, f);
+ lupb_pushmsgval(L, 0, upb_FieldDef_CType(f), val);
+ }
+
+ return 1;
+}
+
+/**
+ * lupb_Message_Newindex()
+ *
+ * Handles:
+ * msg.foo = bar
+ * msg["foo"] = bar
+ * msg[field_descriptor] = bar # (for extensions) (TODO)
+ */
+static int lupb_Message_Newindex(lua_State* L) {
+ upb_Message* msg = lupb_msg_check(L, 1);
+ const upb_FieldDef* f = lupb_msg_checkfield(L, 1, 2);
+ upb_MessageValue msgval;
+ bool merge_arenas = true;
+
+ if (upb_FieldDef_IsMap(f)) {
+ lupb_map* lmap = lupb_map_check(L, 3);
+ const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
+ const upb_FieldDef* key_f =
+ upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber);
+ const upb_FieldDef* val_f =
+ upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber);
+ upb_CType key_type = upb_FieldDef_CType(key_f);
+ upb_CType value_type = upb_FieldDef_CType(val_f);
+ luaL_argcheck(L, lmap->key_type == key_type, 3, "key type mismatch");
+ luaL_argcheck(L, lmap->value_type == value_type, 3, "value type mismatch");
+ if (value_type == kUpb_CType_Message) {
+ lupb_msg_typechecksubmsg(L, 3, 1, val_f);
+ }
+ msgval.map_val = lmap->map;
+ } else if (upb_FieldDef_IsRepeated(f)) {
+ lupb_array* larr = lupb_array_check(L, 3);
+ upb_CType type = upb_FieldDef_CType(f);
+ luaL_argcheck(L, larr->type == type, 3, "array type mismatch");
+ if (type == kUpb_CType_Message) {
+ lupb_msg_typechecksubmsg(L, 3, 1, f);
+ }
+ msgval.array_val = larr->arr;
+ } else if (upb_FieldDef_IsSubMessage(f)) {
+ upb_Message* msg = lupb_msg_check(L, 3);
+ lupb_msg_typechecksubmsg(L, 3, 1, f);
+ msgval.msg_val = msg;
+ } else {
+ msgval = lupb_tomsgval(L, upb_FieldDef_CType(f), 3, 1, LUPB_COPY);
+ merge_arenas = false;
+ }
+
+ if (merge_arenas) {
+ lupb_Arena_Fuseobjs(L, 1, 3);
+ }
+
+ upb_Message_SetFieldByDef(msg, f, msgval, lupb_Arenaget(L, 1));
+
+ /* Return the new value for chained assignments. */
+ lua_pushvalue(L, 3);
+ return 1;
+}
+
+/**
+ * lupb_msg_tostring()
+ *
+ * Handles:
+ * tostring(msg)
+ * print(msg)
+ * etc.
+ */
+static int lupb_msg_tostring(lua_State* L) {
+ upb_Message* msg = lupb_msg_check(L, 1);
+ const upb_MessageDef* m;
+ char buf[1024];
+ size_t size;
+
+ lua_getiuservalue(L, 1, LUPB_MSGDEF_INDEX);
+ m = lupb_MessageDef_check(L, -1);
+
+ size = upb_TextEncode(msg, m, NULL, 0, buf, sizeof(buf));
+
+ if (size < sizeof(buf)) {
+ lua_pushlstring(L, buf, size);
+ } else {
+ char* ptr = malloc(size + 1);
+ upb_TextEncode(msg, m, NULL, 0, ptr, size + 1);
+ lua_pushlstring(L, ptr, size);
+ free(ptr);
+ }
+
+ return 1;
+}
+
+static const struct luaL_Reg lupb_msg_mm[] = {
+ {"__index", lupb_msg_index},
+ {"__newindex", lupb_Message_Newindex},
+ {"__tostring", lupb_msg_tostring},
+ {NULL, NULL}};
+
+/* lupb_Message toplevel
+ * **********************************************************/
+
+static int lupb_getoptions(lua_State* L, int narg) {
+ int options = 0;
+ if (lua_gettop(L) >= narg) {
+ size_t len = lua_rawlen(L, narg);
+ for (size_t i = 1; i <= len; i++) {
+ lua_rawgeti(L, narg, i);
+ options |= lupb_checkuint32(L, -1);
+ lua_pop(L, 1);
+ }
+ }
+ return options;
+}
+
+/**
+ * lupb_decode()
+ *
+ * Handles:
+ * msg = upb.decode(MessageClass, bin_string)
+ */
+static int lupb_decode(lua_State* L) {
+ size_t len;
+ const upb_MessageDef* m = lupb_MessageDef_check(L, 1);
+ const char* pb = lua_tolstring(L, 2, &len);
+ const upb_MiniTable* layout = upb_MessageDef_MiniTable(m);
+ upb_Message* msg = lupb_msg_pushnew(L, 1);
+ upb_Arena* arena = lupb_Arenaget(L, -1);
+ char* buf;
+
+ /* Copy input data to arena, message will reference it. */
+ buf = upb_Arena_Malloc(arena, len);
+ memcpy(buf, pb, len);
+
+ upb_DecodeStatus status = upb_Decode(buf, len, msg, layout, NULL,
+ kUpb_DecodeOption_AliasString, arena);
+
+ if (status != kUpb_DecodeStatus_Ok) {
+ lua_pushstring(L, "Error decoding protobuf.");
+ return lua_error(L);
+ }
+
+ return 1;
+}
+
+/**
+ * lupb_Encode()
+ *
+ * Handles:
+ * bin_string = upb.encode(msg)
+ */
+static int lupb_Encode(lua_State* L) {
+ const upb_Message* msg = lupb_msg_check(L, 1);
+ const upb_MessageDef* m = lupb_Message_Getmsgdef(L, 1);
+ const upb_MiniTable* layout = upb_MessageDef_MiniTable(m);
+ int options = lupb_getoptions(L, 2);
+ upb_Arena* arena = lupb_Arena_pushnew(L);
+ char* buf;
+ size_t size;
+ upb_EncodeStatus status =
+ upb_Encode(msg, (const void*)layout, options, arena, &buf, &size);
+ if (status != kUpb_EncodeStatus_Ok) {
+ lua_pushstring(L, "Error encoding protobuf.");
+ return lua_error(L);
+ }
+
+ lua_pushlstring(L, buf, size);
+
+ return 1;
+}
+
+/**
+ * lupb_jsondecode()
+ *
+ * Handles:
+ * text_string = upb.json_decode(MessageClass, json_str,
+ * {upb.JSONDEC_IGNOREUNKNOWN})
+ */
+static int lupb_jsondecode(lua_State* L) {
+ size_t len;
+ const upb_MessageDef* m = lupb_MessageDef_check(L, 1);
+ const char* json = lua_tolstring(L, 2, &len);
+ int options = lupb_getoptions(L, 3);
+ upb_Message* msg;
+ upb_Arena* arena;
+ upb_Status status;
+
+ msg = lupb_msg_pushnew(L, 1);
+ arena = lupb_Arenaget(L, -1);
+ upb_Status_Clear(&status);
+ upb_JsonDecode(json, len, msg, m, NULL, options, arena, &status);
+ lupb_checkstatus(L, &status);
+
+ return 1;
+}
+
+/**
+ * lupb_jsonencode()
+ *
+ * Handles:
+ * text_string = upb.json_encode(msg, {upb.JSONENC_EMITDEFAULTS})
+ */
+static int lupb_jsonencode(lua_State* L) {
+ upb_Message* msg = lupb_msg_check(L, 1);
+ const upb_MessageDef* m = lupb_Message_Getmsgdef(L, 1);
+ int options = lupb_getoptions(L, 2);
+ char buf[1024];
+ size_t size;
+ upb_Status status;
+
+ upb_Status_Clear(&status);
+ size = upb_JsonEncode(msg, m, NULL, options, buf, sizeof(buf), &status);
+ lupb_checkstatus(L, &status);
+
+ if (size < sizeof(buf)) {
+ lua_pushlstring(L, buf, size);
+ } else {
+ char* ptr = malloc(size + 1);
+ upb_JsonEncode(msg, m, NULL, options, ptr, size + 1, &status);
+ lupb_checkstatus(L, &status);
+ lua_pushlstring(L, ptr, size);
+ free(ptr);
+ }
+
+ return 1;
+}
+
+/**
+ * lupb_textencode()
+ *
+ * Handles:
+ * text_string = upb.text_encode(msg, {upb.TXTENC_SINGLELINE})
+ */
+static int lupb_textencode(lua_State* L) {
+ upb_Message* msg = lupb_msg_check(L, 1);
+ const upb_MessageDef* m = lupb_Message_Getmsgdef(L, 1);
+ int options = lupb_getoptions(L, 2);
+ char buf[1024];
+ size_t size;
+
+ size = upb_TextEncode(msg, m, NULL, options, buf, sizeof(buf));
+
+ if (size < sizeof(buf)) {
+ lua_pushlstring(L, buf, size);
+ } else {
+ char* ptr = malloc(size + 1);
+ upb_TextEncode(msg, m, NULL, options, ptr, size + 1);
+ lua_pushlstring(L, ptr, size);
+ free(ptr);
+ }
+
+ return 1;
+}
+
+static void lupb_setfieldi(lua_State* L, const char* field, int i) {
+ lua_pushinteger(L, i);
+ lua_setfield(L, -2, field);
+}
+
+static const struct luaL_Reg lupb_msg_toplevel_m[] = {
+ {"Array", lupb_Array_New}, {"Map", lupb_Map_New},
+ {"decode", lupb_decode}, {"encode", lupb_Encode},
+ {"json_decode", lupb_jsondecode}, {"json_encode", lupb_jsonencode},
+ {"text_encode", lupb_textencode}, {NULL, NULL}};
+
+void lupb_msg_registertypes(lua_State* L) {
+ lupb_setfuncs(L, lupb_msg_toplevel_m);
+
+ lupb_register_type(L, LUPB_ARENA, NULL, lupb_Arena_mm);
+ lupb_register_type(L, LUPB_ARRAY, NULL, lupb_array_mm);
+ lupb_register_type(L, LUPB_MAP, NULL, lupb_map_mm);
+ lupb_register_type(L, LUPB_MSG, NULL, lupb_msg_mm);
+
+ lupb_setfieldi(L, "TXTENC_SINGLELINE", UPB_TXTENC_SINGLELINE);
+ lupb_setfieldi(L, "TXTENC_SKIPUNKNOWN", UPB_TXTENC_SKIPUNKNOWN);
+ lupb_setfieldi(L, "TXTENC_NOSORT", UPB_TXTENC_NOSORT);
+
+ lupb_setfieldi(L, "ENCODE_DETERMINISTIC", kUpb_EncodeOption_Deterministic);
+ lupb_setfieldi(L, "ENCODE_SKIPUNKNOWN", kUpb_EncodeOption_SkipUnknown);
+
+ lupb_setfieldi(L, "JSONENC_EMITDEFAULTS", upb_JsonEncode_EmitDefaults);
+ lupb_setfieldi(L, "JSONENC_PROTONAMES", upb_JsonEncode_UseProtoNames);
+
+ lupb_setfieldi(L, "JSONDEC_IGNOREUNKNOWN", upb_JsonDecode_IgnoreUnknown);
+
+ lupb_cacheinit(L);
+}
diff --git a/upb/lua/test.proto b/upb/lua/test.proto
new file mode 100644
index 0000000..92bcd1c
--- /dev/null
+++ b/upb/lua/test.proto
@@ -0,0 +1,98 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto2";
+
+import "google/protobuf/timestamp.proto";
+
+package upb_lua_test;
+
+message MapTest {
+ map<string, double> map_string_double = 1;
+}
+
+message PackedTest {
+ repeated bool bool_packed = 1 [packed = true];
+ repeated int32 i32_packed = 2 [packed = true];
+ repeated int64 i64_packed = 3 [packed = true];
+ repeated fixed32 f32_packed = 4 [packed = true];
+ repeated fixed64 f64_packed = 5 [packed = true];
+}
+
+message UnpackedTest {
+ repeated bool bool_packed = 1 [packed = false];
+ repeated int32 i32_packed = 2 [packed = false];
+ repeated int64 i64_packed = 3 [packed = false];
+ repeated fixed32 f32_packed = 4 [packed = false];
+ repeated fixed64 f64_packed = 5 [packed = false];
+}
+
+message TestLargeFieldNumber {
+ optional int32 i32 = 456214797;
+}
+
+message TestTimestamp {
+ optional google.protobuf.Timestamp ts = 1;
+}
+
+message HelloRequest {
+ optional uint32 id = 1;
+ optional uint32 random_name_a0 = 2;
+ optional uint32 random_name_a1 = 3;
+ optional uint32 random_name_a2 = 4;
+ optional uint32 random_name_a3 = 5;
+ optional uint32 random_name_a4 = 6;
+ optional uint32 random_name_a5 = 7;
+ optional uint32 random_name_a6 = 8;
+ optional uint32 random_name_a7 = 9;
+ optional uint32 random_name_a8 = 10;
+ optional uint32 random_name_a9 = 11;
+ optional uint32 random_name_b0 = 12;
+ optional uint32 random_name_b1 = 13;
+ optional uint32 random_name_b2 = 14;
+ optional uint32 random_name_b3 = 15;
+ optional uint32 random_name_b4 = 16;
+ optional uint32 random_name_b5 = 17;
+ optional uint32 random_name_b6 = 18;
+ optional uint32 random_name_b7 = 19;
+ optional uint32 random_name_b8 = 20;
+ optional uint32 random_name_b9 = 21;
+ optional uint32 random_name_c0 = 22;
+ optional uint32 random_name_c1 = 23;
+ optional uint32 random_name_c2 = 24;
+ optional uint32 random_name_c3 = 25;
+ optional uint32 random_name_c4 = 26;
+ optional uint32 random_name_c5 = 27;
+ optional uint32 random_name_c6 = 28;
+ optional uint32 random_name_c7 = 29;
+ optional uint32 random_name_c8 = 30;
+ optional uint32 random_name_c9 = 31;
+ optional string version = 32;
+}
diff --git a/upb/lua/test_upb.lua b/upb/lua/test_upb.lua
new file mode 100644
index 0000000..f74d988
--- /dev/null
+++ b/upb/lua/test_upb.lua
@@ -0,0 +1,852 @@
+--[[--------------------------------------------------------------------------
+
+Protocol Buffers - Google's data interchange format
+Copyright 2023 Google LLC. All rights reserved.
+https://developers.google.com/protocol-buffers/
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google LLC nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--]]--------------------------------------------------------------------------
+
+local upb = require "lupb"
+local lunit = require "lunit"
+local upb_test = require "lua.test_pb"
+local test_messages_proto3 = require "google.protobuf.test_messages_proto3_pb"
+local test_messages_proto2 = require "google.protobuf.test_messages_proto2_pb"
+local descriptor = require "google.protobuf.descriptor_pb"
+local empty = require "google.protobuf.empty_pb"
+
+if _VERSION >= 'Lua 5.2' then
+ _ENV = lunit.module("testupb", "seeall")
+else
+ module("testupb", lunit.testcase, package.seeall)
+end
+
+function iter_to_array(iter)
+ local arr = {}
+ for v in iter do
+ arr[#arr + 1] = v
+ end
+ return arr
+end
+
+function test_def_readers()
+ local m = test_messages_proto3.TestAllTypesProto3
+ assert_equal("TestAllTypesProto3", m:name())
+ assert_equal("protobuf_test_messages.proto3.TestAllTypesProto3", m:full_name())
+
+ -- field
+ local f = m:field("optional_int32")
+ local f2 = m:field(1)
+ assert_equal(f, f2)
+ assert_equal(1, f:number())
+ assert_equal("optional_int32", f:name())
+ assert_equal(upb.LABEL_OPTIONAL, f:label())
+ assert_equal(upb.DESCRIPTOR_TYPE_INT32, f:descriptor_type())
+ assert_equal(upb.TYPE_INT32, f:type())
+ assert_nil(f:containing_oneof())
+ assert_equal(m, f:containing_type())
+ assert_equal(0, f:default())
+ local message_field_count = 0
+ for field in m:fields() do
+ message_field_count = message_field_count + 1
+ end
+ assert_equal(message_field_count, #m)
+
+ local message_oneof_count = 0
+ for oneof in m:oneofs() do
+ message_oneof_count = message_oneof_count + 1
+ end
+ assert_equal(message_oneof_count, m:oneof_count())
+
+ -- oneof
+ local o = m:lookup_name("oneof_field")
+ assert_equal("oneof_field", o:name())
+ assert_equal(m, o:containing_type())
+ local oneof_field_count = 0
+ for field in o:fields() do
+ oneof_field_count = oneof_field_count + 1
+ end
+ assert_equal(oneof_field_count, #o)
+
+ -- enum
+ local e = test_messages_proto3['TestAllTypesProto3.NestedEnum']
+ assert_true(#e > 3 and #e < 10)
+ assert_equal(2, e:value("BAZ"):number())
+end
+
+function test_msg_map()
+ msg = test_messages_proto3.TestAllTypesProto3()
+ msg.map_int32_int32[5] = 10
+ msg.map_int32_int32[6] = 12
+ assert_equal(10, msg.map_int32_int32[5])
+ assert_equal(12, msg.map_int32_int32[6])
+
+ -- Test overwrite.
+ msg.map_int32_int32[5] = 20
+ assert_equal(20, msg.map_int32_int32[5])
+ assert_equal(12, msg.map_int32_int32[6])
+ msg.map_int32_int32[5] = 10
+
+ -- Test delete.
+ msg.map_int32_int32[5] = nil
+ assert_nil(msg.map_int32_int32[5])
+ assert_equal(12, msg.map_int32_int32[6])
+ msg.map_int32_int32[5] = 10
+
+ local serialized = upb.encode(msg)
+ assert_true(#serialized > 0)
+ local msg2 = upb.decode(test_messages_proto3.TestAllTypesProto3, serialized)
+ assert_equal(10, msg2.map_int32_int32[5])
+ assert_equal(12, msg2.map_int32_int32[6])
+end
+
+function test_map_sorting()
+ function msg_with_int32_entries(start, expand)
+ local msg = test_messages_proto3.TestAllTypesProto3()
+ for i=start,start + 8 do
+ msg.map_int32_int32[i] = i * 2
+ end
+
+ if expand then
+ for i=start+20,200 do
+ msg.map_int32_int32[i] = i
+ end
+ for i=start+20,200 do
+ msg.map_int32_int32[i] = nil
+ end
+ end
+ return msg
+ end
+
+ function msg_with_msg_entries(expand)
+ local msg = test_messages_proto3.TestAllTypesProto3()
+ -- 8! = 40320 possible orderings makes it overwhelmingly likely that two
+ -- random orderings will be different.
+ for i=1,8 do
+ local submsg = test_messages_proto3.TestAllTypesProto3.NestedMessage()
+ submsg.corecursive = msg_with_int32_entries(i, expand)
+ msg.map_string_nested_message[tostring(i)] = submsg
+ end
+
+ expand = false
+ if expand then
+ for i=21,2000 do
+ local submsg = test_messages_proto3.TestAllTypesProto3.NestedMessage()
+ submsg.corecursive = msg_with_int32_entries(i, expand)
+ msg.map_string_nested_message[tostring(i)] = submsg
+ end
+ for i=21,2000 do
+ msg.map_string_nested_message[tostring(i)] = nil
+ end
+ end
+ return msg
+ end
+
+ -- Create two messages with the same contents but (hopefully) different
+ -- map table orderings.
+ local msg = msg_with_msg_entries(false)
+ local msg2 = msg_with_msg_entries(true)
+
+ local text1 = upb.text_encode(msg)
+ local text2 = upb.text_encode(msg2)
+ assert_equal(text1, text2)
+
+ local binary1 = upb.encode(msg, {upb.ENCODE_DETERMINISTIC})
+ local binary2 = upb.encode(msg2, {upb.ENCODE_DETERMINISTIC})
+ assert_equal(binary1, binary2)
+
+ -- Non-sorted map should compare different.
+ local text3 = upb.text_encode(msg, {upb.TXTENC_NOSORT})
+ assert_not_equal(text1, text3)
+
+ local binary3 = upb.encode(msg)
+ assert_not_equal(binary1, binary3)
+end
+
+function test_utf8()
+ local proto2_msg = test_messages_proto2.TestAllTypesProto2()
+ proto2_msg.optional_string = "\xff"
+ local serialized = upb.encode(proto2_msg)
+
+ -- Decoding invalid UTF-8 succeeds in proto2.
+ upb.decode(test_messages_proto2.TestAllTypesProto2, serialized)
+
+ -- Decoding invalid UTF-8 fails in proto2.
+ assert_error_match("Error decoding protobuf", function()
+ upb.decode(test_messages_proto3.TestAllTypesProto3, serialized)
+ end)
+
+ -- TODO(haberman): should proto3 accessors also check UTF-8 at set time?
+end
+
+function test_string_double_map()
+ msg = upb_test.MapTest()
+ msg.map_string_double["one"] = 1.0
+ msg.map_string_double["two point five"] = 2.5
+ assert_equal(1, msg.map_string_double["one"])
+ assert_equal(2.5, msg.map_string_double["two point five"])
+
+ -- Test overwrite.
+ msg.map_string_double["one"] = 2
+ assert_equal(2, msg.map_string_double["one"])
+ assert_equal(2.5, msg.map_string_double["two point five"])
+ msg.map_string_double["one"] = 1.0
+
+ -- Test delete.
+ msg.map_string_double["one"] = nil
+ assert_nil(msg.map_string_double["one"])
+ assert_equal(2.5, msg.map_string_double["two point five"])
+ msg.map_string_double["one"] = 1
+
+ local serialized = upb.encode(msg)
+ assert_true(#serialized > 0)
+ local msg2 = upb.decode(upb_test.MapTest, serialized)
+ assert_equal(1, msg2.map_string_double["one"])
+ assert_equal(2.5, msg2.map_string_double["two point five"])
+end
+
+function test_string_double_map()
+ local function fill_msg(msg)
+ msg.i32_packed[1] = 100
+ msg.i32_packed[2] = 200
+ msg.i32_packed[3] = 50000
+
+ msg.i64_packed[1] = 101
+ msg.i64_packed[2] = 201
+ msg.i64_packed[3] = 50001
+
+ msg.f32_packed[1] = 102
+ msg.f32_packed[2] = 202
+ msg.f32_packed[3] = 50002
+
+ msg.f64_packed[1] = 103
+ msg.f64_packed[2] = 203
+ msg.f64_packed[3] = 50003
+ end
+
+ local function check_msg(msg)
+ assert_equal(100, msg.i32_packed[1])
+ assert_equal(200, msg.i32_packed[2])
+ assert_equal(50000, msg.i32_packed[3])
+ assert_equal(3, #msg.i32_packed)
+
+ assert_equal(101, msg.i64_packed[1])
+ assert_equal(201, msg.i64_packed[2])
+ assert_equal(50001, msg.i64_packed[3])
+ assert_equal(3, #msg.i64_packed)
+
+ assert_equal(102, msg.f32_packed[1])
+ assert_equal(202, msg.f32_packed[2])
+ assert_equal(50002, msg.f32_packed[3])
+ assert_equal(3, #msg.f32_packed)
+
+ assert_equal(103, msg.f64_packed[1])
+ assert_equal(203, msg.f64_packed[2])
+ assert_equal(50003, msg.f64_packed[3])
+ assert_equal(3, #msg.f64_packed)
+ end
+
+ local msg = upb_test.PackedTest()
+ fill_msg(msg)
+ check_msg(msg)
+
+ local serialized_packed = upb.encode(msg)
+ local msg2 = upb.decode(upb_test.PackedTest, serialized_packed)
+ local msg3 = upb.decode(upb_test.UnpackedTest, serialized_packed)
+ check_msg(msg2)
+ check_msg(msg3)
+
+ serialized_unpacked = upb.encode(msg3)
+ local msg4 = upb.decode(upb_test.PackedTest, serialized_unpacked)
+ local msg5 = upb.decode(upb_test.PackedTest, serialized_unpacked)
+ check_msg(msg4)
+ check_msg(msg5)
+
+end
+
+function test_msg_string_map()
+ msg = test_messages_proto3.TestAllTypesProto3()
+ msg.map_string_string["foo"] = "bar"
+ msg.map_string_string["baz"] = "quux"
+ assert_nil(msg.map_string_string["abc"])
+ assert_equal("bar", msg.map_string_string["foo"])
+ assert_equal("quux", msg.map_string_string["baz"])
+
+ -- Test overwrite.
+ msg.map_string_string["foo"] = "123"
+ assert_equal("123", msg.map_string_string["foo"])
+ assert_equal("quux", msg.map_string_string["baz"])
+ msg.map_string_string["foo"] = "bar"
+
+ -- Test delete
+ msg.map_string_string["foo"] = nil
+ assert_nil(msg.map_string_string["foo"])
+ assert_equal("quux", msg.map_string_string["baz"])
+ msg.map_string_string["foo"] = "bar"
+
+ local serialized = upb.encode(msg)
+ assert_true(#serialized > 0)
+ local msg2 = upb.decode(test_messages_proto3.TestAllTypesProto3, serialized)
+ assert_equal("bar", msg2.map_string_string["foo"])
+ assert_equal("quux", msg2.map_string_string["baz"])
+end
+
+function test_msg_array()
+ msg = test_messages_proto3.TestAllTypesProto3()
+
+ assert_not_nil(msg.repeated_int32)
+ assert_equal(msg.repeated_int32, msg.repeated_int32)
+ assert_equal(0, #msg.repeated_int32)
+
+ msg.repeated_int32[1] = 2
+ assert_equal(1, #msg.repeated_int32);
+ assert_equal(2, msg.repeated_int32[1]);
+
+ -- Can't assign a scalar; array is expected.
+ assert_error_match("lupb.array expected", function() msg.repeated_int32 = 5 end)
+
+ -- Can't assign array of the wrong type.
+ local function assign_int64()
+ msg.repeated_int32 = upb.Array(upb.TYPE_INT64)
+ end
+ assert_error_match("array type mismatch", assign_int64)
+
+ local arr = upb.Array(upb.TYPE_INT32)
+ arr[1] = 6
+ assert_equal(1, #arr)
+ msg.repeated_int32 = arr
+ assert_equal(msg.repeated_int32, msg.repeated_int32)
+ assert_equal(arr, msg.repeated_int32)
+ assert_equal(1, #msg.repeated_int32)
+ assert_equal(6, msg.repeated_int32[1])
+
+ -- Can't assign other Lua types.
+ assert_error_match("array expected", function() msg.repeated_int32 = "abc" end)
+ assert_error_match("array expected", function() msg.repeated_int32 = true end)
+ assert_error_match("array expected", function() msg.repeated_int32 = false end)
+ assert_error_match("array expected", function() msg.repeated_int32 = nil end)
+ assert_error_match("array expected", function() msg.repeated_int32 = {} end)
+ assert_error_match("array expected", function() msg.repeated_int32 = print end)
+end
+
+function test_array_append()
+ local arr = upb.Array(upb.TYPE_INT32)
+ for i=1,200000 do
+ arr[i] = i
+ end
+ for i=1,200000 do
+ assert_equal(i, arr[i])
+ end
+end
+
+function test_msg_submsg()
+ --msg = test_messages_proto3.TestAllTypesProto3()
+ msg = test_messages_proto3['TestAllTypesProto3']()
+
+ assert_nil(msg.optional_nested_message)
+
+ -- Can't assign message of the wrong type.
+ local function assign_int64()
+ msg.optional_nested_message = test_messages_proto3.TestAllTypesProto3()
+ end
+ assert_error_match("message type mismatch", assign_int64)
+
+ local nested = test_messages_proto3['TestAllTypesProto3.NestedMessage']()
+ msg.optional_nested_message = nested
+ assert_equal(nested, msg.optional_nested_message)
+
+ -- Can't assign other Lua types.
+ assert_error_match("msg expected", function() msg.optional_nested_message = "abc" end)
+ assert_error_match("msg expected", function() msg.optional_nested_message = true end)
+ assert_error_match("msg expected", function() msg.optional_nested_message = false end)
+ assert_error_match("msg expected", function() msg.optional_nested_message = nil end)
+ assert_error_match("msg expected", function() msg.optional_nested_message = {} end)
+ assert_error_match("msg expected", function() msg.optional_nested_message = print end)
+end
+
+-- Lua 5.1 and 5.2 have slightly different semantics for how a finalizer
+-- can be defined in Lua.
+if _VERSION >= 'Lua 5.2' then
+ function defer(fn)
+ setmetatable({}, { __gc = fn })
+ end
+else
+ function defer(fn)
+ getmetatable(newproxy(true)).__gc = fn
+ end
+end
+
+function test_finalizer()
+ -- Tests that we correctly handle a call into an already-finalized object.
+ -- Collectible objects are finalized in the opposite order of creation.
+ do
+ local t = {}
+ defer(function()
+ assert_error_match("called into dead object", function()
+ -- Generic def call.
+ t[1]:lookup_msg("abc")
+ end)
+ end)
+ t = {
+ upb.DefPool(),
+ }
+ end
+ collectgarbage()
+end
+
+-- in-range of 64-bit types but not exactly representable as double
+local bad64 = 2^68 - 1
+
+local numeric_types = {
+ [upb.TYPE_UINT32] = {
+ valid_val = 2^32 - 1,
+ too_big = 2^32,
+ too_small = -1,
+ other_bad = 5.1
+ },
+ [upb.TYPE_UINT64] = {
+ valid_val = 2^63,
+ too_big = 2^64,
+ too_small = -1,
+ other_bad = bad64
+ },
+ [upb.TYPE_INT32] = {
+ valid_val = 2^31 - 1,
+ too_big = 2^31,
+ too_small = -2^31 - 1,
+ other_bad = 5.1
+ },
+ -- Enums don't exist at a language level in Lua, so we just represent enum
+ -- values as int32s.
+ [upb.TYPE_ENUM] = {
+ valid_val = 2^31 - 1,
+ too_big = 2^31,
+ too_small = -2^31 - 1,
+ other_bad = 5.1
+ },
+ [upb.TYPE_INT64] = {
+ valid_val = 2^62,
+ too_big = 2^63,
+ too_small = -2^64,
+ other_bad = bad64
+ },
+ [upb.TYPE_FLOAT] = {
+ valid_val = 340282306073709652508363335590014353408
+ },
+ [upb.TYPE_DOUBLE] = {
+ valid_val = 10^101
+ },
+}
+
+function test_utf8()
+ local invalid_utf8 = "\xff"
+ local proto2_msg = test_messages_proto2.TestAllTypesProto2{
+ optional_string = invalid_utf8,
+ }
+
+ -- As proto2, invalid UTF-8 parses and serializes fine.
+ local serialized = upb.encode(proto2_msg)
+ local proto2_msg2 = upb.decode(test_messages_proto2.TestAllTypesProto2, serialized)
+
+ -- Decoding as proto3 fails.
+ assert_error(function()
+ upb.decode(test_messages_proto3.TestAllTypesProto3, serialized)
+ end)
+end
+
+function test_msg_primitives()
+ local msg = test_messages_proto3.TestAllTypesProto3{
+ optional_int32 = 10,
+ optional_uint32 = 20,
+ optional_int64 = 30,
+ optional_uint64 = 40,
+ optional_double = 50,
+ optional_float = 60,
+ optional_sint32 = 70,
+ optional_sint64 = 80,
+ optional_fixed32 = 90,
+ optional_fixed64 = 100,
+ optional_sfixed32 = 110,
+ optional_sfixed64 = 120,
+ optional_bool = true,
+ optional_string = "abc",
+ optional_nested_message = test_messages_proto3['TestAllTypesProto3.NestedMessage']{a = 123},
+ }
+
+ -- Attempts to access non-existent fields fail.
+ assert_error_match("no such field", function() msg.no_such = 1 end)
+
+ assert_equal(10, msg.optional_int32)
+ assert_equal(20, msg.optional_uint32)
+ assert_equal(30, msg.optional_int64)
+ assert_equal(40, msg.optional_uint64)
+ assert_equal(50, msg.optional_double)
+ assert_equal(60, msg.optional_float)
+ assert_equal(70, msg.optional_sint32)
+ assert_equal(80, msg.optional_sint64)
+ assert_equal(90, msg.optional_fixed32)
+ assert_equal(100, msg.optional_fixed64)
+ assert_equal(110, msg.optional_sfixed32)
+ assert_equal(120, msg.optional_sfixed64)
+ assert_equal(true, msg.optional_bool)
+ assert_equal("abc", msg.optional_string)
+ assert_equal(123, msg.optional_nested_message.a)
+end
+
+
+function test_string_array()
+ local function test_for_string_type(upb_type)
+ local array = upb.Array(upb_type)
+ assert_equal(0, #array)
+
+ -- 0 is never a valid index in Lua.
+ assert_error_match("array index", function() return array[0] end)
+ -- Past the end of the array.
+ assert_error_match("array index", function() return array[1] end)
+
+ array[1] = "foo"
+ assert_equal("foo", array[1])
+ assert_equal(1, #array)
+ -- Past the end of the array.
+ assert_error_match("array index", function() return array[2] end)
+
+ local array2 = upb.Array(upb_type)
+ assert_equal(0, #array2)
+
+ array[2] = "bar"
+ assert_equal("foo", array[1])
+ assert_equal("bar", array[2])
+ assert_equal(2, #array)
+ -- Past the end of the array.
+ assert_error_match("array index", function() return array[3] end)
+
+ -- Can't assign other Lua types.
+ assert_error_match("Expected string", function() array[3] = 123 end)
+ assert_error_match("Expected string", function() array[3] = true end)
+ assert_error_match("Expected string", function() array[3] = false end)
+ assert_error_match("Expected string", function() array[3] = nil end)
+ assert_error_match("Expected string", function() array[3] = {} end)
+ assert_error_match("Expected string", function() array[3] = print end)
+ assert_error_match("Expected string", function() array[3] = array end)
+ end
+
+ test_for_string_type(upb.TYPE_STRING)
+ test_for_string_type(upb.TYPE_BYTES)
+end
+
+function test_numeric_array()
+ local function test_for_numeric_type(upb_type)
+ local array = upb.Array(upb_type)
+ local vals = numeric_types[upb_type]
+ assert_equal(0, #array)
+
+ -- 0 is never a valid index in Lua.
+ assert_error_match("array index", function() return array[0] end)
+ -- Past the end of the array.
+ assert_error_match("array index", function() return array[1] end)
+
+ array[1] = vals.valid_val
+ assert_equal(vals.valid_val, array[1])
+ assert_equal(1, #array)
+ assert_equal(vals.valid_val, array[1])
+ -- Past the end of the array.
+ assert_error_match("array index", function() return array[2] end)
+
+ array[2] = 10
+ assert_equal(vals.valid_val, array[1])
+ assert_equal(10, array[2])
+ assert_equal(2, #array)
+ -- Past the end of the array.
+ assert_error_match("array index", function() return array[3] end)
+
+ -- Values that are out of range.
+ local errmsg = "not an integer or out of range"
+ if vals.too_small then
+ assert_error_match(errmsg, function() array[3] = vals.too_small end)
+ end
+ if vals.too_big then
+ assert_error_match(errmsg, function() array[3] = vals.too_big end)
+ end
+ if vals.other_bad then
+ assert_error_match(errmsg, function() array[3] = vals.other_bad end)
+ end
+
+ -- Can't assign other Lua types.
+ errmsg = "bad argument #3"
+ assert_error_match(errmsg, function() array[3] = "abc" end)
+ assert_error_match(errmsg, function() array[3] = true end)
+ assert_error_match(errmsg, function() array[3] = false end)
+ assert_error_match(errmsg, function() array[3] = nil end)
+ assert_error_match(errmsg, function() array[3] = {} end)
+ assert_error_match(errmsg, function() array[3] = print end)
+ assert_error_match(errmsg, function() array[3] = array end)
+ end
+
+ for k in pairs(numeric_types) do
+ test_for_numeric_type(k)
+ end
+end
+
+function test_numeric_map()
+ local function test_for_numeric_types(key_type, val_type)
+ local map = upb.Map(key_type, val_type)
+ local key_vals = numeric_types[key_type]
+ local val_vals = numeric_types[val_type]
+
+ assert_equal(0, #map)
+
+ -- Unset keys return nil
+ assert_nil(map[key_vals.valid_val])
+
+ map[key_vals.valid_val] = val_vals.valid_val
+ assert_equal(1, #map)
+ assert_equal(val_vals.valid_val, map[key_vals.valid_val])
+
+ i = 0
+ for k, v in pairs(map) do
+ assert_equal(key_vals.valid_val, k)
+ assert_equal(val_vals.valid_val, v)
+ end
+
+ -- Out of range key/val
+ local errmsg = "not an integer or out of range"
+ if key_vals.too_small then
+ assert_error_match(errmsg, function() map[key_vals.too_small] = 1 end)
+ end
+ if key_vals.too_big then
+ assert_error_match(errmsg, function() map[key_vals.too_big] = 1 end)
+ end
+ if key_vals.other_bad then
+ assert_error_match(errmsg, function() map[key_vals.other_bad] = 1 end)
+ end
+
+ if val_vals.too_small then
+ assert_error_match(errmsg, function() map[1] = val_vals.too_small end)
+ end
+ if val_vals.too_big then
+ assert_error_match(errmsg, function() map[1] = val_vals.too_big end)
+ end
+ if val_vals.other_bad then
+ assert_error_match(errmsg, function() map[1] = val_vals.other_bad end)
+ end
+ end
+
+ for k in pairs(numeric_types) do
+ for v in pairs(numeric_types) do
+ test_for_numeric_types(k, v)
+ end
+ end
+end
+
+function test_unknown()
+ local bytes = string.rep("\x38\x00", 1000)
+ for i=1,1000 do
+ local msg = upb.decode(test_messages_proto3.TestAllTypesProto3, bytes)
+ end
+end
+
+function test_foo()
+ local defpool = upb.DefPool()
+ local filename = "external/com_google_protobuf/src/google/protobuf/descriptor_proto-descriptor-set.proto.bin"
+ local alternate_filename = "src/google/protobuf/descriptor_proto-descriptor-set.proto.bin"
+ local file = io.open(filename, "rb") or io.open("bazel-bin/" .. filename, "rb") or io.open(alternate_filename, "rb")
+ assert_not_nil(file)
+ local descriptor = file:read("*a")
+ assert_true(#descriptor > 0)
+ defpool:add_set(descriptor)
+ local FileDescriptorSet = defpool:lookup_msg("google.protobuf.FileDescriptorSet")
+ assert_not_nil(FileDescriptorSet)
+ set = FileDescriptorSet()
+ assert_equal(#set.file, 0)
+ assert_error_match("lupb.array expected", function () set.file = 1 end)
+
+ set = upb.decode(FileDescriptorSet, descriptor)
+
+ -- Test that we can at least call this without crashing.
+ set_textformat = tostring(set)
+
+ -- print(set_textformat)
+ assert_equal(#set.file, 1)
+ assert_equal(set.file[1].name, "google/protobuf/descriptor.proto")
+end
+
+function test_descriptor()
+ local defpool = upb.DefPool()
+ local file_proto = descriptor.FileDescriptorProto {
+ name = "test.proto",
+ message_type = upb.Array(descriptor.DescriptorProto, {
+ descriptor.DescriptorProto{
+ name = "ABC",
+ },
+ })
+ }
+ local file = defpool:add_file(upb.encode(file_proto))
+ assert_equal(file:defpool(), defpool)
+end
+
+function test_descriptor_error()
+ local defpool = upb.DefPool()
+ local file = descriptor.FileDescriptorProto()
+ file.name = "test.proto"
+ file.message_type[1] = descriptor.DescriptorProto{
+ name = "ABC"
+ }
+ file.message_type[2] = descriptor.DescriptorProto{
+ name = "BC."
+ }
+ assert_error(function () defpool:add_file(upb.encode(file)) end)
+ assert_nil(defpool:lookup_msg("ABC"))
+end
+
+function test_duplicate_enumval()
+ local defpool = upb.DefPool()
+ local file_proto = descriptor.FileDescriptorProto {
+ name = "test.proto",
+ message_type = upb.Array(descriptor.DescriptorProto, {
+ descriptor.DescriptorProto{
+ name = "ABC",
+ },
+ }),
+ enum_type = upb.Array(descriptor.EnumDescriptorProto, {
+ descriptor.EnumDescriptorProto{
+ name = "MyEnum",
+ value = upb.Array(descriptor.EnumValueDescriptorProto, {
+ descriptor.EnumValueDescriptorProto{
+ name = "ABC",
+ number = 1,
+ }
+ }),
+ },
+ })
+ }
+ assert_error(function () defpool:add_file(upb.encode(file_proto)) end)
+end
+
+function test_duplicate_filename_error()
+ local defpool = upb.DefPool()
+ local file = descriptor.FileDescriptorProto()
+ file.name = "test.proto"
+ defpool:add_file(upb.encode(file))
+ -- Second add with the same filename fails.
+ assert_error(function () defpool:add_file(upb.encode(file)) end)
+end
+
+function test_encode_skipunknown()
+ -- Test that upb.ENCODE_SKIPUNKNOWN does not encode unknown fields.
+ local msg = test_messages_proto3.TestAllTypesProto3{
+ optional_int32 = 10,
+ optional_uint32 = 20,
+ optional_int64 = 30,
+ }
+ -- SKIPUNKNOWN here tests that it does *not* affect regular fields.
+ local serialized = upb.encode(msg, {upb.ENCODE_SKIPUNKNOWN})
+ assert_true(#serialized > 0)
+ local empty_with_unknown = upb.decode(empty.Empty, serialized)
+ assert_true(#upb.encode(empty_with_unknown) > 0)
+ -- Verify that unknown fields are not serialized.
+ assert_true(#upb.encode(empty_with_unknown, {upb.ENCODE_SKIPUNKNOWN}) == 0)
+end
+
+function test_json_emit_defaults()
+ local msg = test_messages_proto3.TestAllTypesProto3()
+ local json = upb.json_encode(msg, {upb.JSONENC_EMITDEFAULTS})
+end
+
+function test_json_locale()
+ local msg = test_messages_proto3.TestAllTypesProto3()
+ msg.optional_double = 1.1
+ local original_locale = os.setlocale(nil)
+ os.setlocale("C")
+ local json = upb.json_encode(msg)
+ os.setlocale("de_DE.utf8")
+ assert_equal(json, upb.json_encode(msg))
+ os.setlocale(original_locale) -- Restore.
+end
+
+function test_encode_depth_limit()
+ local msg = test_messages_proto3.TestAllTypesProto3()
+ msg.recursive_message = msg
+ assert_error(function() upb.encode(msg) end)
+end
+
+function test_large_field_number()
+ local msg = upb_test.TestLargeFieldNumber()
+ msg.i32 = 5
+ local serialized = upb.encode(msg)
+ local msg2 = upb.decode(upb_test.TestLargeFieldNumber, serialized)
+ assert_equal(msg.i32, msg2.i32)
+end
+
+function test_timestamp_minutes()
+ local msg = upb.json_decode(upb_test.TestTimestamp, '{"ts": "2000-01-01T00:00:00-06:59"}')
+ assert_equal(msg.ts.seconds, 946684800 + ((6 * 60) + 59) * 60)
+end
+
+function test_gc()
+ local top = test_messages_proto3.TestAllTypesProto3()
+ local n = 100
+ local m
+
+ for i=1,n do
+ local inner = test_messages_proto3.TestAllTypesProto3()
+ m = inner
+ for j=1,n do
+ local tmp = m
+ m = test_messages_proto3.TestAllTypesProto3()
+ -- This will cause the arenas to fuse. But we stop referring to the child,
+ -- so the Lua object is eligible for collection (and therefore its original
+ -- arena can be collected too). Only the fusing will keep the C mem alivd.
+ m.recursive_message = tmp
+
+ end
+ top.recursive_message = m
+ end
+
+ collectgarbage()
+
+ for i=1,n do
+ -- Verify we can touch all the messages again and without accessing freed
+ -- memory.
+ m = m.recursive_message
+ assert_not_nil(m)
+ end
+end
+
+function test_b9440()
+ local m = upb_test.HelloRequest()
+ m.id = 8
+ assert_equal(8, m.id)
+ m.version = "1"
+ assert_equal(8, m.id)
+end
+
+local stats = lunit.main()
+
+if stats.failed > 0 or stats.errors > 0 then
+ error("One or more errors in test suite")
+end
diff --git a/upb/lua/upb.c b/upb/lua/upb.c
new file mode 100644
index 0000000..4500fb4
--- /dev/null
+++ b/upb/lua/upb.c
@@ -0,0 +1,261 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+/*
+ * require("lua") -- A Lua extension for upb.
+ *
+ * Exposes only the core library
+ * (sub-libraries are exposed in other extensions).
+ *
+ * 64-bit woes: Lua can only represent numbers of type lua_Number (which is
+ * double unless the user specifically overrides this). Doubles can represent
+ * the entire range of 64-bit integers, but lose precision once the integers are
+ * greater than 2^53.
+ *
+ * Lua 5.3 is adding support for integers, which will allow for 64-bit
+ * integers (which can be interpreted as signed or unsigned).
+ *
+ * LuaJIT supports 64-bit signed and unsigned boxed representations
+ * through its "cdata" mechanism, but this is not portable to regular Lua.
+ *
+ * Hopefully Lua 5.3 will come soon enough that we can either use Lua 5.3
+ * integer support or LuaJIT 64-bit cdata for users that need the entire
+ * domain of [u]int64 values.
+ */
+
+#include "lua/upb.h"
+
+#include <float.h>
+#include <math.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "lauxlib.h"
+#include "upb/message/message.h"
+
+/* Lua compatibility code *****************************************************/
+
+/* Shims for upcoming Lua 5.3 functionality. */
+static bool lua_isinteger(lua_State* L, int argn) {
+ LUPB_UNUSED(L);
+ LUPB_UNUSED(argn);
+ return false;
+}
+
+/* Utility functions **********************************************************/
+
+void lupb_checkstatus(lua_State* L, upb_Status* s) {
+ if (!upb_Status_IsOk(s)) {
+ lua_pushstring(L, upb_Status_ErrorMessage(s));
+ lua_error(L);
+ }
+}
+
+/* Pushes a new userdata with the given metatable. */
+void* lupb_newuserdata(lua_State* L, size_t size, int n, const char* type) {
+#if LUA_VERSION_NUM >= 504
+ void* ret = lua_newuserdatauv(L, size, n);
+#else
+ void* ret = lua_newuserdata(L, size);
+ lua_createtable(L, 0, n);
+ lua_setuservalue(L, -2);
+#endif
+
+ /* Set metatable. */
+ luaL_getmetatable(L, type);
+ assert(!lua_isnil(L, -1)); /* Should have been created by luaopen_upb. */
+ lua_setmetatable(L, -2);
+
+ return ret;
+}
+
+#if LUA_VERSION_NUM < 504
+int lua_setiuservalue(lua_State* L, int index, int n) {
+ lua_getuservalue(L, index);
+ lua_insert(L, -2);
+ lua_rawseti(L, -2, n);
+ lua_pop(L, 1);
+ return 1;
+}
+
+int lua_getiuservalue(lua_State* L, int index, int n) {
+ lua_getuservalue(L, index);
+ lua_rawgeti(L, -1, n);
+ lua_replace(L, -2);
+ return 1;
+}
+#endif
+
+/* We use this function as the __index metamethod when a type has both methods
+ * and an __index metamethod. */
+int lupb_indexmm(lua_State* L) {
+ /* Look up in __index table (which is a closure param). */
+ lua_pushvalue(L, 2);
+ lua_rawget(L, lua_upvalueindex(1));
+ if (!lua_isnil(L, -1)) {
+ return 1;
+ }
+
+ /* Not found, chain to user __index metamethod. */
+ lua_pushvalue(L, lua_upvalueindex(2));
+ lua_pushvalue(L, 1);
+ lua_pushvalue(L, 2);
+ lua_call(L, 2, 1);
+ return 1;
+}
+
+void lupb_register_type(lua_State* L, const char* name, const luaL_Reg* m,
+ const luaL_Reg* mm) {
+ luaL_newmetatable(L, name);
+
+ if (mm) {
+ lupb_setfuncs(L, mm);
+ }
+
+ if (m) {
+ lua_createtable(L, 0, 0); /* __index table */
+ lupb_setfuncs(L, m);
+
+ /* Methods go in the mt's __index slot. If the user also specified an
+ * __index metamethod, use our custom lupb_indexmm() that can check both. */
+ lua_getfield(L, -2, "__index");
+ if (lua_isnil(L, -1)) {
+ lua_pop(L, 1);
+ } else {
+ lua_pushcclosure(L, &lupb_indexmm, 2);
+ }
+ lua_setfield(L, -2, "__index");
+ }
+
+ lua_pop(L, 1); /* The mt. */
+}
+
+/* Scalar type mapping ********************************************************/
+
+/* Functions that convert scalar/primitive values (numbers, strings, bool)
+ * between Lua and C/upb. Handles type/range checking. */
+
+bool lupb_checkbool(lua_State* L, int narg) {
+ if (!lua_isboolean(L, narg)) {
+ luaL_error(L, "must be true or false");
+ }
+ return lua_toboolean(L, narg);
+}
+
+/* Unlike luaL_checkstring(), this does not allow implicit conversion to
+ * string. */
+const char* lupb_checkstring(lua_State* L, int narg, size_t* len) {
+ if (lua_type(L, narg) != LUA_TSTRING) {
+ luaL_error(L, "Expected string");
+ }
+
+ return lua_tolstring(L, narg, len);
+}
+
+/* Unlike luaL_checkinteger, these do not implicitly convert from string or
+ * round an existing double value. We allow floating-point input, but only if
+ * the actual value is integral. */
+#define INTCHECK(type, ctype, min, max) \
+ ctype lupb_check##type(lua_State* L, int narg) { \
+ double n; \
+ if (lua_isinteger(L, narg)) { \
+ return lua_tointeger(L, narg); \
+ } \
+ \
+ /* Prevent implicit conversion from string. */ \
+ luaL_checktype(L, narg, LUA_TNUMBER); \
+ n = lua_tonumber(L, narg); \
+ \
+ /* Check this double has no fractional part and remains in bounds. \
+ * Consider INT64_MIN and INT64_MAX: \
+ * 1. INT64_MIN -(2^63) is a power of 2, so this converts to a double. \
+ * 2. INT64_MAX (2^63 - 1) is not a power of 2, and conversion of \
+ * out-of-range integer values to a double can lead to undefined behavior. \
+ * On some compilers, this conversion can return 0, but it also can return \
+ * the max value. To deal with this, we can first divide by 2 to prevent \
+ * the overflow, multiply it back, and add 1 to find the true limit. */ \
+ double i; \
+ double max_value = (((double)max / 2) * 2) + 1; \
+ if ((modf(n, &i) != 0.0) || n < min || n >= max_value) { \
+ luaL_error(L, "number %f was not an integer or out of range for " #type, \
+ n); \
+ } \
+ return (ctype)n; \
+ } \
+ void lupb_push##type(lua_State* L, ctype val) { \
+ /* TODO: push integer for Lua >= 5.3, 64-bit cdata for LuaJIT. */ \
+ /* This is lossy for some [u]int64 values, which isn't great, but */ \
+ /* crashing when we encounter these values seems worse. */ \
+ lua_pushnumber(L, val); \
+ }
+
+INTCHECK(int64, int64_t, INT64_MIN, INT64_MAX)
+INTCHECK(int32, int32_t, INT32_MIN, INT32_MAX)
+INTCHECK(uint64, uint64_t, 0, UINT64_MAX)
+INTCHECK(uint32, uint32_t, 0, UINT32_MAX)
+
+double lupb_checkdouble(lua_State* L, int narg) {
+ /* If we were being really hard-nosed here, we'd check whether the input was
+ * an integer that has no precise double representation. But doubles aren't
+ * generally expected to be exact like integers are, and worse this could
+ * cause data-dependent runtime errors: one run of the program could work fine
+ * because the integer calculations happened to be exactly representable in
+ * double, while the next could crash because of subtly different input. */
+
+ luaL_checktype(L, narg, LUA_TNUMBER); /* lua_tonumber() auto-converts. */
+ return lua_tonumber(L, narg);
+}
+
+float lupb_checkfloat(lua_State* L, int narg) {
+ /* We don't worry about checking whether the input can be exactly converted to
+ * float -- see above. */
+
+ luaL_checktype(L, narg, LUA_TNUMBER); /* lua_tonumber() auto-converts. */
+ return lua_tonumber(L, narg);
+}
+
+void lupb_pushdouble(lua_State* L, double d) { lua_pushnumber(L, d); }
+
+void lupb_pushfloat(lua_State* L, float d) { lua_pushnumber(L, d); }
+
+/* Library entry point ********************************************************/
+
+int luaopen_lupb(lua_State* L) {
+#if LUA_VERSION_NUM == 501
+ const struct luaL_Reg funcs[] = {{NULL, NULL}};
+ luaL_register(L, "upb_c", funcs);
+#else
+ lua_createtable(L, 0, 8);
+#endif
+ lupb_def_registertypes(L);
+ lupb_msg_registertypes(L);
+ return 1; /* Return package table. */
+}
diff --git a/upb/lua/upb.h b/upb/lua/upb.h
new file mode 100644
index 0000000..46ec911
--- /dev/null
+++ b/upb/lua/upb.h
@@ -0,0 +1,135 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+/*
+ * Shared definitions for upb Lua modules.
+ */
+
+#ifndef UPB_LUA_UPB_H_
+#define UPB_LUA_UPB_H_
+
+#include "lauxlib.h"
+#include "upb/message/message.h"
+#include "upb/reflection/def.h"
+#include "upb/reflection/message.h"
+
+/* Lua changes its API in incompatible ways in every minor release.
+ * This is some shim code to paper over the differences. */
+
+#if LUA_VERSION_NUM == 501
+#define lua_rawlen lua_objlen
+#define lua_setuservalue(L, idx) lua_setfenv(L, idx)
+#define lua_getuservalue(L, idx) lua_getfenv(L, idx)
+#define lupb_setfuncs(L, l) luaL_register(L, NULL, l)
+#elif LUA_VERSION_NUM >= 502 && LUA_VERSION_NUM <= 504
+#define lupb_setfuncs(L, l) luaL_setfuncs(L, l, 0)
+#else
+#error Only Lua 5.1-5.4 are supported
+#endif
+
+/* Create a new userdata with the given type and |n| uservals, which are popped
+ * from the stack to initialize the userdata. */
+void* lupb_newuserdata(lua_State* L, size_t size, int n, const char* type);
+
+#if LUA_VERSION_NUM < 504
+/* Polyfills for this Lua 5.4 function. Pushes userval |n| for the userdata at
+ * |index|. */
+int lua_setiuservalue(lua_State* L, int index, int n);
+int lua_getiuservalue(lua_State* L, int index, int n);
+#endif
+
+/* Registers a type with the given name, methods, and metamethods. */
+void lupb_register_type(lua_State* L, const char* name, const luaL_Reg* m,
+ const luaL_Reg* mm);
+
+/* Checks the given upb_Status and throws a Lua error if it is not ok. */
+void lupb_checkstatus(lua_State* L, upb_Status* s);
+
+int luaopen_lupb(lua_State* L);
+
+/* C <-> Lua value conversions. ***********************************************/
+
+/* Custom check/push functions. Unlike the Lua equivalents, they are pinned to
+ * specific C types (instead of lua_Number, etc), and do not allow any implicit
+ * conversion or data loss. */
+int64_t lupb_checkint64(lua_State* L, int narg);
+int32_t lupb_checkint32(lua_State* L, int narg);
+uint64_t lupb_checkuint64(lua_State* L, int narg);
+uint32_t lupb_checkuint32(lua_State* L, int narg);
+double lupb_checkdouble(lua_State* L, int narg);
+float lupb_checkfloat(lua_State* L, int narg);
+bool lupb_checkbool(lua_State* L, int narg);
+const char* lupb_checkstring(lua_State* L, int narg, size_t* len);
+const char* lupb_checkname(lua_State* L, int narg);
+
+void lupb_pushint64(lua_State* L, int64_t val);
+void lupb_pushint32(lua_State* L, int32_t val);
+void lupb_pushuint64(lua_State* L, uint64_t val);
+void lupb_pushuint32(lua_State* L, uint32_t val);
+
+/** From def.c. ***************************************************************/
+
+const upb_MessageDef* lupb_MessageDef_check(lua_State* L, int narg);
+const upb_EnumDef* lupb_EnumDef_check(lua_State* L, int narg);
+const upb_FieldDef* lupb_FieldDef_check(lua_State* L, int narg);
+upb_DefPool* lupb_DefPool_check(lua_State* L, int narg);
+void lupb_MessageDef_pushsubmsgdef(lua_State* L, const upb_FieldDef* f);
+
+void lupb_def_registertypes(lua_State* L);
+
+/** From msg.c. ***************************************************************/
+
+void lupb_pushmsgval(lua_State* L, int container, upb_CType type,
+ upb_MessageValue val);
+int lupb_MessageDef_call(lua_State* L);
+upb_Arena* lupb_Arena_pushnew(lua_State* L);
+
+void lupb_msg_registertypes(lua_State* L);
+
+#define lupb_assert(L, predicate) \
+ if (!(predicate)) \
+ luaL_error(L, "internal error: %s, %s:%d ", #predicate, __FILE__, __LINE__);
+
+#define LUPB_UNUSED(var) (void)var
+
+#if defined(__GNUC__) || defined(__clang__)
+#define LUPB_UNREACHABLE() \
+ do { \
+ assert(0); \
+ __builtin_unreachable(); \
+ } while (0)
+#else
+#define LUPB_UNREACHABLE() \
+ do { \
+ assert(0); \
+ } while (0)
+#endif
+
+#endif /* UPB_LUA_UPB_H_ */
diff --git a/upb/lua/upb.lua b/upb/lua/upb.lua
new file mode 100644
index 0000000..3533309
--- /dev/null
+++ b/upb/lua/upb.lua
@@ -0,0 +1,58 @@
+--[[--------------------------------------------------------------------------
+
+ Copyright (c) 2009-2021, Google LLC
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Google LLC nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--]]--------------------------------------------------------------------------
+
+local upb = require("lupb")
+
+upb.generated_pool = upb.DefPool()
+
+local module_metatable = {
+ __index = function(t, k)
+ local package = t._filedef:package()
+ if package then
+ k = package .. "." .. k
+ end
+ local pool = upb.generated_pool
+ local def = pool:lookup_msg(k) or pool:lookup_enum(k)
+ local v = nil
+ if def and def:file():name() == t._filedef:name() then
+ v = def
+ t[k] = v
+ end
+ return v
+ end
+}
+
+function upb._generated_module(desc_string)
+ local file = upb.generated_pool:add_file(desc_string)
+ local module = {_filedef = file}
+ setmetatable(module, module_metatable)
+ return module
+end
+
+return upb
diff --git a/upb/lua/upbc.cc b/upb/lua/upbc.cc
new file mode 100644
index 0000000..4a274f2
--- /dev/null
+++ b/upb/lua/upbc.cc
@@ -0,0 +1,139 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "google/protobuf/descriptor.pb.h"
+#include "absl/strings/str_replace.h"
+#include "absl/strings/string_view.h"
+#include "absl/strings/substitute.h"
+#include "google/protobuf/compiler/code_generator.h"
+#include "google/protobuf/compiler/plugin.h"
+#include "google/protobuf/descriptor.h"
+#include "google/protobuf/io/printer.h"
+
+namespace protoc = ::google::protobuf::compiler;
+namespace protobuf = ::google::protobuf;
+
+class LuaGenerator : public protoc::CodeGenerator {
+ bool Generate(const protobuf::FileDescriptor* file,
+ const std::string& parameter, protoc::GeneratorContext* context,
+ std::string* error) const override;
+};
+
+static std::string StripExtension(absl::string_view fname) {
+ size_t lastdot = fname.find_last_of('.');
+ if (lastdot == std::string::npos) {
+ return std::string(fname);
+ }
+ return std::string(fname.substr(0, lastdot));
+}
+
+static std::string Filename(const protobuf::FileDescriptor* file) {
+ return StripExtension(file->name()) + "_pb.lua";
+}
+
+static std::string ModuleName(const protobuf::FileDescriptor* file) {
+ std::string ret = StripExtension(file->name()) + "_pb";
+ return absl::StrReplaceAll(ret, {{"/", "."}});
+}
+
+static void PrintHexDigit(char digit, protobuf::io::Printer* printer) {
+ char text;
+ if (digit < 10) {
+ text = '0' + digit;
+ } else {
+ text = 'A' + (digit - 10);
+ }
+ printer->WriteRaw(&text, 1);
+}
+
+static void PrintString(int max_cols, absl::string_view* str,
+ protobuf::io::Printer* printer) {
+ printer->Print("\'");
+ while (max_cols > 0 && !str->empty()) {
+ char ch = (*str)[0];
+ if (ch == '\\') {
+ printer->PrintRaw("\\\\");
+ max_cols--;
+ } else if (ch == '\'') {
+ printer->PrintRaw("\\'");
+ max_cols--;
+ } else if (isprint(ch)) {
+ printer->WriteRaw(&ch, 1);
+ max_cols--;
+ } else {
+ unsigned char byte = ch;
+ printer->PrintRaw("\\x");
+ PrintHexDigit(byte >> 4, printer);
+ PrintHexDigit(byte & 15, printer);
+ max_cols -= 4;
+ }
+ str->remove_prefix(1);
+ }
+ printer->Print("\'");
+}
+
+bool LuaGenerator::Generate(const protobuf::FileDescriptor* file,
+ const std::string& /* parameter */,
+ protoc::GeneratorContext* context,
+ std::string* /* error */) const {
+ std::string filename = Filename(file);
+ protobuf::io::ZeroCopyOutputStream* out = context->Open(filename);
+ protobuf::io::Printer printer(out, '$');
+
+ for (int i = 0; i < file->dependency_count(); i++) {
+ const protobuf::FileDescriptor* dep = file->dependency(i);
+ printer.Print("require('$name$')\n", "name", ModuleName(dep));
+ }
+
+ printer.Print("local upb = require('upb')\n");
+
+ protobuf::FileDescriptorProto file_proto;
+ file->CopyTo(&file_proto);
+ std::string file_data;
+ file_proto.SerializeToString(&file_data);
+
+ printer.Print("local descriptor = table.concat({\n");
+ absl::string_view data(file_data);
+ while (!data.empty()) {
+ printer.Print(" ");
+ PrintString(72, &data, &printer);
+ printer.Print(",\n");
+ }
+ printer.Print("})\n");
+
+ printer.Print("return upb._generated_module(descriptor)\n");
+
+ return true;
+}
+
+int main(int argc, char** argv) {
+ LuaGenerator generator;
+ return google::protobuf::compiler::PluginMain(argc, argv, &generator);
+}
diff --git a/upb/protos/BUILD b/upb/protos/BUILD
new file mode 100644
index 0000000..be97361
--- /dev/null
+++ b/upb/protos/BUILD
@@ -0,0 +1,188 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load(
+ "//bazel:build_defs.bzl",
+ "UPB_DEFAULT_CPPOPTS",
+)
+load(
+ "//protos/bazel:upb_cc_proto_library.bzl",
+ "upb_cc_proto_library_copts",
+)
+
+# begin:google_only
+# package(default_applicable_licenses = ["//:license"])
+# end:google_only
+
+licenses(["notice"])
+
+cc_library(
+ name = "repeated_field",
+ hdrs = [
+ "repeated_field.h",
+ "repeated_field_iterator.h",
+ ],
+ copts = UPB_DEFAULT_CPPOPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":protos",
+ ":protos_traits",
+ "//:base",
+ "//:collections",
+ "//:collections_internal",
+ "//:mem",
+ "//:message_copy",
+ "//:mini_table",
+ "//:port",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/strings",
+ ],
+)
+
+cc_library(
+ name = "protos",
+ srcs = [
+ "protos.cc",
+ ],
+ hdrs = [
+ "protos.h",
+ ],
+ copts = UPB_DEFAULT_CPPOPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":protos_extension_lock",
+ "//:base",
+ "//:mem",
+ "//:message",
+ "//:message_copy",
+ "//:message_internal",
+ "//:message_promote",
+ "//:mini_table",
+ "//:wire",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/status",
+ "@com_google_absl//absl/status:statusor",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/strings:str_format",
+ ],
+)
+
+# Internally used type traits.
+cc_library(
+ name = "protos_traits",
+ hdrs = [
+ "protos_traits.h",
+ ],
+ copts = UPB_DEFAULT_CPPOPTS,
+ visibility = ["//visibility:private"],
+)
+
+cc_library(
+ name = "protos_internal",
+ hdrs = ["protos_internal.h"],
+ copts = UPB_DEFAULT_CPPOPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":protos",
+ "//:mem",
+ "//:message",
+ "//:mini_table",
+ "@com_google_absl//absl/status",
+ "@com_google_absl//absl/status:statusor",
+ "@com_google_absl//absl/strings:str_format",
+ ],
+)
+
+cc_library(
+ name = "protos_extension_lock",
+ srcs = ["protos_extension_lock.cc"],
+ hdrs = ["protos_extension_lock.h"],
+ copts = UPB_DEFAULT_CPPOPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ "//:message",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/synchronization",
+ ],
+)
+
+# Common support code for C++ generated code.
+cc_library(
+ name = "generated_protos_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
+ hdrs = [
+ "protos_internal.h",
+ ],
+ copts = UPB_DEFAULT_CPPOPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":protos",
+ ":protos_internal",
+ ":repeated_field",
+ "//:mem",
+ "//:message",
+ ],
+)
+
+cc_test(
+ name = "protos_internal_test",
+ srcs = ["protos_internal_test.cc"],
+ copts = UPB_DEFAULT_CPPOPTS,
+ deps = [
+ ":protos_internal",
+ "//:mem",
+ "//protos_generator/tests:test_model_upb_cc_proto",
+ "//protos_generator/tests:test_model_upb_proto",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+upb_cc_proto_library_copts(
+ name = "upb_cc_proto_library_copts__for_generated_code_only_do_not_use",
+ copts = UPB_DEFAULT_CPPOPTS,
+ visibility = ["//visibility:public"],
+)
+
+cc_test(
+ name = "repeated_field_iterator_test",
+ srcs = ["repeated_field_iterator_test.cc"],
+ deps = [
+ ":repeated_field",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "protos_extension_lock_test",
+ srcs = ["protos_extension_lock_test.cc"],
+ deps = [
+ "//:mem",
+ "//protos",
+ "//protos:protos_extension_lock",
+ "//protos_generator/tests:test_model_upb_cc_proto",
+ "@com_google_absl//absl/hash",
+ "@com_google_absl//absl/log:absl_check",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
diff --git a/upb/protos/bazel/BUILD b/upb/protos/bazel/BUILD
new file mode 100644
index 0000000..e56be4c
--- /dev/null
+++ b/upb/protos/bazel/BUILD
@@ -0,0 +1,43 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load("@bazel_skylib//:bzl_library.bzl", "bzl_library")
+
+# begin:google_only
+# package(default_applicable_licenses = ["//:license"])
+# end:google_only
+
+licenses(["notice"])
+
+bzl_library(
+ name = "upb_cc_proto_library_bzl",
+ srcs = ["upb_cc_proto_library.bzl"],
+ visibility = ["//visibility:public"],
+ deps = [
+ "@bazel_skylib//lib:paths",
+ "//bazel:upb_proto_library_bzl",
+ "@bazel_tools//tools/cpp:toolchain_utils.bzl",
+ ],
+)
diff --git a/upb/protos/bazel/upb_cc_proto_library.bzl b/upb/protos/bazel/upb_cc_proto_library.bzl
new file mode 100644
index 0000000..f109f9d
--- /dev/null
+++ b/upb/protos/bazel/upb_cc_proto_library.bzl
@@ -0,0 +1,307 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Public rules for using upb protos:
+ - upb_cc_proto_library()
+"""
+
+load("@bazel_skylib//lib:paths.bzl", "paths")
+load("//bazel:upb_proto_library.bzl", "GeneratedSrcsInfo", "UpbWrappedCcInfo", "upb_proto_library_aspect")
+
+# begin:google_only
+# load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain", "use_cpp_toolchain")
+#
+# end:google_only
+# begin:github_only
+# Compatibility code for Bazel 4.x. Remove this when we drop support for Bazel 4.x.
+load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain")
+
+def use_cpp_toolchain():
+ return ["@bazel_tools//tools/cpp:toolchain_type"]
+# end:github_only
+
+# Generic support code #########################################################
+
+# begin:github_only
+_is_google3 = False
+# end:github_only
+
+# begin:google_only
+# _is_google3 = True
+# end:google_only
+
+def _get_real_short_path(file):
+ # For some reason, files from other archives have short paths that look like:
+ # ../com_google_protobuf/google/protobuf/descriptor.proto
+ short_path = file.short_path
+ if short_path.startswith("../"):
+ second_slash = short_path.index("/", 3)
+ short_path = short_path[second_slash + 1:]
+
+ # Sometimes it has another few prefixes like:
+ # _virtual_imports/any_proto/google/protobuf/any.proto
+ # benchmarks/_virtual_imports/100_msgs_proto/benchmarks/100_msgs.proto
+ # We want just google/protobuf/any.proto.
+ virtual_imports = "_virtual_imports/"
+ if virtual_imports in short_path:
+ short_path = short_path.split(virtual_imports)[1].split("/", 1)[1]
+ return short_path
+
+def _get_real_root(file):
+ real_short_path = _get_real_short_path(file)
+ return file.path[:-len(real_short_path) - 1]
+
+def _generate_output_file(ctx, src, extension):
+ real_short_path = _get_real_short_path(src)
+ real_short_path = paths.relativize(real_short_path, ctx.label.package)
+ output_filename = paths.replace_extension(real_short_path, extension)
+ ret = ctx.actions.declare_file(output_filename)
+ return ret
+
+def _filter_none(elems):
+ out = []
+ for elem in elems:
+ if elem:
+ out.append(elem)
+ return out
+
+def _cc_library_func(ctx, name, hdrs, srcs, copts, dep_ccinfos):
+ """Like cc_library(), but callable from rules.
+
+ Args:
+ ctx: Rule context.
+ name: Unique name used to generate output files.
+ hdrs: Public headers that can be #included from other rules.
+ srcs: C/C++ source files.
+ copts: Additional options for cc compilation.
+ dep_ccinfos: CcInfo providers of dependencies we should build/link against.
+
+ Returns:
+ CcInfo provider for this compilation.
+ """
+
+ compilation_contexts = [info.compilation_context for info in dep_ccinfos]
+ linking_contexts = [info.linking_context for info in dep_ccinfos]
+ toolchain = find_cpp_toolchain(ctx)
+ feature_configuration = cc_common.configure_features(
+ ctx = ctx,
+ cc_toolchain = toolchain,
+ requested_features = ctx.features,
+ unsupported_features = ctx.disabled_features,
+ )
+
+ (compilation_context, compilation_outputs) = cc_common.compile(
+ actions = ctx.actions,
+ feature_configuration = feature_configuration,
+ cc_toolchain = toolchain,
+ name = name,
+ srcs = srcs,
+ public_hdrs = hdrs,
+ user_compile_flags = copts,
+ compilation_contexts = compilation_contexts,
+ )
+
+ # buildifier: disable=unused-variable
+ (linking_context, linking_outputs) = cc_common.create_linking_context_from_compilation_outputs(
+ actions = ctx.actions,
+ name = name,
+ feature_configuration = feature_configuration,
+ cc_toolchain = toolchain,
+ compilation_outputs = compilation_outputs,
+ linking_contexts = linking_contexts,
+ )
+
+ return CcInfo(
+ compilation_context = compilation_context,
+ linking_context = linking_context,
+ )
+
+# Dummy rule to expose select() copts to aspects ##############################
+
+UpbCcProtoLibraryCoptsInfo = provider(
+ "Provides copts for upb cc proto targets",
+ fields = {
+ "copts": "copts for upb_cc_proto_library()",
+ },
+)
+
+def upb_cc_proto_library_copts_impl(ctx):
+ return UpbCcProtoLibraryCoptsInfo(copts = ctx.attr.copts)
+
+upb_cc_proto_library_copts = rule(
+ implementation = upb_cc_proto_library_copts_impl,
+ attrs = {"copts": attr.string_list(default = [])},
+)
+
+_UpbCcWrappedCcInfo = provider("Provider for cc_info for protos", fields = ["cc_info"])
+_WrappedCcGeneratedSrcsInfo = provider("Provider for generated sources", fields = ["srcs"])
+
+def _compile_upb_cc_protos(ctx, generator, proto_info, proto_sources):
+ if len(proto_sources) == 0:
+ return GeneratedSrcsInfo(srcs = [], hdrs = [])
+
+ tool = getattr(ctx.executable, "_gen_" + generator)
+ srcs = [_generate_output_file(ctx, name, ".upb.proto.cc") for name in proto_sources]
+ hdrs = [_generate_output_file(ctx, name, ".upb.proto.h") for name in proto_sources]
+ hdrs += [_generate_output_file(ctx, name, ".upb.fwd.h") for name in proto_sources]
+ transitive_sets = proto_info.transitive_descriptor_sets.to_list()
+
+ args = ctx.actions.args()
+ args.use_param_file(param_file_arg = "@%s")
+ args.set_param_file_format("multiline")
+
+ args.add("--" + generator + "_out=" + _get_real_root(srcs[0]))
+ args.add("--plugin=protoc-gen-" + generator + "=" + tool.path)
+ args.add("--descriptor_set_in=" + ctx.configuration.host_path_separator.join([f.path for f in transitive_sets]))
+ args.add_all(proto_sources, map_each = _get_real_short_path)
+
+ ctx.actions.run(
+ inputs = depset(
+ direct = [proto_info.direct_descriptor_set],
+ transitive = [proto_info.transitive_descriptor_sets],
+ ),
+ tools = [tool],
+ outputs = srcs + hdrs,
+ executable = ctx.executable._protoc,
+ arguments = [args],
+ progress_message = "Generating upb cc protos for :" + ctx.label.name,
+ )
+ return GeneratedSrcsInfo(srcs = srcs, hdrs = hdrs)
+
+def _upb_cc_proto_rule_impl(ctx):
+ if len(ctx.attr.deps) != 1:
+ fail("only one deps dependency allowed.")
+ dep = ctx.attr.deps[0]
+
+ if _WrappedCcGeneratedSrcsInfo in dep:
+ srcs = dep[_WrappedCcGeneratedSrcsInfo].srcs
+ else:
+ fail("proto_library rule must generate _WrappedCcGeneratedSrcsInfo (aspect should have " +
+ "handled this).")
+
+ if _UpbCcWrappedCcInfo in dep:
+ cc_info = dep[_UpbCcWrappedCcInfo].cc_info
+ elif UpbWrappedCcInfo in dep:
+ cc_info = dep[UpbWrappedCcInfo].cc_info
+ else:
+ fail("proto_library rule must generate UpbWrappedCcInfo or " +
+ "_UpbCcWrappedCcInfo (aspect should have handled this).")
+
+ lib = cc_info.linking_context.linker_inputs.to_list()[0].libraries[0]
+ files = _filter_none([
+ lib.static_library,
+ lib.pic_static_library,
+ lib.dynamic_library,
+ ])
+ return [
+ DefaultInfo(files = depset(files + srcs.hdrs + srcs.srcs)),
+ srcs,
+ cc_info,
+ ]
+
+def _upb_cc_proto_aspect_impl(target, ctx, generator, cc_provider, file_provider):
+ proto_info = target[ProtoInfo]
+ files = _compile_upb_cc_protos(ctx, generator, proto_info, proto_info.direct_sources)
+ deps = ctx.rule.attr.deps + getattr(ctx.attr, "_" + generator)
+ dep_ccinfos = [dep[CcInfo] for dep in deps if CcInfo in dep]
+ dep_ccinfos += [dep[UpbWrappedCcInfo].cc_info for dep in deps if UpbWrappedCcInfo in dep]
+ dep_ccinfos += [dep[_UpbCcWrappedCcInfo].cc_info for dep in deps if _UpbCcWrappedCcInfo in dep]
+ if UpbWrappedCcInfo not in target:
+ fail("Target should have UpbWrappedCcInfo provider")
+ dep_ccinfos.append(target[UpbWrappedCcInfo].cc_info)
+ cc_info = _cc_library_func(
+ ctx = ctx,
+ name = ctx.rule.attr.name + "." + generator,
+ hdrs = files.hdrs,
+ srcs = files.srcs,
+ copts = ctx.attr._ccopts[UpbCcProtoLibraryCoptsInfo].copts,
+ dep_ccinfos = dep_ccinfos,
+ )
+ return [cc_provider(cc_info = cc_info), file_provider(srcs = files)]
+
+def _upb_cc_proto_library_aspect_impl(target, ctx):
+ return _upb_cc_proto_aspect_impl(target, ctx, "upbprotos", _UpbCcWrappedCcInfo, _WrappedCcGeneratedSrcsInfo)
+
+_upb_cc_proto_library_aspect = aspect(
+ attrs = {
+ "_ccopts": attr.label(
+ default = "//protos:upb_cc_proto_library_copts__for_generated_code_only_do_not_use",
+ ),
+ "_gen_upbprotos": attr.label(
+ executable = True,
+ cfg = "exec",
+ default = "//protos_generator:protoc-gen-upb-protos",
+ ),
+ "_protoc": attr.label(
+ executable = True,
+ cfg = "exec",
+ default = "@com_google_protobuf//:protoc",
+ ),
+ "_cc_toolchain": attr.label(
+ default = "@bazel_tools//tools/cpp:current_cc_toolchain",
+ ),
+ "_upbprotos": attr.label_list(
+ default = [
+ # TODO: Add dependencies for cc runtime (absl/string etc..)
+ "//:generated_cpp_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
+ "//protos:generated_protos_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/status:statusor",
+ "//protos",
+ "//protos:repeated_field",
+ ],
+ ),
+ },
+ implementation = _upb_cc_proto_library_aspect_impl,
+ provides = [
+ _UpbCcWrappedCcInfo,
+ _WrappedCcGeneratedSrcsInfo,
+ ],
+ required_aspect_providers = [
+ UpbWrappedCcInfo,
+ ],
+ attr_aspects = ["deps"],
+ fragments = ["cpp"],
+ toolchains = use_cpp_toolchain(),
+ incompatible_use_toolchain_transition = True,
+)
+
+upb_cc_proto_library = rule(
+ output_to_genfiles = True,
+ implementation = _upb_cc_proto_rule_impl,
+ attrs = {
+ "deps": attr.label_list(
+ aspects = [
+ upb_proto_library_aspect,
+ _upb_cc_proto_library_aspect,
+ ],
+ allow_rules = ["proto_library"],
+ providers = [ProtoInfo],
+ ),
+ "_ccopts": attr.label(
+ default = "//protos:upb_cc_proto_library_copts__for_generated_code_only_do_not_use",
+ ),
+ },
+)
diff --git a/upb/protos/protos.cc b/upb/protos/protos.cc
new file mode 100644
index 0000000..79535f5
--- /dev/null
+++ b/upb/protos/protos.cc
@@ -0,0 +1,187 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "protos/protos.h"
+
+#include <atomic>
+#include <cstddef>
+
+#include "absl/status/status.h"
+#include "absl/status/statusor.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/string_view.h"
+#include "protos/protos_extension_lock.h"
+#include "upb/mem/arena.h"
+#include "upb/message/copy.h"
+#include "upb/message/internal/extension.h"
+#include "upb/message/promote.h"
+#include "upb/mini_table/extension.h"
+#include "upb/mini_table/extension_registry.h"
+#include "upb/mini_table/message.h"
+#include "upb/wire/decode.h"
+#include "upb/wire/encode.h"
+
+namespace protos {
+
+// begin:google_only
+// absl::Status MessageAllocationError(SourceLocation loc) {
+// return absl::Status(absl::StatusCode::kInternal,
+// "Upb message allocation error", loc);
+// }
+//
+// absl::Status ExtensionNotFoundError(int extension_number, SourceLocation loc) {
+// return absl::Status(
+// absl::StatusCode::kInternal,
+// absl::StrFormat("Extension %d not found", extension_number), loc);
+// }
+//
+// absl::Status MessageEncodeError(upb_EncodeStatus status, SourceLocation loc) {
+// return absl::Status(absl::StatusCode::kInternal,
+// absl::StrFormat("Upb message encoding error %d", status),
+// loc
+//
+// );
+// }
+//
+// absl::Status MessageDecodeError(upb_DecodeStatus status, SourceLocation loc
+//
+// ) {
+// return absl::Status(absl::StatusCode::kInternal,
+// absl::StrFormat("Upb message parse error %d", status), loc
+//
+// );
+// }
+// end:google_only
+
+// begin:github_only
+absl::Status MessageAllocationError(SourceLocation loc) {
+ return absl::Status(absl::StatusCode::kUnknown,
+ "Upb message allocation error");
+}
+
+absl::Status ExtensionNotFoundError(int ext_number, SourceLocation loc) {
+ return absl::Status(absl::StatusCode::kUnknown,
+ absl::StrFormat("Extension %d not found", ext_number));
+}
+
+absl::Status MessageEncodeError(upb_EncodeStatus s, SourceLocation loc) {
+ return absl::Status(absl::StatusCode::kUnknown, "Encoding error");
+}
+
+absl::Status MessageDecodeError(upb_DecodeStatus status, SourceLocation loc
+
+) {
+ return absl::Status(absl::StatusCode::kUnknown, "Upb message parse error");
+}
+// end:github_only
+
+namespace internal {
+
+upb_ExtensionRegistry* GetUpbExtensions(
+ const ExtensionRegistry& extension_registry) {
+ return extension_registry.registry_;
+}
+
+/**
+ * MessageLock(msg) acquires lock on msg when constructed and releases it when
+ * destroyed.
+ */
+class MessageLock {
+ public:
+ explicit MessageLock(const upb_Message* msg) : msg_(msg) {
+ UpbExtensionLocker locker =
+ upb_extension_locker_global.load(std::memory_order_acquire);
+ unlocker_ = (locker != nullptr) ? locker(msg) : nullptr;
+ }
+ MessageLock(const MessageLock&) = delete;
+ void operator=(const MessageLock&) = delete;
+ ~MessageLock() {
+ if (unlocker_ != nullptr) {
+ unlocker_(msg_);
+ }
+ }
+
+ private:
+ const upb_Message* msg_;
+ UpbExtensionUnlocker unlocker_;
+};
+
+bool HasExtensionOrUnknown(const upb_Message* msg,
+ const upb_MiniTableExtension* eid) {
+ MessageLock msg_lock(msg);
+ return _upb_Message_Getext(msg, eid) != nullptr ||
+ upb_MiniTable_FindUnknown(msg, eid->field.number,
+ kUpb_WireFormat_DefaultDepthLimit)
+ .status == kUpb_FindUnknown_Ok;
+}
+
+const upb_Message_Extension* GetOrPromoteExtension(
+ upb_Message* msg, const upb_MiniTableExtension* eid, upb_Arena* arena) {
+ MessageLock msg_lock(msg);
+ const upb_Message_Extension* ext = _upb_Message_Getext(msg, eid);
+ if (ext == nullptr) {
+ upb_GetExtension_Status ext_status = upb_MiniTable_GetOrPromoteExtension(
+ (upb_Message*)msg, eid, kUpb_WireFormat_DefaultDepthLimit, arena, &ext);
+ if (ext_status != kUpb_GetExtension_Ok) {
+ ext = nullptr;
+ }
+ }
+ return ext;
+}
+
+absl::StatusOr<absl::string_view> Serialize(const upb_Message* message,
+ const upb_MiniTable* mini_table,
+ upb_Arena* arena, int options) {
+ MessageLock msg_lock(message);
+ size_t len;
+ char* ptr;
+ upb_EncodeStatus status =
+ upb_Encode(message, mini_table, options, arena, &ptr, &len);
+ if (status == kUpb_EncodeStatus_Ok) {
+ return absl::string_view(ptr, len);
+ }
+ return MessageEncodeError(status);
+}
+
+void DeepCopy(upb_Message* target, const upb_Message* source,
+ const upb_MiniTable* mini_table, upb_Arena* arena) {
+ MessageLock msg_lock(source);
+ upb_Message_DeepCopy(target, source, mini_table, arena);
+}
+
+upb_Message* DeepClone(const upb_Message* source,
+ const upb_MiniTable* mini_table, upb_Arena* arena) {
+ MessageLock msg_lock(source);
+ return upb_Message_DeepClone(source, mini_table, arena);
+}
+
+} // namespace internal
+
+} // namespace protos
diff --git a/upb/protos/protos.h b/upb/protos/protos.h
new file mode 100644
index 0000000..8d1adae
--- /dev/null
+++ b/upb/protos/protos.h
@@ -0,0 +1,520 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_PROTOS_PROTOS_H_
+#define UPB_PROTOS_PROTOS_H_
+
+#include <type_traits>
+#include <vector>
+
+#include "absl/status/status.h"
+#include "absl/status/statusor.h"
+#include "upb/base/status.hpp"
+#include "upb/mem/arena.hpp"
+#include "upb/message/copy.h"
+#include "upb/message/internal/extension.h"
+#include "upb/wire/decode.h"
+#include "upb/wire/encode.h"
+
+namespace protos {
+
+using Arena = ::upb::Arena;
+class ExtensionRegistry;
+
+template <typename T>
+using Proxy = std::conditional_t<std::is_const<T>::value,
+ typename std::remove_const_t<T>::CProxy,
+ typename T::Proxy>;
+
+// Provides convenient access to Proxy and CProxy message types.
+//
+// Using rebinding and handling of const, Ptr<Message> and Ptr<const Message>
+// allows copying const with T* const and avoids using non-copyable Proxy types
+// directly.
+template <typename T>
+class Ptr final {
+ public:
+ Ptr() = delete;
+
+ // Implicit conversions
+ Ptr(T* m) : p_(m) {} // NOLINT
+ Ptr(const Proxy<T>* p) : p_(*p) {} // NOLINT
+ Ptr(Proxy<T> p) : p_(p) {} // NOLINT
+ Ptr(const Ptr& m) = default;
+
+ Ptr& operator=(Ptr v) & {
+ Proxy<T>::Rebind(p_, v.p_);
+ return *this;
+ }
+
+ Proxy<T> operator*() const { return p_; }
+ Proxy<T>* operator->() const {
+ return const_cast<Proxy<T>*>(std::addressof(p_));
+ }
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wclass-conversion"
+#endif
+ template <typename U = T, std::enable_if_t<!std::is_const<U>::value, int> = 0>
+ operator Ptr<const T>() const {
+ Proxy<const T> p(p_);
+ return Ptr<const T>(&p);
+ }
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+ private:
+ Ptr(void* msg, upb_Arena* arena) : p_(msg, arena) {} // NOLINT
+
+ friend class Ptr<const T>;
+ friend typename T::Access;
+
+ Proxy<T> p_;
+};
+
+namespace internal {
+struct PrivateAccess {
+ template <typename T>
+ static auto* GetInternalMsg(T&& message) {
+ return message->msg();
+ }
+};
+
+template <typename T>
+auto* GetInternalMsg(T&& message) {
+ return PrivateAccess::GetInternalMsg(std::forward<T>(message));
+}
+
+} // namespace internal
+
+inline absl::string_view UpbStrToStringView(upb_StringView str) {
+ return absl::string_view(str.data, str.size);
+}
+
+// TODO: update bzl and move to upb runtime / protos.cc.
+inline upb_StringView UpbStrFromStringView(absl::string_view str,
+ upb_Arena* arena) {
+ const size_t str_size = str.size();
+ char* buffer = static_cast<char*>(upb_Arena_Malloc(arena, str_size));
+ memcpy(buffer, str.data(), str_size);
+ return upb_StringView_FromDataAndSize(buffer, str_size);
+}
+
+template <typename T>
+typename T::Proxy CreateMessage(::protos::Arena& arena) {
+ return typename T::Proxy(upb_Message_New(T::minitable(), arena.ptr()),
+ arena.ptr());
+}
+
+// begin:github_only
+// This type exists to work around an absl type that has not yet been
+// released.
+struct SourceLocation {
+ static SourceLocation current() { return {}; }
+ absl::string_view file_name() { return "<unknown>"; }
+ int line() { return 0; }
+};
+// end:github_only
+
+// begin:google_only
+// using SourceLocation = absl::SourceLocation;
+// end:google_only
+
+absl::Status MessageAllocationError(
+ SourceLocation loc = SourceLocation::current());
+
+absl::Status ExtensionNotFoundError(
+ int extension_number, SourceLocation loc = SourceLocation::current());
+
+absl::Status MessageDecodeError(upb_DecodeStatus status,
+ SourceLocation loc = SourceLocation::current());
+
+absl::Status MessageEncodeError(upb_EncodeStatus status,
+ SourceLocation loc = SourceLocation::current());
+
+namespace internal {
+template <typename T>
+T CreateMessage() {
+ return T();
+}
+
+template <typename T>
+typename T::Proxy CreateMessageProxy(void* msg, upb_Arena* arena) {
+ return typename T::Proxy(msg, arena);
+}
+
+template <typename T>
+typename T::CProxy CreateMessage(upb_Message* msg, upb_Arena* arena) {
+ return typename T::CProxy(msg, arena);
+}
+
+class ExtensionMiniTableProvider {
+ public:
+ constexpr explicit ExtensionMiniTableProvider(
+ const upb_MiniTableExtension* mini_table_ext)
+ : mini_table_ext_(mini_table_ext) {}
+ const upb_MiniTableExtension* mini_table_ext() const {
+ return mini_table_ext_;
+ }
+
+ private:
+ const upb_MiniTableExtension* mini_table_ext_;
+};
+
+// -------------------------------------------------------------------
+// ExtensionIdentifier
+// This is the type of actual extension objects. E.g. if you have:
+// extend Foo {
+// optional MyExtension bar = 1234;
+// }
+// then "bar" will be defined in C++ as:
+// ExtensionIdentifier<Foo, MyExtension> bar(&namespace_bar_ext);
+template <typename ExtendeeType, typename ExtensionType>
+class ExtensionIdentifier : public ExtensionMiniTableProvider {
+ public:
+ using Extension = ExtensionType;
+ using Extendee = ExtendeeType;
+
+ constexpr explicit ExtensionIdentifier(
+ const upb_MiniTableExtension* mini_table_ext)
+ : ExtensionMiniTableProvider(mini_table_ext) {}
+};
+
+template <typename T>
+upb_Arena* GetArena(Ptr<T> message) {
+ return static_cast<upb_Arena*>(message->GetInternalArena());
+}
+
+template <typename T>
+upb_Arena* GetArena(T* message) {
+ return static_cast<upb_Arena*>(message->GetInternalArena());
+}
+
+template <typename T>
+const upb_MiniTable* GetMiniTable(const T*) {
+ return T::minitable();
+}
+
+template <typename T>
+const upb_MiniTable* GetMiniTable(Ptr<T>) {
+ return T::minitable();
+}
+
+upb_ExtensionRegistry* GetUpbExtensions(
+ const ExtensionRegistry& extension_registry);
+
+absl::StatusOr<absl::string_view> Serialize(const upb_Message* message,
+ const upb_MiniTable* mini_table,
+ upb_Arena* arena, int options);
+
+bool HasExtensionOrUnknown(const upb_Message* msg,
+ const upb_MiniTableExtension* eid);
+
+const upb_Message_Extension* GetOrPromoteExtension(
+ upb_Message* msg, const upb_MiniTableExtension* eid, upb_Arena* arena);
+
+void DeepCopy(upb_Message* target, const upb_Message* source,
+ const upb_MiniTable* mini_table, upb_Arena* arena);
+
+upb_Message* DeepClone(const upb_Message* source,
+ const upb_MiniTable* mini_table, upb_Arena* arena);
+
+} // namespace internal
+
+template <typename T>
+void DeepCopy(Ptr<const T> source_message, Ptr<T> target_message) {
+ static_assert(!std::is_const_v<T>);
+ ::protos::internal::DeepCopy(
+ internal::GetInternalMsg(target_message),
+ internal::GetInternalMsg(source_message), T::minitable(),
+ static_cast<upb_Arena*>(target_message->GetInternalArena()));
+}
+
+template <typename T>
+typename T::Proxy CloneMessage(Ptr<T> message, upb::Arena& arena) {
+ return typename T::Proxy(
+ ::protos::internal::DeepClone(internal::GetInternalMsg(message),
+ T::minitable(), arena.ptr()),
+ arena.ptr());
+}
+
+template <typename T>
+void DeepCopy(Ptr<const T> source_message, T* target_message) {
+ static_assert(!std::is_const_v<T>);
+ DeepCopy(source_message, protos::Ptr(target_message));
+}
+
+template <typename T>
+void DeepCopy(const T* source_message, Ptr<T> target_message) {
+ static_assert(!std::is_const_v<T>);
+ DeepCopy(protos::Ptr(source_message), target_message);
+}
+
+template <typename T>
+void DeepCopy(const T* source_message, T* target_message) {
+ static_assert(!std::is_const_v<T>);
+ DeepCopy(protos::Ptr(source_message), protos::Ptr(target_message));
+}
+
+template <typename T>
+void ClearMessage(Ptr<T> message) {
+ static_assert(!std::is_const_v<T>, "");
+ upb_Message_Clear(internal::GetInternalMsg(message), T::minitable());
+}
+
+template <typename T>
+void ClearMessage(T* message) {
+ ClearMessage(protos::Ptr(message));
+}
+
+class ExtensionRegistry {
+ public:
+ ExtensionRegistry(
+ const std::vector<const ::protos::internal::ExtensionMiniTableProvider*>&
+ extensions,
+ const upb::Arena& arena)
+ : registry_(upb_ExtensionRegistry_New(arena.ptr())) {
+ if (registry_) {
+ for (const auto& ext_provider : extensions) {
+ const auto* ext = ext_provider->mini_table_ext();
+ bool success = upb_ExtensionRegistry_AddArray(registry_, &ext, 1);
+ if (!success) {
+ registry_ = nullptr;
+ break;
+ }
+ }
+ }
+ }
+
+ private:
+ friend upb_ExtensionRegistry* ::protos::internal::GetUpbExtensions(
+ const ExtensionRegistry& extension_registry);
+ upb_ExtensionRegistry* registry_;
+};
+
+template <typename T>
+using EnableIfProtosClass = std::enable_if_t<
+ std::is_base_of<typename T::Access, T>::value &&
+ std::is_base_of<typename T::Access, typename T::ExtendableType>::value>;
+
+template <typename T>
+using EnableIfMutableProto = std::enable_if_t<!std::is_const<T>::value>;
+
+template <typename T, typename Extendee, typename Extension,
+ typename = EnableIfProtosClass<T>>
+ABSL_MUST_USE_RESULT bool HasExtension(
+ Ptr<T> message,
+ const ::protos::internal::ExtensionIdentifier<Extendee, Extension>& id) {
+ return ::protos::internal::HasExtensionOrUnknown(
+ ::protos::internal::GetInternalMsg(message), id.mini_table_ext());
+}
+
+template <typename T, typename Extendee, typename Extension,
+ typename = EnableIfProtosClass<T>>
+ABSL_MUST_USE_RESULT bool HasExtension(
+ const T* message,
+ const ::protos::internal::ExtensionIdentifier<Extendee, Extension>& id) {
+ return HasExtension(protos::Ptr(message), id);
+}
+
+template <typename T, typename Extendee, typename Extension,
+ typename = EnableIfProtosClass<T>, typename = EnableIfMutableProto<T>>
+void ClearExtension(
+ Ptr<T> message,
+ const ::protos::internal::ExtensionIdentifier<Extendee, Extension>& id) {
+ static_assert(!std::is_const_v<T>, "");
+ _upb_Message_ClearExtensionField(internal::GetInternalMsg(message),
+ id.mini_table_ext());
+}
+
+template <typename T, typename Extendee, typename Extension,
+ typename = EnableIfProtosClass<T>>
+void ClearExtension(
+ T* message,
+ const ::protos::internal::ExtensionIdentifier<Extendee, Extension>& id) {
+ ClearExtension(::protos::Ptr(message), id);
+}
+
+template <typename T, typename Extendee, typename Extension,
+ typename = EnableIfProtosClass<T>, typename = EnableIfMutableProto<T>>
+absl::Status SetExtension(
+ Ptr<T> message,
+ const ::protos::internal::ExtensionIdentifier<Extendee, Extension>& id,
+ Extension& value) {
+ static_assert(!std::is_const_v<T>);
+ auto* message_arena = static_cast<upb_Arena*>(message->GetInternalArena());
+ upb_Message_Extension* msg_ext = _upb_Message_GetOrCreateExtension(
+ internal::GetInternalMsg(message), id.mini_table_ext(), message_arena);
+ if (!msg_ext) {
+ return MessageAllocationError();
+ }
+ auto* extension_arena = static_cast<upb_Arena*>(message->GetInternalArena());
+ if (message_arena != extension_arena) {
+ upb_Arena_Fuse(message_arena, extension_arena);
+ }
+ msg_ext->data.ptr = internal::GetInternalMsg(&value);
+ return absl::OkStatus();
+}
+
+template <typename T, typename Extendee, typename Extension,
+ typename = EnableIfProtosClass<T>>
+absl::Status SetExtension(
+ T* message,
+ const ::protos::internal::ExtensionIdentifier<Extendee, Extension>& id,
+ Extension& value) {
+ return ::protos::SetExtension(::protos::Ptr(message), id, value);
+}
+
+template <typename T, typename Extendee, typename Extension,
+ typename = EnableIfProtosClass<T>>
+absl::StatusOr<Ptr<const Extension>> GetExtension(
+ Ptr<T> message,
+ const ::protos::internal::ExtensionIdentifier<Extendee, Extension>& id) {
+ // TODO(b/294089233): Fix const correctness issues.
+ const upb_Message_Extension* ext = ::protos::internal::GetOrPromoteExtension(
+ const_cast<upb_Message*>(internal::GetInternalMsg(message)),
+ id.mini_table_ext(), ::protos::internal::GetArena(message));
+ if (!ext) {
+ return ExtensionNotFoundError(id.mini_table_ext()->field.number);
+ }
+ return Ptr<const Extension>(::protos::internal::CreateMessage<Extension>(
+ ext->data.ptr, ::protos::internal::GetArena(message)));
+}
+
+template <typename T, typename Extendee, typename Extension,
+ typename = EnableIfProtosClass<T>>
+absl::StatusOr<Ptr<const Extension>> GetExtension(
+ const T* message,
+ const ::protos::internal::ExtensionIdentifier<Extendee, Extension>& id) {
+ return GetExtension(protos::Ptr(message), id);
+}
+
+template <typename T>
+ABSL_MUST_USE_RESULT bool Parse(Ptr<T> message, absl::string_view bytes) {
+ static_assert(!std::is_const_v<T>);
+ upb_Message_Clear(internal::GetInternalMsg(message),
+ ::protos::internal::GetMiniTable(message));
+ auto* arena = static_cast<upb_Arena*>(message->GetInternalArena());
+ return upb_Decode(bytes.data(), bytes.size(),
+ internal::GetInternalMsg(message),
+ ::protos::internal::GetMiniTable(message),
+ /* extreg= */ nullptr, /* options= */ 0,
+ arena) == kUpb_DecodeStatus_Ok;
+}
+
+template <typename T>
+ABSL_MUST_USE_RESULT bool Parse(
+ Ptr<T> message, absl::string_view bytes,
+ const ::protos::ExtensionRegistry& extension_registry) {
+ static_assert(!std::is_const_v<T>);
+ upb_Message_Clear(internal::GetInternalMsg(message),
+ ::protos::internal::GetMiniTable(message));
+ auto* arena = static_cast<upb_Arena*>(message->GetInternalArena());
+ return upb_Decode(bytes.data(), bytes.size(),
+ internal::GetInternalMsg(message),
+ ::protos::internal::GetMiniTable(message),
+ /* extreg= */
+ ::protos::internal::GetUpbExtensions(extension_registry),
+ /* options= */ 0, arena) == kUpb_DecodeStatus_Ok;
+}
+
+template <typename T>
+ABSL_MUST_USE_RESULT bool Parse(
+ T* message, absl::string_view bytes,
+ const ::protos::ExtensionRegistry& extension_registry) {
+ static_assert(!std::is_const_v<T>);
+ return Parse(protos::Ptr(message, bytes, extension_registry));
+}
+
+template <typename T>
+ABSL_MUST_USE_RESULT bool Parse(T* message, absl::string_view bytes) {
+ static_assert(!std::is_const_v<T>);
+ upb_Message_Clear(internal::GetInternalMsg(message),
+ ::protos::internal::GetMiniTable(message));
+ auto* arena = static_cast<upb_Arena*>(message->GetInternalArena());
+ return upb_Decode(bytes.data(), bytes.size(),
+ internal::GetInternalMsg(message),
+ ::protos::internal::GetMiniTable(message),
+ /* extreg= */ nullptr, /* options= */ 0,
+ arena) == kUpb_DecodeStatus_Ok;
+}
+
+template <typename T>
+absl::StatusOr<T> Parse(absl::string_view bytes, int options = 0) {
+ T message;
+ auto* arena = static_cast<upb_Arena*>(message.GetInternalArena());
+ upb_DecodeStatus status =
+ upb_Decode(bytes.data(), bytes.size(), message.msg(),
+ ::protos::internal::GetMiniTable(&message),
+ /* extreg= */ nullptr, /* options= */ 0, arena);
+ if (status == kUpb_DecodeStatus_Ok) {
+ return message;
+ }
+ return MessageDecodeError(status);
+}
+
+template <typename T>
+absl::StatusOr<T> Parse(absl::string_view bytes,
+ const ::protos::ExtensionRegistry& extension_registry,
+ int options = 0) {
+ T message;
+ auto* arena = static_cast<upb_Arena*>(message.GetInternalArena());
+ upb_DecodeStatus status =
+ upb_Decode(bytes.data(), bytes.size(), message.msg(),
+ ::protos::internal::GetMiniTable(&message),
+ ::protos::internal::GetUpbExtensions(extension_registry),
+ /* options= */ 0, arena);
+ if (status == kUpb_DecodeStatus_Ok) {
+ return message;
+ }
+ return MessageDecodeError(status);
+}
+
+template <typename T>
+absl::StatusOr<absl::string_view> Serialize(const T* message, upb::Arena& arena,
+ int options = 0) {
+ return ::protos::internal::Serialize(
+ internal::GetInternalMsg(message),
+ ::protos::internal::GetMiniTable(message), arena.ptr(), options);
+}
+
+template <typename T>
+absl::StatusOr<absl::string_view> Serialize(Ptr<T> message, upb::Arena& arena,
+ int options = 0) {
+ return ::protos::internal::Serialize(
+ internal::GetInternalMsg(message),
+ ::protos::internal::GetMiniTable(message), arena.ptr(), options);
+}
+
+} // namespace protos
+
+#endif // UPB_PROTOS_PROTOS_H_
diff --git a/upb/protos/protos_extension_lock.cc b/upb/protos/protos_extension_lock.cc
new file mode 100644
index 0000000..dbb2fc2
--- /dev/null
+++ b/upb/protos/protos_extension_lock.cc
@@ -0,0 +1,39 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "protos/protos_extension_lock.h"
+
+#include <atomic>
+
+namespace protos::internal {
+
+std::atomic<UpbExtensionLocker> upb_extension_locker_global;
+
+} // namespace protos::internal
diff --git a/upb/protos/protos_extension_lock.h b/upb/protos/protos_extension_lock.h
new file mode 100644
index 0000000..04a268d
--- /dev/null
+++ b/upb/protos/protos_extension_lock.h
@@ -0,0 +1,54 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_PROTOS_PROTOS_EXTENSION_LOCK_H_
+#define UPB_PROTOS_PROTOS_EXTENSION_LOCK_H_
+
+#include <atomic>
+
+namespace protos::internal {
+
+// TODO(b/295355754): Temporary locking api for cross-language
+// concurrency issue around extension api that uses lazy promotion
+// from unknown data to upb_MiniTableExtension. Will be replaced by
+// a core runtime solution in the future.
+//
+// Any api(s) using unknown or extension data (GetOrPromoteExtension,
+// Serialize and others) call lock/unlock to provide a way for
+// mixed language implementations to avoid race conditions)
+using UpbExtensionUnlocker = void (*)(const void*);
+using UpbExtensionLocker = UpbExtensionUnlocker (*)(const void*);
+
+// TODO(b/295355754): Expose as function instead of global.
+extern std::atomic<UpbExtensionLocker> upb_extension_locker_global;
+
+} // namespace protos::internal
+
+#endif // UPB_PROTOS_PROTOS_EXTENSION_LOCK_H_
diff --git a/upb/protos/protos_extension_lock_test.cc b/upb/protos/protos_extension_lock_test.cc
new file mode 100644
index 0000000..774e407
--- /dev/null
+++ b/upb/protos/protos_extension_lock_test.cc
@@ -0,0 +1,140 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "protos/protos_extension_lock.h"
+
+#include <atomic>
+#include <mutex>
+#include <string>
+#include <thread>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/hash/hash.h"
+#include "absl/log/absl_check.h"
+#include "protos/protos.h"
+#include "protos_generator/tests/test_model.upb.proto.h"
+#include "upb/mem/arena.hpp"
+
+#ifndef ASSERT_OK
+#define ASSERT_OK(x) ASSERT_TRUE(x.ok())
+#endif // ASSERT_OK
+#ifndef EXPECT_OK
+#define EXPECT_OK(x) EXPECT_TRUE(x.ok())
+#endif // EXPECT_OK
+
+namespace protos_generator::test::protos {
+
+namespace {
+
+std::string GenerateTestData() {
+ TestModel model;
+ model.set_str1("str");
+ ThemeExtension extension1;
+ extension1.set_ext_name("theme");
+ ABSL_CHECK_OK(::protos::SetExtension(&model, theme, extension1));
+ ThemeExtension extension2;
+ extension2.set_ext_name("theme_extension");
+ ABSL_CHECK_OK(::protos::SetExtension(&model, ThemeExtension::theme_extension,
+ extension2));
+ ::upb::Arena arena;
+ auto bytes = ::protos::Serialize(&model, arena);
+ ABSL_CHECK_OK(bytes);
+ return std::string(bytes->data(), bytes->size());
+}
+
+std::mutex m[8];
+void unlock_func(const void* msg) { m[absl::HashOf(msg) & 0x7].unlock(); }
+::protos::internal::UpbExtensionUnlocker lock_func(const void* msg) {
+ m[absl::HashOf(msg) & 0x7].lock();
+ return &unlock_func;
+}
+
+void TestConcurrentExtensionAccess(::protos::ExtensionRegistry registry) {
+ ::protos::internal::upb_extension_locker_global.store(
+ &lock_func, std::memory_order_release);
+ const std::string payload = GenerateTestData();
+ TestModel parsed_model =
+ ::protos::Parse<TestModel>(payload, registry).value();
+ const auto test_main = [&] { EXPECT_EQ("str", parsed_model.str1()); };
+ const auto test_theme = [&] {
+ ASSERT_TRUE(::protos::HasExtension(&parsed_model, theme));
+ auto ext = ::protos::GetExtension(&parsed_model, theme);
+ ASSERT_OK(ext);
+ EXPECT_EQ((*ext)->ext_name(), "theme");
+ };
+ const auto test_theme_extension = [&] {
+ auto ext =
+ ::protos::GetExtension(&parsed_model, ThemeExtension::theme_extension);
+ ASSERT_OK(ext);
+ EXPECT_EQ((*ext)->ext_name(), "theme_extension");
+ };
+ const auto test_serialize = [&] {
+ ::upb::Arena arena;
+ EXPECT_OK(::protos::Serialize(&parsed_model, arena));
+ };
+ std::thread t1(test_main);
+ std::thread t2(test_main);
+ std::thread t3(test_theme);
+ std::thread t4(test_theme);
+ std::thread t5(test_theme_extension);
+ std::thread t6(test_theme_extension);
+ std::thread t7(test_serialize);
+ t1.join();
+ t2.join();
+ t3.join();
+ t4.join();
+ t5.join();
+ t6.join();
+ t7.join();
+ test_main();
+ test_theme();
+ test_theme_extension();
+}
+
+TEST(CppGeneratedCode, ConcurrentAccessDoesNotRaceBothLazy) {
+ ::upb::Arena arena;
+ TestConcurrentExtensionAccess({{}, arena});
+}
+
+TEST(CppGeneratedCode, ConcurrentAccessDoesNotRaceOneLazyOneEager) {
+ ::upb::Arena arena;
+ TestConcurrentExtensionAccess({{&theme}, arena});
+ TestConcurrentExtensionAccess({{&ThemeExtension::theme_extension}, arena});
+}
+
+TEST(CppGeneratedCode, ConcurrentAccessDoesNotRaceBothEager) {
+ ::upb::Arena arena;
+ TestConcurrentExtensionAccess(
+ {{&theme, &ThemeExtension::theme_extension}, arena});
+}
+
+} // namespace
+} // namespace protos_generator::test::protos
diff --git a/upb/protos/protos_internal.h b/upb/protos/protos_internal.h
new file mode 100644
index 0000000..0a1f194
--- /dev/null
+++ b/upb/protos/protos_internal.h
@@ -0,0 +1,48 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_PROTOS_PROTOS_INTERNAL_H_
+#define UPB_PROTOS_PROTOS_INTERNAL_H_
+
+#include "upb/mem/arena.h"
+#include "upb/message/message.h"
+
+namespace protos::internal {
+
+// Moves ownership of a message created in a source arena.
+//
+// Utility function to provide a way to move ownership across languages or VMs.
+template <typename T>
+T MoveMessage(upb_Message* msg, upb_Arena* arena) {
+ return T(msg, arena);
+}
+
+} // namespace protos::internal
+#endif
diff --git a/upb/protos/protos_internal_test.cc b/upb/protos/protos_internal_test.cc
new file mode 100644
index 0000000..e73954a
--- /dev/null
+++ b/upb/protos/protos_internal_test.cc
@@ -0,0 +1,60 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "protos/protos_internal.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "protos_generator/tests/test_model.upb.h"
+#include "protos_generator/tests/test_model.upb.proto.h"
+#include "upb/mem/arena.h"
+
+namespace protos::testing {
+namespace {
+using ::protos_generator::test::protos::TestModel;
+
+TEST(CppGeneratedCode, InternalMoveMessage) {
+ // Generate message (simulating message created in another VM/language)
+ upb_Arena* source_arena = upb_Arena_New();
+ protos_generator_test_TestModel* message =
+ protos_generator_test_TestModel_new(source_arena);
+ ASSERT_NE(message, nullptr);
+ protos_generator_test_TestModel_set_int_value_with_default(message, 123);
+
+ // Move ownership.
+ TestModel model =
+ protos::internal::MoveMessage<TestModel>(message, source_arena);
+ // Now that we have moved ownership, free original arena.
+ upb_Arena_Free(source_arena);
+ EXPECT_EQ(model.int_value_with_default(), 123);
+}
+
+} // namespace
+} // namespace protos::testing
diff --git a/upb/protos/protos_traits.h b/upb/protos/protos_traits.h
new file mode 100644
index 0000000..92b9dc3
--- /dev/null
+++ b/upb/protos/protos_traits.h
@@ -0,0 +1,44 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef THIRD_PARTY_UPB_PROTOS_PROTOS_TRAITS_H_
+#define THIRD_PARTY_UPB_PROTOS_PROTOS_TRAITS_H_
+
+#include <type_traits>
+
+namespace protos::internal {
+
+template <typename T, typename T2>
+using add_const_if_T_is_const =
+ std::conditional_t<std::is_const_v<T>, const T2, T2>;
+
+} // namespace protos::internal
+
+#endif // THIRD_PARTY_UPB_PROTOS_PROTOS_TRAITS_H_
diff --git a/upb/protos/repeated_field.h b/upb/protos/repeated_field.h
new file mode 100644
index 0000000..9d66e15
--- /dev/null
+++ b/upb/protos/repeated_field.h
@@ -0,0 +1,309 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_PROTOS_REPEATED_FIELD_H_
+#define UPB_PROTOS_REPEATED_FIELD_H_
+
+#include <cstddef>
+#include <iterator>
+#include <type_traits>
+
+#include "absl/base/attributes.h"
+#include "absl/strings/string_view.h"
+#include "protos/protos.h"
+#include "protos/protos_traits.h"
+#include "protos/repeated_field_iterator.h"
+#include "upb/base/string_view.h"
+#include "upb/collections/array.h"
+#include "upb/collections/internal/array.h"
+#include "upb/mem/arena.h"
+#include "upb/message/copy.h"
+
+// Must be last:
+#include "upb/port/def.inc"
+
+namespace protos {
+
+namespace internal {
+
+// Shared implementation of repeated fields for absl::string_view and
+// message types for mutable and immutable variants.
+//
+// Immutable (const accessor), constructs this class with a nullptr upb_Array*
+// when the underlying array in the message is empty.
+//
+// Mutable accessors on the other hand, will allocate a new empty non-null
+// upb_Array* for the message when the RepeatedFieldProxy is constructed.
+template <class T>
+class RepeatedFieldProxyBase {
+ using Array = add_const_if_T_is_const<T, upb_Array>;
+
+ public:
+ explicit RepeatedFieldProxyBase(Array* arr, upb_Arena* arena)
+ : arr_(arr), arena_(arena) {}
+
+ size_t size() const { return arr_ != nullptr ? upb_Array_Size(arr_) : 0; }
+
+ bool empty() const { return size() == 0; }
+
+ protected:
+ // Returns upb_Array message member.
+ inline upb_Message* GetMessage(size_t n) const;
+
+ Array* arr_;
+ upb_Arena* arena_;
+};
+
+template <class T>
+upb_Message* RepeatedFieldProxyBase<T>::GetMessage(size_t n) const {
+ auto** messages =
+ static_cast<upb_Message**>(upb_Array_MutableDataPtr(this->arr_));
+ return messages[n];
+}
+
+template <class T>
+class RepeatedFieldProxyMutableBase : public RepeatedFieldProxyBase<T> {
+ public:
+ RepeatedFieldProxyMutableBase(upb_Array* arr, upb_Arena* arena)
+ : RepeatedFieldProxyBase<T>(arr, arena) {}
+
+ void clear() { upb_Array_Resize(this->arr_, 0, this->arena_); }
+};
+
+// RepeatedField proxy for repeated messages.
+template <class T>
+class RepeatedFieldProxy
+ : public std::conditional_t<std::is_const_v<T>, RepeatedFieldProxyBase<T>,
+ RepeatedFieldProxyMutableBase<T>> {
+ static_assert(!std::is_same_v<T, absl::string_view>, "");
+ static_assert(!std::is_same_v<T, const absl::string_view>, "");
+ static_assert(!std::is_arithmetic_v<T>, "");
+ static constexpr bool kIsConst = std::is_const_v<T>;
+
+ public:
+ explicit RepeatedFieldProxy(const upb_Array* arr, upb_Arena* arena)
+ : RepeatedFieldProxyBase<T>(arr, arena) {}
+ RepeatedFieldProxy(upb_Array* arr, upb_Arena* arena)
+ : RepeatedFieldProxyMutableBase<T>(arr, arena) {}
+ // Constructor used by ::protos::Ptr.
+ RepeatedFieldProxy(const RepeatedFieldProxy&) = default;
+
+ // T::CProxy [] operator specialization.
+ typename T::CProxy operator[](size_t n) const {
+ upb_MessageValue message_value = upb_Array_Get(this->arr_, n);
+ return ::protos::internal::CreateMessage<typename std::remove_const_t<T>>(
+ (upb_Message*)message_value.msg_val, this->arena_);
+ }
+
+ // TODO(b:/280069986) : Audit/Finalize based on Iterator Design.
+ // T::Proxy [] operator specialization.
+ template <int&... DeductionBlocker, bool b = !kIsConst,
+ typename = std::enable_if_t<b>>
+ typename T::Proxy operator[](size_t n) {
+ return ::protos::internal::CreateMessageProxy<T>(this->GetMessage(n),
+ this->arena_);
+ }
+
+ // Mutable message reference specialization.
+ template <int&... DeductionBlocker, bool b = !kIsConst,
+ typename = std::enable_if_t<b>>
+ void push_back(const T& t) {
+ upb_MessageValue message_value;
+ message_value.msg_val = upb_Message_DeepClone(
+ PrivateAccess::GetInternalMsg(&t), ::protos::internal::GetMiniTable(&t),
+ this->arena_);
+ upb_Array_Append(this->arr_, message_value, this->arena_);
+ }
+
+ // Mutable message add using move.
+ template <int&... DeductionBlocker, bool b = !kIsConst,
+ typename = std::enable_if_t<b>>
+ void push_back(T&& msg) {
+ upb_MessageValue message_value;
+ message_value.msg_val = PrivateAccess::GetInternalMsg(&msg);
+ upb_Arena_Fuse(GetArena(&msg), this->arena_);
+ upb_Array_Append(this->arr_, message_value, this->arena_);
+ T moved_msg = std::move(msg);
+ }
+
+ private:
+ friend class ::protos::Ptr<T>;
+};
+
+// RepeatedField proxy for repeated strings.
+template <class T>
+class RepeatedFieldStringProxy
+ : public std::conditional_t<std::is_const_v<T>, RepeatedFieldProxyBase<T>,
+ RepeatedFieldProxyMutableBase<T>> {
+ static_assert(std::is_same_v<T, absl::string_view> ||
+ std::is_same_v<T, const absl::string_view>,
+ "");
+ static constexpr bool kIsConst = std::is_const_v<T>;
+
+ public:
+ using value_type = std::remove_const_t<T>;
+ using size_type = size_t;
+ using difference_type = ptrdiff_t;
+ using iterator = internal::Iterator<StringIteratorPolicy<T>>;
+ using reference = typename iterator::reference;
+ using pointer = typename iterator::pointer;
+ using reverse_iterator = std::reverse_iterator<iterator>;
+
+ // Immutable constructor.
+ explicit RepeatedFieldStringProxy(const upb_Array* arr, upb_Arena* arena)
+ : RepeatedFieldProxyBase<T>(arr, arena) {}
+ // Mutable constructor.
+ RepeatedFieldStringProxy(upb_Array* arr, upb_Arena* arena)
+ : RepeatedFieldProxyMutableBase<T>(arr, arena) {}
+ // Constructor used by ::protos::Ptr.
+ RepeatedFieldStringProxy(const RepeatedFieldStringProxy&) = default;
+
+ reference operator[](size_t n) const { return begin()[n]; }
+
+ template <int&... DeductionBlocker, bool b = !kIsConst,
+ typename = std::enable_if_t<b>>
+ void push_back(T t) {
+ upb_MessageValue message_value;
+ // Copy string to arena.
+ UPB_ASSERT(this->arena_);
+ char* data = (char*)upb_Arena_Malloc(this->arena_, t.size());
+ UPB_ASSERT(data);
+ memcpy(data, t.data(), t.size());
+ message_value.str_val = upb_StringView_FromDataAndSize(data, t.size());
+ upb_Array_Append(this->arr_, message_value, this->arena_);
+ }
+
+ iterator begin() const { return iterator({this->arr_, this->arena_, 0}); }
+ iterator end() const {
+ return iterator({this->arr_, this->arena_, this->size()});
+ }
+ reverse_iterator rbegin() const { return reverse_iterator(end()); }
+ reverse_iterator rend() const { return reverse_iterator(begin()); }
+};
+
+// RepeatedField proxy for repeated scalar types.
+template <typename T>
+class RepeatedFieldScalarProxy
+ : public std::conditional_t<std::is_const_v<T>, RepeatedFieldProxyBase<T>,
+ RepeatedFieldProxyMutableBase<T>> {
+ static_assert(std::is_arithmetic_v<T>, "");
+ static constexpr bool kIsConst = std::is_const_v<T>;
+
+ public:
+ using value_type = std::remove_const_t<T>;
+ using size_type = size_t;
+ using difference_type = ptrdiff_t;
+ using iterator = internal::Iterator<ScalarIteratorPolicy<T>>;
+ using reference = typename iterator::reference;
+ using pointer = typename iterator::pointer;
+ using reverse_iterator = std::reverse_iterator<iterator>;
+
+ explicit RepeatedFieldScalarProxy(const upb_Array* arr, upb_Arena* arena)
+ : RepeatedFieldProxyBase<T>(arr, arena) {}
+ RepeatedFieldScalarProxy(upb_Array* arr, upb_Arena* arena)
+ : RepeatedFieldProxyMutableBase<T>(arr, arena) {}
+ // Constructor used by ::protos::Ptr.
+ RepeatedFieldScalarProxy(const RepeatedFieldScalarProxy&) = default;
+
+ T operator[](size_t n) const {
+ upb_MessageValue message_value = upb_Array_Get(this->arr_, n);
+ typename std::remove_const_t<T> val;
+ memcpy(&val, &message_value, sizeof(T));
+ return val;
+ }
+
+ template <int&... DeductionBlocker, bool b = !kIsConst,
+ typename = std::enable_if_t<b>>
+ void push_back(T t) {
+ upb_MessageValue message_value;
+ memcpy(&message_value, &t, sizeof(T));
+ upb_Array_Append(this->arr_, message_value, this->arena_);
+ }
+
+ iterator begin() const { return iterator({unsafe_array()}); }
+ iterator cbegin() const { return begin(); }
+ iterator end() const { return iterator({unsafe_array() + this->size()}); }
+ iterator cend() const { return end(); }
+
+ // Reverse iterator support.
+ reverse_iterator rbegin() const { return reverse_iterator(end()); }
+ reverse_iterator rend() const { return reverse_iterator(begin()); }
+ reverse_iterator crbegin() const { return reverse_iterator(end()); }
+ reverse_iterator crend() const { return reverse_iterator(begin()); }
+
+ private:
+ T* unsafe_array() const {
+ if (kIsConst) {
+ const void* unsafe_ptr = ::upb_Array_DataPtr(this->arr_);
+ return static_cast<T*>(const_cast<void*>(unsafe_ptr));
+ }
+ if (!kIsConst) {
+ void* unsafe_ptr =
+ ::upb_Array_MutableDataPtr(const_cast<upb_Array*>(this->arr_));
+ return static_cast<T*>(unsafe_ptr);
+ }
+ }
+};
+
+} // namespace internal
+
+template <typename T>
+class RepeatedField {
+ static constexpr bool kIsString = std::is_same_v<T, absl::string_view>;
+ static constexpr bool kIsScalar = std::is_arithmetic_v<T>;
+
+ public:
+ using Proxy = std::conditional_t<
+ kIsScalar, internal::RepeatedFieldScalarProxy<T>,
+ std::conditional_t<kIsString, internal::RepeatedFieldStringProxy<T>,
+ internal::RepeatedFieldProxy<T>>>;
+ using CProxy = std::conditional_t<
+ kIsScalar, internal::RepeatedFieldScalarProxy<const T>,
+ std::conditional_t<kIsString, internal::RepeatedFieldStringProxy<const T>,
+ internal::RepeatedFieldProxy<const T>>>;
+ // TODO(b/286451125): T supports incomplete type from fwd.h forwarding headers
+ // We would like to reference T::CProxy. Validate forwarding header design.
+ using ValueProxy = std::conditional_t<
+ kIsScalar, T,
+ std::conditional_t<kIsString, absl::string_view, ::protos::Ptr<T>>>;
+ using ValueCProxy = std::conditional_t<
+ kIsScalar, const T,
+ std::conditional_t<kIsString, absl::string_view, ::protos::Ptr<const T>>>;
+ using Access = std::conditional_t<
+ kIsScalar, internal::RepeatedFieldScalarProxy<T>,
+ std::conditional_t<kIsString, internal::RepeatedFieldStringProxy<T>,
+ internal::RepeatedFieldProxy<T>>>;
+};
+
+} // namespace protos
+
+#include "upb/port/undef.inc"
+
+#endif // UPB_PROTOS_REPEATED_FIELD_H_
diff --git a/upb/protos/repeated_field_iterator.h b/upb/protos/repeated_field_iterator.h
new file mode 100644
index 0000000..66227ba
--- /dev/null
+++ b/upb/protos/repeated_field_iterator.h
@@ -0,0 +1,356 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#ifndef UPB_PROTOS_REPEATED_FIELD_ITERATOR_H_
+#define UPB_PROTOS_REPEATED_FIELD_ITERATOR_H_
+
+#include <cstddef>
+#include <cstring>
+#include <iterator>
+#include <type_traits>
+
+#include "absl/strings/string_view.h"
+#include "protos/protos.h"
+#include "upb/base/string_view.h"
+#include "upb/collections/array.h"
+#include "upb/mem/arena.h"
+#include "upb/message/copy.h"
+
+// Must be last:
+#include "upb/port/def.inc"
+
+namespace protos {
+namespace internal {
+
+// TODO(b/279086429): Implement std iterator for messages
+template <typename T>
+class RepeatedFieldScalarProxy;
+template <typename T>
+class RepeatedFieldStringProxy;
+
+struct IteratorTestPeer;
+
+template <typename T>
+class Iterator;
+
+template <typename PolicyT>
+class ReferenceProxy;
+
+template <typename PolicyT>
+class InjectedRelationalsImpl {
+ using RP = ReferenceProxy<PolicyT>;
+ using V = typename PolicyT::value_type;
+ friend bool operator==(RP a, V b) { return static_cast<V>(a) == b; }
+ friend bool operator==(V a, RP b) { return a == static_cast<V>(b); }
+ friend bool operator==(RP a, RP b) {
+ return static_cast<V>(a) == static_cast<V>(b);
+ }
+ friend bool operator!=(RP a, V b) { return static_cast<V>(a) != b; }
+ friend bool operator!=(V a, RP b) { return a != static_cast<V>(b); }
+ friend bool operator!=(RP a, RP b) {
+ return static_cast<V>(a) != static_cast<V>(b);
+ }
+ friend bool operator<(RP a, V b) { return static_cast<V>(a) < b; }
+ friend bool operator<(V a, RP b) { return a < static_cast<V>(b); }
+ friend bool operator<(RP a, RP b) {
+ return static_cast<V>(a) < static_cast<V>(b);
+ }
+ friend bool operator<=(RP a, V b) { return static_cast<V>(a) <= b; }
+ friend bool operator<=(V a, RP b) { return a <= static_cast<V>(b); }
+ friend bool operator<=(RP a, RP b) {
+ return static_cast<V>(a) <= static_cast<V>(b);
+ }
+ friend bool operator>(RP a, V b) { return static_cast<V>(a) > b; }
+ friend bool operator>(V a, RP b) { return a > static_cast<V>(b); }
+ friend bool operator>(RP a, RP b) {
+ return static_cast<V>(a) > static_cast<V>(b);
+ }
+ friend bool operator>=(RP a, V b) { return static_cast<V>(a) >= b; }
+ friend bool operator>=(V a, RP b) { return a >= static_cast<V>(b); }
+ friend bool operator>=(RP a, RP b) {
+ return static_cast<V>(a) >= static_cast<V>(b);
+ }
+};
+class NoInjectedRelationalsImpl {};
+
+// We need to inject relationals for the string references because the
+// relationals for string_view are templates and won't allow for implicit
+// conversions from ReferenceProxy to string_view before deduction.
+template <typename PolicyT>
+using InjectedRelationals = std::conditional_t<
+ std::is_same_v<std::remove_const_t<typename PolicyT::value_type>,
+ absl::string_view>,
+ InjectedRelationalsImpl<PolicyT>, NoInjectedRelationalsImpl>;
+
+template <typename PolicyT>
+class ReferenceProxy : InjectedRelationals<PolicyT> {
+ using value_type = typename PolicyT::value_type;
+
+ public:
+ ReferenceProxy(const ReferenceProxy&) = default;
+ ReferenceProxy& operator=(const ReferenceProxy& other) {
+ // Assign through the references
+ // TODO(sbenza): Make this better for strings to avoid the copy.
+ it_.Set(other.it_.Get());
+ return *this;
+ }
+ friend void swap(ReferenceProxy a, ReferenceProxy b) { a.it_.swap(b.it_); }
+
+ operator value_type() const { return it_.Get(); }
+ void operator=(const value_type& value) const { it_.Set(value); }
+ void operator=(value_type&& value) const { it_.Set(std::move(value)); }
+ Iterator<PolicyT> operator&() const { return Iterator<PolicyT>(it_); }
+
+ private:
+ friend IteratorTestPeer;
+ friend ReferenceProxy<typename PolicyT::AddConst>;
+ friend Iterator<PolicyT>;
+
+ explicit ReferenceProxy(typename PolicyT::Payload elem) : it_(elem) {}
+ typename PolicyT::Payload it_;
+};
+
+template <template <typename> class PolicyTemplate, typename T>
+class ReferenceProxy<PolicyTemplate<const T>>
+ : InjectedRelationals<PolicyTemplate<const T>> {
+ using PolicyT = PolicyTemplate<const T>;
+ using value_type = typename PolicyT::value_type;
+
+ public:
+ ReferenceProxy(ReferenceProxy<PolicyTemplate<T>> p) : it_(p.it_) {}
+ ReferenceProxy(const ReferenceProxy&) = default;
+ ReferenceProxy& operator=(const ReferenceProxy&) = delete;
+
+ operator value_type() const { return it_.Get(); }
+ Iterator<PolicyT> operator&() const { return Iterator<PolicyT>(it_); }
+
+ private:
+ friend IteratorTestPeer;
+ friend Iterator<PolicyT>;
+
+ explicit ReferenceProxy(typename PolicyT::Payload elem) : it_(elem) {}
+ typename PolicyT::Payload it_;
+};
+
+template <typename PolicyT>
+class Iterator {
+ public:
+ using iterator_category = std::random_access_iterator_tag;
+ using value_type = std::remove_const_t<typename PolicyT::value_type>;
+ using difference_type = std::ptrdiff_t;
+ using pointer = Iterator;
+ using reference = ReferenceProxy<PolicyT>;
+
+ constexpr Iterator() noexcept : it_(nullptr) {}
+ Iterator(const Iterator& other) = default;
+ Iterator& operator=(const Iterator& other) = default;
+ template <
+ typename P = PolicyT,
+ typename = std::enable_if_t<std::is_const<typename P::value_type>::value>>
+ Iterator(const Iterator<typename P::RemoveConst>& other) : it_(other.it_) {}
+
+ constexpr reference operator*() const noexcept { return reference(it_); }
+ // No operator-> needed because T is a scalar.
+
+ private:
+ // Hide the internal type.
+ using iterator = Iterator;
+
+ public:
+ // {inc,dec}rementable
+ constexpr iterator& operator++() noexcept {
+ it_.AddOffset(1);
+ return *this;
+ }
+ constexpr iterator operator++(int) noexcept {
+ auto copy = *this;
+ ++*this;
+ return copy;
+ }
+ constexpr iterator& operator--() noexcept {
+ it_.AddOffset(-1);
+ return *this;
+ }
+ constexpr iterator operator--(int) noexcept {
+ auto copy = *this;
+ --*this;
+ return copy;
+ }
+
+ // equality_comparable
+ friend constexpr bool operator==(const iterator& x,
+ const iterator& y) noexcept {
+ return x.it_.Index() == y.it_.Index();
+ }
+ friend constexpr bool operator!=(const iterator& x,
+ const iterator& y) noexcept {
+ return !(x == y);
+ }
+
+ // less_than_comparable
+ friend constexpr bool operator<(const iterator& x,
+ const iterator& y) noexcept {
+ return x.it_.Index() < y.it_.Index();
+ }
+ friend constexpr bool operator<=(const iterator& x,
+ const iterator& y) noexcept {
+ return !(y < x);
+ }
+ friend constexpr bool operator>(const iterator& x,
+ const iterator& y) noexcept {
+ return y < x;
+ }
+ friend constexpr bool operator>=(const iterator& x,
+ const iterator& y) noexcept {
+ return !(x < y);
+ }
+
+ constexpr iterator& operator+=(difference_type d) noexcept {
+ it_.AddOffset(d);
+ return *this;
+ }
+ constexpr iterator operator+(difference_type d) const noexcept {
+ auto copy = *this;
+ copy += d;
+ return copy;
+ }
+ friend constexpr iterator operator+(const difference_type d,
+ iterator it) noexcept {
+ return it + d;
+ }
+
+ constexpr iterator& operator-=(difference_type d) noexcept {
+ it_.AddOffset(-d);
+ return *this;
+ }
+ constexpr iterator operator-(difference_type d) const noexcept {
+ auto copy = *this;
+ copy -= d;
+ return copy;
+ }
+
+ // indexable
+ constexpr reference operator[](difference_type d) const noexcept {
+ auto copy = *this;
+ copy += d;
+ return *copy;
+ }
+
+ // random access iterator
+ friend constexpr difference_type operator-(iterator x, iterator y) noexcept {
+ return x.it_.Index() - y.it_.Index();
+ }
+
+ private:
+ friend IteratorTestPeer;
+ friend ReferenceProxy<PolicyT>;
+ friend Iterator<typename PolicyT::AddConst>;
+ template <typename U>
+ friend class RepeatedFieldScalarProxy;
+ template <typename U>
+ friend class RepeatedFieldStringProxy;
+
+ // Create from internal::RepeatedFieldScalarProxy.
+ explicit Iterator(typename PolicyT::Payload it) noexcept : it_(it) {}
+
+ // The internal iterator.
+ typename PolicyT::Payload it_;
+};
+
+template <typename T>
+struct ScalarIteratorPolicy {
+ using value_type = T;
+ using RemoveConst = ScalarIteratorPolicy<std::remove_const_t<T>>;
+ using AddConst = ScalarIteratorPolicy<const T>;
+
+ struct Payload {
+ T* value;
+ void AddOffset(ptrdiff_t offset) { value += offset; }
+ T Get() const { return *value; }
+ void Set(T new_value) const { *value = new_value; }
+ T* Index() const { return value; }
+
+ void swap(Payload& other) {
+ using std::swap;
+ swap(*value, *other.value);
+ }
+
+ operator typename ScalarIteratorPolicy<const T>::Payload() const {
+ return {value};
+ }
+ };
+};
+
+template <typename T>
+struct StringIteratorPolicy {
+ using value_type = T;
+ using RemoveConst = StringIteratorPolicy<std::remove_const_t<T>>;
+ using AddConst = StringIteratorPolicy<const T>;
+
+ struct Payload {
+ using Array =
+ std::conditional_t<std::is_const_v<T>, const upb_Array, upb_Array>;
+ Array* arr;
+ upb_Arena* arena;
+ size_t index;
+
+ void AddOffset(ptrdiff_t offset) { index += offset; }
+ absl::string_view Get() const {
+ upb_MessageValue message_value = upb_Array_Get(arr, index);
+ return absl::string_view(message_value.str_val.data,
+ message_value.str_val.size);
+ }
+ void Set(absl::string_view new_value) const {
+ char* data =
+ static_cast<char*>(upb_Arena_Malloc(arena, new_value.size()));
+ memcpy(data, new_value.data(), new_value.size());
+ upb_MessageValue message_value;
+ message_value.str_val =
+ upb_StringView_FromDataAndSize(data, new_value.size());
+ upb_Array_Set(arr, index, message_value);
+ }
+ size_t Index() const { return index; }
+
+ void swap(Payload& other) {
+ upb_MessageValue a = upb_Array_Get(this->arr, this->index);
+ upb_MessageValue b = upb_Array_Get(other.arr, other.index);
+ upb_Array_Set(this->arr, this->index, b);
+ upb_Array_Set(other.arr, other.index, a);
+ }
+
+ operator typename StringIteratorPolicy<const T>::Payload() const {
+ return {arr, arena, index};
+ }
+ };
+};
+
+} // namespace internal
+} // namespace protos
+
+#endif // UPB_PROTOS_REPEATED_FIELD_ITERATOR_H_
diff --git a/upb/protos/repeated_field_iterator_test.cc b/upb/protos/repeated_field_iterator_test.cc
new file mode 100644
index 0000000..c8e0a60
--- /dev/null
+++ b/upb/protos/repeated_field_iterator_test.cc
@@ -0,0 +1,478 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "protos/repeated_field_iterator.h"
+
+#include <algorithm>
+#include <array>
+#include <numeric>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using ::testing::ElementsAre;
+
+namespace protos {
+namespace internal {
+
+template <typename T>
+using ScalarRef = ReferenceProxy<ScalarIteratorPolicy<T>>;
+template <typename T>
+using ScalarIterator = Iterator<ScalarIteratorPolicy<T>>;
+
+template <typename T>
+using StringRef = ReferenceProxy<StringIteratorPolicy<T>>;
+template <typename T>
+using StringIterator = Iterator<StringIteratorPolicy<T>>;
+
+struct IteratorTestPeer {
+ template <typename T>
+ static ScalarRef<T> MakeScalarRefProxy(T& ref) {
+ return ScalarRef<T>({&ref});
+ }
+
+ template <typename T>
+ static ScalarIterator<T> MakeScalarIterator(T* ptr) {
+ return ScalarIterator<T>({ptr});
+ }
+
+ template <typename T>
+ static StringRef<T> MakeStringRefProxy(upb_Array* arr, protos::Arena& arena) {
+ return StringRef<T>({arr, arena.ptr(), 0});
+ }
+
+ template <typename T>
+ static StringIterator<T> MakeStringIterator(upb_Array* arr,
+ protos::Arena& arena) {
+ return StringIterator<T>({arr, arena.ptr()});
+ }
+};
+
+namespace {
+
+TEST(ScalarReferenceTest, BasicOperationsWork) {
+ int i = 0;
+ ScalarRef<int> p = IteratorTestPeer::MakeScalarRefProxy(i);
+ ScalarRef<const int> cp =
+ IteratorTestPeer::MakeScalarRefProxy(std::as_const(i));
+ EXPECT_EQ(i, 0);
+ p = 17;
+ EXPECT_EQ(i, 17);
+ EXPECT_EQ(p, 17);
+ EXPECT_EQ(cp, 17);
+ i = 13;
+ EXPECT_EQ(p, 13);
+ EXPECT_EQ(cp, 13);
+
+ EXPECT_FALSE((std::is_assignable<decltype(cp), int>::value));
+
+ // Check that implicit conversion works T -> const T
+ ScalarRef<const int> cp2 = p;
+ EXPECT_EQ(cp2, 13);
+
+ EXPECT_FALSE((std::is_convertible<decltype(cp), ScalarRef<int>>::value));
+}
+
+TEST(ScalarReferenceTest, AssignmentAndSwap) {
+ int i = 3;
+ int j = 5;
+ ScalarRef<int> p = IteratorTestPeer::MakeScalarRefProxy(i);
+ ScalarRef<int> p2 = IteratorTestPeer::MakeScalarRefProxy(j);
+
+ EXPECT_EQ(p, 3);
+ EXPECT_EQ(p2, 5);
+ swap(p, p2);
+ EXPECT_EQ(p, 5);
+ EXPECT_EQ(p2, 3);
+
+ p = p2;
+ EXPECT_EQ(p, 3);
+ EXPECT_EQ(p2, 3);
+}
+
+template <typename T, typename U>
+std::array<bool, 6> RunCompares(const T& a, const U& b) {
+ // Verify some basic properties here.
+ // Equivalencies
+ EXPECT_EQ((a == b), (b == a));
+ EXPECT_EQ((a != b), (b != a));
+ EXPECT_EQ((a < b), (b > a));
+ EXPECT_EQ((a > b), (b < a));
+ EXPECT_EQ((a <= b), (b >= a));
+ EXPECT_EQ((a >= b), (b <= a));
+
+ // Opposites
+ EXPECT_NE((a == b), (a != b));
+ EXPECT_NE((a < b), (a >= b));
+ EXPECT_NE((a > b), (a <= b));
+
+ return {{
+ (a == b),
+ (a != b),
+ (a < b),
+ (a <= b),
+ (a > b),
+ (a >= b),
+ }};
+}
+
+template <typename T>
+void TestScalarIterator(T* array) {
+ ScalarIterator<T> it = IteratorTestPeer::MakeScalarIterator(array);
+ // Copy
+ auto it2 = it;
+
+ EXPECT_THAT(RunCompares(it, it2),
+ ElementsAre(true, false, false, true, false, true));
+
+ // Increment
+ EXPECT_EQ(*++it, 11);
+ EXPECT_EQ(*it2, 10);
+ EXPECT_EQ(*it++, 11);
+ EXPECT_EQ(*it2, 10);
+ EXPECT_EQ(*it, 12);
+ EXPECT_EQ(*it2, 10);
+
+ EXPECT_THAT(RunCompares(it, it2),
+ ElementsAre(false, true, false, false, true, true));
+
+ // Assign
+ it2 = it;
+ EXPECT_EQ(*it, 12);
+ EXPECT_EQ(*it2, 12);
+
+ // Decrement
+ EXPECT_EQ(*--it, 11);
+ EXPECT_EQ(*it--, 11);
+ EXPECT_EQ(*it, 10);
+
+ it += 5;
+ EXPECT_EQ(*it, 15);
+ EXPECT_EQ(it - it2, 3);
+ EXPECT_EQ(it2 - it, -3);
+ it -= 3;
+ EXPECT_EQ(*it, 12);
+ EXPECT_EQ(it[6], 18);
+ EXPECT_EQ(it[-1], 11);
+}
+
+TEST(ScalarIteratorTest, BasicOperationsWork) {
+ int array[10] = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
+ TestScalarIterator<const int>(array);
+ TestScalarIterator<int>(array);
+}
+
+TEST(ScalarIteratorTest, Convertibility) {
+ int array[10] = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
+ ScalarIterator<int> it = IteratorTestPeer::MakeScalarIterator(array);
+ it += 4;
+ ScalarIterator<const int> cit = it;
+ EXPECT_EQ(*it, 14);
+ EXPECT_EQ(*cit, 14);
+ it += 2;
+ EXPECT_EQ(*it, 16);
+ EXPECT_EQ(*cit, 14);
+ cit = it;
+ EXPECT_EQ(*it, 16);
+ EXPECT_EQ(*cit, 16);
+
+ EXPECT_FALSE((std::is_convertible<ScalarIterator<const int>,
+ ScalarIterator<int>>::value));
+ EXPECT_FALSE((std::is_assignable<ScalarIterator<int>,
+ ScalarIterator<const int>>::value));
+}
+
+TEST(ScalarIteratorTest, MutabilityOnlyWorksOnMutable) {
+ int array[10] = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
+ ScalarIterator<int> it = IteratorTestPeer::MakeScalarIterator(array);
+ EXPECT_EQ(array[3], 13);
+ it[3] = 113;
+ EXPECT_EQ(array[3], 113);
+ ScalarIterator<const int> cit = it;
+ EXPECT_FALSE((std::is_assignable<decltype(*cit), int>::value));
+ EXPECT_FALSE((std::is_assignable<decltype(cit[1]), int>::value));
+}
+
+TEST(ScalarIteratorTest, IteratorReferenceInteraction) {
+ int array[10] = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
+ ScalarIterator<int> it = IteratorTestPeer::MakeScalarIterator(array);
+ EXPECT_EQ(it[4], 14);
+ // op& from references goes back to iterator.
+ ScalarIterator<int> it2 = &it[4];
+ EXPECT_EQ(it + 4, it2);
+}
+
+TEST(ScalarIteratorTest, IteratorBasedAlgorithmsWork) {
+ // We use a vector here to make testing it easier.
+ std::vector<int> v(10, 0);
+ ScalarIterator<int> it = IteratorTestPeer::MakeScalarIterator(v.data());
+ EXPECT_THAT(v, ElementsAre(0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+ std::iota(it, it + 10, 10);
+ EXPECT_THAT(v, ElementsAre(10, 11, 12, 13, 14, 15, 16, 17, 18, 19));
+ EXPECT_EQ(it + 5, std::find(it, it + 10, 15));
+ EXPECT_EQ(145, std::accumulate(it, it + 10, 0));
+ std::sort(it, it + 10, [](int a, int b) {
+ return std::tuple(a % 2, a) < std::tuple(b % 2, b);
+ });
+ EXPECT_THAT(v, ElementsAre(10, 12, 14, 16, 18, 11, 13, 15, 17, 19));
+}
+
+const char* CloneString(protos::Arena& arena, absl::string_view str) {
+ char* data = (char*)upb_Arena_Malloc(arena.ptr(), str.size());
+ memcpy(data, str.data(), str.size());
+ return data;
+}
+upb_Array* MakeStringArray(protos::Arena& arena,
+ const std::vector<std::string>& input) {
+ upb_Array* arr = upb_Array_New(arena.ptr(), kUpb_CType_String);
+ for (absl::string_view str : input) {
+ upb_MessageValue message_value;
+ message_value.str_val =
+ upb_StringView_FromDataAndSize(CloneString(arena, str), str.size());
+ upb_Array_Append(arr, message_value, arena.ptr());
+ }
+ return arr;
+}
+
+TEST(StringReferenceTest, BasicOperationsWork) {
+ protos::Arena arena;
+ upb_Array* arr = MakeStringArray(arena, {""});
+
+ auto read = [&] {
+ upb_MessageValue message_value = upb_Array_Get(arr, 0);
+ return absl::string_view(message_value.str_val.data,
+ message_value.str_val.size);
+ };
+
+ StringRef<absl::string_view> p =
+ IteratorTestPeer::MakeStringRefProxy<absl::string_view>(arr, arena);
+ StringRef<const absl::string_view> cp =
+ IteratorTestPeer::MakeStringRefProxy<const absl::string_view>(arr, arena);
+ EXPECT_EQ(read(), "");
+ EXPECT_EQ(p, "");
+ p = "ABC";
+ EXPECT_EQ(read(), "ABC");
+ EXPECT_EQ(p, "ABC");
+ EXPECT_EQ(cp, "ABC");
+ const_cast<char*>(read().data())[0] = 'X';
+ EXPECT_EQ(read(), "XBC");
+ EXPECT_EQ(p, "XBC");
+ EXPECT_EQ(cp, "XBC");
+
+ EXPECT_FALSE((std::is_assignable<decltype(cp), int>::value));
+
+ // Check that implicit conversion works T -> const T
+ StringRef<const absl::string_view> cp2 = p;
+ EXPECT_EQ(cp2, "XBC");
+
+ EXPECT_FALSE(
+ (std::is_convertible<decltype(cp), StringRef<absl::string_view>>::value));
+
+ EXPECT_THAT(RunCompares(p, "XBC"),
+ ElementsAre(true, false, false, true, false, true));
+ EXPECT_THAT(RunCompares(p, "YBC"),
+ ElementsAre(false, true, true, true, false, false));
+ EXPECT_THAT(RunCompares(p, "RBC"),
+ ElementsAre(false, true, false, false, true, true));
+ EXPECT_THAT(RunCompares(p, "XB"),
+ ElementsAre(false, true, false, false, true, true));
+ EXPECT_THAT(RunCompares(p, "XBCD"),
+ ElementsAre(false, true, true, true, false, false));
+}
+
+TEST(StringReferenceTest, AssignmentAndSwap) {
+ protos::Arena arena;
+ upb_Array* arr1 = MakeStringArray(arena, {"ABC"});
+ upb_Array* arr2 = MakeStringArray(arena, {"DEF"});
+
+ auto p = IteratorTestPeer::MakeStringRefProxy<absl::string_view>(arr1, arena);
+ auto p2 =
+ IteratorTestPeer::MakeStringRefProxy<absl::string_view>(arr2, arena);
+
+ EXPECT_EQ(p, "ABC");
+ EXPECT_EQ(p2, "DEF");
+ swap(p, p2);
+ EXPECT_EQ(p, "DEF");
+ EXPECT_EQ(p2, "ABC");
+
+ p = p2;
+ EXPECT_EQ(p, "ABC");
+ EXPECT_EQ(p2, "ABC");
+}
+
+template <typename T>
+void TestStringIterator(protos::Arena& arena, upb_Array* array) {
+ StringIterator<T> it = IteratorTestPeer::MakeStringIterator<T>(array, arena);
+ // Copy
+ auto it2 = it;
+
+ EXPECT_THAT(RunCompares(it, it2),
+ ElementsAre(true, false, false, true, false, true));
+
+ // Increment
+ EXPECT_EQ(*++it, "11");
+ EXPECT_EQ(*it2, "10");
+ EXPECT_EQ(*it++, "11");
+ EXPECT_EQ(*it2, "10");
+ EXPECT_EQ(*it, "12");
+ EXPECT_EQ(*it2, "10");
+
+ EXPECT_THAT(RunCompares(it, it2),
+ ElementsAre(false, true, false, false, true, true));
+
+ // Assign
+ it2 = it;
+ EXPECT_EQ(*it, "12");
+ EXPECT_EQ(*it2, "12");
+
+ // Decrement
+ EXPECT_EQ(*--it, "11");
+ EXPECT_EQ(*it--, "11");
+ EXPECT_EQ(*it, "10");
+
+ it += 5;
+ EXPECT_EQ(*it, "15");
+ EXPECT_EQ(it - it2, 3);
+ EXPECT_EQ(it2 - it, -3);
+ it -= 3;
+ EXPECT_EQ(*it, "12");
+ EXPECT_EQ(it[6], "18");
+ EXPECT_EQ(it[-1], "11");
+}
+
+TEST(StringIteratorTest, BasicOperationsWork) {
+ protos::Arena arena;
+ auto* array = MakeStringArray(
+ arena, {"10", "11", "12", "13", "14", "15", "16", "17", "18", "19"});
+ TestStringIterator<const absl::string_view>(arena, array);
+ TestStringIterator<absl::string_view>(arena, array);
+}
+
+TEST(StringIteratorTest, Convertibility) {
+ protos::Arena arena;
+ auto* array = MakeStringArray(
+ arena, {"10", "11", "12", "13", "14", "15", "16", "17", "18", "19"});
+ StringIterator<absl::string_view> it =
+ IteratorTestPeer::MakeStringIterator<absl::string_view>(array, arena);
+ it += 4;
+ StringIterator<const absl::string_view> cit = it;
+ EXPECT_EQ(*it, "14");
+ EXPECT_EQ(*cit, "14");
+ it += 2;
+ EXPECT_EQ(*it, "16");
+ EXPECT_EQ(*cit, "14");
+ cit = it;
+ EXPECT_EQ(*it, "16");
+ EXPECT_EQ(*cit, "16");
+
+ EXPECT_FALSE((std::is_convertible<StringIterator<const absl::string_view>,
+ StringIterator<absl::string_view>>::value));
+ EXPECT_FALSE(
+ (std::is_assignable<StringIterator<absl::string_view>,
+ StringIterator<const absl::string_view>>::value));
+}
+
+TEST(StringIteratorTest, MutabilityOnlyWorksOnMutable) {
+ protos::Arena arena;
+ auto* array = MakeStringArray(
+ arena, {"10", "11", "12", "13", "14", "15", "16", "17", "18", "19"});
+ StringIterator<absl::string_view> it =
+ IteratorTestPeer::MakeStringIterator<absl::string_view>(array, arena);
+
+ auto read = [&] {
+ upb_MessageValue message_value = upb_Array_Get(array, 3);
+ return absl::string_view(message_value.str_val.data,
+ message_value.str_val.size);
+ };
+
+ EXPECT_EQ(read(), "13");
+ it[3] = "113";
+ EXPECT_EQ(read(), "113");
+ StringIterator<const absl::string_view> cit = it;
+ EXPECT_FALSE((std::is_assignable<decltype(*cit), absl::string_view>::value));
+ EXPECT_FALSE(
+ (std::is_assignable<decltype(cit[1]), absl::string_view>::value));
+}
+
+TEST(StringIteratorTest, IteratorReferenceInteraction) {
+ protos::Arena arena;
+ auto* array = MakeStringArray(
+ arena, {"10", "11", "12", "13", "14", "15", "16", "17", "18", "19"});
+ StringIterator<absl::string_view> it =
+ IteratorTestPeer::MakeStringIterator<absl::string_view>(array, arena);
+ EXPECT_EQ(it[4], "14");
+ // op& from references goes back to iterator.
+ StringIterator<absl::string_view> it2 = &it[4];
+ EXPECT_EQ(it + 4, it2);
+}
+
+TEST(StringIteratorTest, IteratorBasedAlgorithmsWork) {
+ protos::Arena arena;
+ auto* array = MakeStringArray(
+ arena, {"10", "11", "12", "13", "14", "15", "16", "17", "18", "19"});
+ StringIterator<absl::string_view> it =
+ IteratorTestPeer::MakeStringIterator<absl::string_view>(array, arena);
+
+ auto read = [&] {
+ std::vector<absl::string_view> v;
+ for (int i = 0; i < 10; ++i) {
+ upb_MessageValue message_value = upb_Array_Get(array, i);
+ v.emplace_back(message_value.str_val.data, message_value.str_val.size);
+ }
+ return v;
+ };
+
+ EXPECT_THAT(read(), ElementsAre("10", "11", "12", "13", "14", //
+ "15", "16", "17", "18", "19"));
+ std::sort(it, it + 10, [](absl::string_view a, absl::string_view b) {
+ return std::tuple(a[1] % 2, a) < std::tuple(b[1] % 2, b);
+ });
+ EXPECT_THAT(read(), ElementsAre("10", "12", "14", "16", "18", //
+ "11", "13", "15", "17", "19"));
+ // Now sort with the default less.
+ std::sort(it, it + 10);
+ EXPECT_THAT(read(), ElementsAre("10", "11", "12", "13", "14", //
+ "15", "16", "17", "18", "19"));
+
+ // Mutable algorithm
+ std::generate(it, it + 10,
+ [i = 0]() mutable { return std::string(i++, 'x'); });
+ EXPECT_THAT(read(),
+ ElementsAre("", "x", "xx", "xxx", "xxxx", "xxxxx", "xxxxxx",
+ "xxxxxxx", "xxxxxxxx", "xxxxxxxxx"));
+}
+
+} // namespace
+} // namespace internal
+} // namespace protos
diff --git a/upb/protos_generator/BUILD b/upb/protos_generator/BUILD
new file mode 100644
index 0000000..acc0c1a
--- /dev/null
+++ b/upb/protos_generator/BUILD
@@ -0,0 +1,119 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load(
+ "//bazel:build_defs.bzl",
+ "UPB_DEFAULT_CPPOPTS",
+)
+
+# begin:google_only
+# package(default_applicable_licenses = ["//:license"])
+# end:google_only
+
+licenses(["notice"])
+
+cc_binary(
+ name = "protoc-gen-upb-protos",
+ srcs = [
+ "protoc-gen-upb-protos.cc",
+ ],
+ copts = UPB_DEFAULT_CPPOPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":gen_utils",
+ ":generator",
+ ":names",
+ ":output",
+ "//upbc:file_layout",
+ "@com_google_protobuf//:protobuf",
+ "@com_google_protobuf//src/google/protobuf/compiler:code_generator",
+ ],
+)
+
+cc_library(
+ name = "generator",
+ srcs = [
+ "gen_accessors.cc",
+ "gen_enums.cc",
+ "gen_extensions.cc",
+ "gen_messages.cc",
+ "gen_repeated_fields.cc",
+ ],
+ hdrs = [
+ "gen_accessors.h",
+ "gen_enums.h",
+ "gen_extensions.h",
+ "gen_messages.h",
+ "gen_repeated_fields.h",
+ ],
+ visibility = ["//visibility:private"],
+ deps = [
+ ":gen_utils",
+ ":names",
+ ":output",
+ "@com_google_absl//absl/container:flat_hash_set",
+ "@com_google_absl//absl/strings",
+ "//upbc:common",
+ "//upbc:file_layout",
+ "//upbc:keywords",
+ "//upbc:names",
+ "@com_google_protobuf//:protobuf",
+ ],
+)
+
+cc_library(
+ name = "output",
+ srcs = ["output.cc"],
+ hdrs = ["output.h"],
+ visibility = ["//visibility:private"],
+ deps = [
+ "@com_google_absl//absl/log:absl_log",
+ "@com_google_absl//absl/strings",
+ "@com_google_protobuf//:protobuf",
+ ],
+)
+
+cc_library(
+ name = "gen_utils",
+ srcs = ["gen_utils.cc"],
+ hdrs = ["gen_utils.h"],
+ visibility = ["//visibility:public"],
+ deps = [
+ "@com_google_absl//absl/strings",
+ "@com_google_protobuf//:protobuf",
+ "@com_google_protobuf//src/google/protobuf/compiler:code_generator",
+ ],
+)
+
+cc_library(
+ name = "names",
+ srcs = ["names.cc"],
+ hdrs = ["names.h"],
+ visibility = ["//visibility:private"],
+ deps = [
+ ":output",
+ "//upbc:keywords",
+ ],
+)
diff --git a/upb/protos_generator/gen_accessors.cc b/upb/protos_generator/gen_accessors.cc
new file mode 100644
index 0000000..ac7186f
--- /dev/null
+++ b/upb/protos_generator/gen_accessors.cc
@@ -0,0 +1,584 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "protos_generator/gen_accessors.h"
+
+#include <string>
+
+#include "absl/container/flat_hash_set.h"
+#include "absl/strings/match.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+#include "google/protobuf/descriptor.h"
+#include "protos_generator/gen_repeated_fields.h"
+#include "protos_generator/gen_utils.h"
+#include "protos_generator/names.h"
+#include "protos_generator/output.h"
+#include "upbc/common.h"
+#include "upbc/keywords.h"
+#include "upbc/names.h"
+
+namespace protos_generator {
+
+namespace protobuf = ::google::protobuf;
+
+using NameToFieldDescriptorMap =
+ absl::flat_hash_map<absl::string_view, const protobuf::FieldDescriptor*>;
+
+void WriteFieldAccessorHazzer(const protobuf::Descriptor* desc,
+ const protobuf::FieldDescriptor* field,
+ absl::string_view resolved_field_name,
+ absl::string_view resolved_upbc_name,
+ Output& output);
+void WriteFieldAccessorClear(const protobuf::Descriptor* desc,
+ const protobuf::FieldDescriptor* field,
+ absl::string_view resolved_field_name,
+ absl::string_view resolved_upbc_name,
+ Output& output);
+void WriteMapFieldAccessors(const protobuf::Descriptor* desc,
+ const protobuf::FieldDescriptor* field,
+ absl::string_view resolved_field_name,
+ absl::string_view resolved_upbc_name,
+ Output& output);
+
+void WriteMapAccessorDefinitions(const protobuf::Descriptor* message,
+ const protobuf::FieldDescriptor* field,
+ absl::string_view resolved_field_name,
+ absl::string_view class_name, Output& output);
+
+// Returns C++ class member name by resolving naming conflicts across
+// proto field names (such as clear_ prefixes) and keyword collisions.
+//
+// The Upb C generator prefixes all accessors with package and class names
+// avoiding collisions. Therefore we need to use raw field names when calling
+// into C accessors but need to fully resolve conflicts for C++ class members.
+std::string ResolveFieldName(const protobuf::FieldDescriptor* field,
+ const NameToFieldDescriptorMap& field_names);
+
+NameToFieldDescriptorMap CreateFieldNameMap(
+ const protobuf::Descriptor* message) {
+ NameToFieldDescriptorMap field_names;
+ for (int i = 0; i < message->field_count(); i++) {
+ const protobuf::FieldDescriptor* field = message->field(i);
+ field_names.emplace(field->name(), field);
+ }
+ return field_names;
+}
+
+void WriteFieldAccessorsInHeader(const protobuf::Descriptor* desc,
+ Output& output) {
+ // Generate const methods.
+ OutputIndenter i(output);
+
+ auto field_names = CreateFieldNameMap(desc);
+ auto upbc_field_names = upbc::CreateFieldNameMap(desc);
+
+ for (const auto* field : FieldNumberOrder(desc)) {
+ std::string resolved_field_name = ResolveFieldName(field, field_names);
+ std::string resolved_upbc_name =
+ upbc::ResolveFieldName(field, upbc_field_names);
+ WriteFieldAccessorHazzer(desc, field, resolved_field_name,
+ resolved_upbc_name, output);
+ WriteFieldAccessorClear(desc, field, resolved_field_name,
+ resolved_upbc_name, output);
+
+ if (field->is_map()) {
+ WriteMapFieldAccessors(desc, field, resolved_field_name,
+ resolved_upbc_name, output);
+ } else if (desc->options().map_entry()) {
+ // TODO(b/237399867) Implement map entry
+ } else if (field->is_repeated()) {
+ WriteRepeatedFieldsInMessageHeader(desc, field, resolved_field_name,
+ resolved_upbc_name, output);
+ } else {
+ // non-repeated.
+ if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING) {
+ output(R"cc(
+ $0 $1() const;
+ void set_$1($0 value);
+ )cc",
+ CppConstType(field), resolved_field_name);
+ } else if (field->cpp_type() ==
+ protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
+ output(R"cc(
+ $1 $2() const;
+ $0 mutable_$2();
+ )cc",
+ MessagePtrConstType(field, /* const */ false),
+ MessagePtrConstType(field, /* const */ true),
+ resolved_field_name, resolved_upbc_name);
+ } else {
+ output(
+ R"cc(
+ inline $0 $1() const { return $2_$3(msg_); }
+ inline void set_$1($0 value) { return $2_set_$3(msg_, value); }
+ )cc",
+ CppConstType(field), resolved_field_name, MessageName(desc),
+ resolved_upbc_name);
+ }
+ }
+ }
+}
+
+void WriteFieldAccessorHazzer(const protobuf::Descriptor* desc,
+ const protobuf::FieldDescriptor* field,
+ const absl::string_view resolved_field_name,
+ const absl::string_view resolved_upbc_name,
+ Output& output) {
+ // Generate hazzer (if any).
+ if (field->has_presence()) {
+ // Has presence.
+ output("inline bool has_$0() const { return $1_has_$2(msg_); }\n",
+ resolved_field_name, MessageName(desc), resolved_upbc_name);
+ }
+}
+
+void WriteFieldAccessorClear(const protobuf::Descriptor* desc,
+ const protobuf::FieldDescriptor* field,
+ const absl::string_view resolved_field_name,
+ const absl::string_view resolved_upbc_name,
+ Output& output) {
+ if (field->has_presence()) {
+ output("void clear_$0() { $2_clear_$1(msg_); }\n", resolved_field_name,
+ resolved_upbc_name, MessageName(desc));
+ }
+}
+
+void WriteMapFieldAccessors(const protobuf::Descriptor* desc,
+ const protobuf::FieldDescriptor* field,
+ const absl::string_view resolved_field_name,
+ const absl::string_view resolved_upbc_name,
+ Output& output) {
+ const protobuf::Descriptor* entry = field->message_type();
+ const protobuf::FieldDescriptor* key = entry->FindFieldByNumber(1);
+ const protobuf::FieldDescriptor* val = entry->FindFieldByNumber(2);
+ output(
+ R"cc(
+ inline size_t $0_size() const { return $1_$3_size(msg_); }
+ inline void clear_$0() { $1_clear_$3(msg_); }
+ void delete_$0($2 key);
+ )cc",
+ resolved_field_name, MessageName(desc), CppConstType(key),
+ resolved_upbc_name);
+
+ if (val->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
+ output(
+ R"cc(
+ bool set_$0($1 key, $3 value);
+ bool set_$0($1 key, $4 value);
+ absl::StatusOr<$3> get_$0($1 key);
+ )cc",
+ resolved_field_name, CppConstType(key), CppConstType(val),
+ MessagePtrConstType(val, /* is_const */ true),
+ MessagePtrConstType(val, /* is_const */ false));
+ } else {
+ output(
+ R"cc(
+ bool set_$0($1 key, $2 value);
+ absl::StatusOr<$2> get_$0($1 key);
+ )cc",
+ resolved_field_name, CppConstType(key), CppConstType(val));
+ }
+}
+
+void WriteAccessorsInSource(const protobuf::Descriptor* desc, Output& output) {
+ std::string class_name = ClassName(desc);
+ absl::StrAppend(&class_name, "Access");
+ output("namespace internal {\n");
+ const char arena_expression[] = "arena_";
+ auto field_names = CreateFieldNameMap(desc);
+ auto upbc_field_names = upbc::CreateFieldNameMap(desc);
+
+ // Generate const methods.
+ OutputIndenter i(output);
+ for (const auto* field : FieldNumberOrder(desc)) {
+ std::string resolved_field_name = ResolveFieldName(field, field_names);
+ std::string resolved_upbc_name =
+ upbc::ResolveFieldName(field, upbc_field_names);
+ if (field->is_map()) {
+ WriteMapAccessorDefinitions(desc, field, resolved_field_name, class_name,
+ output);
+ } else if (desc->options().map_entry()) {
+ // TODO(b/237399867) Implement map entry
+ } else if (field->is_repeated()) {
+ if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
+ WriteRepeatedMessageAccessor(desc, field, resolved_field_name,
+ class_name, output);
+ } else if (field->cpp_type() ==
+ protobuf::FieldDescriptor::CPPTYPE_STRING) {
+ WriteRepeatedStringAccessor(desc, field, resolved_field_name,
+ class_name, output);
+ } else {
+ WriteRepeatedScalarAccessor(desc, field, resolved_field_name,
+ class_name, output);
+ }
+ } else {
+ // non-repeated field.
+ if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING) {
+ output(
+ R"cc(
+ $1 $0::$2() const {
+ return ::protos::UpbStrToStringView($3_$4(msg_));
+ }
+ )cc",
+ class_name, CppConstType(field), resolved_field_name,
+ MessageName(desc), resolved_upbc_name);
+ // Set string.
+ output(
+ R"cc(
+ void $0::set_$2($1 value) {
+ $4_set_$3(msg_, ::protos::UpbStrFromStringView(value, $5));
+ }
+ )cc",
+ class_name, CppConstType(field), resolved_field_name,
+ resolved_upbc_name, MessageName(desc), arena_expression);
+ } else if (field->cpp_type() ==
+ protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
+ output(
+ R"cc(
+ $1 $0::$2() const {
+ if (!has_$2()) {
+ return $4::default_instance();
+ }
+ return ::protos::internal::CreateMessage<$4>(
+ (upb_Message*)($3_$5(msg_)), arena_);
+ }
+ )cc",
+ class_name, MessagePtrConstType(field, /* is_const */ true),
+ resolved_field_name, MessageName(desc),
+ MessageBaseType(field, /* maybe_const */ false),
+ resolved_upbc_name);
+
+ output(
+ R"cc(
+ $1 $0::mutable_$2() {
+ return ::protos::internal::CreateMessageProxy<$4>(
+ (upb_Message*)($3_mutable_$5(msg_, $6)), $6);
+ }
+ )cc",
+ class_name, MessagePtrConstType(field, /* is_const */ false),
+ resolved_field_name, MessageName(desc),
+ MessageBaseType(field, /* maybe_const */ false), resolved_upbc_name,
+ arena_expression);
+ }
+ }
+ }
+ output("\n");
+ output("} // namespace internal\n\n");
+}
+
+void WriteMapAccessorDefinitions(const protobuf::Descriptor* message,
+ const protobuf::FieldDescriptor* field,
+ const absl::string_view resolved_field_name,
+ const absl::string_view class_name,
+ Output& output) {
+ const protobuf::Descriptor* entry = field->message_type();
+ const protobuf::FieldDescriptor* key = entry->FindFieldByNumber(1);
+ const protobuf::FieldDescriptor* val = entry->FindFieldByNumber(2);
+ absl::string_view upbc_name = field->name();
+ absl::string_view converted_key_name = "key";
+ absl::string_view optional_conversion_code = "";
+
+ if (key->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING) {
+ // Insert conversion from absl::string_view to upb_StringView.
+ // Creates upb_StringView on stack to prevent allocation.
+ converted_key_name = "upb_key";
+ optional_conversion_code =
+ "upb_StringView upb_key = {key.data(), key.size()};\n";
+ }
+ if (val->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
+ output(
+ R"cc(
+ bool $0::set_$1($2 key, $3 value) {
+ upb_Message* clone = upb_Message_DeepClone(
+ ::protos::internal::PrivateAccess::GetInternalMsg(value), &$9,
+ arena_);
+ $6return $4_$8_set(msg_, $7, ($5*)clone, arena_);
+ }
+ )cc",
+ class_name, resolved_field_name, CppConstType(key),
+ MessagePtrConstType(val, /* is_const */ true), MessageName(message),
+ MessageName(val->message_type()), optional_conversion_code,
+ converted_key_name, upbc_name,
+ ::upbc::MessageInit(val->message_type()->full_name()));
+ output(
+ R"cc(
+ bool $0::set_$1($2 key, $3 value) {
+ upb_Message* clone = upb_Message_DeepClone(
+ ::protos::internal::PrivateAccess::GetInternalMsg(value), &$9,
+ arena_);
+ $6return $4_$8_set(msg_, $7, ($5*)clone, arena_);
+ }
+ )cc",
+ class_name, resolved_field_name, CppConstType(key),
+ MessagePtrConstType(val, /* is_const */ false), MessageName(message),
+ MessageName(val->message_type()), optional_conversion_code,
+ converted_key_name, upbc_name,
+ ::upbc::MessageInit(val->message_type()->full_name()));
+ output(
+ R"cc(
+ absl::StatusOr<$3> $0::get_$1($2 key) {
+ $5* msg_value;
+ $7bool success = $4_$9_get(msg_, $8, &msg_value);
+ if (success) {
+ return ::protos::internal::CreateMessage<$6>(msg_value, arena_);
+ }
+ return absl::NotFoundError("");
+ }
+ )cc",
+ class_name, resolved_field_name, CppConstType(key),
+ MessagePtrConstType(val, /* is_const */ true), MessageName(message),
+ MessageName(val->message_type()),
+ QualifiedClassName(val->message_type()), optional_conversion_code,
+ converted_key_name, upbc_name);
+ output(
+ R"cc(
+ void $0::delete_$1($2 key) { $6$4_$8_delete(msg_, $7); }
+ )cc",
+ class_name, resolved_field_name, CppConstType(key),
+ MessagePtrConstType(val, /* is_const */ false), MessageName(message),
+ MessageName(val->message_type()), optional_conversion_code,
+ converted_key_name, upbc_name);
+ } else if (val->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING) {
+ output(
+ R"cc(
+ bool $0::set_$1($2 key, $3 value) {
+ $5return $4_$7_set(msg_, $6,
+ ::protos::UpbStrFromStringView(value, arena_),
+ arena_);
+ }
+ )cc",
+ class_name, resolved_field_name, CppConstType(key), CppConstType(val),
+ MessageName(message), optional_conversion_code, converted_key_name,
+ upbc_name);
+ output(
+ R"cc(
+ absl::StatusOr<$3> $0::get_$1($2 key) {
+ upb_StringView value;
+ $5bool success = $4_$7_get(msg_, $6, &value);
+ if (success) {
+ return absl::string_view(value.data, value.size);
+ }
+ return absl::NotFoundError("");
+ }
+ )cc",
+ class_name, resolved_field_name, CppConstType(key), CppConstType(val),
+ MessageName(message), optional_conversion_code, converted_key_name,
+ upbc_name);
+ output(
+ R"cc(
+ void $0::delete_$1($2 key) { $5$4_$7_delete(msg_, $6); }
+ )cc",
+ class_name, resolved_field_name, CppConstType(key), CppConstType(val),
+ MessageName(message), optional_conversion_code, converted_key_name,
+ upbc_name);
+ } else {
+ output(
+ R"cc(
+ bool $0::set_$1($2 key, $3 value) {
+ $5return $4_$7_set(msg_, $6, value, arena_);
+ }
+ )cc",
+ class_name, resolved_field_name, CppConstType(key), CppConstType(val),
+ MessageName(message), optional_conversion_code, converted_key_name,
+ upbc_name);
+ output(
+ R"cc(
+ absl::StatusOr<$3> $0::get_$1($2 key) {
+ $3 value;
+ $5bool success = $4_$7_get(msg_, $6, &value);
+ if (success) {
+ return value;
+ }
+ return absl::NotFoundError("");
+ }
+ )cc",
+ class_name, resolved_field_name, CppConstType(key), CppConstType(val),
+ MessageName(message), optional_conversion_code, converted_key_name,
+ upbc_name);
+ output(
+ R"cc(
+ void $0::delete_$1($2 key) { $5$4_$7_delete(msg_, $6); }
+ )cc",
+ class_name, resolved_field_name, CppConstType(key), CppConstType(val),
+ MessageName(message), optional_conversion_code, converted_key_name,
+ upbc_name);
+ }
+}
+
+void WriteUsingAccessorsInHeader(const protobuf::Descriptor* desc,
+ MessageClassType handle_type, Output& output) {
+ bool read_only = handle_type == MessageClassType::kMessageCProxy;
+
+ // Generate const methods.
+ OutputIndenter i(output);
+ std::string class_name = ClassName(desc);
+ auto field_names = CreateFieldNameMap(desc);
+
+ for (const auto* field : FieldNumberOrder(desc)) {
+ std::string resolved_field_name = ResolveFieldName(field, field_names);
+ // Generate hazzer (if any).
+ if (field->has_presence()) {
+ output("using $0Access::has_$1;\n", class_name, resolved_field_name);
+ output("using $0Access::clear_$1;\n", class_name, resolved_field_name);
+ }
+ if (field->is_map()) {
+ output(
+ R"cc(
+ using $0Access::$1_size;
+ using $0Access::clear_$1;
+ using $0Access::delete_$1;
+ using $0Access::get_$1;
+ using $0Access::set_$1;
+ )cc",
+ class_name, resolved_field_name);
+ } else if (desc->options().map_entry()) {
+ // TODO(b/237399867) Implement map entry
+ } else if (field->is_repeated()) {
+ WriteRepeatedFieldUsingAccessors(field, class_name, resolved_field_name,
+ output, read_only);
+ } else {
+ if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
+ output("using $0Access::$1;\n", ClassName(desc), resolved_field_name);
+ if (!read_only) {
+ output("using $0Access::mutable_$1;\n", class_name,
+ resolved_field_name);
+ }
+ } else {
+ output("using $0Access::$1;\n", class_name, resolved_field_name);
+ if (!read_only) {
+ output("using $0Access::set_$1;\n", class_name, resolved_field_name);
+ }
+ }
+ }
+ }
+ for (int i = 0; i < desc->real_oneof_decl_count(); ++i) {
+ const protobuf::OneofDescriptor* oneof = desc->oneof_decl(i);
+ output("using $0Access::$1_case;\n", class_name, oneof->name());
+ output("using $0Access::$1Case;\n", class_name,
+ ToCamelCase(oneof->name(), /*lower_first=*/false));
+ for (int j = 0; j < oneof->field_count(); ++j) {
+ const protobuf::FieldDescriptor* field = oneof->field(j);
+ output("using $0Access::k$1;\n", class_name,
+ ToCamelCase(field->name(), /*lower_first=*/false),
+ field->number());
+ }
+ output("using $0Access::$1_NOT_SET;\n", class_name,
+ absl::AsciiStrToUpper(oneof->name()));
+ }
+}
+
+void WriteOneofAccessorsInHeader(const protobuf::Descriptor* desc,
+ Output& output) {
+ // Generate const methods.
+ OutputIndenter i(output);
+ std::string class_name = ClassName(desc);
+ auto field_names = CreateFieldNameMap(desc);
+ for (int i = 0; i < desc->real_oneof_decl_count(); ++i) {
+ const protobuf::OneofDescriptor* oneof = desc->oneof_decl(i);
+ output("enum $0Case {\n",
+ ToCamelCase(oneof->name(), /*lower_first=*/false));
+ for (int j = 0; j < oneof->field_count(); ++j) {
+ const protobuf::FieldDescriptor* field = oneof->field(j);
+ output(" k$0 = $1,\n", ToCamelCase(field->name(), /*lower_first=*/false),
+ field->number());
+ }
+ output(" $0_NOT_SET = 0,\n", absl::AsciiStrToUpper(oneof->name()));
+ output("};\n\n");
+ output("$0Case $1_case() const {\n",
+ ToCamelCase(oneof->name(), /*lower_first=*/false), oneof->name());
+ for (int j = 0; j < oneof->field_count(); ++j) {
+ const protobuf::FieldDescriptor* field = oneof->field(j);
+ std::string resolved_field_name = ResolveFieldName(field, field_names);
+ output(" if (has_$0()) { return k$1; }\n", resolved_field_name,
+ ToCamelCase(field->name(), /*lower_first=*/false));
+ }
+ output(" return $0_NOT_SET;\n", absl::AsciiStrToUpper(oneof->name()));
+ output("}\n;");
+ }
+}
+
+std::string ResolveFieldName(const protobuf::FieldDescriptor* field,
+ const NameToFieldDescriptorMap& field_names) {
+ // C++ implementation specific reserved names.
+ static const auto& kReservedNames =
+ *new absl::flat_hash_set<absl::string_view>({
+ "msg",
+ "msg_",
+ "arena",
+ "arena_",
+ });
+
+ // C++ specific prefixes used by code generator for field access.
+ static constexpr absl::string_view kClearMethodPrefix = "clear_";
+ static constexpr absl::string_view kSetMethodPrefix = "set_";
+ static constexpr absl::string_view kHasMethodPrefix = "has_";
+ static constexpr absl::string_view kDeleteMethodPrefix = "delete_";
+ static constexpr absl::string_view kAddToRepeatedMethodPrefix = "add_";
+ static constexpr absl::string_view kResizeArrayMethodPrefix = "resize_";
+
+ // List of generated accessor prefixes to check against.
+ // Example:
+ // optional repeated string phase = 236;
+ // optional bool clear_phase = 237;
+ static constexpr absl::string_view kAccessorPrefixes[] = {
+ kClearMethodPrefix, kDeleteMethodPrefix, kAddToRepeatedMethodPrefix,
+ kResizeArrayMethodPrefix, kSetMethodPrefix, kHasMethodPrefix};
+
+ absl::string_view field_name = field->name();
+ if (kReservedNames.count(field_name) > 0) {
+ if (absl::EndsWith(field_name, "_")) {
+ return absl::StrCat(field_name, "_");
+ } else {
+ return absl::StrCat(field_name, "__");
+ }
+ }
+ for (const auto prefix : kAccessorPrefixes) {
+ // If field name starts with a prefix such as clear_ and the proto
+ // contains a field name with trailing end, depending on type of field
+ // (repeated, map, message) we have a conflict to resolve.
+ if (absl::StartsWith(field_name, prefix)) {
+ auto match = field_names.find(field_name.substr(prefix.size()));
+ if (match != field_names.end()) {
+ const auto* candidate = match->second;
+ if (candidate->is_repeated() || candidate->is_map() ||
+ (candidate->cpp_type() ==
+ protobuf::FieldDescriptor::CPPTYPE_STRING &&
+ prefix == kClearMethodPrefix) ||
+ prefix == kSetMethodPrefix || prefix == kHasMethodPrefix) {
+ return absl::StrCat(field_name, "_");
+ }
+ }
+ }
+ }
+ return upbc::ResolveKeywordConflict(std::string(field_name));
+}
+
+} // namespace protos_generator
diff --git a/upb/protos_generator/gen_accessors.h b/upb/protos_generator/gen_accessors.h
new file mode 100644
index 0000000..8372fc3
--- /dev/null
+++ b/upb/protos_generator/gen_accessors.h
@@ -0,0 +1,51 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_PROTOS_GENERATOR_ACCESSORS_H_
+#define UPB_PROTOS_GENERATOR_ACCESSORS_H_
+
+#include "google/protobuf/descriptor.h"
+#include "protos_generator/gen_utils.h"
+#include "protos_generator/output.h"
+
+namespace protos_generator {
+
+namespace protobuf = ::google::protobuf;
+
+void WriteFieldAccessorsInHeader(const protobuf::Descriptor* desc,
+ Output& output);
+void WriteAccessorsInSource(const protobuf::Descriptor* desc, Output& output);
+void WriteUsingAccessorsInHeader(const protobuf::Descriptor* desc,
+ MessageClassType handle_type, Output& output);
+void WriteOneofAccessorsInHeader(const protobuf::Descriptor* desc,
+ Output& output);
+} // namespace protos_generator
+
+#endif // UPB_PROTOS_GENERATOR_ACCESSORS_H_
diff --git a/upb/protos_generator/gen_enums.cc b/upb/protos_generator/gen_enums.cc
new file mode 100644
index 0000000..7c535a7
--- /dev/null
+++ b/upb/protos_generator/gen_enums.cc
@@ -0,0 +1,144 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "protos_generator/gen_enums.h"
+
+#include <algorithm>
+#include <limits>
+#include <string>
+#include <vector>
+
+#include "google/protobuf/descriptor.pb.h"
+#include "google/protobuf/descriptor.h"
+#include "protos_generator/gen_utils.h"
+#include "protos_generator/names.h"
+
+namespace protos_generator {
+
+namespace protobuf = ::google::protobuf;
+
+// Convert enum value to C++ literal.
+//
+// In C++, an value of -2147483648 gets interpreted as the negative of
+// 2147483648, and since 2147483648 can't fit in an integer, this produces a
+// compiler warning. This works around that issue.
+std::string EnumInt32ToString(int number) {
+ if (number == std::numeric_limits<int32_t>::min()) {
+ // This needs to be special-cased, see explanation here:
+ // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
+ return absl::StrCat(number + 1, " - 1");
+ } else {
+ return absl::StrCat(number);
+ }
+}
+
+std::string EnumTypeName(const protobuf::EnumDescriptor* enum_descriptor) {
+ auto containing_type = enum_descriptor->containing_type();
+ if (containing_type == nullptr) {
+ // enums types with no package name are prefixed with protos_ to prevent
+ // conflicts with generated C headers.
+ if (enum_descriptor->file()->package().empty()) {
+ return absl::StrCat(kNoPackageNamePrefix,
+ ToCIdent(enum_descriptor->name()));
+ }
+ return ToCIdent(enum_descriptor->name());
+ } else {
+ // Since the enum is in global name space (no package), it will have the
+ // same classified name as the C header include, to prevent collision
+ // rename as above.
+ if (containing_type->file()->package().empty()) {
+ return ToCIdent(absl::StrCat(containing_type->name(), "_",
+ kNoPackageNamePrefix,
+ enum_descriptor->name()));
+ } else {
+ return ToCIdent(
+ absl::StrCat(containing_type->name(), "_", enum_descriptor->name()));
+ }
+ }
+}
+
+std::string EnumValueSymbolInNameSpace(
+ const protobuf::EnumDescriptor* desc,
+ const protobuf::EnumValueDescriptor* value) {
+ auto containing_type = desc->containing_type();
+ if (containing_type != nullptr) {
+ return ToCIdent(absl::StrCat(containing_type->name(), "_", desc->name(),
+ "_", value->name()));
+ } else {
+ // protos enum values with no package name are prefixed with protos_ to
+ // prevent conflicts with generated C headers.
+ if (desc->file()->package().empty()) {
+ return absl::StrCat(kNoPackageNamePrefix, ToCIdent(value->name()));
+ }
+ return ToCIdent(value->name());
+ }
+}
+
+void WriteEnumValues(const protobuf::EnumDescriptor* desc, Output& output) {
+ std::vector<const protobuf::EnumValueDescriptor*> values;
+ auto value_count = desc->value_count();
+ values.reserve(value_count);
+ for (int i = 0; i < value_count; i++) {
+ values.push_back(desc->value(i));
+ }
+ std::sort(values.begin(), values.end(),
+ [](const protobuf::EnumValueDescriptor* a,
+ const protobuf::EnumValueDescriptor* b) {
+ return a->number() < b->number();
+ });
+
+ for (size_t i = 0; i < values.size(); i++) {
+ auto value = values[i];
+ output(" $0", EnumValueSymbolInNameSpace(desc, value));
+ output(" = $0", EnumInt32ToString(value->number()));
+ if (i != values.size() - 1) {
+ output(",");
+ }
+ output("\n");
+ }
+}
+
+void WriteEnumDeclarations(
+ const std::vector<const protobuf::EnumDescriptor*>& enums, Output& output) {
+ for (auto enumdesc : enums) {
+ output("enum $0 : int {\n", EnumTypeName(enumdesc));
+ WriteEnumValues(enumdesc, output);
+ output("};\n\n");
+ }
+}
+
+void WriteHeaderEnumForwardDecls(
+ std::vector<const protobuf::EnumDescriptor*>& enums, Output& output) {
+ for (const auto* enumdesc : enums) {
+ output("enum $0 : int;\n", EnumTypeName(enumdesc));
+ }
+}
+
+} // namespace protos_generator
diff --git a/upb/protos_generator/gen_enums.h b/upb/protos_generator/gen_enums.h
new file mode 100644
index 0000000..6078762
--- /dev/null
+++ b/upb/protos_generator/gen_enums.h
@@ -0,0 +1,52 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_PROTOS_GENERATOR_ENUMS_H_
+#define UPB_PROTOS_GENERATOR_ENUMS_H_
+
+#include "google/protobuf/descriptor.h"
+#include "protos_generator/output.h"
+
+namespace protos_generator {
+
+namespace protobuf = ::google::protobuf;
+
+std::string EnumTypeName(const protobuf::EnumDescriptor* enum_descriptor);
+std::string EnumValueSymbolInNameSpace(
+ const protobuf::EnumDescriptor* desc,
+ const protobuf::EnumValueDescriptor* value);
+void WriteHeaderEnumForwardDecls(
+ std::vector<const protobuf::EnumDescriptor*>& enums, Output& output);
+void WriteEnumDeclarations(
+ const std::vector<const protobuf::EnumDescriptor*>& enums, Output& output);
+
+} // namespace protos_generator
+
+#endif // UPB_PROTOS_GENERATOR_ENUMS_H_
diff --git a/upb/protos_generator/gen_extensions.cc b/upb/protos_generator/gen_extensions.cc
new file mode 100644
index 0000000..be0cbc5
--- /dev/null
+++ b/upb/protos_generator/gen_extensions.cc
@@ -0,0 +1,117 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "protos_generator/gen_extensions.h"
+
+#include "absl/strings/str_cat.h"
+#include "protos_generator/gen_utils.h"
+#include "protos_generator/names.h"
+
+namespace protos_generator {
+
+namespace protobuf = ::google::protobuf;
+
+std::string ExtensionIdentifierBase(const protobuf::FieldDescriptor* ext) {
+ assert(ext->is_extension());
+ std::string ext_scope;
+ if (ext->extension_scope()) {
+ return MessageName(ext->extension_scope());
+ } else {
+ return ToCIdent(ext->file()->package());
+ }
+}
+
+std::string ContainingTypeName(const protobuf::FieldDescriptor* ext) {
+ return ext->containing_type()->file() != ext->file()
+ ? QualifiedClassName(ext->containing_type())
+ : ClassName(ext->containing_type());
+}
+
+void WriteExtensionIdentifierHeader(const protobuf::FieldDescriptor* ext,
+ Output& output) {
+ std::string mini_table_name =
+ absl::StrCat(ExtensionIdentifierBase(ext), "_", ext->name(), "_ext");
+ if (ext->extension_scope()) {
+ output(
+ R"cc(
+ static const ::protos::internal::ExtensionIdentifier<$0, $1> $2;
+ )cc",
+ ContainingTypeName(ext), CppTypeParameterName(ext), ext->name());
+ } else {
+ output(
+ R"cc(
+ extern const ::protos::internal::ExtensionIdentifier<$0, $1> $2;
+ )cc",
+ ContainingTypeName(ext), CppTypeParameterName(ext), ext->name());
+ }
+}
+
+void WriteExtensionIdentifiersHeader(
+ const std::vector<const protobuf::FieldDescriptor*>& extensions,
+ Output& output) {
+ for (const auto* ext : extensions) {
+ if (!ext->extension_scope()) {
+ WriteExtensionIdentifierHeader(ext, output);
+ }
+ }
+}
+
+void WriteExtensionIdentifier(const protobuf::FieldDescriptor* ext,
+ Output& output) {
+ std::string mini_table_name =
+ absl::StrCat(ExtensionIdentifierBase(ext), "_", ext->name(), "_ext");
+ if (ext->extension_scope()) {
+ output(
+ R"cc(
+ const ::protos::internal::ExtensionIdentifier<$0, $3> $4::$2(&$1);
+ )cc",
+ ContainingTypeName(ext), mini_table_name, ext->name(),
+ CppTypeParameterName(ext), ClassName(ext->extension_scope()));
+ } else {
+ output(
+ R"cc(
+ const ::protos::internal::ExtensionIdentifier<$0, $3> $2(&$1);
+ )cc",
+ ContainingTypeName(ext), mini_table_name, ext->name(),
+ CppTypeParameterName(ext));
+ }
+}
+
+void WriteExtensionIdentifiers(
+ const std::vector<const protobuf::FieldDescriptor*>& extensions,
+ Output& output) {
+ for (const auto* ext : extensions) {
+ if (!ext->extension_scope()) {
+ WriteExtensionIdentifier(ext, output);
+ }
+ }
+}
+
+} // namespace protos_generator
diff --git a/upb/protos_generator/gen_extensions.h b/upb/protos_generator/gen_extensions.h
new file mode 100644
index 0000000..8ec740a
--- /dev/null
+++ b/upb/protos_generator/gen_extensions.h
@@ -0,0 +1,54 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_PROTOS_GENERATOR_GEN_EXTENSIONS_H_
+#define UPB_PROTOS_GENERATOR_GEN_EXTENSIONS_H_
+
+#include "google/protobuf/descriptor.h"
+#include "protos_generator/output.h"
+
+namespace protos_generator {
+
+namespace protobuf = ::google::protobuf;
+
+void WriteExtensionIdentifiersHeader(
+ const std::vector<const protobuf::FieldDescriptor*>& extensions,
+ Output& output);
+void WriteExtensionIdentifierHeader(const protobuf::FieldDescriptor* ext,
+ Output& output);
+void WriteExtensionIdentifiers(
+ const std::vector<const protobuf::FieldDescriptor*>& extensions,
+ Output& output);
+void WriteExtensionIdentifier(const protobuf::FieldDescriptor* ext,
+ Output& output);
+
+} // namespace protos_generator
+
+#endif // UPB_PROTOS_GENERATOR_GEN_EXTENSIONS_H_
diff --git a/upb/protos_generator/gen_messages.cc b/upb/protos_generator/gen_messages.cc
new file mode 100644
index 0000000..4ece015
--- /dev/null
+++ b/upb/protos_generator/gen_messages.cc
@@ -0,0 +1,511 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "protos_generator/gen_messages.h"
+
+#include <string>
+#include <vector>
+
+#include "google/protobuf/descriptor.pb.h"
+#include "absl/strings/str_cat.h"
+#include "google/protobuf/descriptor.h"
+#include "protos_generator/gen_accessors.h"
+#include "protos_generator/gen_enums.h"
+#include "protos_generator/gen_extensions.h"
+#include "protos_generator/gen_utils.h"
+#include "protos_generator/names.h"
+#include "protos_generator/output.h"
+#include "upbc/common.h"
+#include "upbc/file_layout.h"
+
+namespace protos_generator {
+
+namespace protobuf = ::google::protobuf;
+
+void WriteModelAccessDeclaration(const protobuf::Descriptor* descriptor,
+ Output& output);
+void WriteModelPublicDeclaration(
+ const protobuf::Descriptor* descriptor,
+ const std::vector<const protobuf::FieldDescriptor*>& file_exts,
+ const std::vector<const protobuf::EnumDescriptor*>& file_enums,
+ Output& output);
+void WriteExtensionIdentifiersInClassHeader(
+ const protobuf::Descriptor* message,
+ const std::vector<const protobuf::FieldDescriptor*>& file_exts,
+ Output& output);
+void WriteModelProxyDeclaration(const protobuf::Descriptor* descriptor,
+ Output& output);
+void WriteModelCProxyDeclaration(const protobuf::Descriptor* descriptor,
+ Output& output);
+void WriteInternalForwardDeclarationsInHeader(
+ const protobuf::Descriptor* message, Output& output);
+void WriteDefaultInstanceHeader(const protobuf::Descriptor* message,
+ Output& output);
+void WriteExtensionIdentifiersImplementation(
+ const protobuf::Descriptor* message,
+ const std::vector<const protobuf::FieldDescriptor*>& file_exts,
+ Output& output);
+void WriteUsingEnumsInHeader(
+ const protobuf::Descriptor* message,
+ const std::vector<const protobuf::EnumDescriptor*>& file_enums,
+ Output& output);
+
+// Writes message class declarations into .upb.proto.h.
+//
+// For each proto Foo, FooAccess and FooProxy/FooCProxy are generated
+// that are exposed to users as Foo , Ptr<Foo> and Ptr<const Foo>.
+void WriteMessageClassDeclarations(
+ const protobuf::Descriptor* descriptor,
+ const std::vector<const protobuf::FieldDescriptor*>& file_exts,
+ const std::vector<const protobuf::EnumDescriptor*>& file_enums,
+ Output& output) {
+ if (IsMapEntryMessage(descriptor)) {
+ // Skip map entry generation. Low level accessors for maps are
+ // generated that don't require a separate map type.
+ return;
+ }
+
+ // Forward declaration of Proto Class for GCC handling of free friend method.
+ output("class $0;\n", ClassName(descriptor));
+ output("namespace internal {\n\n");
+ WriteModelAccessDeclaration(descriptor, output);
+ output("\n");
+ WriteInternalForwardDeclarationsInHeader(descriptor, output);
+ output("\n");
+ output("} // namespace internal\n\n");
+ WriteModelPublicDeclaration(descriptor, file_exts, file_enums, output);
+ output("namespace internal {\n");
+ WriteModelCProxyDeclaration(descriptor, output);
+ WriteModelProxyDeclaration(descriptor, output);
+ output("} // namespace internal\n\n");
+}
+
+void WriteModelAccessDeclaration(const protobuf::Descriptor* descriptor,
+ Output& output) {
+ output(
+ R"cc(
+ class $0Access {
+ public:
+ $0Access() {}
+ $0Access($1* msg, upb_Arena* arena) : msg_(msg), arena_(arena) {
+ assert(arena != nullptr);
+ } // NOLINT
+ $0Access(const $1* msg, upb_Arena* arena)
+ : msg_(const_cast<$1*>(msg)), arena_(arena) {
+ assert(arena != nullptr);
+ } // NOLINT
+ void* GetInternalArena() const { return arena_; }
+ )cc",
+ ClassName(descriptor), MessageName(descriptor));
+ WriteFieldAccessorsInHeader(descriptor, output);
+ WriteOneofAccessorsInHeader(descriptor, output);
+ output.Indent();
+ output(
+ R"cc(
+ private:
+ friend class $2;
+ friend class $0Proxy;
+ friend class $0CProxy;
+ friend struct ::protos::internal::PrivateAccess;
+ $1* msg_;
+ upb_Arena* arena_;
+ )cc",
+ ClassName(descriptor), MessageName(descriptor),
+ QualifiedClassName(descriptor));
+ output.Outdent();
+ output("};\n");
+}
+
+void WriteModelPublicDeclaration(
+ const protobuf::Descriptor* descriptor,
+ const std::vector<const protobuf::FieldDescriptor*>& file_exts,
+ const std::vector<const protobuf::EnumDescriptor*>& file_enums,
+ Output& output) {
+ output(
+ R"cc(
+ class $0 final : private internal::$0Access {
+ public:
+ using Access = internal::$0Access;
+ using Proxy = internal::$0Proxy;
+ using CProxy = internal::$0CProxy;
+
+ $0();
+
+ $0(const $0& from);
+ $0& operator=(const $3& from);
+ $0(const CProxy& from);
+ $0(const Proxy& from);
+ $0& operator=(const CProxy& from);
+
+ $0($0&& m)
+ : Access(absl::exchange(m.msg_, nullptr),
+ absl::exchange(m.arena_, nullptr)),
+ owned_arena_(std::move(m.owned_arena_)) {}
+
+ $0& operator=($0&& m) {
+ msg_ = absl::exchange(m.msg_, nullptr);
+ arena_ = absl::exchange(m.arena_, nullptr);
+ owned_arena_ = std::move(m.owned_arena_);
+ return *this;
+ }
+ )cc",
+ ClassName(descriptor), ::upbc::MessageInit(descriptor->full_name()),
+ MessageName(descriptor), QualifiedClassName(descriptor));
+
+ WriteUsingAccessorsInHeader(descriptor, MessageClassType::kMessage, output);
+ WriteUsingEnumsInHeader(descriptor, file_enums, output);
+ WriteDefaultInstanceHeader(descriptor, output);
+ WriteExtensionIdentifiersInClassHeader(descriptor, file_exts, output);
+ if (descriptor->extension_range_count()) {
+ // for typetrait checking
+ output("using ExtendableType = $0;\n", ClassName(descriptor));
+ }
+ // Note: free function friends that are templates such as ::protos::Parse
+ // require explicit <$2> type parameter in declaration to be able to compile
+ // with gcc otherwise the compiler will fail with
+ // "has not been declared within namespace" error. Even though there is a
+ // namespace qualifier, cross namespace matching fails.
+ output.Indent();
+ output(
+ R"cc(
+ static const upb_MiniTable* minitable();
+ using $0Access::GetInternalArena;
+ )cc",
+ ClassName(descriptor));
+ output("\n");
+ output(
+ R"cc(
+ private:
+ const void* msg() const { return msg_; }
+ void* msg() { return msg_; }
+
+ $0(upb_Message* msg, upb_Arena* arena) : $0Access() {
+ msg_ = ($1*)msg;
+ arena_ = owned_arena_.ptr();
+ upb_Arena_Fuse(arena_, arena);
+ }
+ ::protos::Arena owned_arena_;
+ friend struct ::protos::internal::PrivateAccess;
+ friend Proxy;
+ friend CProxy;
+ friend absl::StatusOr<$2>(::protos::Parse<$2>(absl::string_view bytes,
+ int options));
+ friend absl::StatusOr<$2>(::protos::Parse<$2>(
+ absl::string_view bytes,
+ const ::protos::ExtensionRegistry& extension_registry,
+ int options));
+ friend upb_Arena* ::protos::internal::GetArena<$0>($0* message);
+ friend upb_Arena* ::protos::internal::GetArena<$0>(::protos::Ptr<$0> message);
+ friend $0(::protos::internal::MoveMessage<$0>(upb_Message* msg,
+ upb_Arena* arena));
+ )cc",
+ ClassName(descriptor), MessageName(descriptor),
+ QualifiedClassName(descriptor));
+ output.Outdent();
+ output("};\n\n");
+}
+
+void WriteModelProxyDeclaration(const protobuf::Descriptor* descriptor,
+ Output& output) {
+ // Foo::Proxy.
+ output(
+ R"cc(
+ class $0Proxy final : private internal::$0Access {
+ public:
+ $0Proxy() = delete;
+ $0Proxy(const $0Proxy& m) : internal::$0Access() {
+ msg_ = m.msg_;
+ arena_ = m.arena_;
+ }
+ $0Proxy($0* m) : internal::$0Access() {
+ msg_ = m->msg_;
+ arena_ = m->arena_;
+ }
+ $0Proxy operator=(const $0Proxy& m) {
+ msg_ = m.msg_;
+ arena_ = m.arena_;
+ return *this;
+ }
+ using $0Access::GetInternalArena;
+ )cc",
+ ClassName(descriptor));
+
+ WriteUsingAccessorsInHeader(descriptor, MessageClassType::kMessageProxy,
+ output);
+ output("\n");
+ output.Indent(1);
+ output(
+ R"cc(
+ private:
+ void* msg() const { return msg_; }
+
+ $0Proxy(void* msg, upb_Arena* arena) : internal::$0Access(($1*)msg, arena) {}
+ friend $0::Proxy(::protos::CreateMessage<$0>(::protos::Arena& arena));
+ friend $0::Proxy(::protos::internal::CreateMessageProxy<$0>(
+ upb_Message*, upb_Arena*));
+ friend struct ::protos::internal::PrivateAccess;
+ friend class RepeatedFieldProxy;
+ friend class $0CProxy;
+ friend class $0Access;
+ friend class ::protos::Ptr<$0>;
+ friend class ::protos::Ptr<const $0>;
+ static const upb_MiniTable* minitable() { return $0::minitable(); }
+ friend const upb_MiniTable* ::protos::internal::GetMiniTable<$0Proxy>(
+ const $0Proxy* message);
+ friend const upb_MiniTable* ::protos::internal::GetMiniTable<$0Proxy>(
+ ::protos::Ptr<$0Proxy> message);
+ friend upb_Arena* ::protos::internal::GetArena<$2>($2* message);
+ friend upb_Arena* ::protos::internal::GetArena<$2>(::protos::Ptr<$2> message);
+ friend $0Proxy(::protos::CloneMessage(::protos::Ptr<$2> message,
+ ::upb::Arena& arena));
+
+ static void Rebind($0Proxy& lhs, const $0Proxy& rhs) {
+ lhs.msg_ = rhs.msg_;
+ lhs.arena_ = rhs.arena_;
+ }
+ )cc",
+ ClassName(descriptor), MessageName(descriptor),
+ QualifiedClassName(descriptor));
+ output.Outdent(1);
+ output("};\n\n");
+}
+
+void WriteModelCProxyDeclaration(const protobuf::Descriptor* descriptor,
+ Output& output) {
+ // Foo::CProxy.
+ output(
+ R"cc(
+ class $0CProxy final : private internal::$0Access {
+ public:
+ $0CProxy() = delete;
+ $0CProxy(const $0* m)
+ : internal::$0Access(m->msg_, ::protos::internal::GetArena(m)) {}
+ $0CProxy($0Proxy m);
+ using $0Access::GetInternalArena;
+ )cc",
+ ClassName(descriptor), MessageName(descriptor));
+
+ WriteUsingAccessorsInHeader(descriptor, MessageClassType::kMessageProxy,
+ output);
+
+ output.Indent(1);
+ output(
+ R"cc(
+ private:
+ using AsNonConst = $0Proxy;
+ const void* msg() const { return msg_; }
+
+ $0CProxy(void* msg, upb_Arena* arena) : internal::$0Access(($1*)msg, arena){};
+ friend $0::CProxy(::protos::internal::CreateMessage<$0>(
+ upb_Message* msg, upb_Arena* arena));
+ friend struct ::protos::internal::PrivateAccess;
+ friend class RepeatedFieldProxy;
+ friend class ::protos::Ptr<$0>;
+ friend class ::protos::Ptr<const $0>;
+ static const upb_MiniTable* minitable() { return $0::minitable(); }
+ friend const upb_MiniTable* ::protos::internal::GetMiniTable<$0CProxy>(
+ const $0CProxy* message);
+ friend const upb_MiniTable* ::protos::internal::GetMiniTable<$0CProxy>(
+ ::protos::Ptr<$0CProxy> message);
+
+ static void Rebind($0CProxy& lhs, const $0CProxy& rhs) {
+ lhs.msg_ = rhs.msg_;
+ lhs.arena_ = rhs.arena_;
+ }
+ )cc",
+ ClassName(descriptor), MessageName(descriptor));
+ output.Outdent(1);
+ output("};\n\n");
+}
+
+void WriteDefaultInstanceHeader(const protobuf::Descriptor* message,
+ Output& output) {
+ output(" static ::protos::Ptr<const $0> default_instance();\n",
+ ClassName(message));
+}
+
+void WriteMessageImplementation(
+ const protobuf::Descriptor* descriptor,
+ const std::vector<const protobuf::FieldDescriptor*>& file_exts,
+ Output& output) {
+ bool message_is_map_entry = descriptor->options().map_entry();
+ if (!message_is_map_entry) {
+ // Constructor.
+ output(
+ R"cc(
+ $0::$0() : $0Access() {
+ arena_ = owned_arena_.ptr();
+ msg_ = $1_new(arena_);
+ }
+ $0::$0(const $0& from) : $0Access() {
+ arena_ = owned_arena_.ptr();
+ msg_ = ($1*)upb_Message_DeepClone(from.msg_, &$2, arena_);
+ }
+ $0::$0(const CProxy& from) : $0Access() {
+ arena_ = owned_arena_.ptr();
+ msg_ = ($1*)upb_Message_DeepClone(
+ ::protos::internal::GetInternalMsg(&from), &$2, arena_);
+ }
+ $0::$0(const Proxy& from) : $0(static_cast<const CProxy&>(from)) {}
+ internal::$0CProxy::$0CProxy($0Proxy m) : $0Access() {
+ arena_ = m.arena_;
+ msg_ = ($1*)::protos::internal::GetInternalMsg(&m);
+ }
+ $0& $0::operator=(const $3& from) {
+ arena_ = owned_arena_.ptr();
+ msg_ = ($1*)upb_Message_DeepClone(from.msg_, &$2, arena_);
+ return *this;
+ }
+ $0& $0::operator=(const CProxy& from) {
+ arena_ = owned_arena_.ptr();
+ msg_ = ($1*)upb_Message_DeepClone(
+ ::protos::internal::GetInternalMsg(&from), &$2, arena_);
+ return *this;
+ }
+ )cc",
+ ClassName(descriptor), MessageName(descriptor),
+ ::upbc::MessageInit(descriptor->full_name()),
+ QualifiedClassName(descriptor));
+ output("\n");
+ // Minitable
+ output(
+ R"cc(
+ const upb_MiniTable* $0::minitable() { return &$1; }
+ )cc",
+ ClassName(descriptor), ::upbc::MessageInit(descriptor->full_name()));
+ output("\n");
+ }
+
+ WriteAccessorsInSource(descriptor, output);
+
+ if (!message_is_map_entry) {
+ output(
+ R"cc(
+ struct $0DefaultTypeInternal {
+ $1* msg;
+ upb_Arena* arena;
+ };
+ static $0DefaultTypeInternal _$0DefaultTypeBuilder() {
+ upb_Arena* arena = upb_Arena_New();
+ return $0DefaultTypeInternal{$1_new(arena), arena};
+ }
+ $0DefaultTypeInternal _$0_default_instance_ = _$0DefaultTypeBuilder();
+ )cc",
+ ClassName(descriptor), MessageName(descriptor));
+
+ output(
+ R"cc(
+ ::protos::Ptr<const $0> $0::default_instance() {
+ return ::protos::internal::CreateMessage<$0>(
+ (upb_Message *)_$0_default_instance_.msg,
+ _$0_default_instance_.arena);
+ }
+ )cc",
+ ClassName(descriptor));
+
+ WriteExtensionIdentifiersImplementation(descriptor, file_exts, output);
+ }
+}
+
+void WriteInternalForwardDeclarationsInHeader(
+ const protobuf::Descriptor* message, Output& output) {
+ // Write declaration for internal re-usable default_instance without
+ // leaking implementation.
+ output(
+ R"cc(
+ struct $0DefaultTypeInternal;
+ extern $0DefaultTypeInternal _$0_default_instance_;
+ )cc",
+ ClassName(message));
+}
+
+void WriteExtensionIdentifiersInClassHeader(
+ const protobuf::Descriptor* message,
+ const std::vector<const protobuf::FieldDescriptor*>& file_exts,
+ Output& output) {
+ for (auto* ext : file_exts) {
+ if (ext->extension_scope() &&
+ ext->extension_scope()->full_name() == message->full_name()) {
+ WriteExtensionIdentifierHeader(ext, output);
+ }
+ }
+}
+
+void WriteExtensionIdentifiersImplementation(
+ const protobuf::Descriptor* message,
+ const std::vector<const protobuf::FieldDescriptor*>& file_exts,
+ Output& output) {
+ for (auto* ext : file_exts) {
+ if (ext->extension_scope() &&
+ ext->extension_scope()->full_name() == message->full_name()) {
+ WriteExtensionIdentifier(ext, output);
+ }
+ }
+}
+
+void WriteUsingEnumsInHeader(
+ const protobuf::Descriptor* message,
+ const std::vector<const protobuf::EnumDescriptor*>& file_enums,
+ Output& output) {
+ for (auto* enum_descriptor : file_enums) {
+ std::string enum_type_name = EnumTypeName(enum_descriptor);
+ std::string enum_resolved_type_name =
+ enum_descriptor->file()->package().empty() &&
+ enum_descriptor->containing_type() == nullptr
+ ? absl::StrCat(kNoPackageNamePrefix,
+ ToCIdent(enum_descriptor->name()))
+ : enum_type_name;
+ if (enum_descriptor->containing_type() == nullptr ||
+ enum_descriptor->containing_type()->full_name() !=
+ message->full_name()) {
+ continue;
+ }
+ output("using $0", enum_descriptor->name());
+ if (enum_descriptor->options().deprecated()) {
+ output(" ABSL_DEPRECATED(\"Proto enum $0\")", enum_descriptor->name());
+ }
+ output(" = $0;", enum_resolved_type_name);
+ output("\n");
+ int value_count = enum_descriptor->value_count();
+ for (int i = 0; i < value_count; i++) {
+ output("static constexpr $0 $1", enum_descriptor->name(),
+ enum_descriptor->value(i)->name());
+ if (enum_descriptor->options().deprecated() ||
+ enum_descriptor->value(i)->options().deprecated()) {
+ output(" ABSL_DEPRECATED(\"Proto enum value $0\") ",
+ enum_descriptor->value(i)->name());
+ }
+ output(" = $0;\n", EnumValueSymbolInNameSpace(enum_descriptor,
+ enum_descriptor->value(i)));
+ }
+ }
+}
+
+} // namespace protos_generator
diff --git a/upb/protos_generator/gen_messages.h b/upb/protos_generator/gen_messages.h
new file mode 100644
index 0000000..3236964
--- /dev/null
+++ b/upb/protos_generator/gen_messages.h
@@ -0,0 +1,51 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_PROTOS_GENERATOR_GEN_MESSAGES_H_
+#define UPB_PROTOS_GENERATOR_GEN_MESSAGES_H_
+
+#include "google/protobuf/descriptor.h"
+#include "protos_generator/output.h"
+
+namespace protos_generator {
+namespace protobuf = ::google::protobuf;
+
+void WriteMessageClassDeclarations(
+ const protobuf::Descriptor* descriptor,
+ const std::vector<const protobuf::FieldDescriptor*>& file_exts,
+ const std::vector<const protobuf::EnumDescriptor*>& file_enums,
+ Output& output);
+void WriteMessageImplementation(
+ const protobuf::Descriptor* descriptor,
+ const std::vector<const protobuf::FieldDescriptor*>& file_exts,
+ Output& output);
+} // namespace protos_generator
+
+#endif // UPB_PROTOS_GENERATOR_GEN_MESSAGES_H_
diff --git a/upb/protos_generator/gen_repeated_fields.cc b/upb/protos_generator/gen_repeated_fields.cc
new file mode 100644
index 0000000..d5b4bab
--- /dev/null
+++ b/upb/protos_generator/gen_repeated_fields.cc
@@ -0,0 +1,347 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#include "protos_generator/gen_repeated_fields.h"
+
+#include <string>
+#include <vector>
+
+#include "google/protobuf/descriptor.pb.h"
+#include "absl/strings/string_view.h"
+#include "google/protobuf/descriptor.h"
+#include "protos_generator/gen_accessors.h"
+#include "protos_generator/gen_enums.h"
+#include "protos_generator/gen_extensions.h"
+#include "protos_generator/gen_utils.h"
+#include "protos_generator/names.h"
+#include "protos_generator/output.h"
+#include "upbc/common.h"
+#include "upbc/file_layout.h"
+#include "upbc/names.h"
+
+namespace protos_generator {
+namespace protobuf = ::google::protobuf;
+
+// Adds using accessors to reuse base Access class members from a Proxy/CProxy.
+void WriteRepeatedFieldUsingAccessors(const protobuf::FieldDescriptor* field,
+ absl::string_view class_name,
+ absl::string_view resolved_field_name,
+ Output& output, bool read_only) {
+ if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
+ output(
+ R"cc(
+ using $0Access::$1;
+ using $0Access::$1_size;
+ )cc",
+ class_name, resolved_field_name);
+ if (!read_only) {
+ output(
+ R"cc(
+ using $0Access::add_$1;
+ using $0Access::mutable_$1;
+ )cc",
+ class_name, resolved_field_name);
+ }
+ } else {
+ output(
+ R"cc(
+ using $0Access::$1;
+ using $0Access::$1_size;
+ )cc",
+ class_name, resolved_field_name);
+ if (!read_only) {
+ output(
+ R"cc(
+ using $0Access::add_$1;
+ using $0Access::mutable_$1;
+ using $0Access::resize_$1;
+ using $0Access::set_$1;
+ )cc",
+ class_name, resolved_field_name);
+ }
+ }
+}
+
+void WriteRepeatedFieldsInMessageHeader(const protobuf::Descriptor* desc,
+ const protobuf::FieldDescriptor* field,
+ absl::string_view resolved_field_name,
+ absl::string_view resolved_upbc_name,
+ Output& output) {
+ output(
+ R"cc(
+ inline size_t $1_size() const {
+ size_t len;
+ $0_$2(msg_, &len);
+ return len;
+ }
+ )cc",
+ MessageName(desc), resolved_field_name, resolved_upbc_name);
+
+ if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
+ output(
+ R"cc(
+ $1 $2(size_t index) const;
+ const ::protos::RepeatedField<const $4>::CProxy $2() const;
+ ::protos::Ptr<::protos::RepeatedField<$4>> mutable_$2();
+ absl::StatusOr<$0> add_$2();
+ $0 mutable_$2(size_t index) const;
+ )cc",
+ MessagePtrConstType(field, /* const */ false), // $0
+ MessagePtrConstType(field, /* const */ true), // $1
+ resolved_field_name, // $2
+ resolved_upbc_name, // $3
+ MessageBaseType(field, /* maybe_const */ false) // $4
+ );
+ } else if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING) {
+ output(
+ R"cc(
+ $0 $1(size_t index) const;
+ const ::protos::RepeatedField<$0>::CProxy $1() const;
+ ::protos::Ptr<::protos::RepeatedField<$0>> mutable_$1();
+ bool add_$1($0 val);
+ void set_$1(size_t index, $0 val);
+ bool resize_$1(size_t len);
+ )cc",
+ CppConstType(field), resolved_field_name);
+ } else {
+ output(
+ R"cc(
+ $0 $1(size_t index) const;
+ const ::protos::RepeatedField<$0>::CProxy $1() const;
+ ::protos::Ptr<::protos::RepeatedField<$0>> mutable_$1();
+ bool add_$1($0 val);
+ void set_$1(size_t index, $0 val);
+ bool resize_$1(size_t len);
+ )cc",
+ CppConstType(field), resolved_field_name);
+ }
+}
+
+void WriteRepeatedMessageAccessor(const protobuf::Descriptor* message,
+ const protobuf::FieldDescriptor* field,
+ const absl::string_view resolved_field_name,
+ const absl::string_view class_name,
+ Output& output) {
+ const char arena_expression[] = "arena_";
+ absl::string_view upbc_name = field->name();
+ output(
+ R"cc(
+ $1 $0::$2(size_t index) const {
+ size_t len;
+ auto* ptr = $3_$5(msg_, &len);
+ assert(index < len);
+ return ::protos::internal::CreateMessage<$4>(
+ (upb_Message*)*(ptr + index), arena_);
+ }
+ )cc",
+ class_name, MessagePtrConstType(field, /* is_const */ true),
+ resolved_field_name, MessageName(message),
+ MessageBaseType(field, /* maybe_const */ false), upbc_name);
+ output(
+ R"cc(
+ absl::StatusOr<$1> $0::add_$2() {
+ auto new_msg = $3_add_$6(msg_, $5);
+ if (!new_msg) {
+ return ::protos::MessageAllocationError();
+ }
+ return ::protos::internal::CreateMessageProxy<$4>((upb_Message*)new_msg, $5);
+ }
+ )cc",
+ class_name, MessagePtrConstType(field, /* const */ false),
+ resolved_field_name, MessageName(message),
+ MessageBaseType(field, /* maybe_const */ false), arena_expression,
+ upbc_name);
+ output(
+ R"cc(
+ $1 $0::mutable_$2(size_t index) const {
+ size_t len;
+ auto* ptr = $3_$6(msg_, &len);
+ assert(index < len);
+ return ::protos::internal::CreateMessageProxy<$4>(
+ (upb_Message*)*(ptr + index), $5);
+ }
+ )cc",
+ class_name, MessagePtrConstType(field, /* is_const */ false),
+ resolved_field_name, MessageName(message),
+ MessageBaseType(field, /* maybe_const */ false), arena_expression,
+ upbc_name);
+ output(
+ R"cc(
+ const ::protos::RepeatedField<const $1>::CProxy $0::$2() const {
+ size_t size;
+ const upb_Array* arr = _$3_$4_$5(msg_, &size);
+ return ::protos::RepeatedField<const $1>::CProxy(arr, arena_);
+ };
+ ::protos::Ptr<::protos::RepeatedField<$1>> $0::mutable_$2() {
+ size_t size;
+ upb_Array* arr = _$3_$4_$6(msg_, &size, arena_);
+ return ::protos::RepeatedField<$1>::Proxy(arr, arena_);
+ }
+ )cc",
+ class_name, // $0
+ MessageBaseType(field, /* maybe_const */ false), // $1
+ resolved_field_name, // $2
+ MessageName(message), // $3
+ upbc_name, // $4
+ upbc::kRepeatedFieldArrayGetterPostfix, // $5
+ upbc::kRepeatedFieldMutableArrayGetterPostfix // $6
+ );
+}
+
+void WriteRepeatedStringAccessor(const protobuf::Descriptor* message,
+ const protobuf::FieldDescriptor* field,
+ const absl::string_view resolved_field_name,
+ const absl::string_view class_name,
+ Output& output) {
+ absl::string_view upbc_name = field->name();
+ output(
+ R"cc(
+ $1 $0::$2(size_t index) const {
+ size_t len;
+ auto* ptr = $3_mutable_$4(msg_, &len);
+ assert(index < len);
+ return ::protos::UpbStrToStringView(*(ptr + index));
+ }
+ )cc",
+ class_name, CppConstType(field), resolved_field_name,
+ MessageName(message), upbc_name);
+ output(
+ R"cc(
+ bool $0::resize_$1(size_t len) {
+ return $2_resize_$3(msg_, len, arena_);
+ }
+ )cc",
+ class_name, resolved_field_name, MessageName(message), upbc_name);
+ output(
+ R"cc(
+ bool $0::add_$2($1 val) {
+ return $3_add_$4(msg_, ::protos::UpbStrFromStringView(val, arena_), arena_);
+ }
+ )cc",
+ class_name, CppConstType(field), resolved_field_name,
+ MessageName(message), upbc_name);
+ output(
+ R"cc(
+ void $0::set_$2(size_t index, $1 val) {
+ size_t len;
+ auto* ptr = $3_mutable_$4(msg_, &len);
+ assert(index < len);
+ *(ptr + index) = ::protos::UpbStrFromStringView(val, arena_);
+ }
+ )cc",
+ class_name, CppConstType(field), resolved_field_name,
+ MessageName(message), upbc_name);
+ output(
+ R"cc(
+ const ::protos::RepeatedField<$1>::CProxy $0::$2() const {
+ size_t size;
+ const upb_Array* arr = _$3_$4_$5(msg_, &size);
+ return ::protos::RepeatedField<$1>::CProxy(arr, arena_);
+ };
+ ::protos::Ptr<::protos::RepeatedField<$1>> $0::mutable_$2() {
+ size_t size;
+ upb_Array* arr = _$3_$4_$6(msg_, &size, arena_);
+ return ::protos::RepeatedField<$1>::Proxy(arr, arena_);
+ }
+ )cc",
+ class_name, // $0
+ CppConstType(field), // $1
+ resolved_field_name, // $2
+ MessageName(message), // $3
+ upbc_name, // $4
+ upbc::kRepeatedFieldArrayGetterPostfix, // $5
+ upbc::kRepeatedFieldMutableArrayGetterPostfix // $6
+ );
+}
+
+void WriteRepeatedScalarAccessor(const protobuf::Descriptor* message,
+ const protobuf::FieldDescriptor* field,
+ const absl::string_view resolved_field_name,
+ const absl::string_view class_name,
+ Output& output) {
+ absl::string_view upbc_name = field->name();
+ output(
+ R"cc(
+ $1 $0::$2(size_t index) const {
+ size_t len;
+ auto* ptr = $3_mutable_$4(msg_, &len);
+ assert(index < len);
+ return *(ptr + index);
+ }
+ )cc",
+ class_name, CppConstType(field), resolved_field_name,
+ MessageName(message), upbc_name);
+ output(
+ R"cc(
+ bool $0::resize_$1(size_t len) {
+ return $2_resize_$3(msg_, len, arena_);
+ }
+ )cc",
+ class_name, resolved_field_name, MessageName(message), upbc_name);
+ output(
+ R"cc(
+ bool $0::add_$2($1 val) { return $3_add_$4(msg_, val, arena_); }
+ )cc",
+ class_name, CppConstType(field), resolved_field_name,
+ MessageName(message), upbc_name);
+ output(
+ R"cc(
+ void $0::set_$2(size_t index, $1 val) {
+ size_t len;
+ auto* ptr = $3_mutable_$4(msg_, &len);
+ assert(index < len);
+ *(ptr + index) = val;
+ }
+ )cc",
+ class_name, CppConstType(field), resolved_field_name,
+ MessageName(message), upbc_name);
+ output(
+ R"cc(
+ const ::protos::RepeatedField<$1>::CProxy $0::$2() const {
+ size_t size;
+ const upb_Array* arr = _$3_$4_$5(msg_, &size);
+ return ::protos::RepeatedField<$1>::CProxy(arr, arena_);
+ };
+ ::protos::Ptr<::protos::RepeatedField<$1>> $0::mutable_$2() {
+ size_t size;
+ upb_Array* arr = _$3_$4_$6(msg_, &size, arena_);
+ return ::protos::RepeatedField<$1>::Proxy(arr, arena_);
+ }
+ )cc",
+ class_name, // $0
+ CppConstType(field), // $1
+ resolved_field_name, // $2
+ MessageName(message), // $3
+ upbc_name, // $4
+ upbc::kRepeatedFieldArrayGetterPostfix, // $5
+ upbc::kRepeatedFieldMutableArrayGetterPostfix // $6
+ );
+}
+
+} // namespace protos_generator
diff --git a/upb/protos_generator/gen_repeated_fields.h b/upb/protos_generator/gen_repeated_fields.h
new file mode 100644
index 0000000..1650eb0
--- /dev/null
+++ b/upb/protos_generator/gen_repeated_fields.h
@@ -0,0 +1,69 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef THIRD_PARTY_UPB_PROTOS_GENERATOR_GEN_REPEATED_FIELDS_H_
+#define THIRD_PARTY_UPB_PROTOS_GENERATOR_GEN_REPEATED_FIELDS_H_
+
+#include "absl/strings/string_view.h"
+#include "google/protobuf/descriptor.h"
+#include "protos_generator/output.h"
+
+namespace protos_generator {
+namespace protobuf = ::google::protobuf;
+
+void WriteRepeatedFieldUsingAccessors(const protobuf::FieldDescriptor* field,
+ absl::string_view class_name,
+ absl::string_view resolved_field_name,
+ Output& output, bool read_only);
+
+void WriteRepeatedFieldsInMessageHeader(const protobuf::Descriptor* desc,
+ const protobuf::FieldDescriptor* field,
+ absl::string_view resolved_field_name,
+ absl::string_view resolved_upbc_name,
+ Output& output);
+
+void WriteRepeatedMessageAccessor(const protobuf::Descriptor* message,
+ const protobuf::FieldDescriptor* field,
+ absl::string_view resolved_field_name,
+ absl::string_view class_name, Output& output);
+
+void WriteRepeatedStringAccessor(const protobuf::Descriptor* message,
+ const protobuf::FieldDescriptor* field,
+ absl::string_view resolved_field_name,
+ absl::string_view class_name, Output& output);
+
+void WriteRepeatedScalarAccessor(const protobuf::Descriptor* message,
+ const protobuf::FieldDescriptor* field,
+ absl::string_view resolved_field_name,
+ absl::string_view class_name, Output& output);
+
+} // namespace protos_generator
+
+#endif // THIRD_PARTY_UPB_PROTOS_GENERATOR_GEN_REPEATED_FIELDS_H_
diff --git a/upb/protos_generator/gen_utils.cc b/upb/protos_generator/gen_utils.cc
new file mode 100644
index 0000000..79dd5a1
--- /dev/null
+++ b/upb/protos_generator/gen_utils.cc
@@ -0,0 +1,152 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "protos_generator/gen_utils.h"
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "absl/strings/ascii.h"
+
+namespace protos_generator {
+
+namespace protobuf = ::google::protobuf;
+
+void AddEnums(const protobuf::Descriptor* message,
+ std::vector<const protobuf::EnumDescriptor*>* enums) {
+ enums->reserve(enums->size() + message->enum_type_count());
+ for (int i = 0; i < message->enum_type_count(); i++) {
+ enums->push_back(message->enum_type(i));
+ }
+ for (int i = 0; i < message->nested_type_count(); i++) {
+ AddEnums(message->nested_type(i), enums);
+ }
+}
+
+std::vector<const protobuf::EnumDescriptor*> SortedEnums(
+ const protobuf::FileDescriptor* file) {
+ std::vector<const protobuf::EnumDescriptor*> enums;
+ enums.reserve(file->enum_type_count());
+ for (int i = 0; i < file->enum_type_count(); i++) {
+ enums.push_back(file->enum_type(i));
+ }
+ for (int i = 0; i < file->message_type_count(); i++) {
+ AddEnums(file->message_type(i), &enums);
+ }
+ return enums;
+}
+
+void AddMessages(const protobuf::Descriptor* message,
+ std::vector<const protobuf::Descriptor*>* messages) {
+ messages->push_back(message);
+ for (int i = 0; i < message->nested_type_count(); i++) {
+ AddMessages(message->nested_type(i), messages);
+ }
+}
+
+std::vector<const protobuf::Descriptor*> SortedMessages(
+ const protobuf::FileDescriptor* file) {
+ std::vector<const protobuf::Descriptor*> messages;
+ for (int i = 0; i < file->message_type_count(); i++) {
+ AddMessages(file->message_type(i), &messages);
+ }
+ return messages;
+}
+
+void AddExtensionsFromMessage(
+ const protobuf::Descriptor* message,
+ std::vector<const protobuf::FieldDescriptor*>* exts) {
+ for (int i = 0; i < message->extension_count(); i++) {
+ exts->push_back(message->extension(i));
+ }
+ for (int i = 0; i < message->nested_type_count(); i++) {
+ AddExtensionsFromMessage(message->nested_type(i), exts);
+ }
+}
+
+std::vector<const protobuf::FieldDescriptor*> SortedExtensions(
+ const protobuf::FileDescriptor* file) {
+ const int extension_count = file->extension_count();
+ const int message_type_count = file->message_type_count();
+
+ std::vector<const protobuf::FieldDescriptor*> ret;
+ ret.reserve(extension_count + message_type_count);
+
+ for (int i = 0; i < extension_count; i++) {
+ ret.push_back(file->extension(i));
+ }
+ for (int i = 0; i < message_type_count; i++) {
+ AddExtensionsFromMessage(file->message_type(i), &ret);
+ }
+
+ return ret;
+}
+
+std::vector<const protobuf::FieldDescriptor*> FieldNumberOrder(
+ const protobuf::Descriptor* message) {
+ std::vector<const protobuf::FieldDescriptor*> fields;
+ fields.reserve(message->field_count());
+ for (int i = 0; i < message->field_count(); i++) {
+ fields.push_back(message->field(i));
+ }
+ std::sort(fields.begin(), fields.end(),
+ [](const protobuf::FieldDescriptor* a,
+ const protobuf::FieldDescriptor* b) {
+ return a->number() < b->number();
+ });
+ return fields;
+}
+
+std::string ToCamelCase(const std::string& input, bool lower_first) {
+ bool capitalize_next = !lower_first;
+ std::string result;
+ result.reserve(input.size());
+
+ for (char character : input) {
+ if (character == '_') {
+ capitalize_next = true;
+ } else if (capitalize_next) {
+ result.push_back(absl::ascii_toupper(character));
+ capitalize_next = false;
+ } else {
+ result.push_back(character);
+ }
+ }
+
+ // Lower-case the first letter.
+ if (lower_first && !result.empty()) {
+ result[0] = absl::ascii_tolower(result[0]);
+ }
+
+ return result;
+}
+
+} // namespace protos_generator
diff --git a/upb/protos_generator/gen_utils.h b/upb/protos_generator/gen_utils.h
new file mode 100644
index 0000000..71f325f
--- /dev/null
+++ b/upb/protos_generator/gen_utils.h
@@ -0,0 +1,68 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_PROTOS_GENERATOR_GEN_UTILS_H_
+#define UPB_PROTOS_GENERATOR_GEN_UTILS_H_
+
+#include <string>
+#include <vector>
+
+#include "google/protobuf/descriptor.pb.h"
+#include "google/protobuf/compiler/code_generator.h"
+#include "google/protobuf/descriptor.h"
+
+namespace protos_generator {
+
+namespace protobuf = ::google::protobuf;
+
+enum class MessageClassType {
+ kMessage,
+ kMessageCProxy,
+ kMessageProxy,
+ kMessageAccess,
+};
+
+inline bool IsMapEntryMessage(const protobuf::Descriptor* descriptor) {
+ return descriptor->options().map_entry();
+}
+std::vector<const protobuf::EnumDescriptor*> SortedEnums(
+ const protobuf::FileDescriptor* file);
+std::vector<const protobuf::Descriptor*> SortedMessages(
+ const protobuf::FileDescriptor* file);
+std::vector<const protobuf::FieldDescriptor*> SortedExtensions(
+ const protobuf::FileDescriptor* file);
+std::vector<const protobuf::FieldDescriptor*> FieldNumberOrder(
+ const protobuf::Descriptor* message);
+
+std::string ToCamelCase(const std::string& input, bool lower_first);
+
+} // namespace protos_generator
+
+#endif // UPB_PROTOS_GENERATOR_GEN_UTILS_H_
diff --git a/upb/protos_generator/names.cc b/upb/protos_generator/names.cc
new file mode 100644
index 0000000..eb3fbee
--- /dev/null
+++ b/upb/protos_generator/names.cc
@@ -0,0 +1,201 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "protos_generator/names.h"
+
+#include <string>
+
+#include "upbc/keywords.h"
+
+namespace protos_generator {
+
+namespace protobuf = ::google::protobuf;
+
+namespace {
+
+std::string NamespaceFromPackageName(absl::string_view package_name) {
+ return absl::StrCat(absl::StrReplaceAll(package_name, {{".", "::"}}),
+ "::protos");
+}
+
+std::string DotsToColons(const std::string& name) {
+ return absl::StrReplaceAll(name, {{".", "::"}});
+}
+
+std::string Namespace(const std::string& package) {
+ if (package.empty()) return "";
+ return "::" + DotsToColons(package);
+}
+
+// Return the qualified C++ name for a file level symbol.
+std::string QualifiedFileLevelSymbol(const protobuf::FileDescriptor* file,
+ const std::string& name) {
+ if (file->package().empty()) {
+ return absl::StrCat("::", name);
+ }
+ // Append ::protos postfix to package name.
+ return absl::StrCat(Namespace(file->package()), "::protos::", name);
+}
+
+std::string CppTypeInternal(const protobuf::FieldDescriptor* field,
+ bool is_const, bool is_type_parameter) {
+ std::string maybe_const = is_const ? "const " : "";
+ switch (field->cpp_type()) {
+ case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: {
+ if (is_type_parameter) {
+ return absl::StrCat(maybe_const,
+ QualifiedClassName(field->message_type()));
+ } else {
+ return absl::StrCat(maybe_const,
+ QualifiedClassName(field->message_type()), "*");
+ }
+ }
+ case protobuf::FieldDescriptor::CPPTYPE_BOOL:
+ return "bool";
+ case protobuf::FieldDescriptor::CPPTYPE_FLOAT:
+ return "float";
+ case protobuf::FieldDescriptor::CPPTYPE_INT32:
+ case protobuf::FieldDescriptor::CPPTYPE_ENUM:
+ return "int32_t";
+ case protobuf::FieldDescriptor::CPPTYPE_UINT32:
+ return "uint32_t";
+ case protobuf::FieldDescriptor::CPPTYPE_DOUBLE:
+ return "double";
+ case protobuf::FieldDescriptor::CPPTYPE_INT64:
+ return "int64_t";
+ case protobuf::FieldDescriptor::CPPTYPE_UINT64:
+ return "uint64_t";
+ case protobuf::FieldDescriptor::CPPTYPE_STRING:
+ return "absl::string_view";
+ default:
+ ABSL_LOG(FATAL) << "Unexpected type: " << field->cpp_type();
+ }
+}
+
+} // namespace
+
+std::string ClassName(const protobuf::Descriptor* descriptor) {
+ const protobuf::Descriptor* parent = descriptor->containing_type();
+ std::string res;
+ // Classes in global namespace without package names are prefixed
+ // by protos_ to avoid collision with C compiler structs defined in
+ // proto.upb.h.
+ if ((parent && parent->file()->package().empty()) ||
+ descriptor->file()->package().empty()) {
+ res = std::string(kNoPackageNamePrefix);
+ }
+ if (parent) res += ClassName(parent) + "_";
+ absl::StrAppend(&res, descriptor->name());
+ return ::upbc::ResolveKeywordConflict(res);
+}
+
+std::string QualifiedClassName(const protobuf::Descriptor* descriptor) {
+ return QualifiedFileLevelSymbol(descriptor->file(), ClassName(descriptor));
+}
+
+std::string QualifiedInternalClassName(const protobuf::Descriptor* descriptor) {
+ return QualifiedFileLevelSymbol(
+ descriptor->file(), absl::StrCat("internal::", ClassName(descriptor)));
+}
+
+std::string CppSourceFilename(const google::protobuf::FileDescriptor* file) {
+ return StripExtension(file->name()) + ".upb.proto.cc";
+}
+
+std::string ForwardingHeaderFilename(const google::protobuf::FileDescriptor* file) {
+ return StripExtension(file->name()) + ".upb.fwd.h";
+}
+
+std::string UpbCFilename(const google::protobuf::FileDescriptor* file) {
+ return StripExtension(file->name()) + ".upb.h";
+}
+
+std::string CppHeaderFilename(const google::protobuf::FileDescriptor* file) {
+ return StripExtension(file->name()) + ".upb.proto.h";
+}
+
+void WriteStartNamespace(const protobuf::FileDescriptor* file, Output& output) {
+ // Skip namespace generation if package name is not specified.
+ if (file->package().empty()) {
+ return;
+ }
+
+ output("namespace $0 {\n\n", NamespaceFromPackageName(file->package()));
+}
+
+void WriteEndNamespace(const protobuf::FileDescriptor* file, Output& output) {
+ if (file->package().empty()) {
+ return;
+ }
+ output("} // namespace $0\n\n", NamespaceFromPackageName(file->package()));
+}
+
+std::string CppConstType(const protobuf::FieldDescriptor* field) {
+ return CppTypeInternal(field, /* is_const= */ true,
+ /* is_type_parameter= */ false);
+}
+
+std::string CppTypeParameterName(const protobuf::FieldDescriptor* field) {
+ return CppTypeInternal(field, /* is_const= */ false,
+ /* is_type_parameter= */ true);
+}
+
+std::string MessageBaseType(const protobuf::FieldDescriptor* field,
+ bool is_const) {
+ ABSL_DCHECK(field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE);
+ std::string maybe_const = is_const ? "const " : "";
+ return maybe_const + QualifiedClassName(field->message_type());
+}
+
+std::string MessagePtrConstType(const protobuf::FieldDescriptor* field,
+ bool is_const) {
+ ABSL_DCHECK(field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE);
+ std::string maybe_const = is_const ? "const " : "";
+ return "::protos::Ptr<" + maybe_const +
+ QualifiedClassName(field->message_type()) + ">";
+}
+
+std::string MessageCProxyType(const protobuf::FieldDescriptor* field,
+ bool is_const) {
+ ABSL_DCHECK(field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE);
+ std::string maybe_const = is_const ? "const " : "";
+ return maybe_const + QualifiedInternalClassName(field->message_type()) +
+ "CProxy";
+}
+
+std::string MessageProxyType(const protobuf::FieldDescriptor* field,
+ bool is_const) {
+ ABSL_DCHECK(field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE);
+ std::string maybe_const = is_const ? "const " : "";
+ return maybe_const + QualifiedInternalClassName(field->message_type()) +
+ "Proxy";
+}
+
+} // namespace protos_generator
diff --git a/upb/protos_generator/names.h b/upb/protos_generator/names.h
new file mode 100644
index 0000000..efa01ce
--- /dev/null
+++ b/upb/protos_generator/names.h
@@ -0,0 +1,73 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_PROTOS_GENERATOR_NAMES_H_
+#define UPB_PROTOS_GENERATOR_NAMES_H_
+
+#include <string>
+
+#include "google/protobuf/descriptor.pb.h"
+#include "protos_generator/output.h"
+
+namespace protos_generator {
+
+namespace protobuf = ::google::protobuf;
+
+inline constexpr absl::string_view kNoPackageNamePrefix = "protos_";
+
+std::string ClassName(const protobuf::Descriptor* descriptor);
+std::string QualifiedClassName(const protobuf::Descriptor* descriptor);
+std::string QualifiedInternalClassName(const protobuf::Descriptor* descriptor);
+
+std::string CppSourceFilename(const google::protobuf::FileDescriptor* file);
+std::string ForwardingHeaderFilename(const google::protobuf::FileDescriptor* file);
+std::string UpbCFilename(const google::protobuf::FileDescriptor* file);
+std::string CppHeaderFilename(const google::protobuf::FileDescriptor* file);
+
+void WriteStartNamespace(const protobuf::FileDescriptor* file, Output& output);
+void WriteEndNamespace(const protobuf::FileDescriptor* file, Output& output);
+
+std::string CppConstType(const protobuf::FieldDescriptor* field);
+std::string CppTypeParameterName(const protobuf::FieldDescriptor* field);
+
+std::string MessageBaseType(const protobuf::FieldDescriptor* field,
+ bool is_const);
+// Generate protos::Ptr<const Model> to be used in accessors as public
+// signatures.
+std::string MessagePtrConstType(const protobuf::FieldDescriptor* field,
+ bool is_const);
+std::string MessageCProxyType(const protobuf::FieldDescriptor* field,
+ bool is_const);
+std::string MessageProxyType(const protobuf::FieldDescriptor* field,
+ bool is_const);
+
+} // namespace protos_generator
+
+#endif // UPB_PROTOS_GENERATOR_NAMES_H_
diff --git a/upb/protos_generator/output.cc b/upb/protos_generator/output.cc
new file mode 100644
index 0000000..72cd4e5
--- /dev/null
+++ b/upb/protos_generator/output.cc
@@ -0,0 +1,92 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "protos_generator/output.h"
+
+#include <string>
+
+#include "absl/strings/str_replace.h"
+
+namespace protos_generator {
+namespace {
+
+namespace protobuf = ::google::protobuf;
+
+} // namespace
+
+std::string StripExtension(absl::string_view fname) {
+ size_t lastdot = fname.find_last_of('.');
+ if (lastdot == std::string::npos) {
+ return std::string(fname);
+ }
+ return std::string(fname.substr(0, lastdot));
+}
+
+std::string ToCIdent(absl::string_view str) {
+ return absl::StrReplaceAll(str, {{".", "_"}, {"/", "_"}, {"-", "_"}});
+}
+
+std::string ToPreproc(absl::string_view str) {
+ return absl::AsciiStrToUpper(ToCIdent(str));
+}
+
+void EmitFileWarning(const protobuf::FileDescriptor* file, Output& output) {
+ output(
+ R"cc(
+ /* This file was generated by protos_generator (the upb C++ compiler) "
+ from the input
+ * file:
+ *
+ * $0
+ *
+ * Do not edit -- your changes will be discarded when the file is
+ * regenerated. */
+ )cc",
+ file->name());
+ output("\n");
+}
+
+std::string MessageName(const protobuf::Descriptor* descriptor) {
+ return ToCIdent(descriptor->full_name());
+}
+
+std::string FileLayoutName(const google::protobuf::FileDescriptor* file) {
+ return ToCIdent(file->name()) + "_upb_file_layout";
+}
+
+std::string CHeaderFilename(const google::protobuf::FileDescriptor* file) {
+ return StripExtension(file->name()) + ".upb.h";
+}
+
+std::string CSourceFilename(const google::protobuf::FileDescriptor* file) {
+ return StripExtension(file->name()) + ".upb.c";
+}
+
+} // namespace protos_generator
diff --git a/upb/protos_generator/output.h b/upb/protos_generator/output.h
new file mode 100644
index 0000000..53f0720
--- /dev/null
+++ b/upb/protos_generator/output.h
@@ -0,0 +1,174 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_PROTOS_GENERATOR_OUTPUT_H
+#define UPB_PROTOS_GENERATOR_OUTPUT_H
+
+#include <vector>
+
+#include "absl/log/absl_log.h"
+#include "absl/strings/str_replace.h"
+#include "absl/strings/substitute.h"
+#include "google/protobuf/descriptor.h"
+#include "google/protobuf/io/zero_copy_stream.h"
+
+namespace protos_generator {
+
+class Output {
+ public:
+ Output(google::protobuf::io::ZeroCopyOutputStream* stream) : stream_(stream) {}
+ ~Output() { stream_->BackUp((int)buffer_size_); }
+
+ template <class... Arg>
+ void operator()(absl::string_view format, const Arg&... arg) {
+ Write(absl::Substitute(format, arg...));
+ }
+
+ // Indentation size in characters.
+ static constexpr size_t kIndentationSize = 2;
+
+ void Indent() { Indent(kIndentationSize); }
+ void Indent(size_t size) { indent_ += size; }
+
+ void Outdent() { Outdent(kIndentationSize); }
+ void Outdent(size_t size) {
+ if (indent_ < size) {
+ ABSL_LOG(FATAL) << "mismatched Output indent/unindent calls";
+ }
+ indent_ -= size;
+ }
+
+ private:
+ void Write(absl::string_view data) {
+ std::string stripped;
+ if (absl::StartsWith(data, "\n ")) {
+ size_t indent = data.substr(1).find_first_not_of(' ');
+ if (indent > indent_) {
+ indent -= indent_;
+ }
+ if (indent != absl::string_view::npos) {
+ // Remove indentation from all lines.
+ auto line_prefix = data.substr(0, indent + 1);
+ // The final line has an extra newline and is indented two less, eg.
+ // R"cc(
+ // UPB_INLINE $0 $1_$2(const $1 *msg) {
+ // return $1_has_$2(msg) ? *UPB_PTR_AT(msg, $3, $0) : $4;
+ // }
+ // )cc",
+ std::string last_line_prefix = std::string(line_prefix);
+ last_line_prefix.resize(last_line_prefix.size() - 2);
+ data.remove_prefix(line_prefix.size());
+ stripped = absl::StrReplaceAll(
+ data, {{line_prefix, "\n"}, {last_line_prefix, "\n"}});
+ data = stripped;
+ }
+ } else {
+ WriteIndent();
+ }
+ WriteRaw(data);
+ }
+
+ void WriteRaw(absl::string_view data) {
+ while (!data.empty()) {
+ RefreshOutput();
+ size_t to_write = std::min(data.size(), buffer_size_);
+ memcpy(output_buffer_, data.data(), to_write);
+ data.remove_prefix(to_write);
+ output_buffer_ += to_write;
+ buffer_size_ -= to_write;
+ }
+ }
+
+ void WriteIndent() {
+ if (indent_ == 0) {
+ return;
+ }
+ size_t size = indent_;
+ while (size > buffer_size_) {
+ if (buffer_size_ > 0) {
+ memset(output_buffer_, ' ', buffer_size_);
+ }
+ size -= buffer_size_;
+ buffer_size_ = 0;
+ RefreshOutput();
+ }
+ memset(output_buffer_, ' ', size);
+ output_buffer_ += size;
+ buffer_size_ -= size;
+ }
+
+ void RefreshOutput() {
+ while (buffer_size_ == 0) {
+ void* void_buffer;
+ int size;
+ if (!stream_->Next(&void_buffer, &size)) {
+ fprintf(stderr, "upbc: Failed to write to to output\n");
+ abort();
+ }
+ output_buffer_ = static_cast<char*>(void_buffer);
+ buffer_size_ = size;
+ }
+ }
+
+ google::protobuf::io::ZeroCopyOutputStream* stream_;
+ char* output_buffer_ = nullptr;
+ size_t buffer_size_ = 0;
+ // Current indentation size in characters.
+ size_t indent_ = 0;
+ friend class OutputIndenter;
+};
+
+class OutputIndenter {
+ public:
+ OutputIndenter(Output& output)
+ : OutputIndenter(output, Output::kIndentationSize) {}
+ OutputIndenter(Output& output, size_t indent_size)
+ : indent_size_(indent_size), output_(output) {
+ output.Indent(indent_size);
+ }
+ ~OutputIndenter() { output_.Outdent(indent_size_); }
+
+ private:
+ size_t indent_size_;
+ Output& output_;
+};
+
+std::string StripExtension(absl::string_view fname);
+std::string ToCIdent(absl::string_view str);
+std::string ToPreproc(absl::string_view str);
+void EmitFileWarning(const google::protobuf::FileDescriptor* file, Output& output);
+std::string MessageName(const google::protobuf::Descriptor* descriptor);
+std::string FileLayoutName(const google::protobuf::FileDescriptor* file);
+std::string CHeaderFilename(const google::protobuf::FileDescriptor* file);
+std::string CSourceFilename(const google::protobuf::FileDescriptor* file);
+
+} // namespace protos_generator
+
+#endif // UPB_PROTOS_GENERATOR_OUTPUT_H
diff --git a/upb/protos_generator/protoc-gen-upb-protos.cc b/upb/protos_generator/protoc-gen-upb-protos.cc
new file mode 100644
index 0000000..58c3712
--- /dev/null
+++ b/upb/protos_generator/protoc-gen-upb-protos.cc
@@ -0,0 +1,282 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <memory>
+
+#include "google/protobuf/descriptor.pb.h"
+#include "google/protobuf/compiler/code_generator.h"
+#include "google/protobuf/compiler/plugin.h"
+#include "google/protobuf/descriptor.h"
+#include "protos_generator/gen_enums.h"
+#include "protos_generator/gen_extensions.h"
+#include "protos_generator/gen_messages.h"
+#include "protos_generator/gen_utils.h"
+#include "protos_generator/names.h"
+#include "protos_generator/output.h"
+#include "upbc/file_layout.h"
+
+namespace protos_generator {
+namespace {
+
+namespace protoc = ::google::protobuf::compiler;
+namespace protobuf = ::google::protobuf;
+using FileDescriptor = ::google::protobuf::FileDescriptor;
+
+void WriteSource(const protobuf::FileDescriptor* file, Output& output,
+ bool fasttable_enabled);
+void WriteHeader(const protobuf::FileDescriptor* file, Output& output);
+void WriteForwardingHeader(const protobuf::FileDescriptor* file,
+ Output& output);
+void WriteMessageImplementations(const protobuf::FileDescriptor* file,
+ Output& output);
+void WriteTypedefForwardingHeader(
+ const protobuf::FileDescriptor* file,
+ const std::vector<const protobuf::Descriptor*>& file_messages,
+ Output& output);
+void WriteHeaderMessageForwardDecls(
+ const protobuf::FileDescriptor* file,
+ Output& output);
+
+class Generator : public protoc::CodeGenerator {
+ public:
+ ~Generator() override {}
+ bool Generate(const protobuf::FileDescriptor* file,
+ const std::string& parameter, protoc::GeneratorContext* context,
+ std::string* error) const override;
+ uint64_t GetSupportedFeatures() const override {
+ return FEATURE_PROTO3_OPTIONAL;
+ }
+};
+
+bool Generator::Generate(const protobuf::FileDescriptor* file,
+ const std::string& parameter,
+ protoc::GeneratorContext* context,
+ std::string* error) const {
+ bool fasttable_enabled = false;
+ std::vector<std::pair<std::string, std::string>> params;
+ google::protobuf::compiler::ParseGeneratorParameter(parameter, ¶ms);
+
+ for (const auto& pair : params) {
+ if (pair.first == "fasttable") {
+ fasttable_enabled = true;
+ } else {
+ *error = "Unknown parameter: " + pair.first;
+ return false;
+ }
+ }
+
+ // Write model.upb.fwd.h
+ Output forwarding_header_output(
+ context->Open(ForwardingHeaderFilename(file)));
+ WriteForwardingHeader(file, forwarding_header_output);
+ // Write model.upb.proto.h
+ Output header_output(context->Open(CppHeaderFilename(file)));
+ WriteHeader(file, header_output);
+ // Write model.upb.proto.cc
+ Output cc_output(context->Open(CppSourceFilename(file)));
+ WriteSource(file, cc_output, fasttable_enabled);
+ return true;
+}
+
+// The forwarding header defines Access/Proxy/CProxy for message classes
+// used to include when referencing dependencies to prevent transitive
+// dependency headers from being included.
+void WriteForwardingHeader(const protobuf::FileDescriptor* file,
+ Output& output) {
+ EmitFileWarning(file, output);
+ output(
+ R"cc(
+#ifndef $0_UPB_FWD_H_
+#define $0_UPB_FWD_H_
+ )cc",
+ ToPreproc(file->name()));
+ output("\n");
+ for (int i = 0; i < file->public_dependency_count(); ++i) {
+ output("#include \"$0\"\n",
+ ForwardingHeaderFilename(file->public_dependency(i)));
+ }
+ if (file->public_dependency_count() > 0) {
+ output("\n");
+ }
+ const std::vector<const protobuf::Descriptor*> this_file_messages =
+ SortedMessages(file);
+ WriteTypedefForwardingHeader(file, this_file_messages, output);
+ output("#endif /* $0_UPB_FWD_H_ */\n", ToPreproc(file->name()));
+}
+
+void WriteHeader(const protobuf::FileDescriptor* file, Output& output) {
+ EmitFileWarning(file, output);
+ output(
+ R"cc(
+#ifndef $0_UPB_PROTO_H_
+#define $0_UPB_PROTO_H_
+
+#include "protos/protos.h"
+#include "protos/protos_internal.h"
+#include "protos/repeated_field.h"
+
+#include "absl/strings/string_view.h"
+#include "absl/status/statusor.h"
+ )cc",
+ ToPreproc(file->name()));
+
+ // Import headers for proto public dependencies.
+ for (int i = 0; i < file->public_dependency_count(); i++) {
+ if (i == 0) {
+ output("// Public Imports.\n");
+ }
+ output("#include \"$0\"\n", CppHeaderFilename(file->public_dependency(i)));
+ if (i == file->public_dependency_count() - 1) {
+ output("\n");
+ }
+ }
+
+ output("#include \"upb/port/def.inc\"\n");
+
+ const std::vector<const protobuf::Descriptor*> this_file_messages =
+ SortedMessages(file);
+ const std::vector<const protobuf::FieldDescriptor*> this_file_exts =
+ SortedExtensions(file);
+
+ if (!this_file_messages.empty()) {
+ output("\n");
+ }
+
+ WriteHeaderMessageForwardDecls(file, output);
+ WriteStartNamespace(file, output);
+
+ std::vector<const protobuf::EnumDescriptor*> this_file_enums =
+ SortedEnums(file);
+
+ // Write Class and Enums.
+ WriteEnumDeclarations(this_file_enums, output);
+ output("\n");
+
+ for (auto message : this_file_messages) {
+ WriteMessageClassDeclarations(message, this_file_exts, this_file_enums,
+ output);
+ }
+ output("\n");
+
+ WriteExtensionIdentifiersHeader(this_file_exts, output);
+ output("\n");
+
+ WriteEndNamespace(file, output);
+
+ output("\n#include \"upb/port/undef.inc\"\n\n");
+ // End of "C" section.
+
+ output("#endif /* $0_UPB_PROTO_H_ */\n", ToPreproc(file->name()));
+}
+
+// Writes a .upb.cc source file.
+void WriteSource(const protobuf::FileDescriptor* file, Output& output,
+ bool fasttable_enabled) {
+ EmitFileWarning(file, output);
+
+ output(
+ R"cc(
+#include <stddef.h>
+#include "absl/strings/string_view.h"
+#include "protos/protos.h"
+#include "$0"
+ )cc",
+ CppHeaderFilename(file));
+
+ for (int i = 0; i < file->dependency_count(); i++) {
+ output("#include \"$0\"\n", CppHeaderFilename(file->dependency(i)));
+ }
+ output("#include \"upb/port/def.inc\"\n");
+
+ WriteStartNamespace(file, output);
+ WriteMessageImplementations(file, output);
+ const std::vector<const protobuf::FieldDescriptor*> this_file_exts =
+ SortedExtensions(file);
+ WriteExtensionIdentifiers(this_file_exts, output);
+ WriteEndNamespace(file, output);
+
+ output("#include \"upb/port/undef.inc\"\n\n");
+}
+
+void WriteMessageImplementations(const protobuf::FileDescriptor* file,
+ Output& output) {
+ const std::vector<const protobuf::FieldDescriptor*> file_exts =
+ SortedExtensions(file);
+ const std::vector<const protobuf::Descriptor*> this_file_messages =
+ SortedMessages(file);
+ for (auto message : this_file_messages) {
+ WriteMessageImplementation(message, file_exts, output);
+ }
+}
+
+void WriteTypedefForwardingHeader(
+ const protobuf::FileDescriptor* file,
+ const std::vector<const protobuf::Descriptor*>& file_messages,
+ Output& output) {
+ WriteStartNamespace(file, output);
+
+ // Forward-declare types defined in this file.
+ for (auto message : file_messages) {
+ output(
+ R"cc(
+ class $0;
+ namespace internal {
+ class $0Access;
+ class $0Proxy;
+ class $0CProxy;
+ } // namespace internal
+ )cc",
+ ClassName(message));
+ }
+ output("\n");
+ WriteEndNamespace(file, output);
+}
+
+/// Writes includes for upb C minitables and fwd.h for transitive typedefs.
+void WriteHeaderMessageForwardDecls(
+ const protobuf::FileDescriptor* file,
+ Output& output) {
+ // Import forward-declaration of types defined in this file.
+ output("#include \"$0\"\n", UpbCFilename(file));
+ output("#include \"$0\"\n", ForwardingHeaderFilename(file));
+ // Import forward-declaration of types in dependencies.
+ for (int i = 0; i < file->dependency_count(); ++i) {
+ output("#include \"$0\"\n", ForwardingHeaderFilename(file->dependency(i)));
+ }
+ output("\n");
+}
+
+} // namespace
+} // namespace protos_generator
+
+int main(int argc, char** argv) {
+ protos_generator::Generator generator_cc;
+ return google::protobuf::compiler::PluginMain(argc, argv, &generator_cc);
+}
diff --git a/upb/protos_generator/tests/BUILD b/upb/protos_generator/tests/BUILD
new file mode 100644
index 0000000..71f5f3c
--- /dev/null
+++ b/upb/protos_generator/tests/BUILD
@@ -0,0 +1,157 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load(
+ "//bazel:build_defs.bzl",
+ "UPB_DEFAULT_CPPOPTS",
+)
+load(
+ "//protos/bazel:upb_cc_proto_library.bzl",
+ "upb_cc_proto_library",
+)
+load(
+ "//bazel:upb_proto_library.bzl",
+ "upb_proto_library",
+)
+load(
+ "@rules_cc//cc:defs.bzl",
+ "cc_proto_library",
+)
+
+# begin:google_only
+# package(default_applicable_licenses = ["//:license"])
+# end:google_only
+
+licenses(["notice"])
+
+proto_library(
+ name = "test_model_proto",
+ srcs = [
+ "child_model.proto",
+ "test_enum.proto",
+ "test_extension.proto",
+ "test_model.proto",
+ ],
+)
+
+proto_library(
+ name = "no_package_proto",
+ srcs = [
+ "no_package.proto",
+ ],
+)
+
+proto_library(
+ name = "naming_conflict_proto",
+ srcs = [
+ "naming_conflict.proto",
+ ],
+)
+
+proto_library(
+ name = "no_package_enum_user_proto",
+ srcs = [
+ "no_package_enum_user.proto",
+ ],
+ deps = [":no_package_proto"],
+)
+
+upb_proto_library(
+ name = "test_model_upb_proto",
+ visibility = [
+ "//protos:__pkg__",
+ ],
+ deps = [":test_model_proto"],
+)
+
+upb_cc_proto_library(
+ name = "test_model_upb_cc_proto",
+ visibility = ["//protos:__pkg__"],
+ deps = [":test_model_proto"],
+)
+
+upb_cc_proto_library(
+ name = "naming_conflict_upb_cc_proto",
+ visibility = [
+ "//visibility:private", # Only private by automation, not intent. Owner may accept CLs adding visibility. See go/scheuklappen#explicit-private.
+ ],
+ deps = [":naming_conflict_proto"],
+)
+
+upb_cc_proto_library(
+ name = "no_package_upb_cc_proto",
+ deps = [
+ ":no_package_proto",
+ ],
+)
+
+upb_cc_proto_library(
+ name = "no_package_enum_user_upb_cc_proto",
+ deps = [
+ ":no_package_enum_user_proto",
+ ],
+)
+
+cc_proto_library(
+ name = "test_model_cc_proto",
+ deps = [":test_model_proto"],
+)
+
+# begin:google_only
+# proto_library(
+# name = "legacy_name_proto",
+# srcs = [
+# "legacy-name.proto",
+# ],
+# )
+#
+# upb_cc_proto_library(
+# name = "legacy_name_test_proto",
+# visibility = [
+# "//visibility:private", # Only private by automation, not intent. Owner may accept CLs adding visibility. See go/scheuklappen#explicit-private.
+# ],
+# deps = [":legacy_name_proto"],
+# )
+# end:google_only
+
+cc_test(
+ name = "test_generated_cc_code",
+ srcs = ["test_generated.cc"],
+ copts = UPB_DEFAULT_CPPOPTS,
+ deps = [
+ # begin:google_only
+# ":legacy_name_test_proto",
+ # end:google_only
+ ":no_package_upb_cc_proto",
+ ":test_model_upb_cc_proto",
+ ":test_model_upb_proto",
+ ":naming_conflict_upb_cc_proto",
+ "@com_google_googletest//:gtest_main",
+ "@com_google_absl//absl/status:statusor",
+ "@com_google_absl//absl/strings",
+ "//protos",
+ "//protos:repeated_field",
+ ],
+)
diff --git a/upb/protos_generator/tests/child_model.proto b/upb/protos_generator/tests/child_model.proto
new file mode 100644
index 0000000..c7af6f1
--- /dev/null
+++ b/upb/protos_generator/tests/child_model.proto
@@ -0,0 +1,49 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto3";
+
+package protos_generator.test;
+
+import public "protos_generator/tests/test_enum.proto";
+
+message ChildModel1 {
+ optional bool child_b1 = 44;
+ optional string child_str1 = 56;
+}
+
+message ChildModel3 {
+ string sub_key = 1;
+ bool bool1 = 2;
+ int32 i32 = 3;
+ optional string opt_str = 4;
+ optional bool opt_bool = 5;
+ optional int32 opt_i32 = 6;
+}
diff --git a/upb/protos_generator/tests/legacy-name.proto b/upb/protos_generator/tests/legacy-name.proto
new file mode 100644
index 0000000..0f256ad
--- /dev/null
+++ b/upb/protos_generator/tests/legacy-name.proto
@@ -0,0 +1,40 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto3";
+
+package protos_generator.test;
+
+// option java_multiple_files = true;
+
+enum LegacyEnum {
+ PHASE_DEFAULT = 0;
+ PHASE_BUSY = 1;
+}
diff --git a/upb/protos_generator/tests/naming_conflict.proto b/upb/protos_generator/tests/naming_conflict.proto
new file mode 100644
index 0000000..414f7c6
--- /dev/null
+++ b/upb/protos_generator/tests/naming_conflict.proto
@@ -0,0 +1,38 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto3";
+
+package protos_generator.test;
+
+message HasChildCount {
+ optional HasChildCount has_child_count = 1;
+ optional int32 child_count = 2;
+}
diff --git a/upb/protos_generator/tests/no_package.proto b/upb/protos_generator/tests/no_package.proto
new file mode 100644
index 0000000..37d6df8
--- /dev/null
+++ b/upb/protos_generator/tests/no_package.proto
@@ -0,0 +1,48 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto2";
+
+// option java_multiple_files = true;
+
+enum EnumWithNoPackage {
+ CELSIUS = 1;
+ FAHRENHEIT = 2;
+}
+
+message MessageWithEnumUpbTest {
+ enum EnumWithNoPackageInMessage {
+ UNKNOWN = 0;
+ AB_1 = 1;
+ CD_2 = 2;
+ EF_3 = 3;
+ GH_4 = 4;
+ }
+}
diff --git a/upb/protos_generator/tests/no_package_enum_user.proto b/upb/protos_generator/tests/no_package_enum_user.proto
new file mode 100644
index 0000000..ebf8c67
--- /dev/null
+++ b/upb/protos_generator/tests/no_package_enum_user.proto
@@ -0,0 +1,41 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto2";
+
+package protos_generator.tests;
+
+import "protos_generator/tests/no_package.proto";
+
+// option java_multiple_files = true;
+
+message MyMessage {
+ optional MessageWithEnumUpbTest my_type = 1;
+}
diff --git a/upb/protos_generator/tests/test_enum.proto b/upb/protos_generator/tests/test_enum.proto
new file mode 100644
index 0000000..ed41c3c
--- /dev/null
+++ b/upb/protos_generator/tests/test_enum.proto
@@ -0,0 +1,40 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto3";
+
+package protos_generator.test;
+
+enum TestEnum {
+ DEVICE_UNKNOWN = 0;
+ DEVICE_KEYBOARD = 1;
+ DEVICE_MOUSE = 2;
+ DEVICE_MONITOR = 3;
+}
diff --git a/upb/protos_generator/tests/test_extension.proto b/upb/protos_generator/tests/test_extension.proto
new file mode 100644
index 0000000..3a1a3c6
--- /dev/null
+++ b/upb/protos_generator/tests/test_extension.proto
@@ -0,0 +1,42 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto2";
+
+package protos_generator.test.someotherpackage;
+
+import "protos_generator/tests/test_model.proto";
+
+// Define extension that is extending proto outside this package with a type
+// defined in different file.
+
+extend TestModel {
+ optional ThemeExtension styling = 13001;
+}
diff --git a/upb/protos_generator/tests/test_generated.cc b/upb/protos_generator/tests/test_generated.cc
new file mode 100644
index 0000000..6bfc855
--- /dev/null
+++ b/upb/protos_generator/tests/test_generated.cc
@@ -0,0 +1,1011 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/status/statusor.h"
+#include "absl/strings/string_view.h"
+#include "protos/protos.h"
+#include "protos/repeated_field.h"
+#include "protos/repeated_field_iterator.h"
+#include "protos_generator/tests/child_model.upb.proto.h"
+#include "protos_generator/tests/no_package.upb.proto.h"
+#include "protos_generator/tests/test_model.upb.proto.h"
+
+using ::protos_generator::test::protos::ChildModel1;
+using ::protos_generator::test::protos::other_ext;
+using ::protos_generator::test::protos::RED;
+using ::protos_generator::test::protos::TestEnum;
+using ::protos_generator::test::protos::TestModel;
+using ::protos_generator::test::protos::TestModel_Category;
+using ::protos_generator::test::protos::TestModel_Category_IMAGES;
+using ::protos_generator::test::protos::TestModel_Category_NEWS;
+using ::protos_generator::test::protos::TestModel_Category_VIDEO;
+using ::protos_generator::test::protos::theme;
+using ::protos_generator::test::protos::ThemeExtension;
+using ::testing::ElementsAre;
+
+TEST(CppGeneratedCode, Constructor) { TestModel test_model; }
+
+TEST(CppGeneratedCode, MessageEnum) { EXPECT_EQ(5, TestModel_Category_IMAGES); }
+
+TEST(CppGeneratedCode, ImportedEnum) { EXPECT_EQ(3, TestEnum::DEVICE_MONITOR); }
+
+TEST(CppGeneratedCode, Enum) { EXPECT_EQ(1, RED); }
+
+TEST(CppGeneratedCode, EnumNoPackage) { EXPECT_EQ(1, ::protos_CELSIUS); }
+
+TEST(CppGeneratedCode, MessageEnumType) {
+ TestModel_Category category1 = TestModel_Category_IMAGES;
+ TestModel::Category category2 = TestModel::IMAGES;
+ EXPECT_EQ(category1, category2);
+}
+
+TEST(CppGeneratedCode, MessageEnumValue) {
+ EXPECT_EQ(TestModel_Category_IMAGES, TestModel::IMAGES);
+}
+
+TEST(CppGeneratedCode, ArenaConstructor) {
+ ::protos::Arena arena;
+ auto testModel = ::protos::CreateMessage<TestModel>(arena);
+ EXPECT_EQ(false, testModel.has_b1());
+}
+
+TEST(CppGeneratedCode, Booleans) {
+ ::protos::Arena arena;
+ auto testModel = ::protos::CreateMessage<TestModel>(arena);
+ EXPECT_FALSE(testModel.b1());
+ testModel.set_b1(true);
+ EXPECT_TRUE(testModel.b1());
+ testModel.set_b1(false);
+ EXPECT_FALSE(testModel.b1());
+ testModel.set_b1(true);
+ EXPECT_TRUE(testModel.b1());
+ testModel.clear_b1();
+ EXPECT_FALSE(testModel.has_b1());
+}
+
+TEST(CppGeneratedCode, ScalarInt32) {
+ ::protos::Arena arena;
+ auto testModel = ::protos::CreateMessage<TestModel>(arena);
+ // Test int32 defaults.
+ EXPECT_EQ(testModel.value(), 0);
+ EXPECT_FALSE(testModel.has_value());
+ // Floating point defautls.
+ EXPECT_EQ(std::numeric_limits<float>::infinity(),
+ testModel.float_value_with_default());
+ EXPECT_EQ(-std::numeric_limits<double>::infinity(),
+ testModel.double_value_with_default());
+
+ // Set value.
+ testModel.set_value(5);
+ EXPECT_TRUE(testModel.has_value());
+ EXPECT_EQ(testModel.value(), 5);
+ // Change value.
+ testModel.set_value(10);
+ EXPECT_TRUE(testModel.has_value());
+ EXPECT_EQ(testModel.value(), 10);
+ // Clear value.
+ testModel.clear_value();
+ EXPECT_FALSE(testModel.has_value());
+ EXPECT_EQ(testModel.value(), 0);
+}
+
+const char kTestStr1[] = "abcdefg";
+const char kTestStr2[] = "just another test string";
+
+TEST(CppGeneratedCode, Strings) {
+ TestModel testModel;
+ testModel.set_str1(kTestStr1);
+ testModel.set_str2(kTestStr2);
+ EXPECT_EQ(testModel.str1(), kTestStr1);
+ EXPECT_EQ(testModel.str2(), kTestStr2);
+ EXPECT_TRUE(testModel.has_str1());
+ EXPECT_TRUE(testModel.has_str2());
+
+ testModel.clear_str1();
+ EXPECT_FALSE(testModel.has_str1());
+ EXPECT_TRUE(testModel.has_str2());
+}
+
+TEST(CppGeneratedCode, ScalarUInt32) {
+ ::protos::Arena arena;
+ auto testModel = ::protos::CreateMessage<TestModel>(arena);
+ // Test defaults.
+ EXPECT_EQ(testModel.optional_uint32(), 0);
+ EXPECT_FALSE(testModel.has_optional_uint32());
+ // Set value.
+ testModel.set_optional_uint32(0xA0001000);
+ EXPECT_TRUE(testModel.has_optional_uint32());
+ EXPECT_EQ(testModel.optional_uint32(), 0xA0001000);
+ // Change value.
+ testModel.set_optional_uint32(0x70002000);
+ EXPECT_TRUE(testModel.has_optional_uint32());
+ EXPECT_EQ(testModel.optional_uint32(), 0x70002000);
+ // Clear value.
+ testModel.clear_optional_uint32();
+ EXPECT_FALSE(testModel.has_optional_uint32());
+ EXPECT_EQ(testModel.optional_uint32(), 0);
+}
+
+TEST(CppGeneratedCode, ScalarInt64) {
+ ::protos::Arena arena;
+ auto testModel = ::protos::CreateMessage<TestModel>(arena);
+ // Test defaults.
+ EXPECT_EQ(testModel.optional_int64(), 0);
+ EXPECT_FALSE(testModel.has_optional_int64());
+ // Set value.
+ testModel.set_optional_int64(0xFF00CCDDA0001000);
+ EXPECT_TRUE(testModel.has_optional_int64());
+ EXPECT_EQ(testModel.optional_int64(), 0xFF00CCDDA0001000);
+ // Change value.
+ testModel.set_optional_int64(0xFF00CCDD70002000);
+ EXPECT_TRUE(testModel.has_optional_int64());
+ EXPECT_EQ(testModel.optional_int64(), 0xFF00CCDD70002000);
+ // Clear value.
+ testModel.clear_optional_int64();
+ EXPECT_FALSE(testModel.has_optional_int64());
+ EXPECT_EQ(testModel.optional_int64(), 0);
+ // Set after clear.
+ testModel.set_optional_int64(0xFF00CCDDA0001000);
+ EXPECT_TRUE(testModel.has_optional_int64());
+ EXPECT_EQ(testModel.optional_int64(), 0xFF00CCDDA0001000);
+}
+
+TEST(CppGeneratedCode, ScalarFloat) {
+ ::protos::Arena arena;
+ auto testModel = ::protos::CreateMessage<TestModel>(arena);
+ // Test defaults.
+ EXPECT_EQ(testModel.optional_float(), 0.0f);
+ EXPECT_FALSE(testModel.has_optional_float());
+ EXPECT_EQ(std::numeric_limits<float>::infinity(),
+ testModel.float_value_with_default());
+ EXPECT_EQ(-std::numeric_limits<double>::infinity(),
+ testModel.double_value_with_default());
+ // Set value.
+ testModel.set_optional_float(3.14159265f);
+ EXPECT_TRUE(testModel.has_optional_float());
+ EXPECT_NEAR(testModel.optional_float(), 3.14159265f, 1e-9f);
+ // Change value.
+ testModel.set_optional_float(-2.0f);
+ EXPECT_TRUE(testModel.has_optional_float());
+ EXPECT_NEAR(testModel.optional_float(), -2, 1e-9f);
+ // Clear value.
+ testModel.clear_optional_float();
+ EXPECT_FALSE(testModel.has_optional_float());
+ EXPECT_EQ(testModel.optional_float(), 0.0f);
+ // Set after clear.
+ testModel.set_optional_float(3.14159265f);
+ EXPECT_TRUE(testModel.has_optional_float());
+ EXPECT_NEAR(testModel.optional_float(), 3.14159265f, 1e-9f);
+}
+
+TEST(CppGeneratedCode, ScalarDouble) {
+ ::protos::Arena arena;
+ auto testModel = ::protos::CreateMessage<TestModel>(arena);
+ // Test defaults.
+ EXPECT_EQ(testModel.optional_double(), 0.0);
+ EXPECT_FALSE(testModel.has_optional_double());
+ // Set value.
+ testModel.set_optional_double(3.141592653589793);
+ EXPECT_TRUE(testModel.has_optional_double());
+ EXPECT_NEAR(testModel.optional_double(), 3.141592653589793, 1e-16f);
+ // Change value.
+ testModel.set_optional_double(-1.0);
+ EXPECT_TRUE(testModel.has_optional_double());
+ EXPECT_NEAR(testModel.optional_double(), -1.0, 1e-16f);
+ // Clear value.
+ testModel.clear_optional_double();
+ EXPECT_FALSE(testModel.has_optional_double());
+ EXPECT_EQ(testModel.optional_double(), 0.0f);
+ // Set after clear.
+ testModel.set_optional_double(3.141592653589793);
+ EXPECT_TRUE(testModel.has_optional_double());
+ EXPECT_NEAR(testModel.optional_double(), 3.141592653589793, 1e-16f);
+}
+
+TEST(CppGeneratedCode, Enums) {
+ ::protos::Arena arena;
+ auto testModel = ::protos::CreateMessage<TestModel>(arena);
+
+ // Check enum default value.
+ EXPECT_EQ(TestModel_Category_IMAGES, 5);
+
+ // Test defaults.
+ EXPECT_FALSE(testModel.has_category());
+ EXPECT_EQ(testModel.category(), TestModel_Category_IMAGES);
+ // Set value.
+ testModel.set_category(TestModel_Category_NEWS);
+ EXPECT_TRUE(testModel.has_category());
+ EXPECT_EQ(testModel.category(), TestModel_Category_NEWS);
+ // Change value.
+ testModel.set_category(TestModel_Category_VIDEO);
+ EXPECT_TRUE(testModel.has_category());
+ EXPECT_EQ(testModel.category(), TestModel_Category_VIDEO);
+ // Clear value.
+ testModel.clear_category();
+ EXPECT_FALSE(testModel.has_category());
+ EXPECT_EQ(testModel.category(), TestModel_Category_IMAGES);
+ // Set after clear.
+ testModel.set_category(TestModel_Category_VIDEO);
+ EXPECT_TRUE(testModel.has_category());
+ EXPECT_EQ(testModel.category(), TestModel_Category_VIDEO);
+}
+
+TEST(CppGeneratedCode, FieldWithDefaultValue) {
+ ::protos::Arena arena;
+ auto testModel = ::protos::CreateMessage<TestModel>(arena);
+
+ EXPECT_FALSE(testModel.has_int_value_with_default());
+ EXPECT_EQ(testModel.int_value_with_default(), 65);
+ testModel.set_int_value_with_default(10);
+ EXPECT_EQ(testModel.int_value_with_default(), 10);
+
+ EXPECT_FALSE(testModel.has_string_value_with_default());
+ EXPECT_EQ(testModel.string_value_with_default(), "hello");
+ testModel.set_string_value_with_default("new string");
+ EXPECT_EQ(testModel.string_value_with_default(), "new string");
+}
+
+TEST(CppGeneratedCode, OneOfFields) {
+ ::protos::Arena arena;
+ auto test_model = ::protos::CreateMessage<TestModel>(arena);
+
+ EXPECT_FALSE(test_model.has_oneof_member1());
+ EXPECT_FALSE(test_model.has_oneof_member2());
+ EXPECT_EQ(TestModel::CHILD_ONEOF1_NOT_SET, test_model.child_oneof1_case());
+
+ test_model.set_oneof_member1("one of string");
+ EXPECT_TRUE(test_model.has_oneof_member1());
+ EXPECT_FALSE(test_model.has_oneof_member2());
+ EXPECT_EQ(test_model.oneof_member1(), "one of string");
+ EXPECT_EQ(TestModel::kOneofMember1, test_model.child_oneof1_case());
+
+ test_model.set_oneof_member2(true);
+ EXPECT_FALSE(test_model.has_oneof_member1());
+ EXPECT_TRUE(test_model.has_oneof_member2());
+ EXPECT_EQ(test_model.oneof_member2(), true);
+ EXPECT_EQ(TestModel::kOneofMember2, test_model.child_oneof1_case());
+
+ test_model.clear_oneof_member2();
+ EXPECT_FALSE(test_model.has_oneof_member1());
+ EXPECT_FALSE(test_model.has_oneof_member2());
+ EXPECT_EQ(test_model.oneof_member1(), "");
+ EXPECT_EQ(test_model.oneof_member2(), false);
+ EXPECT_EQ(TestModel::CHILD_ONEOF1_NOT_SET, test_model.child_oneof1_case());
+}
+
+TEST(CppGeneratedCode, Messages) {
+ ::protos::Arena arena;
+ auto test_model = ::protos::CreateMessage<TestModel>(arena);
+ EXPECT_EQ(false, test_model.has_child_model_1());
+ auto child_model = test_model.child_model_1();
+ EXPECT_EQ(false, child_model->has_child_b1());
+ EXPECT_EQ(false, child_model->child_b1());
+ auto mutable_child = test_model.mutable_child_model_1();
+ mutable_child->set_child_b1(true);
+ EXPECT_EQ(true, mutable_child->has_child_b1());
+ EXPECT_EQ(true, mutable_child->child_b1());
+ // The View should not change due to mutation since it
+ // is default_instance.
+ EXPECT_EQ(false, child_model->has_child_b1());
+ // Readonly View should now show change.
+ child_model = test_model.child_model_1();
+ EXPECT_EQ(true, child_model->has_child_b1());
+ EXPECT_EQ(true, child_model->child_b1());
+ // Clear message field.
+ EXPECT_EQ(true, test_model.has_child_model_1());
+ test_model.clear_child_model_1();
+ EXPECT_EQ(false, test_model.has_child_model_1());
+}
+
+TEST(CppGeneratedCode, NestedMessages) {
+ ::protos::Arena arena;
+ auto test_model = ::protos::CreateMessage<TestModel>(arena);
+ auto nested_child = test_model.nested_child_1();
+ EXPECT_EQ(0, nested_child->nested_child_name().size());
+ auto mutable_nested_child = test_model.mutable_nested_child_1();
+ EXPECT_EQ(false, mutable_nested_child->has_nested_child_name());
+ mutable_nested_child->set_nested_child_name(kTestStr1);
+ EXPECT_EQ(true, mutable_nested_child->has_nested_child_name());
+}
+
+TEST(CppGeneratedCode, RepeatedMessages) {
+ ::protos::Arena arena;
+ auto test_model = ::protos::CreateMessage<TestModel>(arena);
+ EXPECT_EQ(0, test_model.child_models_size());
+ // Should be able to clear repeated field when empty.
+ test_model.mutable_child_models()->clear();
+ EXPECT_EQ(0, test_model.child_models_size());
+ // Add 2 children.
+ auto new_child = test_model.add_child_models();
+ EXPECT_EQ(true, new_child.ok());
+ new_child.value()->set_child_str1(kTestStr1);
+ new_child = test_model.add_child_models();
+ EXPECT_EQ(true, new_child.ok());
+ new_child.value()->set_child_str1(kTestStr2);
+ EXPECT_EQ(2, test_model.child_models_size());
+ // Mutable access.
+ auto mutable_first = test_model.mutable_child_models(0);
+ EXPECT_EQ(mutable_first->child_str1(), kTestStr1);
+ mutable_first->set_child_str1("change1");
+ auto mutable_second = test_model.mutable_child_models(1);
+ EXPECT_EQ(mutable_second->child_str1(), kTestStr2);
+ mutable_second->set_child_str1("change2");
+ // Check mutations using views.
+ auto view_first = test_model.child_models(0);
+ EXPECT_EQ(view_first->child_str1(), "change1");
+ auto view_second = test_model.child_models(1);
+ EXPECT_EQ(view_second->child_str1(), "change2");
+}
+
+TEST(CppGeneratedCode, RepeatedScalar) {
+ ::protos::Arena arena;
+ auto test_model = ::protos::CreateMessage<TestModel>(arena);
+ EXPECT_EQ(0, test_model.value_array_size());
+ // Should be able to clear repeated field when empty.
+ test_model.mutable_value_array()->clear();
+ EXPECT_EQ(0, test_model.value_array_size());
+ // Add 2 children.
+ EXPECT_EQ(true, test_model.add_value_array(5));
+ EXPECT_EQ(true, test_model.add_value_array(6));
+ EXPECT_EQ(2, test_model.value_array_size());
+ EXPECT_EQ(5, test_model.value_array(0));
+ EXPECT_EQ(6, test_model.value_array(1));
+ EXPECT_EQ(true, test_model.resize_value_array(3));
+ EXPECT_EQ(3, test_model.value_array_size());
+ test_model.set_value_array(2, 7);
+ EXPECT_EQ(5, test_model.value_array(0));
+ EXPECT_EQ(6, test_model.value_array(1));
+ EXPECT_EQ(7, test_model.value_array(2));
+}
+
+TEST(CppGeneratedCode, RepeatedFieldClear) {
+ ::protos::Arena arena;
+ auto test_model = ::protos::CreateMessage<TestModel>(arena);
+ test_model.mutable_value_array()->push_back(5);
+ test_model.mutable_value_array()->push_back(16);
+ test_model.mutable_value_array()->push_back(27);
+ ASSERT_EQ(test_model.mutable_value_array()->size(), 3);
+ test_model.mutable_value_array()->clear();
+ EXPECT_EQ(test_model.mutable_value_array()->size(), 0);
+}
+
+TEST(CppGeneratedCode, RepeatedFieldProxyForScalars) {
+ ::protos::Arena arena;
+ auto test_model = ::protos::CreateMessage<TestModel>(arena);
+ EXPECT_EQ(0, test_model.value_array().size());
+ EXPECT_EQ(0, test_model.mutable_value_array()->size());
+
+ test_model.mutable_value_array()->push_back(5);
+ test_model.mutable_value_array()->push_back(16);
+ test_model.mutable_value_array()->push_back(27);
+
+ ASSERT_EQ(test_model.mutable_value_array()->size(), 3);
+ EXPECT_EQ((*test_model.mutable_value_array())[0], 5);
+ EXPECT_EQ((*test_model.mutable_value_array())[1], 16);
+ EXPECT_EQ((*test_model.mutable_value_array())[2], 27);
+
+ const auto value_array = test_model.value_array();
+ ASSERT_EQ(value_array.size(), 3);
+ EXPECT_EQ(value_array[0], 5);
+ EXPECT_EQ(value_array[1], 16);
+ EXPECT_EQ(value_array[2], 27);
+
+ EXPECT_THAT(value_array, ElementsAre(5, 16, 27));
+
+ EXPECT_THAT(std::vector(value_array.begin(), value_array.end()),
+ ElementsAre(5, 16, 27));
+ EXPECT_THAT(std::vector(value_array.cbegin(), value_array.cend()),
+ ElementsAre(5, 16, 27));
+ EXPECT_THAT(std::vector(value_array.rbegin(), value_array.rend()),
+ ElementsAre(27, 16, 5));
+ EXPECT_THAT(std::vector(value_array.crbegin(), value_array.crend()),
+ ElementsAre(27, 16, 5));
+}
+
+TEST(CppGeneratedCode, RepeatedScalarIterator) {
+ ::protos::Arena arena;
+ auto test_model = ::protos::CreateMessage<TestModel>(arena);
+ test_model.mutable_value_array()->push_back(5);
+ test_model.mutable_value_array()->push_back(16);
+ test_model.mutable_value_array()->push_back(27);
+ int sum = 0;
+ // Access by value.
+ const ::protos::RepeatedField<int32_t>::CProxy rep1 =
+ test_model.value_array();
+ for (auto i : rep1) {
+ sum += i;
+ }
+ EXPECT_EQ(sum, 5 + 16 + 27);
+ // Access by const reference.
+ sum = 0;
+ for (const int& i : *test_model.mutable_value_array()) {
+ sum += i;
+ }
+ EXPECT_EQ(sum, 5 + 16 + 27);
+ // Access by forwarding reference.
+ sum = 0;
+ for (auto&& i : *test_model.mutable_value_array()) {
+ sum += i;
+ }
+ EXPECT_EQ(sum, 5 + 16 + 27);
+ // Test iterator operators.
+ auto begin = test_model.value_array().begin();
+ auto end = test_model.value_array().end();
+ sum = 0;
+ for (auto it = begin; it != end; ++it) {
+ sum += *it;
+ }
+ EXPECT_EQ(sum, 5 + 16 + 27);
+ auto it = begin;
+ ++it;
+ EXPECT_TRUE(begin < it);
+ EXPECT_TRUE(begin <= it);
+ it = end;
+ EXPECT_TRUE(it == end);
+ EXPECT_TRUE(it > begin);
+ EXPECT_TRUE(it >= begin);
+ EXPECT_TRUE(it != begin);
+ // difference type
+ it = end;
+ --it;
+ --it;
+ EXPECT_EQ(end - it, 2);
+ it = begin;
+ EXPECT_EQ(it[0], 5);
+ EXPECT_EQ(it[1], 16);
+ EXPECT_EQ(it[2], 27);
+ // ValueProxy.
+ sum = 0;
+ for (::protos::RepeatedField<int32_t>::ValueCProxy c :
+ test_model.value_array()) {
+ sum += c;
+ }
+ EXPECT_EQ(sum, 5 + 16 + 27);
+ sum = 0;
+ for (::protos::RepeatedField<int32_t>::ValueProxy c :
+ *test_model.mutable_value_array()) {
+ sum += c;
+ }
+ EXPECT_EQ(sum, 5 + 16 + 27);
+}
+
+TEST(CppGeneratedCode, RepeatedFieldProxyForStrings) {
+ ::protos::Arena arena;
+ auto test_model = ::protos::CreateMessage<TestModel>(arena);
+ EXPECT_EQ(0, test_model.repeated_string().size());
+ EXPECT_EQ(0, test_model.mutable_repeated_string()->size());
+
+ test_model.mutable_repeated_string()->push_back("a");
+ test_model.mutable_repeated_string()->push_back("b");
+ test_model.mutable_repeated_string()->push_back("c");
+
+ ASSERT_EQ(test_model.repeated_string().size(), 3);
+ EXPECT_EQ(test_model.repeated_string()[0], "a");
+ EXPECT_EQ(test_model.repeated_string()[1], "b");
+ EXPECT_EQ(test_model.repeated_string()[2], "c");
+
+ EXPECT_THAT(test_model.repeated_string(), ElementsAre("a", "b", "c"));
+ EXPECT_THAT(*test_model.mutable_repeated_string(),
+ ElementsAre("a", "b", "c"));
+
+ ASSERT_EQ(test_model.mutable_repeated_string()->size(), 3);
+ EXPECT_EQ((*test_model.mutable_repeated_string())[0], "a");
+ EXPECT_EQ((*test_model.mutable_repeated_string())[1], "b");
+ EXPECT_EQ((*test_model.mutable_repeated_string())[2], "c");
+
+ // The const accessor can't be used to modify the element
+ EXPECT_FALSE((std::is_assignable<decltype(test_model.repeated_string()[1]),
+ absl::string_view>::value));
+ // But the mutable one is fine.
+ (*test_model.mutable_repeated_string())[1] = "other";
+ EXPECT_THAT(test_model.repeated_string(), ElementsAre("a", "other", "c"));
+
+ test_model.mutable_repeated_string()->clear();
+ EXPECT_EQ(test_model.mutable_repeated_string()->size(), 0);
+}
+
+TEST(CppGeneratedCode, RepeatedFieldProxyForMessages) {
+ ::protos::Arena arena;
+ auto test_model = ::protos::CreateMessage<TestModel>(arena);
+ EXPECT_EQ(0, test_model.child_models().size());
+ ChildModel1 child1;
+ child1.set_child_str1(kTestStr1);
+ test_model.mutable_child_models()->push_back(child1);
+ ChildModel1 child2;
+ child2.set_child_str1(kTestStr2);
+ test_model.mutable_child_models()->push_back(std::move(child2));
+ EXPECT_EQ(test_model.child_models().size(), 2);
+ EXPECT_EQ(test_model.child_models()[0].child_str1(), kTestStr1);
+ EXPECT_EQ(test_model.child_models()[1].child_str1(), kTestStr2);
+ EXPECT_EQ((*test_model.mutable_child_models())[0].child_str1(), kTestStr1);
+ EXPECT_EQ((*test_model.mutable_child_models())[1].child_str1(), kTestStr2);
+ (*test_model.mutable_child_models())[0].set_child_str1("change1");
+ EXPECT_EQ((*test_model.mutable_child_models())[0].child_str1(), "change1");
+ test_model.mutable_child_models()->clear();
+ EXPECT_EQ(test_model.mutable_child_models()->size(), 0);
+}
+
+TEST(CppGeneratedCode, RepeatedFieldProxyForMessagesIndexOperator) {
+ ::protos::Arena arena;
+ auto test_model = ::protos::CreateMessage<TestModel>(arena);
+ EXPECT_EQ(0, test_model.child_models().size());
+ ChildModel1 child1;
+ child1.set_child_str1(kTestStr1);
+ test_model.mutable_child_models()->push_back(child1);
+ ChildModel1 child2;
+
+ child2.set_child_str1(kTestStr2);
+ test_model.mutable_child_models()->push_back(std::move(child2));
+ ASSERT_EQ(test_model.child_models().size(), 2);
+
+ // test_model.child_models()[0].set_child_str1("change1");
+ (*test_model.mutable_child_models())[0].set_child_str1("change1");
+ EXPECT_EQ((*test_model.mutable_child_models())[0].child_str1(), "change1");
+}
+
+TEST(CppGeneratedCode, RepeatedStrings) {
+ ::protos::Arena arena;
+ auto test_model = ::protos::CreateMessage<TestModel>(arena);
+ EXPECT_EQ(0, test_model.repeated_string_size());
+ // Should be able to clear repeated field when empty.
+ test_model.mutable_repeated_string()->clear();
+ EXPECT_EQ(0, test_model.repeated_string_size());
+ // Add 2 children.
+ EXPECT_EQ(true, test_model.add_repeated_string("Hello"));
+ EXPECT_EQ(true, test_model.add_repeated_string("World"));
+ EXPECT_EQ(2, test_model.repeated_string_size());
+ EXPECT_EQ("Hello", test_model.repeated_string(0));
+ EXPECT_EQ("World", test_model.repeated_string(1));
+ EXPECT_EQ(true, test_model.resize_repeated_string(3));
+ EXPECT_EQ(3, test_model.repeated_string_size());
+ test_model.set_repeated_string(2, "Test");
+ EXPECT_EQ("Hello", test_model.repeated_string(0));
+ EXPECT_EQ("World", test_model.repeated_string(1));
+ EXPECT_EQ("Test", test_model.repeated_string(2));
+}
+
+TEST(CppGeneratedCode, MessageMapInt32KeyMessageValue) {
+ const int key_test_value = 3;
+ ::protos::Arena arena;
+ ::protos::Arena child_arena;
+ auto test_model = ::protos::CreateMessage<TestModel>(arena);
+ EXPECT_EQ(0, test_model.child_map_size());
+ test_model.clear_child_map();
+ EXPECT_EQ(0, test_model.child_map_size());
+ auto child_model1 = ::protos::CreateMessage<ChildModel1>(child_arena);
+ child_model1.set_child_str1("abc");
+ test_model.set_child_map(key_test_value, child_model1);
+ auto map_result = test_model.get_child_map(key_test_value);
+ EXPECT_EQ(true, map_result.ok());
+ EXPECT_EQ("abc", map_result.value()->child_str1());
+ // Now mutate original child model to verify that value semantics are
+ // preserved.
+ child_model1.set_child_str1("abc V2");
+ EXPECT_EQ("abc", map_result.value()->child_str1());
+ test_model.delete_child_map(key_test_value);
+ auto map_result_after_delete = test_model.get_child_map(key_test_value);
+ EXPECT_EQ(false, map_result_after_delete.ok());
+}
+
+TEST(CppGeneratedCode, MessageMapStringKeyAndStringValue) {
+ ::protos::Arena arena;
+ auto test_model = ::protos::CreateMessage<TestModel>(arena);
+ EXPECT_EQ(0, test_model.str_to_str_map_size());
+ test_model.clear_str_to_str_map();
+ EXPECT_EQ(0, test_model.str_to_str_map_size());
+ test_model.set_str_to_str_map("first", "abc");
+ test_model.set_str_to_str_map("second", "def");
+ auto result = test_model.get_str_to_str_map("second");
+ EXPECT_EQ(true, result.ok());
+ EXPECT_EQ("def", result.value());
+ test_model.delete_str_to_str_map("first");
+ auto result_after_delete = test_model.get_str_to_str_map("first");
+ EXPECT_EQ(false, result_after_delete.ok());
+}
+
+TEST(CppGeneratedCode, MessageMapStringKeyAndInt32Value) {
+ ::protos::Arena arena;
+ auto test_model = ::protos::CreateMessage<TestModel>(arena);
+ EXPECT_EQ(0, test_model.str_to_int_map_size());
+ test_model.clear_str_to_int_map();
+ EXPECT_EQ(0, test_model.str_to_int_map_size());
+ test_model.set_str_to_int_map("first", 10);
+ EXPECT_EQ(1, test_model.str_to_int_map_size());
+ test_model.set_str_to_int_map("second", 20);
+ EXPECT_EQ(2, test_model.str_to_int_map_size());
+ auto result = test_model.get_str_to_int_map("second");
+ EXPECT_EQ(true, result.ok());
+ EXPECT_EQ(20, result.value());
+ test_model.delete_str_to_int_map("first");
+ auto result_after_delete = test_model.get_str_to_int_map("first");
+ EXPECT_EQ(false, result_after_delete.ok());
+}
+
+TEST(CppGeneratedCode, HasExtension) {
+ TestModel model;
+ EXPECT_EQ(false, ::protos::HasExtension(&model, theme));
+}
+
+TEST(CppGeneratedCode, HasExtensionPtr) {
+ TestModel model;
+ EXPECT_EQ(false, ::protos::HasExtension(model.recursive_child(), theme));
+}
+
+TEST(CppGeneratedCode, ClearExtensionWithEmptyExtension) {
+ TestModel model;
+ EXPECT_EQ(false, ::protos::HasExtension(&model, theme));
+ ::protos::ClearExtension(&model, theme);
+ EXPECT_EQ(false, ::protos::HasExtension(&model, theme));
+}
+
+TEST(CppGeneratedCode, ClearExtensionWithEmptyExtensionPtr) {
+ TestModel model;
+ ::protos::Ptr<TestModel> recursive_child = model.mutable_recursive_child();
+ ::protos::ClearExtension(recursive_child, theme);
+ EXPECT_EQ(false, ::protos::HasExtension(recursive_child, theme));
+}
+
+TEST(CppGeneratedCode, SetExtension) {
+ TestModel model;
+ ThemeExtension extension1;
+ extension1.set_ext_name("Hello World");
+ EXPECT_EQ(false, ::protos::HasExtension(&model, theme));
+ EXPECT_EQ(true, ::protos::SetExtension(&model, theme, extension1).ok());
+ EXPECT_EQ(true, ::protos::HasExtension(&model, theme));
+}
+
+TEST(CppGeneratedCode, SetExtensionOnMutableChild) {
+ TestModel model;
+ ThemeExtension extension1;
+ extension1.set_ext_name("Hello World");
+ EXPECT_EQ(false,
+ ::protos::HasExtension(model.mutable_recursive_child(), theme));
+ EXPECT_EQ(true, ::protos::SetExtension(model.mutable_recursive_child(), theme,
+ extension1)
+ .ok());
+ EXPECT_EQ(true,
+ ::protos::HasExtension(model.mutable_recursive_child(), theme));
+}
+
+TEST(CppGeneratedCode, GetExtension) {
+ TestModel model;
+ ThemeExtension extension1;
+ extension1.set_ext_name("Hello World");
+ EXPECT_EQ(false, ::protos::HasExtension(&model, theme));
+ EXPECT_EQ(true, ::protos::SetExtension(&model, theme, extension1).ok());
+ EXPECT_EQ("Hello World",
+ ::protos::GetExtension(&model, theme).value()->ext_name());
+}
+
+TEST(CppGeneratedCode, GetExtensionOnMutableChild) {
+ TestModel model;
+ ThemeExtension extension1;
+ extension1.set_ext_name("Hello World");
+ ::protos::Ptr<TestModel> mutable_recursive_child =
+ model.mutable_recursive_child();
+ EXPECT_EQ(false, ::protos::HasExtension(mutable_recursive_child, theme));
+ EXPECT_EQ(
+ true,
+ ::protos::SetExtension(mutable_recursive_child, theme, extension1).ok());
+ EXPECT_EQ("Hello World",
+ ::protos::GetExtension(mutable_recursive_child, theme)
+ .value()
+ ->ext_name());
+}
+
+TEST(CppGeneratedCode, GetExtensionOnImmutableChild) {
+ TestModel model;
+ ThemeExtension extension1;
+ extension1.set_ext_name("Hello World");
+ ::protos::Ptr<TestModel> mutable_recursive_child =
+ model.mutable_recursive_child();
+ EXPECT_EQ(false, ::protos::HasExtension(mutable_recursive_child, theme));
+ EXPECT_EQ(
+ true,
+ ::protos::SetExtension(mutable_recursive_child, theme, extension1).ok());
+ ::protos::Ptr<const TestModel> recursive_child = model.recursive_child();
+ EXPECT_EQ("Hello World",
+ ::protos::GetExtension(recursive_child, theme).value()->ext_name());
+}
+
+TEST(CppGeneratedCode, SerializeUsingArena) {
+ TestModel model;
+ model.set_str1("Hello World");
+ ::upb::Arena arena;
+ absl::StatusOr<absl::string_view> bytes = ::protos::Serialize(&model, arena);
+ EXPECT_EQ(true, bytes.ok());
+ TestModel parsed_model = ::protos::Parse<TestModel>(bytes.value()).value();
+ EXPECT_EQ("Hello World", parsed_model.str1());
+}
+
+TEST(CppGeneratedCode, SerializeProxyUsingArena) {
+ ::upb::Arena message_arena;
+ TestModel::Proxy model_proxy =
+ ::protos::CreateMessage<TestModel>(message_arena);
+ model_proxy.set_str1("Hello World");
+ ::upb::Arena arena;
+ absl::StatusOr<absl::string_view> bytes =
+ ::protos::Serialize(&model_proxy, arena);
+ EXPECT_EQ(true, bytes.ok());
+ TestModel parsed_model = ::protos::Parse<TestModel>(bytes.value()).value();
+ EXPECT_EQ("Hello World", parsed_model.str1());
+}
+
+TEST(CppGeneratedCode, SerializeNestedMessageUsingArena) {
+ TestModel model;
+ model.mutable_recursive_child()->set_str1("Hello World");
+ ::upb::Arena arena;
+ ::protos::Ptr<const TestModel> child = model.recursive_child();
+ absl::StatusOr<absl::string_view> bytes = ::protos::Serialize(child, arena);
+ EXPECT_EQ(true, bytes.ok());
+ TestModel parsed_model = ::protos::Parse<TestModel>(bytes.value()).value();
+ EXPECT_EQ("Hello World", parsed_model.str1());
+}
+
+TEST(CppGeneratedCode, Parse) {
+ TestModel model;
+ model.set_str1("Test123");
+ ThemeExtension extension1;
+ extension1.set_ext_name("Hello World");
+ EXPECT_EQ(true, ::protos::SetExtension(&model, theme, extension1).ok());
+ ::upb::Arena arena;
+ auto bytes = ::protos::Serialize(&model, arena);
+ EXPECT_EQ(true, bytes.ok());
+ TestModel parsed_model = ::protos::Parse<TestModel>(bytes.value()).value();
+ EXPECT_EQ("Test123", parsed_model.str1());
+ EXPECT_EQ(true, ::protos::GetExtension(&parsed_model, theme).ok());
+}
+
+TEST(CppGeneratedCode, ParseIntoPtrToModel) {
+ TestModel model;
+ model.set_str1("Test123");
+ ThemeExtension extension1;
+ extension1.set_ext_name("Hello World");
+ EXPECT_EQ(true, ::protos::SetExtension(&model, theme, extension1).ok());
+ ::upb::Arena arena;
+ auto bytes = ::protos::Serialize(&model, arena);
+ EXPECT_EQ(true, bytes.ok());
+ ::protos::Ptr<TestModel> parsed_model =
+ ::protos::CreateMessage<TestModel>(arena);
+ EXPECT_TRUE(::protos::Parse(parsed_model, bytes.value()));
+ EXPECT_EQ("Test123", parsed_model->str1());
+ // Should return an extension even if we don't pass ExtensionRegistry
+ // by promoting unknown.
+ EXPECT_EQ(true, ::protos::GetExtension(parsed_model, theme).ok());
+}
+
+TEST(CppGeneratedCode, ParseWithExtensionRegistry) {
+ TestModel model;
+ model.set_str1("Test123");
+ ThemeExtension extension1;
+ extension1.set_ext_name("Hello World");
+ EXPECT_EQ(true, ::protos::SetExtension(&model, theme, extension1).ok());
+ EXPECT_EQ(true, ::protos::SetExtension(
+ &model, ThemeExtension::theme_extension, extension1)
+ .ok());
+ ::upb::Arena arena;
+ auto bytes = ::protos::Serialize(&model, arena);
+ EXPECT_EQ(true, bytes.ok());
+ ::protos::ExtensionRegistry extensions(
+ {&theme, &other_ext, &ThemeExtension::theme_extension}, arena);
+ TestModel parsed_model =
+ ::protos::Parse<TestModel>(bytes.value(), extensions).value();
+ EXPECT_EQ("Test123", parsed_model.str1());
+ EXPECT_EQ(true, ::protos::GetExtension(&parsed_model, theme).ok());
+ EXPECT_EQ(true, ::protos::GetExtension(&parsed_model,
+ ThemeExtension::theme_extension)
+ .ok());
+ EXPECT_EQ("Hello World", ::protos::GetExtension(
+ &parsed_model, ThemeExtension::theme_extension)
+ .value()
+ ->ext_name());
+}
+
+TEST(CppGeneratedCode, NameCollisions) {
+ TestModel model;
+ model.set_template_("test");
+ EXPECT_EQ("test", model.template_());
+ model.set_arena__("test");
+ EXPECT_EQ("test", model.arena__());
+}
+
+TEST(CppGeneratedCode, SharedPointer) {
+ std::shared_ptr<TestModel> model = std::make_shared<TestModel>();
+ ::upb::Arena arena;
+ auto bytes = protos::Serialize(model.get(), arena);
+ EXPECT_TRUE(protos::Parse(model.get(), bytes.value()));
+}
+
+TEST(CppGeneratedCode, UniquePointer) {
+ auto model = std::make_unique<TestModel>();
+ ::upb::Arena arena;
+ auto bytes = protos::Serialize(model.get(), arena);
+ EXPECT_TRUE(protos::Parse(model.get(), bytes.value()));
+}
+
+TEST(CppGeneratedCode, Assignment) {
+ TestModel model;
+ model.set_category(5);
+ model.mutable_child_model_1()->set_child_str1("text in child");
+ TestModel model2 = model;
+ EXPECT_EQ(5, model2.category());
+ EXPECT_EQ(model2.child_model_1()->child_str1(), "text in child");
+}
+
+TEST(CppGeneratedCode, PtrAssignment) {
+ TestModel model;
+ model.mutable_child_model_1()->set_child_str1("text in child");
+ ChildModel1 child_from_const_ptr = *model.child_model_1();
+ EXPECT_EQ(child_from_const_ptr.child_str1(), "text in child");
+ ChildModel1 child_from_ptr = *model.mutable_child_model_1();
+ EXPECT_EQ(child_from_ptr.child_str1(), "text in child");
+}
+
+TEST(CppGeneratedCode, CopyConstructor) {
+ TestModel model;
+ model.set_category(6);
+ TestModel model2(model);
+ EXPECT_EQ(6, model2.category());
+}
+
+TEST(CppGeneratedCode, PtrConstructor) {
+ TestModel model;
+ model.mutable_child_model_1()->set_child_str1("text in child");
+ ChildModel1 child_from_ptr(*model.mutable_child_model_1());
+ EXPECT_EQ(child_from_ptr.child_str1(), "text in child");
+ ChildModel1 child_from_const_ptr(*model.child_model_1());
+ EXPECT_EQ(child_from_const_ptr.child_str1(), "text in child");
+}
+
+TEST(CppGeneratedCode, MutableToProxy) {
+ TestModel model;
+ ::protos::Ptr<ChildModel1> child = model.mutable_child_model_1();
+ (void)child;
+}
+
+TEST(CppGeneratedCode, ProxyToCProxy) {
+ TestModel model;
+ ::protos::Ptr<ChildModel1> child = model.mutable_child_model_1();
+ ::protos::Ptr<const ChildModel1> child2 = child;
+ (void)child2;
+}
+
+bool ProxyToCProxyMethod(::protos::Ptr<const ChildModel1> child) {
+ return child->child_str1() == "text in child";
+}
+
+TEST(CppGeneratedCode, PassProxyToCProxy) {
+ TestModel model;
+ model.mutable_child_model_1()->set_child_str1("text in child");
+ EXPECT_TRUE(ProxyToCProxyMethod(model.mutable_child_model_1()));
+}
+
+TEST(CppGeneratedCode, PtrImplicitConversion) {
+ TestModel model;
+ model.set_int64(5);
+ ::protos::Ptr<TestModel> model_ptr = &model;
+ EXPECT_EQ(model_ptr->int64(), 5);
+}
+
+TEST(CppGeneratedCode, ClearSubMessage) {
+ // Fill model.
+ TestModel model;
+ model.set_int64(5);
+ auto new_child = model.mutable_child_model_1();
+ new_child->set_child_str1("text in child");
+ ThemeExtension extension1;
+ extension1.set_ext_name("name in extension");
+ EXPECT_TRUE(::protos::SetExtension(&model, theme, extension1).ok());
+ EXPECT_TRUE(model.mutable_child_model_1()->has_child_str1());
+ // Clear using Ptr<T>
+ ::protos::ClearMessage(model.mutable_child_model_1());
+ EXPECT_FALSE(model.mutable_child_model_1()->has_child_str1());
+}
+
+TEST(CppGeneratedCode, ClearMessage) {
+ // Fill model.
+ TestModel model;
+ model.set_int64(5);
+ model.set_str2("Hello");
+ auto new_child = model.add_child_models();
+ ASSERT_TRUE(new_child.ok());
+ new_child.value()->set_child_str1("text in child");
+ ThemeExtension extension1;
+ extension1.set_ext_name("name in extension");
+ EXPECT_TRUE(::protos::SetExtension(&model, theme, extension1).ok());
+ // Clear using T*
+ ::protos::ClearMessage(&model);
+ // Verify that scalars, repeated fields and extensions are cleared.
+ EXPECT_FALSE(model.has_int64());
+ EXPECT_FALSE(model.has_str2());
+ EXPECT_TRUE(model.child_models().empty());
+ EXPECT_FALSE(::protos::HasExtension(&model, theme));
+}
+
+TEST(CppGeneratedCode, DeepCopy) {
+ // Fill model.
+ TestModel model;
+ model.set_int64(5);
+ model.set_str2("Hello");
+ auto new_child = model.add_child_models();
+ ASSERT_TRUE(new_child.ok());
+ new_child.value()->set_child_str1("text in child");
+ ThemeExtension extension1;
+ extension1.set_ext_name("name in extension");
+ EXPECT_TRUE(::protos::SetExtension(&model, theme, extension1).ok());
+ TestModel target;
+ target.set_b1(true);
+ ::protos::DeepCopy(&model, &target);
+ EXPECT_FALSE(target.b1()) << "Target was not cleared before copying content ";
+ EXPECT_EQ(target.str2(), "Hello");
+ EXPECT_TRUE(::protos::HasExtension(&target, theme));
+}
+
+TEST(CppGeneratedCode, HasExtensionAndRegistry) {
+ // Fill model.
+ TestModel source;
+ source.set_int64(5);
+ source.set_str2("Hello");
+ auto new_child = source.add_child_models();
+ ASSERT_TRUE(new_child.ok());
+ new_child.value()->set_child_str1("text in child");
+ ThemeExtension extension1;
+ extension1.set_ext_name("name in extension");
+ ASSERT_TRUE(::protos::SetExtension(&source, theme, extension1).ok());
+
+ // Now that we have a source model with extension data, serialize.
+ ::protos::Arena arena;
+ std::string data = std::string(::protos::Serialize(&source, arena).value());
+
+ // Test with ExtensionRegistry
+ ::protos::ExtensionRegistry extensions({&theme}, arena);
+ TestModel parsed_model = ::protos::Parse<TestModel>(data, extensions).value();
+ EXPECT_TRUE(::protos::HasExtension(&parsed_model, theme));
+}
+
+// TODO(b/288491350) : Add BUILD rule to test failures below.
+#ifdef TEST_CLEAR_MESSAGE_FAILURE
+TEST(CppGeneratedCode, ClearConstMessageShouldFail) {
+ // Fill model.
+ TestModel model;
+ model.set_int64(5);
+ model.set_str2("Hello");
+ // Only mutable_ can be cleared not Ptr<const T>.
+ ::protos::ClearMessage(model.child_model_1());
+}
+#endif
diff --git a/upb/protos_generator/tests/test_model.proto b/upb/protos_generator/tests/test_model.proto
new file mode 100644
index 0000000..e133624
--- /dev/null
+++ b/upb/protos_generator/tests/test_model.proto
@@ -0,0 +1,184 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto2";
+
+package protos_generator.test;
+
+import "protos_generator/tests/child_model.proto";
+
+message TestModelContainer {
+ repeated TestModel models = 1;
+ optional ChildModel3 proto_3_child = 2;
+}
+
+message TestModel {
+ optional int32 value = 1;
+ repeated int32 value_array = 2; // _UPB_MODE_ARRAY
+ repeated int32 value_packed_array = 3
+ [packed = true]; // _UPB_MODE_ARRAY | _UPB_MODE_IS_PACKED
+ repeated int32 value_deprec = 4 [deprecated = true];
+ optional string str1 = 115;
+ optional bool b1 = 9;
+ optional bool b2 = 10;
+ optional string str2 = 50;
+ optional string str3 = 11;
+ optional float optional_float = 14;
+ optional double optional_double = 15;
+ optional int64 optional_int64 = 16;
+ optional uint32 optional_uint32 = 17;
+ optional uint64 optional_uint64 = 18;
+ optional sint32 optional_sint32 = 19;
+ optional sint64 optional_sint64 = 20;
+ optional fixed32 optional_fixed32 = 21;
+ optional fixed64 optional_fixed64 = 22;
+ optional sfixed32 optional_sfixed32 = 23;
+ optional sfixed64 optional_sfixed64 = 24;
+ repeated int64 repeated_int64 = 25;
+ repeated uint64 repeated_uint64 = 26;
+ repeated fixed64 repeated_fixed64 = 27;
+ repeated sfixed64 repeated_sfixed64 = 28;
+ repeated bool repeated_bool = 29;
+ repeated string repeated_string = 35;
+ optional bytes optional_bytes = 36;
+ message NestedChild {
+ optional string nested_child_name = 211;
+ }
+ optional NestedChild nested_child_1 = 212;
+ optional ChildModel1 child_model_1 = 222;
+ repeated ChildModel1 child_models = 223;
+ optional ChildModel1 bar = 224;
+ oneof child_oneof1 {
+ string oneof_member1 = 98;
+ bool oneof_member2 = 99;
+ }
+ optional int32 int_value_with_default = 31
+ [default = 65]; // Not supported yet
+ optional string string_value_with_default = 32
+ [default = "hello"]; // Not supported yet
+ optional float float_value_with_default = 33 [default = inf];
+ optional float double_value_with_default = 34 [default = -inf];
+
+ map<int32, ChildModel1> child_map = 225;
+ optional TestModel recursive_child = 226;
+ map<string, ChildModel1> child_str_map = 227;
+ map<string, int32> str_to_int_map = 228;
+ map<string, string> str_to_str_map = 229;
+
+ extend TestAnnotation {
+ optional OtherExtension in_message_ext = 15000;
+ }
+
+ enum Category {
+ IMAGES = 5;
+ NEWS = 6;
+ VIDEO = 7;
+ RADIO = 8 [deprecated = true];
+ }
+ optional Category category = 37;
+
+ // keyword collisions (double, template, ...)
+ oneof type {
+ string string = 230;
+ int64 int64 = 231;
+ double double = 232;
+ }
+ optional string template = 233;
+ optional string msg = 234;
+ optional string arena = 235;
+
+ // Tests publicly imported enum.
+ optional TestEnum imported_enum = 238;
+
+ optional string phase = 239;
+ optional bool clear_phase = 240;
+
+ optional string doc_id = 241;
+ optional bool set_doc_id = 242;
+
+ extensions 10000 to max;
+}
+
+// Old version with fewer fields to test backward/forward compatibility.
+message TestModelContainerV1 {
+ repeated TestModelV1 models = 1;
+}
+
+message TestModelV1 {
+ optional int32 value = 1;
+ repeated int32 value2 = 2;
+ repeated int32 value3 = 3 [packed = true];
+ repeated int32 value4 = 4 [deprecated = true];
+ optional bool b1 = 9;
+ optional bool b2 = 10;
+ optional string str2 = 50;
+}
+
+enum PrimaryColors {
+ RED = 1;
+ GREEN = 2;
+ BLUE = 3;
+}
+
+// TestModel extension.
+message ThemeExtension {
+ extend TestModel {
+ optional ThemeExtension theme_extension = 12003;
+ }
+ optional string ext_name = 1;
+ optional bool ext_bool = 2;
+}
+
+extend TestModel {
+ optional ThemeExtension theme = 12001;
+}
+
+message OtherExtension {
+ optional string ext2_name = 1;
+}
+
+extend TestModel {
+ optional OtherExtension other_ext = 12002;
+}
+
+message TestAnnotation {
+ extensions 10000 to max;
+}
+
+message TestMessageHasEnum {
+ optional EnumDeclaredAfterMessage enum_declared_after_message = 1;
+}
+
+enum EnumDeclaredAfterMessage {
+ ZERO = 0;
+ ONE = 1;
+ TWO = 2;
+ THREE = 3;
+}
diff --git a/upb/python/BUILD b/upb/python/BUILD
new file mode 100644
index 0000000..1adb2ba
--- /dev/null
+++ b/upb/python/BUILD
@@ -0,0 +1,252 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# begin:github_only
+load("//python:py_extension.bzl", "py_extension")
+# end:github_only
+
+load("@bazel_skylib//lib:selects.bzl", "selects")
+load("@bazel_skylib//rules:common_settings.bzl", "bool_flag", "string_flag")
+load("//bazel:build_defs.bzl", "UPB_DEFAULT_COPTS")
+
+# begin:github_only
+load("@rules_pkg//:mappings.bzl", "pkg_files")
+# end:github_only
+
+licenses(["notice"])
+
+package(
+ # begin:google_only
+# default_applicable_licenses = ["//:license"],
+ # end:google_only
+ default_visibility = ["//python/dist:__pkg__"],
+)
+
+LIMITED_API_FLAG_SELECT = {
+ ":limited_api_3.7": ["-DPy_LIMITED_API=0x03070000"],
+ ":limited_api_3.10": ["-DPy_LIMITED_API=0x030a0000"],
+ "//conditions:default": [],
+}
+
+bool_flag(
+ name = "limited_api",
+ build_setting_default = True,
+)
+
+string_flag(
+ name = "python_version",
+ build_setting_default = "system",
+ values = [
+ "system",
+ "37",
+ "38",
+ "39",
+ "310",
+ ],
+)
+
+config_setting(
+ name = "limited_api_3.7",
+ flag_values = {
+ ":limited_api": "True",
+ ":python_version": "37",
+ },
+)
+
+config_setting(
+ name = "full_api_3.7_win32",
+ flag_values = {
+ ":limited_api": "False",
+ ":python_version": "37",
+ },
+ values = {"cpu": "win32"},
+)
+
+config_setting(
+ name = "full_api_3.7_win64",
+ flag_values = {
+ ":limited_api": "False",
+ ":python_version": "37",
+ },
+ values = {"cpu": "win64"},
+)
+
+selects.config_setting_group(
+ name = "full_api_3.7",
+ match_any = [
+ ":full_api_3.7_win32",
+ ":full_api_3.7_win64",
+ ],
+)
+
+config_setting(
+ name = "full_api_3.8_win32",
+ flag_values = {
+ ":limited_api": "False",
+ ":python_version": "38",
+ },
+ values = {"cpu": "win32"},
+)
+
+config_setting(
+ name = "full_api_3.8_win64",
+ flag_values = {
+ ":limited_api": "False",
+ ":python_version": "38",
+ },
+ values = {"cpu": "win64"},
+)
+
+selects.config_setting_group(
+ name = "full_api_3.8",
+ match_any = [
+ ":full_api_3.8_win32",
+ ":full_api_3.8_win64",
+ ],
+)
+
+config_setting(
+ name = "full_api_3.9_win32",
+ flag_values = {
+ ":limited_api": "False",
+ ":python_version": "39",
+ },
+ values = {"cpu": "win32"},
+)
+
+config_setting(
+ name = "full_api_3.9_win64",
+ flag_values = {
+ ":limited_api": "False",
+ ":python_version": "39",
+ },
+ values = {"cpu": "win64"},
+)
+
+selects.config_setting_group(
+ name = "full_api_3.9",
+ match_any = [
+ "full_api_3.9_win32",
+ ":full_api_3.9_win64",
+ ],
+)
+
+config_setting(
+ name = "limited_api_3.10_win32",
+ flag_values = {
+ ":limited_api": "True",
+ ":python_version": "310",
+ },
+ values = {"cpu": "win32"},
+)
+
+config_setting(
+ name = "limited_api_3.10_win64",
+ flag_values = {
+ ":limited_api": "True",
+ ":python_version": "310",
+ },
+ values = {"cpu": "win64"},
+)
+
+selects.config_setting_group(
+ name = "limited_api_3.10",
+ match_any = [
+ ":limited_api_3.10_win32",
+ ":limited_api_3.10_win64",
+ ],
+)
+
+# begin:github_only
+_message_target_compatible_with = {
+ "@platforms//os:windows": ["@platforms//:incompatible"],
+ "@system_python//:none": ["@platforms//:incompatible"],
+ "@system_python//:unsupported": ["@platforms//:incompatible"],
+ "//conditions:default": [],
+}
+
+# end:github_only
+# begin:google_only
+# _message_target_compatible_with = {
+# "@platforms//os:windows": ["@platforms//:incompatible"],
+# "//conditions:default": [],
+# }
+# end:google_only
+
+filegroup(
+ name = "message_srcs",
+ srcs = [
+ "convert.c",
+ "convert.h",
+ "descriptor.c",
+ "descriptor.h",
+ "descriptor_containers.c",
+ "descriptor_containers.h",
+ "descriptor_pool.c",
+ "descriptor_pool.h",
+ "extension_dict.c",
+ "extension_dict.h",
+ "map.c",
+ "map.h",
+ "message.c",
+ "message.h",
+ "protobuf.c",
+ "protobuf.h",
+ "python_api.h",
+ "repeated.c",
+ "repeated.h",
+ "unknown_fields.c",
+ "unknown_fields.h",
+ ],
+ # begin:google_only
+# compatible_with = ["//buildenv/target:non_prod"],
+ # end:google_only
+)
+
+py_extension(
+ name = "_message",
+ srcs = [":message_srcs"],
+ copts = UPB_DEFAULT_COPTS + select(LIMITED_API_FLAG_SELECT) + [
+ # The Python API requires patterns that are ISO C incompatible, like
+ # casts between function pointers and object pointers.
+ "-Wno-pedantic",
+ ],
+ target_compatible_with = select(_message_target_compatible_with),
+ deps = [
+ "//:collections",
+ "//:descriptor_upb_proto_reflection",
+ "//:eps_copy_input_stream",
+ "//:hash",
+ "//:message_copy",
+ "//:port",
+ "//:reflection",
+ "//:text",
+ "//:wire_reader",
+ "//:wire_types",
+ "//upb/util:compare",
+ "//upb/util:def_to_proto",
+ "//upb/util:required_fields",
+ ],
+)
diff --git a/upb/python/convert.c b/upb/python/convert.c
new file mode 100644
index 0000000..98d9b75
--- /dev/null
+++ b/upb/python/convert.c
@@ -0,0 +1,446 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "python/convert.h"
+
+#include "python/message.h"
+#include "python/protobuf.h"
+#include "upb/collections/map.h"
+#include "upb/reflection/message.h"
+#include "upb/util/compare.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+PyObject* PyUpb_UpbToPy(upb_MessageValue val, const upb_FieldDef* f,
+ PyObject* arena) {
+ switch (upb_FieldDef_CType(f)) {
+ case kUpb_CType_Enum:
+ case kUpb_CType_Int32:
+ return PyLong_FromLong(val.int32_val);
+ case kUpb_CType_Int64:
+ return PyLong_FromLongLong(val.int64_val);
+ case kUpb_CType_UInt32:
+ return PyLong_FromSize_t(val.uint32_val);
+ case kUpb_CType_UInt64:
+ return PyLong_FromUnsignedLongLong(val.uint64_val);
+ case kUpb_CType_Float:
+ return PyFloat_FromDouble(val.float_val);
+ case kUpb_CType_Double:
+ return PyFloat_FromDouble(val.double_val);
+ case kUpb_CType_Bool:
+ return PyBool_FromLong(val.bool_val);
+ case kUpb_CType_Bytes:
+ return PyBytes_FromStringAndSize(val.str_val.data, val.str_val.size);
+ case kUpb_CType_String: {
+ PyObject* ret =
+ PyUnicode_DecodeUTF8(val.str_val.data, val.str_val.size, NULL);
+ // If the string can't be decoded in UTF-8, just return a bytes object
+ // that contains the raw bytes. This can't happen if the value was
+ // assigned using the members of the Python message object, but can happen
+ // if the values were parsed from the wire (binary).
+ if (ret == NULL) {
+ PyErr_Clear();
+ ret = PyBytes_FromStringAndSize(val.str_val.data, val.str_val.size);
+ }
+ return ret;
+ }
+ case kUpb_CType_Message:
+ return PyUpb_Message_Get((upb_Message*)val.msg_val,
+ upb_FieldDef_MessageSubDef(f), arena);
+ default:
+ PyErr_Format(PyExc_SystemError,
+ "Getting a value from a field of unknown type %d",
+ upb_FieldDef_CType(f));
+ return NULL;
+ }
+}
+
+static bool PyUpb_GetInt64(PyObject* obj, int64_t* val) {
+ // We require that the value is either an integer or has an __index__
+ // conversion.
+ obj = PyNumber_Index(obj);
+ if (!obj) return false;
+ // If the value is already a Python long, PyLong_AsLongLong() retrieves it.
+ // Otherwise is converts to integer using __int__.
+ *val = PyLong_AsLongLong(obj);
+ bool ok = true;
+ if (PyErr_Occurred()) {
+ assert(PyErr_ExceptionMatches(PyExc_OverflowError));
+ PyErr_Clear();
+ PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
+ ok = false;
+ }
+ Py_DECREF(obj);
+ return ok;
+}
+
+static bool PyUpb_GetUint64(PyObject* obj, uint64_t* val) {
+ // We require that the value is either an integer or has an __index__
+ // conversion.
+ obj = PyNumber_Index(obj);
+ if (!obj) return false;
+ *val = PyLong_AsUnsignedLongLong(obj);
+ bool ok = true;
+ if (PyErr_Occurred()) {
+ assert(PyErr_ExceptionMatches(PyExc_OverflowError));
+ PyErr_Clear();
+ PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
+ ok = false;
+ }
+ Py_DECREF(obj);
+ return ok;
+}
+
+static bool PyUpb_GetInt32(PyObject* obj, int32_t* val) {
+ int64_t i64;
+ if (!PyUpb_GetInt64(obj, &i64)) return false;
+ if (i64 < INT32_MIN || i64 > INT32_MAX) {
+ PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
+ return false;
+ }
+ *val = i64;
+ return true;
+}
+
+static bool PyUpb_GetUint32(PyObject* obj, uint32_t* val) {
+ uint64_t u64;
+ if (!PyUpb_GetUint64(obj, &u64)) return false;
+ if (u64 > UINT32_MAX) {
+ PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
+ return false;
+ }
+ *val = u64;
+ return true;
+}
+
+// If `arena` is specified, copies the string data into the given arena.
+// Otherwise aliases the given data.
+static upb_MessageValue PyUpb_MaybeCopyString(const char* ptr, size_t size,
+ upb_Arena* arena) {
+ upb_MessageValue ret;
+ ret.str_val.size = size;
+ if (arena) {
+ char* buf = upb_Arena_Malloc(arena, size);
+ memcpy(buf, ptr, size);
+ ret.str_val.data = buf;
+ } else {
+ ret.str_val.data = ptr;
+ }
+ return ret;
+}
+
+const char* upb_FieldDef_TypeString(const upb_FieldDef* f) {
+ switch (upb_FieldDef_CType(f)) {
+ case kUpb_CType_Double:
+ return "double";
+ case kUpb_CType_Float:
+ return "float";
+ case kUpb_CType_Int64:
+ return "int64";
+ case kUpb_CType_Int32:
+ return "int32";
+ case kUpb_CType_UInt64:
+ return "uint64";
+ case kUpb_CType_UInt32:
+ return "uint32";
+ case kUpb_CType_Enum:
+ return "enum";
+ case kUpb_CType_Bool:
+ return "bool";
+ case kUpb_CType_String:
+ return "string";
+ case kUpb_CType_Bytes:
+ return "bytes";
+ case kUpb_CType_Message:
+ return "message";
+ }
+ UPB_UNREACHABLE();
+}
+
+static bool PyUpb_PyToUpbEnum(PyObject* obj, const upb_EnumDef* e,
+ upb_MessageValue* val) {
+ if (PyUnicode_Check(obj)) {
+ Py_ssize_t size;
+ const char* name = PyUnicode_AsUTF8AndSize(obj, &size);
+ const upb_EnumValueDef* ev =
+ upb_EnumDef_FindValueByNameWithSize(e, name, size);
+ if (!ev) {
+ PyErr_Format(PyExc_ValueError, "unknown enum label \"%s\"", name);
+ return false;
+ }
+ val->int32_val = upb_EnumValueDef_Number(ev);
+ return true;
+ } else {
+ int32_t i32;
+ if (!PyUpb_GetInt32(obj, &i32)) return false;
+ if (upb_FileDef_Syntax(upb_EnumDef_File(e)) == kUpb_Syntax_Proto2 &&
+ !upb_EnumDef_CheckNumber(e, i32)) {
+ PyErr_Format(PyExc_ValueError, "invalid enumerator %d", (int)i32);
+ return false;
+ }
+ val->int32_val = i32;
+ return true;
+ }
+}
+
+bool PyUpb_IsNumpyNdarray(PyObject* obj, const upb_FieldDef* f) {
+ PyObject* type_name_obj =
+ PyObject_GetAttrString((PyObject*)Py_TYPE(obj), "__name__");
+ bool is_ndarray = false;
+ if (!strcmp(PyUpb_GetStrData(type_name_obj), "ndarray")) {
+ PyErr_Format(PyExc_TypeError,
+ "%S has type ndarray, but expected one of: %s", obj,
+ upb_FieldDef_TypeString(f));
+ is_ndarray = true;
+ }
+ Py_DECREF(type_name_obj);
+ return is_ndarray;
+}
+
+bool PyUpb_PyToUpb(PyObject* obj, const upb_FieldDef* f, upb_MessageValue* val,
+ upb_Arena* arena) {
+ switch (upb_FieldDef_CType(f)) {
+ case kUpb_CType_Enum:
+ return PyUpb_PyToUpbEnum(obj, upb_FieldDef_EnumSubDef(f), val);
+ case kUpb_CType_Int32:
+ return PyUpb_GetInt32(obj, &val->int32_val);
+ case kUpb_CType_Int64:
+ return PyUpb_GetInt64(obj, &val->int64_val);
+ case kUpb_CType_UInt32:
+ return PyUpb_GetUint32(obj, &val->uint32_val);
+ case kUpb_CType_UInt64:
+ return PyUpb_GetUint64(obj, &val->uint64_val);
+ case kUpb_CType_Float:
+ if (PyUpb_IsNumpyNdarray(obj, f)) return false;
+ val->float_val = PyFloat_AsDouble(obj);
+ return !PyErr_Occurred();
+ case kUpb_CType_Double:
+ if (PyUpb_IsNumpyNdarray(obj, f)) return false;
+ val->double_val = PyFloat_AsDouble(obj);
+ return !PyErr_Occurred();
+ case kUpb_CType_Bool:
+ if (PyUpb_IsNumpyNdarray(obj, f)) return false;
+ val->bool_val = PyLong_AsLong(obj);
+ return !PyErr_Occurred();
+ case kUpb_CType_Bytes: {
+ char* ptr;
+ Py_ssize_t size;
+ if (PyBytes_AsStringAndSize(obj, &ptr, &size) < 0) return false;
+ *val = PyUpb_MaybeCopyString(ptr, size, arena);
+ return true;
+ }
+ case kUpb_CType_String: {
+ Py_ssize_t size;
+ const char* ptr;
+ PyObject* unicode = NULL;
+ if (PyBytes_Check(obj)) {
+ unicode = obj = PyUnicode_FromEncodedObject(obj, "utf-8", NULL);
+ if (!obj) return false;
+ }
+ ptr = PyUnicode_AsUTF8AndSize(obj, &size);
+ if (PyErr_Occurred()) {
+ Py_XDECREF(unicode);
+ return false;
+ }
+ *val = PyUpb_MaybeCopyString(ptr, size, arena);
+ Py_XDECREF(unicode);
+ return true;
+ }
+ case kUpb_CType_Message:
+ PyErr_Format(PyExc_ValueError, "Message objects may not be assigned");
+ return false;
+ default:
+ PyErr_Format(PyExc_SystemError,
+ "Getting a value from a field of unknown type %d",
+ upb_FieldDef_CType(f));
+ return false;
+ }
+}
+
+bool upb_Message_IsEqual(const upb_Message* msg1, const upb_Message* msg2,
+ const upb_MessageDef* m);
+
+// -----------------------------------------------------------------------------
+// Equal
+// -----------------------------------------------------------------------------
+
+bool PyUpb_ValueEq(upb_MessageValue val1, upb_MessageValue val2,
+ const upb_FieldDef* f) {
+ switch (upb_FieldDef_CType(f)) {
+ case kUpb_CType_Bool:
+ return val1.bool_val == val2.bool_val;
+ case kUpb_CType_Int32:
+ case kUpb_CType_UInt32:
+ case kUpb_CType_Enum:
+ return val1.int32_val == val2.int32_val;
+ case kUpb_CType_Int64:
+ case kUpb_CType_UInt64:
+ return val1.int64_val == val2.int64_val;
+ case kUpb_CType_Float:
+ return val1.float_val == val2.float_val;
+ case kUpb_CType_Double:
+ return val1.double_val == val2.double_val;
+ case kUpb_CType_String:
+ case kUpb_CType_Bytes:
+ return val1.str_val.size == val2.str_val.size &&
+ memcmp(val1.str_val.data, val2.str_val.data, val1.str_val.size) ==
+ 0;
+ case kUpb_CType_Message:
+ return upb_Message_IsEqual(val1.msg_val, val2.msg_val,
+ upb_FieldDef_MessageSubDef(f));
+ default:
+ return false;
+ }
+}
+
+bool PyUpb_Map_IsEqual(const upb_Map* map1, const upb_Map* map2,
+ const upb_FieldDef* f) {
+ assert(upb_FieldDef_IsMap(f));
+ if (map1 == map2) return true;
+
+ size_t size1 = map1 ? upb_Map_Size(map1) : 0;
+ size_t size2 = map2 ? upb_Map_Size(map2) : 0;
+ if (size1 != size2) return false;
+ if (size1 == 0) return true;
+
+ const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
+ const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
+ size_t iter = kUpb_Map_Begin;
+
+ upb_MessageValue key, val1;
+ while (upb_Map_Next(map1, &key, &val1, &iter)) {
+ upb_MessageValue val2;
+ if (!upb_Map_Get(map2, key, &val2)) return false;
+ if (!PyUpb_ValueEq(val1, val2, val_f)) return false;
+ }
+
+ return true;
+}
+
+static bool PyUpb_ArrayElem_IsEqual(const upb_Array* arr1,
+ const upb_Array* arr2, size_t i,
+ const upb_FieldDef* f) {
+ assert(i < upb_Array_Size(arr1));
+ assert(i < upb_Array_Size(arr2));
+ upb_MessageValue val1 = upb_Array_Get(arr1, i);
+ upb_MessageValue val2 = upb_Array_Get(arr2, i);
+ return PyUpb_ValueEq(val1, val2, f);
+}
+
+bool PyUpb_Array_IsEqual(const upb_Array* arr1, const upb_Array* arr2,
+ const upb_FieldDef* f) {
+ assert(upb_FieldDef_IsRepeated(f) && !upb_FieldDef_IsMap(f));
+ if (arr1 == arr2) return true;
+
+ size_t n1 = arr1 ? upb_Array_Size(arr1) : 0;
+ size_t n2 = arr2 ? upb_Array_Size(arr2) : 0;
+ if (n1 != n2) return false;
+
+ // Half the length rounded down. Important: the empty list rounds to 0.
+ size_t half = n1 / 2;
+
+ // Search from the ends-in. We expect differences to more quickly manifest
+ // at the ends than in the middle. If the length is odd we will miss the
+ // middle element.
+ for (size_t i = 0; i < half; i++) {
+ if (!PyUpb_ArrayElem_IsEqual(arr1, arr2, i, f)) return false;
+ if (!PyUpb_ArrayElem_IsEqual(arr1, arr2, n1 - 1 - i, f)) return false;
+ }
+
+ // For an odd-lengthed list, pick up the middle element.
+ if (n1 & 1) {
+ if (!PyUpb_ArrayElem_IsEqual(arr1, arr2, half, f)) return false;
+ }
+
+ return true;
+}
+
+bool upb_Message_IsEqual(const upb_Message* msg1, const upb_Message* msg2,
+ const upb_MessageDef* m) {
+ if (msg1 == msg2) return true;
+ if (upb_Message_ExtensionCount(msg1) != upb_Message_ExtensionCount(msg2))
+ return false;
+
+ // Compare messages field-by-field. This is slightly tricky, because while
+ // we can iterate over normal fields in a predictable order, the extension
+ // order is unpredictable and may be different between msg1 and msg2.
+ // So we use the following strategy:
+ // 1. Iterate over all msg1 fields (including extensions).
+ // 2. For non-extension fields, we find the corresponding field by simply
+ // using upb_Message_Next(msg2). If the two messages have the same set
+ // of fields, this will yield the same field.
+ // 3. For extension fields, we have to actually search for the corresponding
+ // field, which we do with upb_Message_GetFieldByDef(msg2, ext_f1).
+ // 4. Once iteration over msg1 is complete, we call upb_Message_Next(msg2)
+ // one
+ // final time to verify that we have visited all of msg2's regular fields
+ // (we pass NULL for ext_dict so that iteration will *not* return
+ // extensions).
+ //
+ // We don't need to visit all of msg2's extensions, because we verified up
+ // front that both messages have the same number of extensions.
+ const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m));
+ const upb_FieldDef *f1, *f2;
+ upb_MessageValue val1, val2;
+ size_t iter1 = kUpb_Message_Begin;
+ size_t iter2 = kUpb_Message_Begin;
+ while (upb_Message_Next(msg1, m, symtab, &f1, &val1, &iter1)) {
+ if (upb_FieldDef_IsExtension(f1)) {
+ val2 = upb_Message_GetFieldByDef(msg2, f1);
+ } else {
+ if (!upb_Message_Next(msg2, m, NULL, &f2, &val2, &iter2) || f1 != f2) {
+ return false;
+ }
+ }
+
+ if (upb_FieldDef_IsMap(f1)) {
+ if (!PyUpb_Map_IsEqual(val1.map_val, val2.map_val, f1)) return false;
+ } else if (upb_FieldDef_IsRepeated(f1)) {
+ if (!PyUpb_Array_IsEqual(val1.array_val, val2.array_val, f1)) {
+ return false;
+ }
+ } else {
+ if (!PyUpb_ValueEq(val1, val2, f1)) return false;
+ }
+ }
+
+ if (upb_Message_Next(msg2, m, NULL, &f2, &val2, &iter2)) return false;
+
+ size_t usize1, usize2;
+ const char* uf1 = upb_Message_GetUnknown(msg1, &usize1);
+ const char* uf2 = upb_Message_GetUnknown(msg2, &usize2);
+ // 100 is arbitrary, we're trying to prevent stack overflow but it's not
+ // obvious how deep we should allow here.
+ return upb_Message_UnknownFieldsAreEqual(uf1, usize1, uf2, usize2, 100) ==
+ kUpb_UnknownCompareResult_Equal;
+}
+
+#include "upb/port/undef.inc"
diff --git a/upb/python/convert.h b/upb/python/convert.h
new file mode 100644
index 0000000..1c594d3
--- /dev/null
+++ b/upb/python/convert.h
@@ -0,0 +1,66 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef PYUPB_CONVERT_H__
+#define PYUPB_CONVERT_H__
+
+#include "protobuf.h"
+#include "upb/reflection/def.h"
+#include "upb/reflection/message.h"
+
+// Converts `val` to a Python object according to the type information in `f`.
+// Any newly-created Python objects that reference non-primitive data from `val`
+// will take a reference on `arena`; the caller must ensure that `val` belongs
+// to `arena`. If the conversion cannot be performed, returns NULL and sets a
+// Python error.
+PyObject* PyUpb_UpbToPy(upb_MessageValue val, const upb_FieldDef* f,
+ PyObject* arena);
+
+// Converts `obj` to a upb_MessageValue `*val` according to the type information
+// in `f`. If `arena` is provided, any string data will be copied into `arena`,
+// otherwise the returned value will alias the Python-owned data (this can be
+// useful for an ephemeral upb_MessageValue). If the conversion cannot be
+// performed, returns false.
+bool PyUpb_PyToUpb(PyObject* obj, const upb_FieldDef* f, upb_MessageValue* val,
+ upb_Arena* arena);
+
+// Returns true if the given values (of type `f`) are equal.
+bool PyUpb_ValueEq(upb_MessageValue val1, upb_MessageValue val2,
+ const upb_FieldDef* f);
+
+// Returns true if the two arrays (with element type `f`) are equal.
+bool PyUpb_Array_IsEqual(const upb_Array* arr1, const upb_Array* arr2,
+ const upb_FieldDef* f);
+
+// Returns true if the given messages (of type `m`) are equal.
+bool upb_Message_IsEqual(const upb_Message* msg1, const upb_Message* msg2,
+ const upb_MessageDef* m);
+
+#endif // PYUPB_CONVERT_H__
diff --git a/upb/python/descriptor.c b/upb/python/descriptor.c
new file mode 100644
index 0000000..10f2cce
--- /dev/null
+++ b/upb/python/descriptor.c
@@ -0,0 +1,1701 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "python/descriptor.h"
+
+#include "python/convert.h"
+#include "python/descriptor_containers.h"
+#include "python/descriptor_pool.h"
+#include "python/message.h"
+#include "python/protobuf.h"
+#include "upb/reflection/def.h"
+#include "upb/util/def_to_proto.h"
+
+// -----------------------------------------------------------------------------
+// DescriptorBase
+// -----------------------------------------------------------------------------
+
+// This representation is used by all concrete descriptors.
+
+typedef struct {
+ PyObject_HEAD;
+ PyObject* pool; // We own a ref.
+ const void* def; // Type depends on the class. Kept alive by "pool".
+ PyObject* options; // NULL if not present or not cached.
+} PyUpb_DescriptorBase;
+
+PyObject* PyUpb_AnyDescriptor_GetPool(PyObject* desc) {
+ PyUpb_DescriptorBase* base = (void*)desc;
+ return base->pool;
+}
+
+const void* PyUpb_AnyDescriptor_GetDef(PyObject* desc) {
+ PyUpb_DescriptorBase* base = (void*)desc;
+ return base->def;
+}
+
+static PyUpb_DescriptorBase* PyUpb_DescriptorBase_DoCreate(
+ PyUpb_DescriptorType type, const void* def, const upb_FileDef* file) {
+ PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
+ PyTypeObject* type_obj = state->descriptor_types[type];
+ assert(def);
+
+ PyUpb_DescriptorBase* base = (void*)PyType_GenericAlloc(type_obj, 0);
+ base->pool = PyUpb_DescriptorPool_Get(upb_FileDef_Pool(file));
+ base->def = def;
+ base->options = NULL;
+
+ PyUpb_ObjCache_Add(def, &base->ob_base);
+ return base;
+}
+
+// Returns a Python object wrapping |def|, of descriptor type |type|. If a
+// wrapper was previously created for this def, returns it, otherwise creates a
+// new wrapper.
+static PyObject* PyUpb_DescriptorBase_Get(PyUpb_DescriptorType type,
+ const void* def,
+ const upb_FileDef* file) {
+ PyUpb_DescriptorBase* base = (PyUpb_DescriptorBase*)PyUpb_ObjCache_Get(def);
+
+ if (!base) {
+ base = PyUpb_DescriptorBase_DoCreate(type, def, file);
+ }
+
+ return &base->ob_base;
+}
+
+static PyUpb_DescriptorBase* PyUpb_DescriptorBase_Check(
+ PyObject* obj, PyUpb_DescriptorType type) {
+ PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
+ PyTypeObject* type_obj = state->descriptor_types[type];
+ if (!PyObject_TypeCheck(obj, type_obj)) {
+ PyErr_Format(PyExc_TypeError, "Expected object of type %S, but got %R",
+ type_obj, obj);
+ return NULL;
+ }
+ return (PyUpb_DescriptorBase*)obj;
+}
+
+static PyObject* PyUpb_DescriptorBase_GetOptions(PyUpb_DescriptorBase* self,
+ const upb_Message* opts,
+ const upb_MiniTable* layout,
+ const char* msg_name) {
+ if (!self->options) {
+ // Load descriptors protos if they are not loaded already. We have to do
+ // this lazily, otherwise, it would lead to circular imports.
+ PyObject* mod = PyImport_ImportModule(PYUPB_DESCRIPTOR_MODULE);
+ Py_DECREF(mod);
+
+ // Find the correct options message.
+ PyObject* default_pool = PyUpb_DescriptorPool_GetDefaultPool();
+ const upb_DefPool* symtab = PyUpb_DescriptorPool_GetSymtab(default_pool);
+ const upb_MessageDef* m = upb_DefPool_FindMessageByName(symtab, msg_name);
+ assert(m);
+
+ // Copy the options message from C to Python using serialize+parse.
+ // We don't wrap the C object directly because there is no guarantee that
+ // the descriptor_pb2 that was loaded at runtime has the same members or
+ // layout as the C types that were compiled in.
+ size_t size;
+ PyObject* py_arena = PyUpb_Arena_New();
+ upb_Arena* arena = PyUpb_Arena_Get(py_arena);
+ char* pb;
+ // TODO(b/235839510): Need to correctly handle failed return codes.
+ (void)upb_Encode(opts, layout, 0, arena, &pb, &size);
+ const upb_MiniTable* opts2_layout = upb_MessageDef_MiniTable(m);
+ upb_Message* opts2 = upb_Message_New(opts2_layout, arena);
+ assert(opts2);
+ upb_DecodeStatus ds =
+ upb_Decode(pb, size, opts2, opts2_layout,
+ upb_DefPool_ExtensionRegistry(symtab), 0, arena);
+ (void)ds;
+ assert(ds == kUpb_DecodeStatus_Ok);
+
+ self->options = PyUpb_Message_Get(opts2, m, py_arena);
+ Py_DECREF(py_arena);
+ }
+
+ Py_INCREF(self->options);
+ return self->options;
+}
+
+typedef void* PyUpb_ToProto_Func(const void* def, upb_Arena* arena);
+
+static PyObject* PyUpb_DescriptorBase_GetSerializedProto(
+ PyObject* _self, PyUpb_ToProto_Func* func, const upb_MiniTable* layout) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ upb_Arena* arena = upb_Arena_New();
+ if (!arena) PYUPB_RETURN_OOM;
+ upb_Message* proto = func(self->def, arena);
+ if (!proto) goto oom;
+ size_t size;
+ char* pb;
+ upb_EncodeStatus status = upb_Encode(proto, layout, 0, arena, &pb, &size);
+ if (status) goto oom; // TODO(b/235839510) non-oom errors are possible here
+ PyObject* str = PyBytes_FromStringAndSize(pb, size);
+ upb_Arena_Free(arena);
+ return str;
+
+oom:
+ upb_Arena_Free(arena);
+ PyErr_SetNone(PyExc_MemoryError);
+ return NULL;
+}
+
+static PyObject* PyUpb_DescriptorBase_CopyToProto(PyObject* _self,
+ PyUpb_ToProto_Func* func,
+ const upb_MiniTable* layout,
+ const char* expected_type,
+ PyObject* py_proto) {
+ if (!PyUpb_Message_Verify(py_proto)) return NULL;
+ const upb_MessageDef* m = PyUpb_Message_GetMsgdef(py_proto);
+ const char* type = upb_MessageDef_FullName(m);
+ if (strcmp(type, expected_type) != 0) {
+ PyErr_Format(
+ PyExc_TypeError,
+ "CopyToProto: message is of incorrect type '%s' (expected '%s'", type,
+ expected_type);
+ return NULL;
+ }
+ PyObject* serialized =
+ PyUpb_DescriptorBase_GetSerializedProto(_self, func, layout);
+ if (!serialized) return NULL;
+ PyObject* ret = PyUpb_Message_MergeFromString(py_proto, serialized);
+ Py_DECREF(serialized);
+ return ret;
+}
+
+static void PyUpb_DescriptorBase_Dealloc(PyUpb_DescriptorBase* base) {
+ PyUpb_ObjCache_Delete(base->def);
+ Py_DECREF(base->pool);
+ Py_XDECREF(base->options);
+ PyUpb_Dealloc(base);
+}
+
+#define DESCRIPTOR_BASE_SLOTS \
+ {Py_tp_new, (void*)&PyUpb_Forbidden_New}, { \
+ Py_tp_dealloc, (void*)&PyUpb_DescriptorBase_Dealloc \
+ }
+
+// -----------------------------------------------------------------------------
+// Descriptor
+// -----------------------------------------------------------------------------
+
+PyObject* PyUpb_Descriptor_Get(const upb_MessageDef* m) {
+ assert(m);
+ const upb_FileDef* file = upb_MessageDef_File(m);
+ return PyUpb_DescriptorBase_Get(kPyUpb_Descriptor, m, file);
+}
+
+PyObject* PyUpb_Descriptor_GetClass(const upb_MessageDef* m) {
+ PyObject* ret = PyUpb_ObjCache_Get(upb_MessageDef_MiniTable(m));
+ return ret;
+}
+
+// The LookupNested*() functions provide name lookup for entities nested inside
+// a message. This uses the symtab's table, which requires that the symtab is
+// not being mutated concurrently. We can guarantee this for Python-owned
+// symtabs, but upb cannot guarantee it in general for an arbitrary
+// `const upb_MessageDef*`.
+
+static const void* PyUpb_Descriptor_LookupNestedMessage(const upb_MessageDef* m,
+ const char* name) {
+ const upb_FileDef* filedef = upb_MessageDef_File(m);
+ const upb_DefPool* symtab = upb_FileDef_Pool(filedef);
+ PyObject* qname =
+ PyUnicode_FromFormat("%s.%s", upb_MessageDef_FullName(m), name);
+ const upb_MessageDef* ret = upb_DefPool_FindMessageByName(
+ symtab, PyUnicode_AsUTF8AndSize(qname, NULL));
+ Py_DECREF(qname);
+ return ret;
+}
+
+static const void* PyUpb_Descriptor_LookupNestedEnum(const upb_MessageDef* m,
+ const char* name) {
+ const upb_FileDef* filedef = upb_MessageDef_File(m);
+ const upb_DefPool* symtab = upb_FileDef_Pool(filedef);
+ PyObject* qname =
+ PyUnicode_FromFormat("%s.%s", upb_MessageDef_FullName(m), name);
+ const upb_EnumDef* ret =
+ upb_DefPool_FindEnumByName(symtab, PyUnicode_AsUTF8AndSize(qname, NULL));
+ Py_DECREF(qname);
+ return ret;
+}
+
+static const void* PyUpb_Descriptor_LookupNestedExtension(
+ const upb_MessageDef* m, const char* name) {
+ const upb_FileDef* filedef = upb_MessageDef_File(m);
+ const upb_DefPool* symtab = upb_FileDef_Pool(filedef);
+ PyObject* qname =
+ PyUnicode_FromFormat("%s.%s", upb_MessageDef_FullName(m), name);
+ const upb_FieldDef* ret = upb_DefPool_FindExtensionByName(
+ symtab, PyUnicode_AsUTF8AndSize(qname, NULL));
+ Py_DECREF(qname);
+ return ret;
+}
+
+static PyObject* PyUpb_Descriptor_GetExtensionRanges(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (PyUpb_DescriptorBase*)_self;
+ int n = upb_MessageDef_ExtensionRangeCount(self->def);
+ PyObject* range_list = PyList_New(n);
+
+ for (int i = 0; i < n; i++) {
+ const upb_ExtensionRange* range =
+ upb_MessageDef_ExtensionRange(self->def, i);
+ PyObject* start = PyLong_FromLong(upb_ExtensionRange_Start(range));
+ PyObject* end = PyLong_FromLong(upb_ExtensionRange_End(range));
+ PyList_SetItem(range_list, i, PyTuple_Pack(2, start, end));
+ }
+
+ return range_list;
+}
+
+static PyObject* PyUpb_Descriptor_GetExtensions(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ static PyUpb_GenericSequence_Funcs funcs = {
+ (void*)&upb_MessageDef_NestedExtensionCount,
+ (void*)&upb_MessageDef_NestedExtension,
+ (void*)&PyUpb_FieldDescriptor_Get,
+ };
+ return PyUpb_GenericSequence_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_Descriptor_GetExtensionsByName(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ static PyUpb_ByNameMap_Funcs funcs = {
+ {
+ (void*)&upb_MessageDef_NestedExtensionCount,
+ (void*)&upb_MessageDef_NestedExtension,
+ (void*)&PyUpb_FieldDescriptor_Get,
+ },
+ (void*)&PyUpb_Descriptor_LookupNestedExtension,
+ (void*)&upb_FieldDef_Name,
+ };
+ return PyUpb_ByNameMap_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_Descriptor_GetEnumTypes(PyObject* _self, void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ static PyUpb_GenericSequence_Funcs funcs = {
+ (void*)&upb_MessageDef_NestedEnumCount,
+ (void*)&upb_MessageDef_NestedEnum,
+ (void*)&PyUpb_EnumDescriptor_Get,
+ };
+ return PyUpb_GenericSequence_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_Descriptor_GetOneofs(PyObject* _self, void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ static PyUpb_GenericSequence_Funcs funcs = {
+ (void*)&upb_MessageDef_OneofCount,
+ (void*)&upb_MessageDef_Oneof,
+ (void*)&PyUpb_OneofDescriptor_Get,
+ };
+ return PyUpb_GenericSequence_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_Descriptor_GetOptions(PyObject* _self, PyObject* args) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyUpb_DescriptorBase_GetOptions(
+ self, upb_MessageDef_Options(self->def), &google_protobuf_MessageOptions_msg_init,
+ PYUPB_DESCRIPTOR_PROTO_PACKAGE ".MessageOptions");
+}
+
+static PyObject* PyUpb_Descriptor_CopyToProto(PyObject* _self,
+ PyObject* py_proto) {
+ return PyUpb_DescriptorBase_CopyToProto(
+ _self, (PyUpb_ToProto_Func*)&upb_MessageDef_ToProto,
+ &google_protobuf_DescriptorProto_msg_init,
+ PYUPB_DESCRIPTOR_PROTO_PACKAGE ".DescriptorProto", py_proto);
+}
+
+static PyObject* PyUpb_Descriptor_EnumValueName(PyObject* _self,
+ PyObject* args) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ const char* enum_name;
+ int number;
+ if (!PyArg_ParseTuple(args, "si", &enum_name, &number)) return NULL;
+ const upb_EnumDef* e =
+ PyUpb_Descriptor_LookupNestedEnum(self->def, enum_name);
+ if (!e) {
+ PyErr_SetString(PyExc_KeyError, enum_name);
+ return NULL;
+ }
+ const upb_EnumValueDef* ev = upb_EnumDef_FindValueByNumber(e, number);
+ if (!ev) {
+ PyErr_Format(PyExc_KeyError, "%d", number);
+ return NULL;
+ }
+ return PyUnicode_FromString(upb_EnumValueDef_Name(ev));
+}
+
+static PyObject* PyUpb_Descriptor_GetFieldsByName(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ static PyUpb_ByNameMap_Funcs funcs = {
+ {
+ (void*)&upb_MessageDef_FieldCount,
+ (void*)&upb_MessageDef_Field,
+ (void*)&PyUpb_FieldDescriptor_Get,
+ },
+ (void*)&upb_MessageDef_FindFieldByName,
+ (void*)&upb_FieldDef_Name,
+ };
+ return PyUpb_ByNameMap_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_Descriptor_GetFieldsByCamelCaseName(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ static PyUpb_ByNameMap_Funcs funcs = {
+ {
+ (void*)&upb_MessageDef_FieldCount,
+ (void*)&upb_MessageDef_Field,
+ (void*)&PyUpb_FieldDescriptor_Get,
+ },
+ (void*)&upb_MessageDef_FindByJsonName,
+ (void*)&upb_FieldDef_JsonName,
+ };
+ return PyUpb_ByNameMap_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_Descriptor_GetFieldsByNumber(PyObject* _self,
+ void* closure) {
+ static PyUpb_ByNumberMap_Funcs funcs = {
+ {
+ (void*)&upb_MessageDef_FieldCount,
+ (void*)&upb_MessageDef_Field,
+ (void*)&PyUpb_FieldDescriptor_Get,
+ },
+ (void*)&upb_MessageDef_FindFieldByNumber,
+ (void*)&upb_FieldDef_Number,
+ };
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyUpb_ByNumberMap_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_Descriptor_GetNestedTypes(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ static PyUpb_GenericSequence_Funcs funcs = {
+ (void*)&upb_MessageDef_NestedMessageCount,
+ (void*)&upb_MessageDef_NestedMessage,
+ (void*)&PyUpb_Descriptor_Get,
+ };
+ return PyUpb_GenericSequence_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_Descriptor_GetNestedTypesByName(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ static PyUpb_ByNameMap_Funcs funcs = {
+ {
+ (void*)&upb_MessageDef_NestedMessageCount,
+ (void*)&upb_MessageDef_NestedMessage,
+ (void*)&PyUpb_Descriptor_Get,
+ },
+ (void*)&PyUpb_Descriptor_LookupNestedMessage,
+ (void*)&upb_MessageDef_Name,
+ };
+ return PyUpb_ByNameMap_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_Descriptor_GetContainingType(PyObject* _self,
+ void* closure) {
+ // upb does not natively store the lexical parent of a message type, but we
+ // can derive it with some string manipulation and a lookup.
+ PyUpb_DescriptorBase* self = (void*)_self;
+ const upb_MessageDef* m = self->def;
+ const upb_FileDef* file = upb_MessageDef_File(m);
+ const upb_DefPool* symtab = upb_FileDef_Pool(file);
+ const char* full_name = upb_MessageDef_FullName(m);
+ const char* last_dot = strrchr(full_name, '.');
+ if (!last_dot) Py_RETURN_NONE;
+ const upb_MessageDef* parent = upb_DefPool_FindMessageByNameWithSize(
+ symtab, full_name, last_dot - full_name);
+ if (!parent) Py_RETURN_NONE;
+ return PyUpb_Descriptor_Get(parent);
+}
+
+static PyObject* PyUpb_Descriptor_GetEnumTypesByName(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ static PyUpb_ByNameMap_Funcs funcs = {
+ {
+ (void*)&upb_MessageDef_NestedEnumCount,
+ (void*)&upb_MessageDef_NestedEnum,
+ (void*)&PyUpb_EnumDescriptor_Get,
+ },
+ (void*)&PyUpb_Descriptor_LookupNestedEnum,
+ (void*)&upb_EnumDef_Name,
+ };
+ return PyUpb_ByNameMap_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_Descriptor_GetIsExtendable(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ if (upb_MessageDef_ExtensionRangeCount(self->def) > 0) {
+ Py_RETURN_TRUE;
+ } else {
+ Py_RETURN_FALSE;
+ }
+}
+
+static PyObject* PyUpb_Descriptor_GetFullName(PyObject* self, void* closure) {
+ const upb_MessageDef* msgdef = PyUpb_Descriptor_GetDef(self);
+ return PyUnicode_FromString(upb_MessageDef_FullName(msgdef));
+}
+
+static PyObject* PyUpb_Descriptor_GetConcreteClass(PyObject* self,
+ void* closure) {
+ const upb_MessageDef* msgdef = PyUpb_Descriptor_GetDef(self);
+ return PyUpb_Descriptor_GetClass(msgdef);
+}
+
+static PyObject* PyUpb_Descriptor_GetFile(PyObject* self, void* closure) {
+ const upb_MessageDef* msgdef = PyUpb_Descriptor_GetDef(self);
+ return PyUpb_FileDescriptor_Get(upb_MessageDef_File(msgdef));
+}
+
+static PyObject* PyUpb_Descriptor_GetFields(PyObject* _self, void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ static PyUpb_GenericSequence_Funcs funcs = {
+ (void*)&upb_MessageDef_FieldCount,
+ (void*)&upb_MessageDef_Field,
+ (void*)&PyUpb_FieldDescriptor_Get,
+ };
+ return PyUpb_GenericSequence_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_Descriptor_GetHasOptions(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyBool_FromLong(upb_MessageDef_HasOptions(self->def));
+}
+
+static PyObject* PyUpb_Descriptor_GetName(PyObject* self, void* closure) {
+ const upb_MessageDef* msgdef = PyUpb_Descriptor_GetDef(self);
+ return PyUnicode_FromString(upb_MessageDef_Name(msgdef));
+}
+
+static PyObject* PyUpb_Descriptor_GetEnumValuesByName(PyObject* _self,
+ void* closure) {
+ // upb does not natively store any table containing all nested values.
+ // Consider:
+ // message M {
+ // enum E1 {
+ // A = 0;
+ // B = 1;
+ // }
+ // enum E2 {
+ // C = 0;
+ // D = 1;
+ // }
+ // }
+ //
+ // In this case, upb stores tables for E1 and E2, but it does not store a
+ // table for M that combines them (it is rarely needed and costs precious
+ // space and time to build).
+ //
+ // To work around this, we build an actual Python dict whenever a user
+ // actually asks for this.
+ PyUpb_DescriptorBase* self = (void*)_self;
+ PyObject* ret = PyDict_New();
+ if (!ret) return NULL;
+ int enum_count = upb_MessageDef_NestedEnumCount(self->def);
+ for (int i = 0; i < enum_count; i++) {
+ const upb_EnumDef* e = upb_MessageDef_NestedEnum(self->def, i);
+ int value_count = upb_EnumDef_ValueCount(e);
+ for (int j = 0; j < value_count; j++) {
+ // Collisions should be impossible here, as uniqueness is checked by
+ // protoc (this is an invariant of the protobuf language). However this
+ // uniqueness constraint is not currently checked by upb/def.c at load
+ // time, so if the user supplies a manually-constructed descriptor that
+ // does not respect this constraint, a collision could be possible and the
+ // last-defined enumerator would win. This could be seen as an argument
+ // for having upb actually build the table at load time, thus checking the
+ // constraint proactively, but upb is always checking a subset of the full
+ // validation performed by C++, and we have to pick and choose the biggest
+ // bang for the buck.
+ const upb_EnumValueDef* ev = upb_EnumDef_Value(e, j);
+ const char* name = upb_EnumValueDef_Name(ev);
+ PyObject* val = PyUpb_EnumValueDescriptor_Get(ev);
+ if (!val || PyDict_SetItemString(ret, name, val) < 0) {
+ Py_XDECREF(val);
+ Py_DECREF(ret);
+ return NULL;
+ }
+ Py_DECREF(val);
+ }
+ }
+ return ret;
+}
+
+static PyObject* PyUpb_Descriptor_GetOneofsByName(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ static PyUpb_ByNameMap_Funcs funcs = {
+ {
+ (void*)&upb_MessageDef_OneofCount,
+ (void*)&upb_MessageDef_Oneof,
+ (void*)&PyUpb_OneofDescriptor_Get,
+ },
+ (void*)&upb_MessageDef_FindOneofByName,
+ (void*)&upb_OneofDef_Name,
+ };
+ return PyUpb_ByNameMap_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_Descriptor_GetSyntax(PyObject* self, void* closure) {
+ const upb_MessageDef* msgdef = PyUpb_Descriptor_GetDef(self);
+ const char* syntax =
+ upb_MessageDef_Syntax(msgdef) == kUpb_Syntax_Proto2 ? "proto2" : "proto3";
+ return PyUnicode_InternFromString(syntax);
+}
+
+static PyGetSetDef PyUpb_Descriptor_Getters[] = {
+ {"name", PyUpb_Descriptor_GetName, NULL, "Last name"},
+ {"full_name", PyUpb_Descriptor_GetFullName, NULL, "Full name"},
+ {"_concrete_class", PyUpb_Descriptor_GetConcreteClass, NULL,
+ "concrete class"},
+ {"file", PyUpb_Descriptor_GetFile, NULL, "File descriptor"},
+ {"fields", PyUpb_Descriptor_GetFields, NULL, "Fields sequence"},
+ {"fields_by_name", PyUpb_Descriptor_GetFieldsByName, NULL,
+ "Fields by name"},
+ {"fields_by_camelcase_name", PyUpb_Descriptor_GetFieldsByCamelCaseName,
+ NULL, "Fields by camelCase name"},
+ {"fields_by_number", PyUpb_Descriptor_GetFieldsByNumber, NULL,
+ "Fields by number"},
+ {"nested_types", PyUpb_Descriptor_GetNestedTypes, NULL,
+ "Nested types sequence"},
+ {"nested_types_by_name", PyUpb_Descriptor_GetNestedTypesByName, NULL,
+ "Nested types by name"},
+ {"extensions", PyUpb_Descriptor_GetExtensions, NULL, "Extensions Sequence"},
+ {"extensions_by_name", PyUpb_Descriptor_GetExtensionsByName, NULL,
+ "Extensions by name"},
+ {"extension_ranges", PyUpb_Descriptor_GetExtensionRanges, NULL,
+ "Extension ranges"},
+ {"enum_types", PyUpb_Descriptor_GetEnumTypes, NULL, "Enum sequence"},
+ {"enum_types_by_name", PyUpb_Descriptor_GetEnumTypesByName, NULL,
+ "Enum types by name"},
+ {"enum_values_by_name", PyUpb_Descriptor_GetEnumValuesByName, NULL,
+ "Enum values by name"},
+ {"oneofs_by_name", PyUpb_Descriptor_GetOneofsByName, NULL,
+ "Oneofs by name"},
+ {"oneofs", PyUpb_Descriptor_GetOneofs, NULL, "Oneofs Sequence"},
+ {"containing_type", PyUpb_Descriptor_GetContainingType, NULL,
+ "Containing type"},
+ {"is_extendable", PyUpb_Descriptor_GetIsExtendable, NULL},
+ {"has_options", PyUpb_Descriptor_GetHasOptions, NULL, "Has Options"},
+ {"syntax", &PyUpb_Descriptor_GetSyntax, NULL, "Syntax"},
+ {NULL}};
+
+static PyMethodDef PyUpb_Descriptor_Methods[] = {
+ {"GetOptions", PyUpb_Descriptor_GetOptions, METH_NOARGS},
+ {"CopyToProto", PyUpb_Descriptor_CopyToProto, METH_O},
+ {"EnumValueName", PyUpb_Descriptor_EnumValueName, METH_VARARGS},
+ {NULL}};
+
+static PyType_Slot PyUpb_Descriptor_Slots[] = {
+ DESCRIPTOR_BASE_SLOTS,
+ {Py_tp_methods, PyUpb_Descriptor_Methods},
+ {Py_tp_getset, PyUpb_Descriptor_Getters},
+ {0, NULL}};
+
+static PyType_Spec PyUpb_Descriptor_Spec = {
+ PYUPB_MODULE_NAME ".Descriptor", // tp_name
+ sizeof(PyUpb_DescriptorBase), // tp_basicsize
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT, // tp_flags
+ PyUpb_Descriptor_Slots,
+};
+
+const upb_MessageDef* PyUpb_Descriptor_GetDef(PyObject* _self) {
+ PyUpb_DescriptorBase* self =
+ PyUpb_DescriptorBase_Check(_self, kPyUpb_Descriptor);
+ return self ? self->def : NULL;
+}
+
+// -----------------------------------------------------------------------------
+// EnumDescriptor
+// -----------------------------------------------------------------------------
+
+PyObject* PyUpb_EnumDescriptor_Get(const upb_EnumDef* enumdef) {
+ const upb_FileDef* file = upb_EnumDef_File(enumdef);
+ return PyUpb_DescriptorBase_Get(kPyUpb_EnumDescriptor, enumdef, file);
+}
+
+const upb_EnumDef* PyUpb_EnumDescriptor_GetDef(PyObject* _self) {
+ PyUpb_DescriptorBase* self =
+ PyUpb_DescriptorBase_Check(_self, kPyUpb_EnumDescriptor);
+ return self ? self->def : NULL;
+}
+
+static PyObject* PyUpb_EnumDescriptor_GetFullName(PyObject* self,
+ void* closure) {
+ const upb_EnumDef* enumdef = PyUpb_EnumDescriptor_GetDef(self);
+ return PyUnicode_FromString(upb_EnumDef_FullName(enumdef));
+}
+
+static PyObject* PyUpb_EnumDescriptor_GetName(PyObject* self, void* closure) {
+ const upb_EnumDef* enumdef = PyUpb_EnumDescriptor_GetDef(self);
+ return PyUnicode_FromString(upb_EnumDef_Name(enumdef));
+}
+
+static PyObject* PyUpb_EnumDescriptor_GetFile(PyObject* self, void* closure) {
+ const upb_EnumDef* enumdef = PyUpb_EnumDescriptor_GetDef(self);
+ return PyUpb_FileDescriptor_Get(upb_EnumDef_File(enumdef));
+}
+
+static PyObject* PyUpb_EnumDescriptor_GetValues(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ static PyUpb_GenericSequence_Funcs funcs = {
+ (void*)&upb_EnumDef_ValueCount,
+ (void*)&upb_EnumDef_Value,
+ (void*)&PyUpb_EnumValueDescriptor_Get,
+ };
+ return PyUpb_GenericSequence_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_EnumDescriptor_GetValuesByName(PyObject* _self,
+ void* closure) {
+ static PyUpb_ByNameMap_Funcs funcs = {
+ {
+ (void*)&upb_EnumDef_ValueCount,
+ (void*)&upb_EnumDef_Value,
+ (void*)&PyUpb_EnumValueDescriptor_Get,
+ },
+ (void*)&upb_EnumDef_FindValueByName,
+ (void*)&upb_EnumValueDef_Name,
+ };
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyUpb_ByNameMap_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_EnumDescriptor_GetValuesByNumber(PyObject* _self,
+ void* closure) {
+ static PyUpb_ByNumberMap_Funcs funcs = {
+ {
+ (void*)&upb_EnumDef_ValueCount,
+ (void*)&upb_EnumDef_Value,
+ (void*)&PyUpb_EnumValueDescriptor_Get,
+ },
+ (void*)&upb_EnumDef_FindValueByNumber,
+ (void*)&upb_EnumValueDef_Number,
+ };
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyUpb_ByNumberMap_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_EnumDescriptor_GetContainingType(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ const upb_MessageDef* m = upb_EnumDef_ContainingType(self->def);
+ if (!m) Py_RETURN_NONE;
+ return PyUpb_Descriptor_Get(m);
+}
+
+static PyObject* PyUpb_EnumDescriptor_GetHasOptions(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyBool_FromLong(upb_EnumDef_HasOptions(self->def));
+}
+
+static PyObject* PyUpb_EnumDescriptor_GetIsClosed(PyObject* _self,
+ void* closure) {
+ const upb_EnumDef* enumdef = PyUpb_EnumDescriptor_GetDef(_self);
+ return PyBool_FromLong(upb_EnumDef_IsClosed(enumdef));
+}
+
+static PyObject* PyUpb_EnumDescriptor_GetOptions(PyObject* _self,
+ PyObject* args) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyUpb_DescriptorBase_GetOptions(
+ self, upb_EnumDef_Options(self->def), &google_protobuf_EnumOptions_msg_init,
+ PYUPB_DESCRIPTOR_PROTO_PACKAGE ".EnumOptions");
+}
+
+static PyObject* PyUpb_EnumDescriptor_CopyToProto(PyObject* _self,
+ PyObject* py_proto) {
+ return PyUpb_DescriptorBase_CopyToProto(
+ _self, (PyUpb_ToProto_Func*)&upb_EnumDef_ToProto,
+ &google_protobuf_EnumDescriptorProto_msg_init,
+ PYUPB_DESCRIPTOR_PROTO_PACKAGE ".EnumDescriptorProto", py_proto);
+}
+
+static PyGetSetDef PyUpb_EnumDescriptor_Getters[] = {
+ {"full_name", PyUpb_EnumDescriptor_GetFullName, NULL, "Full name"},
+ {"name", PyUpb_EnumDescriptor_GetName, NULL, "last name"},
+ {"file", PyUpb_EnumDescriptor_GetFile, NULL, "File descriptor"},
+ {"values", PyUpb_EnumDescriptor_GetValues, NULL, "values"},
+ {"values_by_name", PyUpb_EnumDescriptor_GetValuesByName, NULL,
+ "Enum values by name"},
+ {"values_by_number", PyUpb_EnumDescriptor_GetValuesByNumber, NULL,
+ "Enum values by number"},
+ {"containing_type", PyUpb_EnumDescriptor_GetContainingType, NULL,
+ "Containing type"},
+ {"has_options", PyUpb_EnumDescriptor_GetHasOptions, NULL, "Has Options"},
+ {"is_closed", PyUpb_EnumDescriptor_GetIsClosed, NULL,
+ "Checks if the enum is closed"},
+ {NULL}};
+
+static PyMethodDef PyUpb_EnumDescriptor_Methods[] = {
+ {"GetOptions", PyUpb_EnumDescriptor_GetOptions, METH_NOARGS},
+ {"CopyToProto", PyUpb_EnumDescriptor_CopyToProto, METH_O},
+ {NULL}};
+
+static PyType_Slot PyUpb_EnumDescriptor_Slots[] = {
+ DESCRIPTOR_BASE_SLOTS,
+ {Py_tp_methods, PyUpb_EnumDescriptor_Methods},
+ {Py_tp_getset, PyUpb_EnumDescriptor_Getters},
+ {0, NULL}};
+
+static PyType_Spec PyUpb_EnumDescriptor_Spec = {
+ PYUPB_MODULE_NAME ".EnumDescriptor", // tp_name
+ sizeof(PyUpb_DescriptorBase), // tp_basicsize
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT, // tp_flags
+ PyUpb_EnumDescriptor_Slots,
+};
+
+// -----------------------------------------------------------------------------
+// EnumValueDescriptor
+// -----------------------------------------------------------------------------
+
+PyObject* PyUpb_EnumValueDescriptor_Get(const upb_EnumValueDef* ev) {
+ const upb_FileDef* file = upb_EnumDef_File(upb_EnumValueDef_Enum(ev));
+ return PyUpb_DescriptorBase_Get(kPyUpb_EnumValueDescriptor, ev, file);
+}
+
+static PyObject* PyUpb_EnumValueDescriptor_GetName(PyObject* self,
+ void* closure) {
+ PyUpb_DescriptorBase* base = (PyUpb_DescriptorBase*)self;
+ return PyUnicode_FromString(upb_EnumValueDef_Name(base->def));
+}
+
+static PyObject* PyUpb_EnumValueDescriptor_GetNumber(PyObject* self,
+ void* closure) {
+ PyUpb_DescriptorBase* base = (PyUpb_DescriptorBase*)self;
+ return PyLong_FromLong(upb_EnumValueDef_Number(base->def));
+}
+
+static PyObject* PyUpb_EnumValueDescriptor_GetIndex(PyObject* self,
+ void* closure) {
+ PyUpb_DescriptorBase* base = (PyUpb_DescriptorBase*)self;
+ return PyLong_FromLong(upb_EnumValueDef_Index(base->def));
+}
+
+static PyObject* PyUpb_EnumValueDescriptor_GetType(PyObject* self,
+ void* closure) {
+ PyUpb_DescriptorBase* base = (PyUpb_DescriptorBase*)self;
+ return PyUpb_EnumDescriptor_Get(upb_EnumValueDef_Enum(base->def));
+}
+
+static PyObject* PyUpb_EnumValueDescriptor_GetHasOptions(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyBool_FromLong(upb_EnumValueDef_HasOptions(self->def));
+}
+
+static PyObject* PyUpb_EnumValueDescriptor_GetOptions(PyObject* _self,
+ PyObject* args) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyUpb_DescriptorBase_GetOptions(
+ self, upb_EnumValueDef_Options(self->def),
+ &google_protobuf_EnumValueOptions_msg_init,
+ PYUPB_DESCRIPTOR_PROTO_PACKAGE ".EnumValueOptions");
+}
+
+static PyGetSetDef PyUpb_EnumValueDescriptor_Getters[] = {
+ {"name", PyUpb_EnumValueDescriptor_GetName, NULL, "name"},
+ {"number", PyUpb_EnumValueDescriptor_GetNumber, NULL, "number"},
+ {"index", PyUpb_EnumValueDescriptor_GetIndex, NULL, "index"},
+ {"type", PyUpb_EnumValueDescriptor_GetType, NULL, "index"},
+ {"has_options", PyUpb_EnumValueDescriptor_GetHasOptions, NULL,
+ "Has Options"},
+ {NULL}};
+
+static PyMethodDef PyUpb_EnumValueDescriptor_Methods[] = {
+ {
+ "GetOptions",
+ PyUpb_EnumValueDescriptor_GetOptions,
+ METH_NOARGS,
+ },
+ {NULL}};
+
+static PyType_Slot PyUpb_EnumValueDescriptor_Slots[] = {
+ DESCRIPTOR_BASE_SLOTS,
+ {Py_tp_methods, PyUpb_EnumValueDescriptor_Methods},
+ {Py_tp_getset, PyUpb_EnumValueDescriptor_Getters},
+ {0, NULL}};
+
+static PyType_Spec PyUpb_EnumValueDescriptor_Spec = {
+ PYUPB_MODULE_NAME ".EnumValueDescriptor", // tp_name
+ sizeof(PyUpb_DescriptorBase), // tp_basicsize
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT, // tp_flags
+ PyUpb_EnumValueDescriptor_Slots,
+};
+
+// -----------------------------------------------------------------------------
+// FieldDescriptor
+// -----------------------------------------------------------------------------
+
+const upb_FieldDef* PyUpb_FieldDescriptor_GetDef(PyObject* _self) {
+ PyUpb_DescriptorBase* self =
+ PyUpb_DescriptorBase_Check(_self, kPyUpb_FieldDescriptor);
+ return self ? self->def : NULL;
+}
+
+PyObject* PyUpb_FieldDescriptor_Get(const upb_FieldDef* field) {
+ const upb_FileDef* file = upb_FieldDef_File(field);
+ return PyUpb_DescriptorBase_Get(kPyUpb_FieldDescriptor, field, file);
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetFullName(PyUpb_DescriptorBase* self,
+ void* closure) {
+ return PyUnicode_FromString(upb_FieldDef_FullName(self->def));
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetName(PyUpb_DescriptorBase* self,
+ void* closure) {
+ return PyUnicode_FromString(upb_FieldDef_Name(self->def));
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetCamelCaseName(
+ PyUpb_DescriptorBase* self, void* closure) {
+ // TODO: Ok to use jsonname here?
+ return PyUnicode_FromString(upb_FieldDef_JsonName(self->def));
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetJsonName(PyUpb_DescriptorBase* self,
+ void* closure) {
+ return PyUnicode_FromString(upb_FieldDef_JsonName(self->def));
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetFile(PyUpb_DescriptorBase* self,
+ void* closure) {
+ const upb_FileDef* file = upb_FieldDef_File(self->def);
+ if (!file) Py_RETURN_NONE;
+ return PyUpb_FileDescriptor_Get(file);
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetType(PyUpb_DescriptorBase* self,
+ void* closure) {
+ return PyLong_FromLong(upb_FieldDef_Type(self->def));
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetCppType(PyUpb_DescriptorBase* self,
+ void* closure) {
+ // Enum values copied from descriptor.h in C++.
+ enum CppType {
+ CPPTYPE_INT32 = 1, // TYPE_INT32, TYPE_SINT32, TYPE_SFIXED32
+ CPPTYPE_INT64 = 2, // TYPE_INT64, TYPE_SINT64, TYPE_SFIXED64
+ CPPTYPE_UINT32 = 3, // TYPE_UINT32, TYPE_FIXED32
+ CPPTYPE_UINT64 = 4, // TYPE_UINT64, TYPE_FIXED64
+ CPPTYPE_DOUBLE = 5, // TYPE_DOUBLE
+ CPPTYPE_FLOAT = 6, // TYPE_FLOAT
+ CPPTYPE_BOOL = 7, // TYPE_BOOL
+ CPPTYPE_ENUM = 8, // TYPE_ENUM
+ CPPTYPE_STRING = 9, // TYPE_STRING, TYPE_BYTES
+ CPPTYPE_MESSAGE = 10, // TYPE_MESSAGE, TYPE_GROUP
+ };
+ static const uint8_t cpp_types[] = {
+ -1,
+ [kUpb_CType_Int32] = CPPTYPE_INT32,
+ [kUpb_CType_Int64] = CPPTYPE_INT64,
+ [kUpb_CType_UInt32] = CPPTYPE_UINT32,
+ [kUpb_CType_UInt64] = CPPTYPE_UINT64,
+ [kUpb_CType_Double] = CPPTYPE_DOUBLE,
+ [kUpb_CType_Float] = CPPTYPE_FLOAT,
+ [kUpb_CType_Bool] = CPPTYPE_BOOL,
+ [kUpb_CType_Enum] = CPPTYPE_ENUM,
+ [kUpb_CType_String] = CPPTYPE_STRING,
+ [kUpb_CType_Bytes] = CPPTYPE_STRING,
+ [kUpb_CType_Message] = CPPTYPE_MESSAGE,
+ };
+ return PyLong_FromLong(cpp_types[upb_FieldDef_CType(self->def)]);
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetLabel(PyUpb_DescriptorBase* self,
+ void* closure) {
+ return PyLong_FromLong(upb_FieldDef_Label(self->def));
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetIsExtension(
+ PyUpb_DescriptorBase* self, void* closure) {
+ return PyBool_FromLong(upb_FieldDef_IsExtension(self->def));
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetNumber(PyUpb_DescriptorBase* self,
+ void* closure) {
+ return PyLong_FromLong(upb_FieldDef_Number(self->def));
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetIndex(PyUpb_DescriptorBase* self,
+ void* closure) {
+ return PyLong_FromLong(upb_FieldDef_Index(self->def));
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetMessageType(
+ PyUpb_DescriptorBase* self, void* closure) {
+ const upb_MessageDef* subdef = upb_FieldDef_MessageSubDef(self->def);
+ if (!subdef) Py_RETURN_NONE;
+ return PyUpb_Descriptor_Get(subdef);
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetEnumType(PyUpb_DescriptorBase* self,
+ void* closure) {
+ const upb_EnumDef* enumdef = upb_FieldDef_EnumSubDef(self->def);
+ if (!enumdef) Py_RETURN_NONE;
+ return PyUpb_EnumDescriptor_Get(enumdef);
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetContainingType(
+ PyUpb_DescriptorBase* self, void* closure) {
+ const upb_MessageDef* m = upb_FieldDef_ContainingType(self->def);
+ if (!m) Py_RETURN_NONE;
+ return PyUpb_Descriptor_Get(m);
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetExtensionScope(
+ PyUpb_DescriptorBase* self, void* closure) {
+ const upb_MessageDef* m = upb_FieldDef_ExtensionScope(self->def);
+ if (!m) Py_RETURN_NONE;
+ return PyUpb_Descriptor_Get(m);
+}
+
+static PyObject* PyUpb_FieldDescriptor_HasDefaultValue(
+ PyUpb_DescriptorBase* self, void* closure) {
+ return PyBool_FromLong(upb_FieldDef_HasDefault(self->def));
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetDefaultValue(
+ PyUpb_DescriptorBase* self, void* closure) {
+ const upb_FieldDef* f = self->def;
+ if (upb_FieldDef_IsRepeated(f)) return PyList_New(0);
+ if (upb_FieldDef_IsSubMessage(f)) Py_RETURN_NONE;
+ return PyUpb_UpbToPy(upb_FieldDef_Default(self->def), self->def, NULL);
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetContainingOneof(
+ PyUpb_DescriptorBase* self, void* closure) {
+ const upb_OneofDef* oneof = upb_FieldDef_ContainingOneof(self->def);
+ if (!oneof) Py_RETURN_NONE;
+ return PyUpb_OneofDescriptor_Get(oneof);
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetHasOptions(
+ PyUpb_DescriptorBase* _self, void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyBool_FromLong(upb_FieldDef_HasOptions(self->def));
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetHasPresence(
+ PyUpb_DescriptorBase* _self, void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyBool_FromLong(upb_FieldDef_HasPresence(self->def));
+}
+
+static PyObject* PyUpb_FieldDescriptor_GetOptions(PyObject* _self,
+ PyObject* args) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyUpb_DescriptorBase_GetOptions(
+ self, upb_FieldDef_Options(self->def), &google_protobuf_FieldOptions_msg_init,
+ PYUPB_DESCRIPTOR_PROTO_PACKAGE ".FieldOptions");
+}
+
+static PyGetSetDef PyUpb_FieldDescriptor_Getters[] = {
+ {"full_name", (getter)PyUpb_FieldDescriptor_GetFullName, NULL, "Full name"},
+ {"name", (getter)PyUpb_FieldDescriptor_GetName, NULL, "Unqualified name"},
+ {"camelcase_name", (getter)PyUpb_FieldDescriptor_GetCamelCaseName, NULL,
+ "CamelCase name"},
+ {"json_name", (getter)PyUpb_FieldDescriptor_GetJsonName, NULL, "Json name"},
+ {"file", (getter)PyUpb_FieldDescriptor_GetFile, NULL, "File Descriptor"},
+ {"type", (getter)PyUpb_FieldDescriptor_GetType, NULL, "Type"},
+ {"cpp_type", (getter)PyUpb_FieldDescriptor_GetCppType, NULL, "C++ Type"},
+ {"label", (getter)PyUpb_FieldDescriptor_GetLabel, NULL, "Label"},
+ {"number", (getter)PyUpb_FieldDescriptor_GetNumber, NULL, "Number"},
+ {"index", (getter)PyUpb_FieldDescriptor_GetIndex, NULL, "Index"},
+ {"default_value", (getter)PyUpb_FieldDescriptor_GetDefaultValue, NULL,
+ "Default Value"},
+ {"has_default_value", (getter)PyUpb_FieldDescriptor_HasDefaultValue},
+ {"is_extension", (getter)PyUpb_FieldDescriptor_GetIsExtension, NULL, "ID"},
+ // TODO(https://github.com/protocolbuffers/upb/issues/459)
+ //{ "id", (getter)GetID, NULL, "ID"},
+ {"message_type", (getter)PyUpb_FieldDescriptor_GetMessageType, NULL,
+ "Message type"},
+ {"enum_type", (getter)PyUpb_FieldDescriptor_GetEnumType, NULL, "Enum type"},
+ {"containing_type", (getter)PyUpb_FieldDescriptor_GetContainingType, NULL,
+ "Containing type"},
+ {"extension_scope", (getter)PyUpb_FieldDescriptor_GetExtensionScope, NULL,
+ "Extension scope"},
+ {"containing_oneof", (getter)PyUpb_FieldDescriptor_GetContainingOneof, NULL,
+ "Containing oneof"},
+ {"has_options", (getter)PyUpb_FieldDescriptor_GetHasOptions, NULL,
+ "Has Options"},
+ {"has_presence", (getter)PyUpb_FieldDescriptor_GetHasPresence, NULL,
+ "Has Presence"},
+ // TODO(https://github.com/protocolbuffers/upb/issues/459)
+ //{ "_options",
+ //(getter)NULL, (setter)SetOptions, "Options"}, { "_serialized_options",
+ //(getter)NULL, (setter)SetSerializedOptions, "Serialized Options"},
+ {NULL}};
+
+static PyMethodDef PyUpb_FieldDescriptor_Methods[] = {
+ {
+ "GetOptions",
+ PyUpb_FieldDescriptor_GetOptions,
+ METH_NOARGS,
+ },
+ {NULL}};
+
+static PyType_Slot PyUpb_FieldDescriptor_Slots[] = {
+ DESCRIPTOR_BASE_SLOTS,
+ {Py_tp_methods, PyUpb_FieldDescriptor_Methods},
+ {Py_tp_getset, PyUpb_FieldDescriptor_Getters},
+ {0, NULL}};
+
+static PyType_Spec PyUpb_FieldDescriptor_Spec = {
+ PYUPB_MODULE_NAME ".FieldDescriptor",
+ sizeof(PyUpb_DescriptorBase),
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT,
+ PyUpb_FieldDescriptor_Slots,
+};
+
+// -----------------------------------------------------------------------------
+// FileDescriptor
+// -----------------------------------------------------------------------------
+
+PyObject* PyUpb_FileDescriptor_Get(const upb_FileDef* file) {
+ return PyUpb_DescriptorBase_Get(kPyUpb_FileDescriptor, file, file);
+}
+
+// These are not provided on upb_FileDef because they use the underlying
+// symtab's hash table. This works for Python because everything happens under
+// the GIL, but in general the caller has to guarantee that the symtab is not
+// being mutated concurrently.
+typedef const void* PyUpb_FileDescriptor_LookupFunc(const upb_DefPool*,
+ const char*);
+
+static const void* PyUpb_FileDescriptor_NestedLookup(
+ const upb_FileDef* filedef, const char* name,
+ PyUpb_FileDescriptor_LookupFunc* func) {
+ const upb_DefPool* symtab = upb_FileDef_Pool(filedef);
+ const char* package = upb_FileDef_Package(filedef);
+ if (strlen(package)) {
+ PyObject* qname = PyUnicode_FromFormat("%s.%s", package, name);
+ const void* ret = func(symtab, PyUnicode_AsUTF8AndSize(qname, NULL));
+ Py_DECREF(qname);
+ return ret;
+ } else {
+ return func(symtab, name);
+ }
+}
+
+static const void* PyUpb_FileDescriptor_LookupMessage(
+ const upb_FileDef* filedef, const char* name) {
+ return PyUpb_FileDescriptor_NestedLookup(
+ filedef, name, (void*)&upb_DefPool_FindMessageByName);
+}
+
+static const void* PyUpb_FileDescriptor_LookupEnum(const upb_FileDef* filedef,
+ const char* name) {
+ return PyUpb_FileDescriptor_NestedLookup(filedef, name,
+ (void*)&upb_DefPool_FindEnumByName);
+}
+
+static const void* PyUpb_FileDescriptor_LookupExtension(
+ const upb_FileDef* filedef, const char* name) {
+ return PyUpb_FileDescriptor_NestedLookup(
+ filedef, name, (void*)&upb_DefPool_FindExtensionByName);
+}
+
+static const void* PyUpb_FileDescriptor_LookupService(
+ const upb_FileDef* filedef, const char* name) {
+ return PyUpb_FileDescriptor_NestedLookup(
+ filedef, name, (void*)&upb_DefPool_FindServiceByName);
+}
+
+static PyObject* PyUpb_FileDescriptor_GetName(PyUpb_DescriptorBase* self,
+ void* closure) {
+ return PyUnicode_FromString(upb_FileDef_Name(self->def));
+}
+
+static PyObject* PyUpb_FileDescriptor_GetPool(PyObject* _self, void* closure) {
+ PyUpb_DescriptorBase* self = (PyUpb_DescriptorBase*)_self;
+ Py_INCREF(self->pool);
+ return self->pool;
+}
+
+static PyObject* PyUpb_FileDescriptor_GetPackage(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (PyUpb_DescriptorBase*)_self;
+ return PyUnicode_FromString(upb_FileDef_Package(self->def));
+}
+
+static PyObject* PyUpb_FileDescriptor_GetSerializedPb(PyObject* self,
+ void* closure) {
+ return PyUpb_DescriptorBase_GetSerializedProto(
+ self, (PyUpb_ToProto_Func*)&upb_FileDef_ToProto,
+ &google_protobuf_FileDescriptorProto_msg_init);
+}
+
+static PyObject* PyUpb_FileDescriptor_GetMessageTypesByName(PyObject* _self,
+ void* closure) {
+ static PyUpb_ByNameMap_Funcs funcs = {
+ {
+ (void*)&upb_FileDef_TopLevelMessageCount,
+ (void*)&upb_FileDef_TopLevelMessage,
+ (void*)&PyUpb_Descriptor_Get,
+ },
+ (void*)&PyUpb_FileDescriptor_LookupMessage,
+ (void*)&upb_MessageDef_Name,
+ };
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyUpb_ByNameMap_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_FileDescriptor_GetEnumTypesByName(PyObject* _self,
+ void* closure) {
+ static PyUpb_ByNameMap_Funcs funcs = {
+ {
+ (void*)&upb_FileDef_TopLevelEnumCount,
+ (void*)&upb_FileDef_TopLevelEnum,
+ (void*)&PyUpb_EnumDescriptor_Get,
+ },
+ (void*)&PyUpb_FileDescriptor_LookupEnum,
+ (void*)&upb_EnumDef_Name,
+ };
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyUpb_ByNameMap_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_FileDescriptor_GetExtensionsByName(PyObject* _self,
+ void* closure) {
+ static PyUpb_ByNameMap_Funcs funcs = {
+ {
+ (void*)&upb_FileDef_TopLevelExtensionCount,
+ (void*)&upb_FileDef_TopLevelExtension,
+ (void*)&PyUpb_FieldDescriptor_Get,
+ },
+ (void*)&PyUpb_FileDescriptor_LookupExtension,
+ (void*)&upb_FieldDef_Name,
+ };
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyUpb_ByNameMap_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_FileDescriptor_GetServicesByName(PyObject* _self,
+ void* closure) {
+ static PyUpb_ByNameMap_Funcs funcs = {
+ {
+ (void*)&upb_FileDef_ServiceCount,
+ (void*)&upb_FileDef_Service,
+ (void*)&PyUpb_ServiceDescriptor_Get,
+ },
+ (void*)&PyUpb_FileDescriptor_LookupService,
+ (void*)&upb_ServiceDef_Name,
+ };
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyUpb_ByNameMap_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_FileDescriptor_GetDependencies(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ static PyUpb_GenericSequence_Funcs funcs = {
+ (void*)&upb_FileDef_DependencyCount,
+ (void*)&upb_FileDef_Dependency,
+ (void*)&PyUpb_FileDescriptor_Get,
+ };
+ return PyUpb_GenericSequence_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_FileDescriptor_GetPublicDependencies(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ static PyUpb_GenericSequence_Funcs funcs = {
+ (void*)&upb_FileDef_PublicDependencyCount,
+ (void*)&upb_FileDef_PublicDependency,
+ (void*)&PyUpb_FileDescriptor_Get,
+ };
+ return PyUpb_GenericSequence_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_FileDescriptor_GetSyntax(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ const char* syntax =
+ upb_FileDef_Syntax(self->def) == kUpb_Syntax_Proto2 ? "proto2" : "proto3";
+ return PyUnicode_FromString(syntax);
+}
+
+static PyObject* PyUpb_FileDescriptor_GetHasOptions(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyBool_FromLong(upb_FileDef_HasOptions(self->def));
+}
+
+static PyObject* PyUpb_FileDescriptor_GetOptions(PyObject* _self,
+ PyObject* args) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyUpb_DescriptorBase_GetOptions(
+ self, upb_FileDef_Options(self->def), &google_protobuf_FileOptions_msg_init,
+ PYUPB_DESCRIPTOR_PROTO_PACKAGE ".FileOptions");
+}
+
+static PyObject* PyUpb_FileDescriptor_CopyToProto(PyObject* _self,
+ PyObject* py_proto) {
+ return PyUpb_DescriptorBase_CopyToProto(
+ _self, (PyUpb_ToProto_Func*)&upb_FileDef_ToProto,
+ &google_protobuf_FileDescriptorProto_msg_init,
+ PYUPB_DESCRIPTOR_PROTO_PACKAGE ".FileDescriptorProto", py_proto);
+}
+
+static PyGetSetDef PyUpb_FileDescriptor_Getters[] = {
+ {"pool", PyUpb_FileDescriptor_GetPool, NULL, "pool"},
+ {"name", (getter)PyUpb_FileDescriptor_GetName, NULL, "name"},
+ {"package", PyUpb_FileDescriptor_GetPackage, NULL, "package"},
+ {"serialized_pb", PyUpb_FileDescriptor_GetSerializedPb},
+ {"message_types_by_name", PyUpb_FileDescriptor_GetMessageTypesByName, NULL,
+ "Messages by name"},
+ {"enum_types_by_name", PyUpb_FileDescriptor_GetEnumTypesByName, NULL,
+ "Enums by name"},
+ {"extensions_by_name", PyUpb_FileDescriptor_GetExtensionsByName, NULL,
+ "Extensions by name"},
+ {"services_by_name", PyUpb_FileDescriptor_GetServicesByName, NULL,
+ "Services by name"},
+ {"dependencies", PyUpb_FileDescriptor_GetDependencies, NULL,
+ "Dependencies"},
+ {"public_dependencies", PyUpb_FileDescriptor_GetPublicDependencies, NULL,
+ "Dependencies"},
+ {"has_options", PyUpb_FileDescriptor_GetHasOptions, NULL, "Has Options"},
+ {"syntax", PyUpb_FileDescriptor_GetSyntax, (setter)NULL, "Syntax"},
+ {NULL},
+};
+
+static PyMethodDef PyUpb_FileDescriptor_Methods[] = {
+ {"GetOptions", PyUpb_FileDescriptor_GetOptions, METH_NOARGS},
+ {"CopyToProto", PyUpb_FileDescriptor_CopyToProto, METH_O},
+ {NULL}};
+
+static PyType_Slot PyUpb_FileDescriptor_Slots[] = {
+ DESCRIPTOR_BASE_SLOTS,
+ {Py_tp_methods, PyUpb_FileDescriptor_Methods},
+ {Py_tp_getset, PyUpb_FileDescriptor_Getters},
+ {0, NULL}};
+
+static PyType_Spec PyUpb_FileDescriptor_Spec = {
+ PYUPB_MODULE_NAME ".FileDescriptor", // tp_name
+ sizeof(PyUpb_DescriptorBase), // tp_basicsize
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT, // tp_flags
+ PyUpb_FileDescriptor_Slots,
+};
+
+const upb_FileDef* PyUpb_FileDescriptor_GetDef(PyObject* _self) {
+ PyUpb_DescriptorBase* self =
+ PyUpb_DescriptorBase_Check(_self, kPyUpb_FileDescriptor);
+ return self ? self->def : NULL;
+}
+
+// -----------------------------------------------------------------------------
+// MethodDescriptor
+// -----------------------------------------------------------------------------
+
+const upb_MethodDef* PyUpb_MethodDescriptor_GetDef(PyObject* _self) {
+ PyUpb_DescriptorBase* self =
+ PyUpb_DescriptorBase_Check(_self, kPyUpb_MethodDescriptor);
+ return self ? self->def : NULL;
+}
+
+PyObject* PyUpb_MethodDescriptor_Get(const upb_MethodDef* m) {
+ const upb_FileDef* file = upb_ServiceDef_File(upb_MethodDef_Service(m));
+ return PyUpb_DescriptorBase_Get(kPyUpb_MethodDescriptor, m, file);
+}
+
+static PyObject* PyUpb_MethodDescriptor_GetName(PyObject* self, void* closure) {
+ const upb_MethodDef* m = PyUpb_MethodDescriptor_GetDef(self);
+ return PyUnicode_FromString(upb_MethodDef_Name(m));
+}
+
+static PyObject* PyUpb_MethodDescriptor_GetFullName(PyObject* self,
+ void* closure) {
+ const upb_MethodDef* m = PyUpb_MethodDescriptor_GetDef(self);
+ return PyUnicode_FromString(upb_MethodDef_FullName(m));
+}
+
+static PyObject* PyUpb_MethodDescriptor_GetIndex(PyObject* self,
+ void* closure) {
+ const upb_MethodDef* oneof = PyUpb_MethodDescriptor_GetDef(self);
+ return PyLong_FromLong(upb_MethodDef_Index(oneof));
+}
+
+static PyObject* PyUpb_MethodDescriptor_GetContainingService(PyObject* self,
+ void* closure) {
+ const upb_MethodDef* m = PyUpb_MethodDescriptor_GetDef(self);
+ return PyUpb_ServiceDescriptor_Get(upb_MethodDef_Service(m));
+}
+
+static PyObject* PyUpb_MethodDescriptor_GetInputType(PyObject* self,
+ void* closure) {
+ const upb_MethodDef* m = PyUpb_MethodDescriptor_GetDef(self);
+ return PyUpb_Descriptor_Get(upb_MethodDef_InputType(m));
+}
+
+static PyObject* PyUpb_MethodDescriptor_GetOutputType(PyObject* self,
+ void* closure) {
+ const upb_MethodDef* m = PyUpb_MethodDescriptor_GetDef(self);
+ return PyUpb_Descriptor_Get(upb_MethodDef_OutputType(m));
+}
+
+static PyObject* PyUpb_MethodDescriptor_GetOptions(PyObject* _self,
+ PyObject* args) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyUpb_DescriptorBase_GetOptions(
+ self, upb_MethodDef_Options(self->def), &google_protobuf_MethodOptions_msg_init,
+ PYUPB_DESCRIPTOR_PROTO_PACKAGE ".MethodOptions");
+}
+
+static PyObject* PyUpb_MethodDescriptor_CopyToProto(PyObject* _self,
+ PyObject* py_proto) {
+ return PyUpb_DescriptorBase_CopyToProto(
+ _self, (PyUpb_ToProto_Func*)&upb_MethodDef_ToProto,
+ &google_protobuf_MethodDescriptorProto_msg_init,
+ PYUPB_DESCRIPTOR_PROTO_PACKAGE ".MethodDescriptorProto", py_proto);
+}
+
+static PyGetSetDef PyUpb_MethodDescriptor_Getters[] = {
+ {"name", PyUpb_MethodDescriptor_GetName, NULL, "Name", NULL},
+ {"full_name", PyUpb_MethodDescriptor_GetFullName, NULL, "Full name", NULL},
+ {"index", PyUpb_MethodDescriptor_GetIndex, NULL, "Index", NULL},
+ {"containing_service", PyUpb_MethodDescriptor_GetContainingService, NULL,
+ "Containing service", NULL},
+ {"input_type", PyUpb_MethodDescriptor_GetInputType, NULL, "Input type",
+ NULL},
+ {"output_type", PyUpb_MethodDescriptor_GetOutputType, NULL, "Output type",
+ NULL},
+ {NULL}};
+
+static PyMethodDef PyUpb_MethodDescriptor_Methods[] = {
+ {"GetOptions", PyUpb_MethodDescriptor_GetOptions, METH_NOARGS},
+ {"CopyToProto", PyUpb_MethodDescriptor_CopyToProto, METH_O},
+ {NULL}};
+
+static PyType_Slot PyUpb_MethodDescriptor_Slots[] = {
+ DESCRIPTOR_BASE_SLOTS,
+ {Py_tp_methods, PyUpb_MethodDescriptor_Methods},
+ {Py_tp_getset, PyUpb_MethodDescriptor_Getters},
+ {0, NULL}};
+
+static PyType_Spec PyUpb_MethodDescriptor_Spec = {
+ PYUPB_MODULE_NAME ".MethodDescriptor", // tp_name
+ sizeof(PyUpb_DescriptorBase), // tp_basicsize
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT, // tp_flags
+ PyUpb_MethodDescriptor_Slots,
+};
+
+// -----------------------------------------------------------------------------
+// OneofDescriptor
+// -----------------------------------------------------------------------------
+
+const upb_OneofDef* PyUpb_OneofDescriptor_GetDef(PyObject* _self) {
+ PyUpb_DescriptorBase* self =
+ PyUpb_DescriptorBase_Check(_self, kPyUpb_OneofDescriptor);
+ return self ? self->def : NULL;
+}
+
+PyObject* PyUpb_OneofDescriptor_Get(const upb_OneofDef* oneof) {
+ const upb_FileDef* file =
+ upb_MessageDef_File(upb_OneofDef_ContainingType(oneof));
+ return PyUpb_DescriptorBase_Get(kPyUpb_OneofDescriptor, oneof, file);
+}
+
+static PyObject* PyUpb_OneofDescriptor_GetName(PyObject* self, void* closure) {
+ const upb_OneofDef* oneof = PyUpb_OneofDescriptor_GetDef(self);
+ return PyUnicode_FromString(upb_OneofDef_Name(oneof));
+}
+
+static PyObject* PyUpb_OneofDescriptor_GetFullName(PyObject* self,
+ void* closure) {
+ const upb_OneofDef* oneof = PyUpb_OneofDescriptor_GetDef(self);
+ return PyUnicode_FromFormat(
+ "%s.%s", upb_MessageDef_FullName(upb_OneofDef_ContainingType(oneof)),
+ upb_OneofDef_Name(oneof));
+}
+
+static PyObject* PyUpb_OneofDescriptor_GetIndex(PyObject* self, void* closure) {
+ const upb_OneofDef* oneof = PyUpb_OneofDescriptor_GetDef(self);
+ return PyLong_FromLong(upb_OneofDef_Index(oneof));
+}
+
+static PyObject* PyUpb_OneofDescriptor_GetContainingType(PyObject* self,
+ void* closure) {
+ const upb_OneofDef* oneof = PyUpb_OneofDescriptor_GetDef(self);
+ return PyUpb_Descriptor_Get(upb_OneofDef_ContainingType(oneof));
+}
+
+static PyObject* PyUpb_OneofDescriptor_GetHasOptions(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyBool_FromLong(upb_OneofDef_HasOptions(self->def));
+}
+
+static PyObject* PyUpb_OneofDescriptor_GetFields(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ static PyUpb_GenericSequence_Funcs funcs = {
+ (void*)&upb_OneofDef_FieldCount,
+ (void*)&upb_OneofDef_Field,
+ (void*)&PyUpb_FieldDescriptor_Get,
+ };
+ return PyUpb_GenericSequence_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_OneofDescriptor_GetOptions(PyObject* _self,
+ PyObject* args) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyUpb_DescriptorBase_GetOptions(
+ self, upb_OneofDef_Options(self->def), &google_protobuf_OneofOptions_msg_init,
+ PYUPB_DESCRIPTOR_PROTO_PACKAGE ".OneofOptions");
+}
+
+static PyGetSetDef PyUpb_OneofDescriptor_Getters[] = {
+ {"name", PyUpb_OneofDescriptor_GetName, NULL, "Name"},
+ {"full_name", PyUpb_OneofDescriptor_GetFullName, NULL, "Full name"},
+ {"index", PyUpb_OneofDescriptor_GetIndex, NULL, "Index"},
+ {"containing_type", PyUpb_OneofDescriptor_GetContainingType, NULL,
+ "Containing type"},
+ {"has_options", PyUpb_OneofDescriptor_GetHasOptions, NULL, "Has Options"},
+ {"fields", PyUpb_OneofDescriptor_GetFields, NULL, "Fields"},
+ {NULL}};
+
+static PyMethodDef PyUpb_OneofDescriptor_Methods[] = {
+ {"GetOptions", PyUpb_OneofDescriptor_GetOptions, METH_NOARGS}, {NULL}};
+
+static PyType_Slot PyUpb_OneofDescriptor_Slots[] = {
+ DESCRIPTOR_BASE_SLOTS,
+ {Py_tp_methods, PyUpb_OneofDescriptor_Methods},
+ {Py_tp_getset, PyUpb_OneofDescriptor_Getters},
+ {0, NULL}};
+
+static PyType_Spec PyUpb_OneofDescriptor_Spec = {
+ PYUPB_MODULE_NAME ".OneofDescriptor", // tp_name
+ sizeof(PyUpb_DescriptorBase), // tp_basicsize
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT, // tp_flags
+ PyUpb_OneofDescriptor_Slots,
+};
+
+// -----------------------------------------------------------------------------
+// ServiceDescriptor
+// -----------------------------------------------------------------------------
+
+const upb_ServiceDef* PyUpb_ServiceDescriptor_GetDef(PyObject* _self) {
+ PyUpb_DescriptorBase* self =
+ PyUpb_DescriptorBase_Check(_self, kPyUpb_ServiceDescriptor);
+ return self ? self->def : NULL;
+}
+
+PyObject* PyUpb_ServiceDescriptor_Get(const upb_ServiceDef* s) {
+ const upb_FileDef* file = upb_ServiceDef_File(s);
+ return PyUpb_DescriptorBase_Get(kPyUpb_ServiceDescriptor, s, file);
+}
+
+static PyObject* PyUpb_ServiceDescriptor_GetFullName(PyObject* self,
+ void* closure) {
+ const upb_ServiceDef* s = PyUpb_ServiceDescriptor_GetDef(self);
+ return PyUnicode_FromString(upb_ServiceDef_FullName(s));
+}
+
+static PyObject* PyUpb_ServiceDescriptor_GetName(PyObject* self,
+ void* closure) {
+ const upb_ServiceDef* s = PyUpb_ServiceDescriptor_GetDef(self);
+ return PyUnicode_FromString(upb_ServiceDef_Name(s));
+}
+
+static PyObject* PyUpb_ServiceDescriptor_GetFile(PyObject* self,
+ void* closure) {
+ const upb_ServiceDef* s = PyUpb_ServiceDescriptor_GetDef(self);
+ return PyUpb_FileDescriptor_Get(upb_ServiceDef_File(s));
+}
+
+static PyObject* PyUpb_ServiceDescriptor_GetIndex(PyObject* self,
+ void* closure) {
+ const upb_ServiceDef* s = PyUpb_ServiceDescriptor_GetDef(self);
+ return PyLong_FromLong(upb_ServiceDef_Index(s));
+}
+
+static PyObject* PyUpb_ServiceDescriptor_GetMethods(PyObject* _self,
+ void* closure) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ static PyUpb_GenericSequence_Funcs funcs = {
+ (void*)&upb_ServiceDef_MethodCount,
+ (void*)&upb_ServiceDef_Method,
+ (void*)&PyUpb_MethodDescriptor_Get,
+ };
+ return PyUpb_GenericSequence_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_ServiceDescriptor_GetMethodsByName(PyObject* _self,
+ void* closure) {
+ static PyUpb_ByNameMap_Funcs funcs = {
+ {
+ (void*)&upb_ServiceDef_MethodCount,
+ (void*)&upb_ServiceDef_Method,
+ (void*)&PyUpb_MethodDescriptor_Get,
+ },
+ (void*)&upb_ServiceDef_FindMethodByName,
+ (void*)&upb_MethodDef_Name,
+ };
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyUpb_ByNameMap_New(&funcs, self->def, self->pool);
+}
+
+static PyObject* PyUpb_ServiceDescriptor_GetOptions(PyObject* _self,
+ PyObject* args) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ return PyUpb_DescriptorBase_GetOptions(
+ self, upb_ServiceDef_Options(self->def), &google_protobuf_ServiceOptions_msg_init,
+ PYUPB_DESCRIPTOR_PROTO_PACKAGE ".ServiceOptions");
+}
+
+static PyObject* PyUpb_ServiceDescriptor_CopyToProto(PyObject* _self,
+ PyObject* py_proto) {
+ return PyUpb_DescriptorBase_CopyToProto(
+ _self, (PyUpb_ToProto_Func*)&upb_ServiceDef_ToProto,
+ &google_protobuf_ServiceDescriptorProto_msg_init,
+ PYUPB_DESCRIPTOR_PROTO_PACKAGE ".ServiceDescriptorProto", py_proto);
+}
+
+static PyObject* PyUpb_ServiceDescriptor_FindMethodByName(PyObject* _self,
+ PyObject* py_name) {
+ PyUpb_DescriptorBase* self = (void*)_self;
+ const char* name = PyUnicode_AsUTF8AndSize(py_name, NULL);
+ if (!name) return NULL;
+ const upb_MethodDef* method =
+ upb_ServiceDef_FindMethodByName(self->def, name);
+ if (method == NULL) {
+ return PyErr_Format(PyExc_KeyError, "Couldn't find method %.200s", name);
+ }
+ return PyUpb_MethodDescriptor_Get(method);
+}
+
+static PyGetSetDef PyUpb_ServiceDescriptor_Getters[] = {
+ {"name", PyUpb_ServiceDescriptor_GetName, NULL, "Name", NULL},
+ {"full_name", PyUpb_ServiceDescriptor_GetFullName, NULL, "Full name", NULL},
+ {"file", PyUpb_ServiceDescriptor_GetFile, NULL, "File descriptor"},
+ {"index", PyUpb_ServiceDescriptor_GetIndex, NULL, "Index", NULL},
+ {"methods", PyUpb_ServiceDescriptor_GetMethods, NULL, "Methods", NULL},
+ {"methods_by_name", PyUpb_ServiceDescriptor_GetMethodsByName, NULL,
+ "Methods by name", NULL},
+ {NULL}};
+
+static PyMethodDef PyUpb_ServiceDescriptor_Methods[] = {
+ {"GetOptions", PyUpb_ServiceDescriptor_GetOptions, METH_NOARGS},
+ {"CopyToProto", PyUpb_ServiceDescriptor_CopyToProto, METH_O},
+ {"FindMethodByName", PyUpb_ServiceDescriptor_FindMethodByName, METH_O},
+ {NULL}};
+
+static PyType_Slot PyUpb_ServiceDescriptor_Slots[] = {
+ DESCRIPTOR_BASE_SLOTS,
+ {Py_tp_methods, PyUpb_ServiceDescriptor_Methods},
+ {Py_tp_getset, PyUpb_ServiceDescriptor_Getters},
+ {0, NULL}};
+
+static PyType_Spec PyUpb_ServiceDescriptor_Spec = {
+ PYUPB_MODULE_NAME ".ServiceDescriptor", // tp_name
+ sizeof(PyUpb_DescriptorBase), // tp_basicsize
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT, // tp_flags
+ PyUpb_ServiceDescriptor_Slots,
+};
+
+// -----------------------------------------------------------------------------
+// Top Level
+// -----------------------------------------------------------------------------
+
+static bool PyUpb_SetIntAttr(PyObject* obj, const char* name, int val) {
+ PyObject* num = PyLong_FromLong(val);
+ if (!num) return false;
+ int status = PyObject_SetAttrString(obj, name, num);
+ Py_DECREF(num);
+ return status >= 0;
+}
+
+// These must be in the same order as PyUpb_DescriptorType in the header.
+static PyType_Spec* desc_specs[] = {
+ &PyUpb_Descriptor_Spec, &PyUpb_EnumDescriptor_Spec,
+ &PyUpb_EnumValueDescriptor_Spec, &PyUpb_FieldDescriptor_Spec,
+ &PyUpb_FileDescriptor_Spec, &PyUpb_MethodDescriptor_Spec,
+ &PyUpb_OneofDescriptor_Spec, &PyUpb_ServiceDescriptor_Spec,
+};
+
+bool PyUpb_InitDescriptor(PyObject* m) {
+ PyUpb_ModuleState* s = PyUpb_ModuleState_GetFromModule(m);
+
+ for (size_t i = 0; i < kPyUpb_Descriptor_Count; i++) {
+ s->descriptor_types[i] = PyUpb_AddClass(m, desc_specs[i]);
+ if (!s->descriptor_types[i]) {
+ return false;
+ }
+ }
+
+ PyObject* fd = (PyObject*)s->descriptor_types[kPyUpb_FieldDescriptor];
+ return PyUpb_SetIntAttr(fd, "LABEL_OPTIONAL", kUpb_Label_Optional) &&
+ PyUpb_SetIntAttr(fd, "LABEL_REPEATED", kUpb_Label_Repeated) &&
+ PyUpb_SetIntAttr(fd, "LABEL_REQUIRED", kUpb_Label_Required) &&
+ PyUpb_SetIntAttr(fd, "TYPE_BOOL", kUpb_FieldType_Bool) &&
+ PyUpb_SetIntAttr(fd, "TYPE_BYTES", kUpb_FieldType_Bytes) &&
+ PyUpb_SetIntAttr(fd, "TYPE_DOUBLE", kUpb_FieldType_Double) &&
+ PyUpb_SetIntAttr(fd, "TYPE_ENUM", kUpb_FieldType_Enum) &&
+ PyUpb_SetIntAttr(fd, "TYPE_FIXED32", kUpb_FieldType_Fixed32) &&
+ PyUpb_SetIntAttr(fd, "TYPE_FIXED64", kUpb_FieldType_Fixed64) &&
+ PyUpb_SetIntAttr(fd, "TYPE_FLOAT", kUpb_FieldType_Float) &&
+ PyUpb_SetIntAttr(fd, "TYPE_GROUP", kUpb_FieldType_Group) &&
+ PyUpb_SetIntAttr(fd, "TYPE_INT32", kUpb_FieldType_Int32) &&
+ PyUpb_SetIntAttr(fd, "TYPE_INT64", kUpb_FieldType_Int64) &&
+ PyUpb_SetIntAttr(fd, "TYPE_MESSAGE", kUpb_FieldType_Message) &&
+ PyUpb_SetIntAttr(fd, "TYPE_SFIXED32", kUpb_FieldType_SFixed32) &&
+ PyUpb_SetIntAttr(fd, "TYPE_SFIXED64", kUpb_FieldType_SFixed64) &&
+ PyUpb_SetIntAttr(fd, "TYPE_SINT32", kUpb_FieldType_SInt32) &&
+ PyUpb_SetIntAttr(fd, "TYPE_SINT64", kUpb_FieldType_SInt64) &&
+ PyUpb_SetIntAttr(fd, "TYPE_STRING", kUpb_FieldType_String) &&
+ PyUpb_SetIntAttr(fd, "TYPE_UINT32", kUpb_FieldType_UInt32) &&
+ PyUpb_SetIntAttr(fd, "TYPE_UINT64", kUpb_FieldType_UInt64);
+}
diff --git a/upb/python/descriptor.h b/upb/python/descriptor.h
new file mode 100644
index 0000000..63a94f8
--- /dev/null
+++ b/upb/python/descriptor.h
@@ -0,0 +1,82 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef PYUPB_DESCRIPTOR_H__
+#define PYUPB_DESCRIPTOR_H__
+
+#include <stdbool.h>
+
+#include "python/python_api.h"
+#include "upb/reflection/def.h"
+
+typedef enum {
+ kPyUpb_Descriptor = 0,
+ kPyUpb_EnumDescriptor = 1,
+ kPyUpb_EnumValueDescriptor = 2,
+ kPyUpb_FieldDescriptor = 3,
+ kPyUpb_FileDescriptor = 4,
+ kPyUpb_MethodDescriptor = 5,
+ kPyUpb_OneofDescriptor = 6,
+ kPyUpb_ServiceDescriptor = 7,
+ kPyUpb_Descriptor_Count = 8,
+} PyUpb_DescriptorType;
+
+// Given a descriptor object |desc|, returns a Python message class object for
+// the msgdef |m|, which must be from the same pool.
+PyObject* PyUpb_Descriptor_GetClass(const upb_MessageDef* m);
+
+// Returns a Python wrapper object for the given def. This will return an
+// existing object if one already exists, otherwise a new object will be
+// created. The caller always owns a ref on the returned object.
+PyObject* PyUpb_Descriptor_Get(const upb_MessageDef* msgdef);
+PyObject* PyUpb_EnumDescriptor_Get(const upb_EnumDef* enumdef);
+PyObject* PyUpb_FieldDescriptor_Get(const upb_FieldDef* field);
+PyObject* PyUpb_FileDescriptor_Get(const upb_FileDef* file);
+PyObject* PyUpb_OneofDescriptor_Get(const upb_OneofDef* oneof);
+PyObject* PyUpb_EnumValueDescriptor_Get(const upb_EnumValueDef* enumval);
+PyObject* PyUpb_Descriptor_GetOrCreateWrapper(const upb_MessageDef* msg);
+PyObject* PyUpb_ServiceDescriptor_Get(const upb_ServiceDef* s);
+PyObject* PyUpb_MethodDescriptor_Get(const upb_MethodDef* s);
+
+// Returns the underlying |def| for a given wrapper object. The caller must
+// have already verified that the given Python object is of the expected type.
+const upb_FileDef* PyUpb_FileDescriptor_GetDef(PyObject* file);
+const upb_FieldDef* PyUpb_FieldDescriptor_GetDef(PyObject* file);
+const upb_MessageDef* PyUpb_Descriptor_GetDef(PyObject* _self);
+const void* PyUpb_AnyDescriptor_GetDef(PyObject* _self);
+
+// Returns the underlying |def| for a given wrapper object. The caller must
+// have already verified that the given Python object is of the expected type.
+const upb_FileDef* PyUpb_FileDescriptor_GetDef(PyObject* file);
+
+// Module-level init.
+bool PyUpb_InitDescriptor(PyObject* m);
+
+#endif // PYUPB_DESCRIPTOR_H__
diff --git a/upb/python/descriptor_containers.c b/upb/python/descriptor_containers.c
new file mode 100644
index 0000000..3c959b4
--- /dev/null
+++ b/upb/python/descriptor_containers.c
@@ -0,0 +1,795 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "python/descriptor_containers.h"
+
+#include "python/descriptor.h"
+#include "python/protobuf.h"
+#include "upb/reflection/def.h"
+
+// Implements __repr__ as str(dict(self)).
+static PyObject* PyUpb_DescriptorMap_Repr(PyObject* _self) {
+ PyObject* dict = PyDict_New();
+ PyObject* ret = NULL;
+ if (!dict) goto err;
+ if (PyDict_Merge(dict, _self, 1) != 0) goto err;
+ ret = PyObject_Str(dict);
+
+err:
+ Py_XDECREF(dict);
+ return ret;
+}
+
+// -----------------------------------------------------------------------------
+// ByNameIterator
+// -----------------------------------------------------------------------------
+
+typedef struct {
+ PyObject_HEAD;
+ const PyUpb_ByNameMap_Funcs* funcs;
+ const void* parent; // upb_MessageDef*, upb_DefPool*, etc.
+ PyObject* parent_obj; // Python object that keeps parent alive, we own a ref.
+ int index; // Current iterator index.
+} PyUpb_ByNameIterator;
+
+static PyUpb_ByNameIterator* PyUpb_ByNameIterator_Self(PyObject* obj) {
+ assert(Py_TYPE(obj) == PyUpb_ModuleState_Get()->by_name_iterator_type);
+ return (PyUpb_ByNameIterator*)obj;
+}
+
+static void PyUpb_ByNameIterator_Dealloc(PyObject* _self) {
+ PyUpb_ByNameIterator* self = PyUpb_ByNameIterator_Self(_self);
+ Py_DECREF(self->parent_obj);
+ PyUpb_Dealloc(self);
+}
+
+static PyObject* PyUpb_ByNameIterator_New(const PyUpb_ByNameMap_Funcs* funcs,
+ const void* parent,
+ PyObject* parent_obj) {
+ PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
+ PyUpb_ByNameIterator* iter =
+ (void*)PyType_GenericAlloc(s->by_name_iterator_type, 0);
+ iter->funcs = funcs;
+ iter->parent = parent;
+ iter->parent_obj = parent_obj;
+ iter->index = 0;
+ Py_INCREF(iter->parent_obj);
+ return &iter->ob_base;
+}
+
+static PyObject* PyUpb_ByNameIterator_IterNext(PyObject* _self) {
+ PyUpb_ByNameIterator* self = PyUpb_ByNameIterator_Self(_self);
+ int size = self->funcs->base.get_elem_count(self->parent);
+ if (self->index >= size) return NULL;
+ const void* elem = self->funcs->base.index(self->parent, self->index);
+ self->index++;
+ return PyUnicode_FromString(self->funcs->get_elem_name(elem));
+}
+
+static PyType_Slot PyUpb_ByNameIterator_Slots[] = {
+ {Py_tp_dealloc, PyUpb_ByNameIterator_Dealloc},
+ {Py_tp_iter, PyObject_SelfIter},
+ {Py_tp_iternext, PyUpb_ByNameIterator_IterNext},
+ {0, NULL}};
+
+static PyType_Spec PyUpb_ByNameIterator_Spec = {
+ PYUPB_MODULE_NAME "._ByNameIterator", // tp_name
+ sizeof(PyUpb_ByNameIterator), // tp_basicsize
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT, // tp_flags
+ PyUpb_ByNameIterator_Slots,
+};
+
+// -----------------------------------------------------------------------------
+// ByNumberIterator
+// -----------------------------------------------------------------------------
+
+typedef struct {
+ PyObject_HEAD;
+ const PyUpb_ByNumberMap_Funcs* funcs;
+ const void* parent; // upb_MessageDef*, upb_DefPool*, etc.
+ PyObject* parent_obj; // Python object that keeps parent alive, we own a ref.
+ int index; // Current iterator index.
+} PyUpb_ByNumberIterator;
+
+static PyUpb_ByNumberIterator* PyUpb_ByNumberIterator_Self(PyObject* obj) {
+ assert(Py_TYPE(obj) == PyUpb_ModuleState_Get()->by_number_iterator_type);
+ return (PyUpb_ByNumberIterator*)obj;
+}
+
+static void PyUpb_ByNumberIterator_Dealloc(PyObject* _self) {
+ PyUpb_ByNumberIterator* self = PyUpb_ByNumberIterator_Self(_self);
+ Py_DECREF(self->parent_obj);
+ PyUpb_Dealloc(self);
+}
+
+static PyObject* PyUpb_ByNumberIterator_New(
+ const PyUpb_ByNumberMap_Funcs* funcs, const void* parent,
+ PyObject* parent_obj) {
+ PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
+ PyUpb_ByNumberIterator* iter =
+ (void*)PyType_GenericAlloc(s->by_number_iterator_type, 0);
+ iter->funcs = funcs;
+ iter->parent = parent;
+ iter->parent_obj = parent_obj;
+ iter->index = 0;
+ Py_INCREF(iter->parent_obj);
+ return &iter->ob_base;
+}
+
+static PyObject* PyUpb_ByNumberIterator_IterNext(PyObject* _self) {
+ PyUpb_ByNumberIterator* self = PyUpb_ByNumberIterator_Self(_self);
+ int size = self->funcs->base.get_elem_count(self->parent);
+ if (self->index >= size) return NULL;
+ const void* elem = self->funcs->base.index(self->parent, self->index);
+ self->index++;
+ return PyLong_FromLong(self->funcs->get_elem_num(elem));
+}
+
+static PyType_Slot PyUpb_ByNumberIterator_Slots[] = {
+ {Py_tp_dealloc, PyUpb_ByNumberIterator_Dealloc},
+ {Py_tp_iter, PyObject_SelfIter},
+ {Py_tp_iternext, PyUpb_ByNumberIterator_IterNext},
+ {0, NULL}};
+
+static PyType_Spec PyUpb_ByNumberIterator_Spec = {
+ PYUPB_MODULE_NAME "._ByNumberIterator", // tp_name
+ sizeof(PyUpb_ByNumberIterator), // tp_basicsize
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT, // tp_flags
+ PyUpb_ByNumberIterator_Slots,
+};
+
+// -----------------------------------------------------------------------------
+// GenericSequence
+// -----------------------------------------------------------------------------
+
+typedef struct {
+ PyObject_HEAD;
+ const PyUpb_GenericSequence_Funcs* funcs;
+ const void* parent; // upb_MessageDef*, upb_DefPool*, etc.
+ PyObject* parent_obj; // Python object that keeps parent alive, we own a ref.
+} PyUpb_GenericSequence;
+
+PyUpb_GenericSequence* PyUpb_GenericSequence_Self(PyObject* obj) {
+ assert(Py_TYPE(obj) == PyUpb_ModuleState_Get()->generic_sequence_type);
+ return (PyUpb_GenericSequence*)obj;
+}
+
+static void PyUpb_GenericSequence_Dealloc(PyObject* _self) {
+ PyUpb_GenericSequence* self = PyUpb_GenericSequence_Self(_self);
+ Py_CLEAR(self->parent_obj);
+ PyUpb_Dealloc(self);
+}
+
+PyObject* PyUpb_GenericSequence_New(const PyUpb_GenericSequence_Funcs* funcs,
+ const void* parent, PyObject* parent_obj) {
+ PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
+ PyUpb_GenericSequence* seq =
+ (PyUpb_GenericSequence*)PyType_GenericAlloc(s->generic_sequence_type, 0);
+ seq->funcs = funcs;
+ seq->parent = parent;
+ seq->parent_obj = parent_obj;
+ Py_INCREF(parent_obj);
+ return &seq->ob_base;
+}
+
+static Py_ssize_t PyUpb_GenericSequence_Length(PyObject* _self) {
+ PyUpb_GenericSequence* self = PyUpb_GenericSequence_Self(_self);
+ return self->funcs->get_elem_count(self->parent);
+}
+
+static PyObject* PyUpb_GenericSequence_GetItem(PyObject* _self,
+ Py_ssize_t index) {
+ PyUpb_GenericSequence* self = PyUpb_GenericSequence_Self(_self);
+ Py_ssize_t size = self->funcs->get_elem_count(self->parent);
+ if (index < 0 || index >= size) {
+ PyErr_Format(PyExc_IndexError, "list index (%zd) out of range", index);
+ return NULL;
+ }
+ const void* elem = self->funcs->index(self->parent, index);
+ return self->funcs->get_elem_wrapper(elem);
+}
+
+// A sequence container can only be equal to another sequence container, or (for
+// backward compatibility) to a list containing the same items.
+// Returns 1 if equal, 0 if unequal, -1 on error.
+static int PyUpb_GenericSequence_IsEqual(PyUpb_GenericSequence* self,
+ PyObject* other) {
+ // Check the identity of C++ pointers.
+ if (PyObject_TypeCheck(other, Py_TYPE(self))) {
+ PyUpb_GenericSequence* other_seq = (void*)other;
+ return self->parent == other_seq->parent && self->funcs == other_seq->funcs;
+ }
+
+ if (!PyList_Check(other)) return 0;
+
+ // return list(self) == other
+ // We can clamp `i` to int because GenericSequence uses int for size (this
+ // is useful when we do int iteration below).
+ int n = PyUpb_GenericSequence_Length((PyObject*)self);
+ if ((Py_ssize_t)n != PyList_Size(other)) {
+ return false;
+ }
+
+ PyObject* item1;
+ for (int i = 0; i < n; i++) {
+ item1 = PyUpb_GenericSequence_GetItem((PyObject*)self, i);
+ PyObject* item2 = PyList_GetItem(other, i);
+ if (!item1 || !item2) goto error;
+ int cmp = PyObject_RichCompareBool(item1, item2, Py_EQ);
+ Py_DECREF(item1);
+ if (cmp != 1) return cmp;
+ }
+ // All items were found and equal
+ return 1;
+
+error:
+ Py_XDECREF(item1);
+ return -1;
+}
+
+static PyObject* PyUpb_GenericSequence_RichCompare(PyObject* _self,
+ PyObject* other, int opid) {
+ PyUpb_GenericSequence* self = PyUpb_GenericSequence_Self(_self);
+ if (opid != Py_EQ && opid != Py_NE) {
+ Py_RETURN_NOTIMPLEMENTED;
+ }
+ bool ret = PyUpb_GenericSequence_IsEqual(self, other);
+ if (opid == Py_NE) ret = !ret;
+ return PyBool_FromLong(ret);
+}
+
+// Linear search. Could optimize this in some cases (defs that have index),
+// but not all (FileDescriptor.dependencies).
+static int PyUpb_GenericSequence_Find(PyObject* _self, PyObject* item) {
+ PyUpb_GenericSequence* self = PyUpb_GenericSequence_Self(_self);
+ const void* item_ptr = PyUpb_AnyDescriptor_GetDef(item);
+ int count = self->funcs->get_elem_count(self->parent);
+ for (int i = 0; i < count; i++) {
+ if (self->funcs->index(self->parent, i) == item_ptr) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+static PyObject* PyUpb_GenericSequence_Index(PyObject* self, PyObject* item) {
+ int position = PyUpb_GenericSequence_Find(self, item);
+ if (position < 0) {
+ PyErr_SetNone(PyExc_ValueError);
+ return NULL;
+ } else {
+ return PyLong_FromLong(position);
+ }
+}
+
+static PyObject* PyUpb_GenericSequence_Count(PyObject* _self, PyObject* item) {
+ PyUpb_GenericSequence* self = PyUpb_GenericSequence_Self(_self);
+ const void* item_ptr = PyUpb_AnyDescriptor_GetDef(item);
+ int n = self->funcs->get_elem_count(self->parent);
+ int count = 0;
+ for (int i = 0; i < n; i++) {
+ if (self->funcs->index(self->parent, i) == item_ptr) {
+ count++;
+ }
+ }
+ return PyLong_FromLong(count);
+}
+
+static PyObject* PyUpb_GenericSequence_Append(PyObject* self, PyObject* args) {
+ PyErr_Format(PyExc_TypeError, "'%R' is not a mutable sequence", self);
+ return NULL;
+}
+
+static PyMethodDef PyUpb_GenericSequence_Methods[] = {
+ {"index", PyUpb_GenericSequence_Index, METH_O},
+ {"count", PyUpb_GenericSequence_Count, METH_O},
+ {"append", PyUpb_GenericSequence_Append, METH_O},
+ // This was implemented for Python/C++ but so far has not been required.
+ //{ "__reversed__", (PyCFunction)Reversed, METH_NOARGS, },
+ {NULL}};
+
+static PyType_Slot PyUpb_GenericSequence_Slots[] = {
+ {Py_tp_dealloc, &PyUpb_GenericSequence_Dealloc},
+ {Py_tp_methods, &PyUpb_GenericSequence_Methods},
+ {Py_sq_length, PyUpb_GenericSequence_Length},
+ {Py_sq_item, PyUpb_GenericSequence_GetItem},
+ {Py_tp_richcompare, &PyUpb_GenericSequence_RichCompare},
+ // These were implemented for Python/C++ but so far have not been required.
+ // {Py_tp_repr, &PyUpb_GenericSequence_Repr},
+ // {Py_sq_contains, PyUpb_GenericSequence_Contains},
+ // {Py_mp_subscript, PyUpb_GenericSequence_Subscript},
+ // {Py_mp_ass_subscript, PyUpb_GenericSequence_AssignSubscript},
+ {0, NULL},
+};
+
+static PyType_Spec PyUpb_GenericSequence_Spec = {
+ PYUPB_MODULE_NAME "._GenericSequence", // tp_name
+ sizeof(PyUpb_GenericSequence), // tp_basicsize
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT, // tp_flags
+ PyUpb_GenericSequence_Slots,
+};
+
+// -----------------------------------------------------------------------------
+// ByNameMap
+// -----------------------------------------------------------------------------
+
+typedef struct {
+ PyObject_HEAD;
+ const PyUpb_ByNameMap_Funcs* funcs;
+ const void* parent; // upb_MessageDef*, upb_DefPool*, etc.
+ PyObject* parent_obj; // Python object that keeps parent alive, we own a ref.
+} PyUpb_ByNameMap;
+
+PyUpb_ByNameMap* PyUpb_ByNameMap_Self(PyObject* obj) {
+ assert(Py_TYPE(obj) == PyUpb_ModuleState_Get()->by_name_map_type);
+ return (PyUpb_ByNameMap*)obj;
+}
+
+static void PyUpb_ByNameMap_Dealloc(PyObject* _self) {
+ PyUpb_ByNameMap* self = PyUpb_ByNameMap_Self(_self);
+ Py_DECREF(self->parent_obj);
+ PyUpb_Dealloc(self);
+}
+
+PyObject* PyUpb_ByNameMap_New(const PyUpb_ByNameMap_Funcs* funcs,
+ const void* parent, PyObject* parent_obj) {
+ PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
+ PyUpb_ByNameMap* map = (void*)PyType_GenericAlloc(s->by_name_map_type, 0);
+ map->funcs = funcs;
+ map->parent = parent;
+ map->parent_obj = parent_obj;
+ Py_INCREF(parent_obj);
+ return &map->ob_base;
+}
+
+static Py_ssize_t PyUpb_ByNameMap_Length(PyObject* _self) {
+ PyUpb_ByNameMap* self = PyUpb_ByNameMap_Self(_self);
+ return self->funcs->base.get_elem_count(self->parent);
+}
+
+static PyObject* PyUpb_ByNameMap_Subscript(PyObject* _self, PyObject* key) {
+ PyUpb_ByNameMap* self = PyUpb_ByNameMap_Self(_self);
+ const char* name = PyUpb_GetStrData(key);
+ const void* elem = name ? self->funcs->lookup(self->parent, name) : NULL;
+
+ if (!name && PyObject_Hash(key) == -1) return NULL;
+
+ if (elem) {
+ return self->funcs->base.get_elem_wrapper(elem);
+ } else {
+ PyErr_SetObject(PyExc_KeyError, key);
+ return NULL;
+ }
+}
+
+static int PyUpb_ByNameMap_AssignSubscript(PyObject* self, PyObject* key,
+ PyObject* value) {
+ PyErr_Format(PyExc_TypeError, PYUPB_MODULE_NAME
+ ".ByNameMap' object does not support item assignment");
+ return -1;
+}
+
+static int PyUpb_ByNameMap_Contains(PyObject* _self, PyObject* key) {
+ PyUpb_ByNameMap* self = PyUpb_ByNameMap_Self(_self);
+ const char* name = PyUpb_GetStrData(key);
+ const void* elem = name ? self->funcs->lookup(self->parent, name) : NULL;
+ if (!name && PyObject_Hash(key) == -1) return -1;
+ return elem ? 1 : 0;
+}
+
+static PyObject* PyUpb_ByNameMap_Get(PyObject* _self, PyObject* args) {
+ PyUpb_ByNameMap* self = PyUpb_ByNameMap_Self(_self);
+ PyObject* key;
+ PyObject* default_value = Py_None;
+ if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &default_value)) {
+ return NULL;
+ }
+
+ const char* name = PyUpb_GetStrData(key);
+ const void* elem = name ? self->funcs->lookup(self->parent, name) : NULL;
+
+ if (!name && PyObject_Hash(key) == -1) return NULL;
+
+ if (elem) {
+ return self->funcs->base.get_elem_wrapper(elem);
+ } else {
+ Py_INCREF(default_value);
+ return default_value;
+ }
+}
+
+static PyObject* PyUpb_ByNameMap_GetIter(PyObject* _self) {
+ PyUpb_ByNameMap* self = PyUpb_ByNameMap_Self(_self);
+ return PyUpb_ByNameIterator_New(self->funcs, self->parent, self->parent_obj);
+}
+
+static PyObject* PyUpb_ByNameMap_Keys(PyObject* _self, PyObject* args) {
+ PyUpb_ByNameMap* self = PyUpb_ByNameMap_Self(_self);
+ int n = self->funcs->base.get_elem_count(self->parent);
+ PyObject* ret = PyList_New(n);
+ if (!ret) return NULL;
+ for (int i = 0; i < n; i++) {
+ const void* elem = self->funcs->base.index(self->parent, i);
+ PyObject* key = PyUnicode_FromString(self->funcs->get_elem_name(elem));
+ if (!key) goto error;
+ PyList_SetItem(ret, i, key);
+ }
+ return ret;
+
+error:
+ Py_XDECREF(ret);
+ return NULL;
+}
+
+static PyObject* PyUpb_ByNameMap_Values(PyObject* _self, PyObject* args) {
+ PyUpb_ByNameMap* self = PyUpb_ByNameMap_Self(_self);
+ int n = self->funcs->base.get_elem_count(self->parent);
+ PyObject* ret = PyList_New(n);
+ if (!ret) return NULL;
+ for (int i = 0; i < n; i++) {
+ const void* elem = self->funcs->base.index(self->parent, i);
+ PyObject* py_elem = self->funcs->base.get_elem_wrapper(elem);
+ if (!py_elem) goto error;
+ PyList_SetItem(ret, i, py_elem);
+ }
+ return ret;
+
+error:
+ Py_XDECREF(ret);
+ return NULL;
+}
+
+static PyObject* PyUpb_ByNameMap_Items(PyObject* _self, PyObject* args) {
+ PyUpb_ByNameMap* self = (PyUpb_ByNameMap*)_self;
+ int n = self->funcs->base.get_elem_count(self->parent);
+ PyObject* ret = PyList_New(n);
+ PyObject* item;
+ PyObject* py_elem;
+ if (!ret) return NULL;
+ for (int i = 0; i < n; i++) {
+ const void* elem = self->funcs->base.index(self->parent, i);
+ item = PyTuple_New(2);
+ py_elem = self->funcs->base.get_elem_wrapper(elem);
+ if (!item || !py_elem) goto error;
+ PyTuple_SetItem(item, 0,
+ PyUnicode_FromString(self->funcs->get_elem_name(elem)));
+ PyTuple_SetItem(item, 1, py_elem);
+ PyList_SetItem(ret, i, item);
+ }
+ return ret;
+
+error:
+ Py_XDECREF(py_elem);
+ Py_XDECREF(item);
+ Py_XDECREF(ret);
+ return NULL;
+}
+
+// A mapping container can only be equal to another mapping container, or (for
+// backward compatibility) to a dict containing the same items.
+// Returns 1 if equal, 0 if unequal, -1 on error.
+static int PyUpb_ByNameMap_IsEqual(PyUpb_ByNameMap* self, PyObject* other) {
+ // Check the identity of C++ pointers.
+ if (PyObject_TypeCheck(other, Py_TYPE(self))) {
+ PyUpb_ByNameMap* other_map = (void*)other;
+ return self->parent == other_map->parent && self->funcs == other_map->funcs;
+ }
+
+ if (!PyDict_Check(other)) return 0;
+
+ PyObject* self_dict = PyDict_New();
+ PyDict_Merge(self_dict, (PyObject*)self, 0);
+ int eq = PyObject_RichCompareBool(self_dict, other, Py_EQ);
+ Py_DECREF(self_dict);
+ return eq;
+}
+
+static PyObject* PyUpb_ByNameMap_RichCompare(PyObject* _self, PyObject* other,
+ int opid) {
+ PyUpb_ByNameMap* self = PyUpb_ByNameMap_Self(_self);
+ if (opid != Py_EQ && opid != Py_NE) {
+ Py_RETURN_NOTIMPLEMENTED;
+ }
+ bool ret = PyUpb_ByNameMap_IsEqual(self, other);
+ if (opid == Py_NE) ret = !ret;
+ return PyBool_FromLong(ret);
+}
+
+static PyMethodDef PyUpb_ByNameMap_Methods[] = {
+ {"get", (PyCFunction)&PyUpb_ByNameMap_Get, METH_VARARGS},
+ {"keys", PyUpb_ByNameMap_Keys, METH_NOARGS},
+ {"values", PyUpb_ByNameMap_Values, METH_NOARGS},
+ {"items", PyUpb_ByNameMap_Items, METH_NOARGS},
+ {NULL}};
+
+static PyType_Slot PyUpb_ByNameMap_Slots[] = {
+ {Py_mp_ass_subscript, PyUpb_ByNameMap_AssignSubscript},
+ {Py_mp_length, PyUpb_ByNameMap_Length},
+ {Py_mp_subscript, PyUpb_ByNameMap_Subscript},
+ {Py_sq_contains, &PyUpb_ByNameMap_Contains},
+ {Py_tp_dealloc, &PyUpb_ByNameMap_Dealloc},
+ {Py_tp_iter, PyUpb_ByNameMap_GetIter},
+ {Py_tp_methods, &PyUpb_ByNameMap_Methods},
+ {Py_tp_repr, &PyUpb_DescriptorMap_Repr},
+ {Py_tp_richcompare, &PyUpb_ByNameMap_RichCompare},
+ {0, NULL},
+};
+
+static PyType_Spec PyUpb_ByNameMap_Spec = {
+ PYUPB_MODULE_NAME "._ByNameMap", // tp_name
+ sizeof(PyUpb_ByNameMap), // tp_basicsize
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT, // tp_flags
+ PyUpb_ByNameMap_Slots,
+};
+
+// -----------------------------------------------------------------------------
+// ByNumberMap
+// -----------------------------------------------------------------------------
+
+typedef struct {
+ PyObject_HEAD;
+ const PyUpb_ByNumberMap_Funcs* funcs;
+ const void* parent; // upb_MessageDef*, upb_DefPool*, etc.
+ PyObject* parent_obj; // Python object that keeps parent alive, we own a ref.
+} PyUpb_ByNumberMap;
+
+PyUpb_ByNumberMap* PyUpb_ByNumberMap_Self(PyObject* obj) {
+ assert(Py_TYPE(obj) == PyUpb_ModuleState_Get()->by_number_map_type);
+ return (PyUpb_ByNumberMap*)obj;
+}
+
+PyObject* PyUpb_ByNumberMap_New(const PyUpb_ByNumberMap_Funcs* funcs,
+ const void* parent, PyObject* parent_obj) {
+ PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
+ PyUpb_ByNumberMap* map = (void*)PyType_GenericAlloc(s->by_number_map_type, 0);
+ map->funcs = funcs;
+ map->parent = parent;
+ map->parent_obj = parent_obj;
+ Py_INCREF(parent_obj);
+ return &map->ob_base;
+}
+
+static void PyUpb_ByNumberMap_Dealloc(PyObject* _self) {
+ PyUpb_ByNumberMap* self = PyUpb_ByNumberMap_Self(_self);
+ Py_DECREF(self->parent_obj);
+ PyUpb_Dealloc(self);
+}
+
+static Py_ssize_t PyUpb_ByNumberMap_Length(PyObject* _self) {
+ PyUpb_ByNumberMap* self = PyUpb_ByNumberMap_Self(_self);
+ return self->funcs->base.get_elem_count(self->parent);
+}
+
+static const void* PyUpb_ByNumberMap_LookupHelper(PyUpb_ByNumberMap* self,
+ PyObject* key) {
+ long num = PyLong_AsLong(key);
+ if (num == -1 && PyErr_Occurred()) {
+ PyErr_Clear();
+ // Ensure that the key is hashable (this will raise an error if not).
+ PyObject_Hash(key);
+ return NULL;
+ } else {
+ return self->funcs->lookup(self->parent, num);
+ }
+}
+
+static PyObject* PyUpb_ByNumberMap_Subscript(PyObject* _self, PyObject* key) {
+ PyUpb_ByNumberMap* self = PyUpb_ByNumberMap_Self(_self);
+ const void* elem = PyUpb_ByNumberMap_LookupHelper(self, key);
+ if (elem) {
+ return self->funcs->base.get_elem_wrapper(elem);
+ } else {
+ if (!PyErr_Occurred()) {
+ PyErr_SetObject(PyExc_KeyError, key);
+ }
+ return NULL;
+ }
+}
+
+static int PyUpb_ByNumberMap_AssignSubscript(PyObject* self, PyObject* key,
+ PyObject* value) {
+ PyErr_Format(PyExc_TypeError, PYUPB_MODULE_NAME
+ ".ByNumberMap' object does not support item assignment");
+ return -1;
+}
+
+static PyObject* PyUpb_ByNumberMap_Get(PyObject* _self, PyObject* args) {
+ PyUpb_ByNumberMap* self = PyUpb_ByNumberMap_Self(_self);
+ PyObject* key;
+ PyObject* default_value = Py_None;
+ if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &default_value)) {
+ return NULL;
+ }
+
+ const void* elem = PyUpb_ByNumberMap_LookupHelper(self, key);
+ if (elem) {
+ return self->funcs->base.get_elem_wrapper(elem);
+ } else if (PyErr_Occurred()) {
+ return NULL;
+ } else {
+ return PyUpb_NewRef(default_value);
+ }
+}
+
+static PyObject* PyUpb_ByNumberMap_GetIter(PyObject* _self) {
+ PyUpb_ByNumberMap* self = PyUpb_ByNumberMap_Self(_self);
+ return PyUpb_ByNumberIterator_New(self->funcs, self->parent,
+ self->parent_obj);
+}
+
+static PyObject* PyUpb_ByNumberMap_Keys(PyObject* _self, PyObject* args) {
+ PyUpb_ByNumberMap* self = PyUpb_ByNumberMap_Self(_self);
+ int n = self->funcs->base.get_elem_count(self->parent);
+ PyObject* ret = PyList_New(n);
+ if (!ret) return NULL;
+ for (int i = 0; i < n; i++) {
+ const void* elem = self->funcs->base.index(self->parent, i);
+ PyObject* key = PyLong_FromLong(self->funcs->get_elem_num(elem));
+ if (!key) goto error;
+ PyList_SetItem(ret, i, key);
+ }
+ return ret;
+
+error:
+ Py_XDECREF(ret);
+ return NULL;
+}
+
+static PyObject* PyUpb_ByNumberMap_Values(PyObject* _self, PyObject* args) {
+ PyUpb_ByNumberMap* self = PyUpb_ByNumberMap_Self(_self);
+ int n = self->funcs->base.get_elem_count(self->parent);
+ PyObject* ret = PyList_New(n);
+ if (!ret) return NULL;
+ for (int i = 0; i < n; i++) {
+ const void* elem = self->funcs->base.index(self->parent, i);
+ PyObject* py_elem = self->funcs->base.get_elem_wrapper(elem);
+ if (!py_elem) goto error;
+ PyList_SetItem(ret, i, py_elem);
+ }
+ return ret;
+
+error:
+ Py_XDECREF(ret);
+ return NULL;
+}
+
+static PyObject* PyUpb_ByNumberMap_Items(PyObject* _self, PyObject* args) {
+ PyUpb_ByNumberMap* self = PyUpb_ByNumberMap_Self(_self);
+ int n = self->funcs->base.get_elem_count(self->parent);
+ PyObject* ret = PyList_New(n);
+ PyObject* item;
+ PyObject* py_elem;
+ if (!ret) return NULL;
+ for (int i = 0; i < n; i++) {
+ const void* elem = self->funcs->base.index(self->parent, i);
+ int number = self->funcs->get_elem_num(elem);
+ item = PyTuple_New(2);
+ py_elem = self->funcs->base.get_elem_wrapper(elem);
+ if (!item || !py_elem) goto error;
+ PyTuple_SetItem(item, 0, PyLong_FromLong(number));
+ PyTuple_SetItem(item, 1, py_elem);
+ PyList_SetItem(ret, i, item);
+ }
+ return ret;
+
+error:
+ Py_XDECREF(py_elem);
+ Py_XDECREF(item);
+ Py_XDECREF(ret);
+ return NULL;
+}
+
+static int PyUpb_ByNumberMap_Contains(PyObject* _self, PyObject* key) {
+ PyUpb_ByNumberMap* self = PyUpb_ByNumberMap_Self(_self);
+ const void* elem = PyUpb_ByNumberMap_LookupHelper(self, key);
+ if (elem) return 1;
+ if (PyErr_Occurred()) return -1;
+ return 0;
+}
+
+// A mapping container can only be equal to another mapping container, or (for
+// backward compatibility) to a dict containing the same items.
+// Returns 1 if equal, 0 if unequal, -1 on error.
+static int PyUpb_ByNumberMap_IsEqual(PyUpb_ByNumberMap* self, PyObject* other) {
+ // Check the identity of C++ pointers.
+ if (PyObject_TypeCheck(other, Py_TYPE(self))) {
+ PyUpb_ByNumberMap* other_map = (void*)other;
+ return self->parent == other_map->parent && self->funcs == other_map->funcs;
+ }
+
+ if (!PyDict_Check(other)) return 0;
+
+ PyObject* self_dict = PyDict_New();
+ PyDict_Merge(self_dict, (PyObject*)self, 0);
+ int eq = PyObject_RichCompareBool(self_dict, other, Py_EQ);
+ Py_DECREF(self_dict);
+ return eq;
+}
+
+static PyObject* PyUpb_ByNumberMap_RichCompare(PyObject* _self, PyObject* other,
+ int opid) {
+ PyUpb_ByNumberMap* self = PyUpb_ByNumberMap_Self(_self);
+ if (opid != Py_EQ && opid != Py_NE) {
+ Py_RETURN_NOTIMPLEMENTED;
+ }
+ bool ret = PyUpb_ByNumberMap_IsEqual(self, other);
+ if (opid == Py_NE) ret = !ret;
+ return PyBool_FromLong(ret);
+}
+
+static PyMethodDef PyUpb_ByNumberMap_Methods[] = {
+ {"get", (PyCFunction)&PyUpb_ByNumberMap_Get, METH_VARARGS},
+ {"keys", PyUpb_ByNumberMap_Keys, METH_NOARGS},
+ {"values", PyUpb_ByNumberMap_Values, METH_NOARGS},
+ {"items", PyUpb_ByNumberMap_Items, METH_NOARGS},
+ {NULL}};
+
+static PyType_Slot PyUpb_ByNumberMap_Slots[] = {
+ {Py_mp_ass_subscript, PyUpb_ByNumberMap_AssignSubscript},
+ {Py_mp_length, PyUpb_ByNumberMap_Length},
+ {Py_mp_subscript, PyUpb_ByNumberMap_Subscript},
+ {Py_sq_contains, &PyUpb_ByNumberMap_Contains},
+ {Py_tp_dealloc, &PyUpb_ByNumberMap_Dealloc},
+ {Py_tp_iter, PyUpb_ByNumberMap_GetIter},
+ {Py_tp_methods, &PyUpb_ByNumberMap_Methods},
+ {Py_tp_repr, &PyUpb_DescriptorMap_Repr},
+ {Py_tp_richcompare, &PyUpb_ByNumberMap_RichCompare},
+ {0, NULL},
+};
+
+static PyType_Spec PyUpb_ByNumberMap_Spec = {
+ PYUPB_MODULE_NAME "._ByNumberMap", // tp_name
+ sizeof(PyUpb_ByNumberMap), // tp_basicsize
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT, // tp_flags
+ PyUpb_ByNumberMap_Slots,
+};
+
+// -----------------------------------------------------------------------------
+// Top Level
+// -----------------------------------------------------------------------------
+
+bool PyUpb_InitDescriptorContainers(PyObject* m) {
+ PyUpb_ModuleState* s = PyUpb_ModuleState_GetFromModule(m);
+
+ s->by_name_map_type = PyUpb_AddClass(m, &PyUpb_ByNameMap_Spec);
+ s->by_number_map_type = PyUpb_AddClass(m, &PyUpb_ByNumberMap_Spec);
+ s->by_name_iterator_type = PyUpb_AddClass(m, &PyUpb_ByNameIterator_Spec);
+ s->by_number_iterator_type = PyUpb_AddClass(m, &PyUpb_ByNumberIterator_Spec);
+ s->generic_sequence_type = PyUpb_AddClass(m, &PyUpb_GenericSequence_Spec);
+
+ return s->by_name_map_type && s->by_number_map_type &&
+ s->by_name_iterator_type && s->by_number_iterator_type &&
+ s->generic_sequence_type;
+}
diff --git a/upb/python/descriptor_containers.h b/upb/python/descriptor_containers.h
new file mode 100644
index 0000000..5b2b1fa
--- /dev/null
+++ b/upb/python/descriptor_containers.h
@@ -0,0 +1,117 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef PYUPB_DESCRIPTOR_CONTAINERS_H__
+#define PYUPB_DESCRIPTOR_CONTAINERS_H__
+
+// This file defines immutable Python containiner types whose data comes from
+// an underlying descriptor (def).
+//
+// Because there are many instances of these types that vend different kinds of
+// data (fields, oneofs, enums, etc) these types accept a "vtable" of function
+// pointers. This saves us from having to define numerous distinct Python types
+// for each kind of data we want to vend.
+//
+// The underlying upb APIs follow a consistent pattern that allows us to use
+// those functions directly inside these vtables, greatly reducing the amount of
+// "adaptor" code we need to write.
+
+#include <stdbool.h>
+
+#include "protobuf.h"
+#include "upb/reflection/def.h"
+
+// -----------------------------------------------------------------------------
+// PyUpb_GenericSequence
+// -----------------------------------------------------------------------------
+
+// A Python object that vends a sequence of descriptors.
+
+typedef struct {
+ // Returns the number of elements in the map.
+ int (*get_elem_count)(const void* parent);
+ // Returns an element by index.
+ const void* (*index)(const void* parent, int idx);
+ // Returns a Python object wrapping this element, caller owns a ref.
+ PyObject* (*get_elem_wrapper)(const void* elem);
+} PyUpb_GenericSequence_Funcs;
+
+// Returns a new GenericSequence. The vtable `funcs` must outlive this object
+// (generally it should be static). The GenericSequence will take a ref on
+// `parent_obj`, which must be sufficient to keep `parent` alive. The object
+// `parent` will be passed as an argument to the functions in `funcs`.
+PyObject* PyUpb_GenericSequence_New(const PyUpb_GenericSequence_Funcs* funcs,
+ const void* parent, PyObject* parent_obj);
+
+// -----------------------------------------------------------------------------
+// PyUpb_ByNameMap
+// -----------------------------------------------------------------------------
+
+// A Python object that vends a name->descriptor map.
+
+typedef struct {
+ PyUpb_GenericSequence_Funcs base;
+ // Looks up by name and returns either a pointer to the element or NULL.
+ const void* (*lookup)(const void* parent, const char* key);
+ // Returns the name associated with this element.
+ const char* (*get_elem_name)(const void* elem);
+} PyUpb_ByNameMap_Funcs;
+
+// Returns a new ByNameMap. The vtable `funcs` must outlive this object
+// (generally it should be static). The ByNameMap will take a ref on
+// `parent_obj`, which must be sufficient to keep `parent` alive. The object
+// `parent` will be passed as an argument to the functions in `funcs`.
+PyObject* PyUpb_ByNameMap_New(const PyUpb_ByNameMap_Funcs* funcs,
+ const void* parent, PyObject* parent_obj);
+
+// -----------------------------------------------------------------------------
+// PyUpb_ByNumberMap
+// -----------------------------------------------------------------------------
+
+// A Python object that vends a number->descriptor map.
+
+typedef struct {
+ PyUpb_GenericSequence_Funcs base;
+ // Looks up by name and returns either a pointer to the element or NULL.
+ const void* (*lookup)(const void* parent, int num);
+ // Returns the name associated with this element.
+ int (*get_elem_num)(const void* elem);
+} PyUpb_ByNumberMap_Funcs;
+
+// Returns a new ByNumberMap. The vtable `funcs` must outlive this object
+// (generally it should be static). The ByNumberMap will take a ref on
+// `parent_obj`, which must be sufficient to keep `parent` alive. The object
+// `parent` will be passed as an argument to the functions in `funcs`.
+PyObject* PyUpb_ByNumberMap_New(const PyUpb_ByNumberMap_Funcs* funcs,
+ const void* parent, PyObject* parent_obj);
+
+bool PyUpb_InitDescriptorContainers(PyObject* m);
+
+#endif // PYUPB_DESCRIPTOR_CONTAINERS_H__
diff --git a/upb/python/descriptor_pool.c b/upb/python/descriptor_pool.c
new file mode 100644
index 0000000..ee41677
--- /dev/null
+++ b/upb/python/descriptor_pool.c
@@ -0,0 +1,652 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "python/descriptor_pool.h"
+
+#include "google/protobuf/descriptor.upbdefs.h"
+#include "python/convert.h"
+#include "python/descriptor.h"
+#include "python/message.h"
+#include "python/protobuf.h"
+#include "upb/reflection/def.h"
+#include "upb/util/def_to_proto.h"
+
+// -----------------------------------------------------------------------------
+// DescriptorPool
+// -----------------------------------------------------------------------------
+
+typedef struct {
+ PyObject_HEAD;
+ upb_DefPool* symtab;
+ PyObject* db; // The DescriptorDatabase underlying this pool. May be NULL.
+} PyUpb_DescriptorPool;
+
+PyObject* PyUpb_DescriptorPool_GetDefaultPool(void) {
+ PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
+ return s->default_pool;
+}
+
+const upb_MessageDef* PyUpb_DescriptorPool_GetFileProtoDef(void) {
+ PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
+ if (!s->c_descriptor_symtab) {
+ s->c_descriptor_symtab = upb_DefPool_New();
+ }
+ return google_protobuf_FileDescriptorProto_getmsgdef(s->c_descriptor_symtab);
+}
+
+static PyObject* PyUpb_DescriptorPool_DoCreateWithCache(
+ PyTypeObject* type, PyObject* db, PyUpb_WeakMap* obj_cache) {
+ PyUpb_DescriptorPool* pool = (void*)PyType_GenericAlloc(type, 0);
+ pool->symtab = upb_DefPool_New();
+ pool->db = db;
+ Py_XINCREF(pool->db);
+ PyUpb_WeakMap_Add(obj_cache, pool->symtab, &pool->ob_base);
+ return &pool->ob_base;
+}
+
+static PyObject* PyUpb_DescriptorPool_DoCreate(PyTypeObject* type,
+ PyObject* db) {
+ return PyUpb_DescriptorPool_DoCreateWithCache(type, db,
+ PyUpb_ObjCache_Instance());
+}
+
+upb_DefPool* PyUpb_DescriptorPool_GetSymtab(PyObject* pool) {
+ return ((PyUpb_DescriptorPool*)pool)->symtab;
+}
+
+static int PyUpb_DescriptorPool_Traverse(PyUpb_DescriptorPool* self,
+ visitproc visit, void* arg) {
+ Py_VISIT(self->db);
+ return 0;
+}
+
+static int PyUpb_DescriptorPool_Clear(PyUpb_DescriptorPool* self) {
+ Py_CLEAR(self->db);
+ return 0;
+}
+
+PyObject* PyUpb_DescriptorPool_Get(const upb_DefPool* symtab) {
+ PyObject* pool = PyUpb_ObjCache_Get(symtab);
+ assert(pool);
+ return pool;
+}
+
+static void PyUpb_DescriptorPool_Dealloc(PyUpb_DescriptorPool* self) {
+ PyUpb_DescriptorPool_Clear(self);
+ upb_DefPool_Free(self->symtab);
+ PyUpb_ObjCache_Delete(self->symtab);
+ PyUpb_Dealloc(self);
+}
+
+/*
+ * DescriptorPool.__new__()
+ *
+ * Implements:
+ * DescriptorPool(descriptor_db=None)
+ */
+static PyObject* PyUpb_DescriptorPool_New(PyTypeObject* type, PyObject* args,
+ PyObject* kwargs) {
+ char* kwlist[] = {"descriptor_db", 0};
+ PyObject* db = NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O", kwlist, &db)) {
+ return NULL;
+ }
+
+ if (db == Py_None) db = NULL;
+ return PyUpb_DescriptorPool_DoCreate(type, db);
+}
+
+static PyObject* PyUpb_DescriptorPool_DoAdd(PyObject* _self,
+ PyObject* file_desc);
+
+static bool PyUpb_DescriptorPool_TryLoadFileProto(PyUpb_DescriptorPool* self,
+ PyObject* proto) {
+ if (proto == NULL) {
+ if (PyErr_ExceptionMatches(PyExc_KeyError)) {
+ // Expected error: item was simply not found.
+ PyErr_Clear();
+ return true; // We didn't accomplish our goal, but we didn't error out.
+ }
+ return false;
+ }
+ if (proto == Py_None) return true;
+ PyObject* ret = PyUpb_DescriptorPool_DoAdd((PyObject*)self, proto);
+ bool ok = ret != NULL;
+ Py_XDECREF(ret);
+ return ok;
+}
+
+static bool PyUpb_DescriptorPool_TryLoadSymbol(PyUpb_DescriptorPool* self,
+ PyObject* sym) {
+ if (!self->db) return false;
+ PyObject* file_proto =
+ PyObject_CallMethod(self->db, "FindFileContainingSymbol", "O", sym);
+ bool ret = PyUpb_DescriptorPool_TryLoadFileProto(self, file_proto);
+ Py_XDECREF(file_proto);
+ return ret;
+}
+
+static bool PyUpb_DescriptorPool_TryLoadFilename(PyUpb_DescriptorPool* self,
+ PyObject* filename) {
+ if (!self->db) return false;
+ PyObject* file_proto =
+ PyObject_CallMethod(self->db, "FindFileByName", "O", filename);
+ bool ret = PyUpb_DescriptorPool_TryLoadFileProto(self, file_proto);
+ Py_XDECREF(file_proto);
+ return ret;
+}
+
+bool PyUpb_DescriptorPool_CheckNoDatabase(PyObject* _self) { return true; }
+
+static bool PyUpb_DescriptorPool_LoadDependentFiles(
+ PyUpb_DescriptorPool* self, google_protobuf_FileDescriptorProto* proto) {
+ size_t n;
+ const upb_StringView* deps =
+ google_protobuf_FileDescriptorProto_dependency(proto, &n);
+ for (size_t i = 0; i < n; i++) {
+ const upb_FileDef* dep = upb_DefPool_FindFileByNameWithSize(
+ self->symtab, deps[i].data, deps[i].size);
+ if (!dep) {
+ PyObject* filename =
+ PyUnicode_FromStringAndSize(deps[i].data, deps[i].size);
+ if (!filename) return false;
+ bool ok = PyUpb_DescriptorPool_TryLoadFilename(self, filename);
+ Py_DECREF(filename);
+ if (!ok) return false;
+ }
+ }
+ return true;
+}
+
+static PyObject* PyUpb_DescriptorPool_DoAddSerializedFile(
+ PyObject* _self, PyObject* serialized_pb) {
+ PyUpb_DescriptorPool* self = (PyUpb_DescriptorPool*)_self;
+ upb_Arena* arena = upb_Arena_New();
+ if (!arena) PYUPB_RETURN_OOM;
+ PyObject* result = NULL;
+
+ char* buf;
+ Py_ssize_t size;
+ if (PyBytes_AsStringAndSize(serialized_pb, &buf, &size) < 0) {
+ goto done;
+ }
+
+ google_protobuf_FileDescriptorProto* proto =
+ google_protobuf_FileDescriptorProto_parse(buf, size, arena);
+ if (!proto) {
+ PyErr_SetString(PyExc_TypeError, "Couldn't parse file content!");
+ goto done;
+ }
+
+ upb_StringView name = google_protobuf_FileDescriptorProto_name(proto);
+ const upb_FileDef* file =
+ upb_DefPool_FindFileByNameWithSize(self->symtab, name.data, name.size);
+
+ if (file) {
+ // If the existing file is equal to the new file, then silently ignore the
+ // duplicate add.
+ google_protobuf_FileDescriptorProto* existing =
+ upb_FileDef_ToProto(file, arena);
+ if (!existing) {
+ PyErr_SetNone(PyExc_MemoryError);
+ goto done;
+ }
+ const upb_MessageDef* m = PyUpb_DescriptorPool_GetFileProtoDef();
+ if (upb_Message_IsEqual(proto, existing, m)) {
+ result = PyUpb_FileDescriptor_Get(file);
+ goto done;
+ }
+ }
+
+ if (self->db) {
+ if (!PyUpb_DescriptorPool_LoadDependentFiles(self, proto)) goto done;
+ }
+
+ upb_Status status;
+ upb_Status_Clear(&status);
+
+ const upb_FileDef* filedef =
+ upb_DefPool_AddFile(self->symtab, proto, &status);
+ if (!filedef) {
+ PyErr_Format(PyExc_TypeError,
+ "Couldn't build proto file into descriptor pool: %s",
+ upb_Status_ErrorMessage(&status));
+ goto done;
+ }
+
+ result = PyUpb_FileDescriptor_Get(filedef);
+
+done:
+ upb_Arena_Free(arena);
+ return result;
+}
+
+static PyObject* PyUpb_DescriptorPool_DoAdd(PyObject* _self,
+ PyObject* file_desc) {
+ if (!PyUpb_Message_Verify(file_desc)) return NULL;
+ const upb_MessageDef* m = PyUpb_Message_GetMsgdef(file_desc);
+ const char* file_proto_name =
+ PYUPB_DESCRIPTOR_PROTO_PACKAGE ".FileDescriptorProto";
+ if (strcmp(upb_MessageDef_FullName(m), file_proto_name) != 0) {
+ return PyErr_Format(PyExc_TypeError, "Can only add FileDescriptorProto");
+ }
+ PyObject* subargs = PyTuple_New(0);
+ if (!subargs) return NULL;
+ PyObject* serialized =
+ PyUpb_Message_SerializeToString(file_desc, subargs, NULL);
+ Py_DECREF(subargs);
+ if (!serialized) return NULL;
+ PyObject* ret = PyUpb_DescriptorPool_DoAddSerializedFile(_self, serialized);
+ Py_DECREF(serialized);
+ return ret;
+}
+
+/*
+ * PyUpb_DescriptorPool_AddSerializedFile()
+ *
+ * Implements:
+ * DescriptorPool.AddSerializedFile(self, serialized_file_descriptor)
+ *
+ * Adds the given serialized FileDescriptorProto to the pool.
+ */
+static PyObject* PyUpb_DescriptorPool_AddSerializedFile(
+ PyObject* _self, PyObject* serialized_pb) {
+ PyUpb_DescriptorPool* self = (PyUpb_DescriptorPool*)_self;
+ if (self->db) {
+ PyErr_SetString(
+ PyExc_ValueError,
+ "Cannot call AddSerializedFile on a DescriptorPool that uses a "
+ "DescriptorDatabase. Add your file to the underlying database.");
+ return false;
+ }
+ return PyUpb_DescriptorPool_DoAddSerializedFile(_self, serialized_pb);
+}
+
+static PyObject* PyUpb_DescriptorPool_Add(PyObject* _self,
+ PyObject* file_desc) {
+ PyUpb_DescriptorPool* self = (PyUpb_DescriptorPool*)_self;
+ if (self->db) {
+ PyErr_SetString(
+ PyExc_ValueError,
+ "Cannot call Add on a DescriptorPool that uses a DescriptorDatabase. "
+ "Add your file to the underlying database.");
+ return false;
+ }
+ return PyUpb_DescriptorPool_DoAdd(_self, file_desc);
+}
+
+/*
+ * PyUpb_DescriptorPool_FindFileByName()
+ *
+ * Implements:
+ * DescriptorPool.FindFileByName(self, name)
+ */
+static PyObject* PyUpb_DescriptorPool_FindFileByName(PyObject* _self,
+ PyObject* arg) {
+ PyUpb_DescriptorPool* self = (PyUpb_DescriptorPool*)_self;
+
+ const char* name = PyUpb_VerifyStrData(arg);
+ if (!name) return NULL;
+
+ const upb_FileDef* file = upb_DefPool_FindFileByName(self->symtab, name);
+ if (file == NULL && self->db) {
+ if (!PyUpb_DescriptorPool_TryLoadFilename(self, arg)) return NULL;
+ file = upb_DefPool_FindFileByName(self->symtab, name);
+ }
+ if (file == NULL) {
+ return PyErr_Format(PyExc_KeyError, "Couldn't find file %.200s", name);
+ }
+
+ return PyUpb_FileDescriptor_Get(file);
+}
+
+/*
+ * PyUpb_DescriptorPool_FindExtensionByName()
+ *
+ * Implements:
+ * DescriptorPool.FindExtensionByName(self, name)
+ */
+static PyObject* PyUpb_DescriptorPool_FindExtensionByName(PyObject* _self,
+ PyObject* arg) {
+ PyUpb_DescriptorPool* self = (PyUpb_DescriptorPool*)_self;
+
+ const char* name = PyUpb_VerifyStrData(arg);
+ if (!name) return NULL;
+
+ const upb_FieldDef* field =
+ upb_DefPool_FindExtensionByName(self->symtab, name);
+ if (field == NULL && self->db) {
+ if (!PyUpb_DescriptorPool_TryLoadSymbol(self, arg)) return NULL;
+ field = upb_DefPool_FindExtensionByName(self->symtab, name);
+ }
+ if (field == NULL) {
+ return PyErr_Format(PyExc_KeyError, "Couldn't find extension %.200s", name);
+ }
+
+ return PyUpb_FieldDescriptor_Get(field);
+}
+
+/*
+ * PyUpb_DescriptorPool_FindMessageTypeByName()
+ *
+ * Implements:
+ * DescriptorPool.FindMessageTypeByName(self, name)
+ */
+static PyObject* PyUpb_DescriptorPool_FindMessageTypeByName(PyObject* _self,
+ PyObject* arg) {
+ PyUpb_DescriptorPool* self = (PyUpb_DescriptorPool*)_self;
+
+ const char* name = PyUpb_VerifyStrData(arg);
+ if (!name) return NULL;
+
+ const upb_MessageDef* m = upb_DefPool_FindMessageByName(self->symtab, name);
+ if (m == NULL && self->db) {
+ if (!PyUpb_DescriptorPool_TryLoadSymbol(self, arg)) return NULL;
+ m = upb_DefPool_FindMessageByName(self->symtab, name);
+ }
+ if (m == NULL) {
+ return PyErr_Format(PyExc_KeyError, "Couldn't find message %.200s", name);
+ }
+
+ return PyUpb_Descriptor_Get(m);
+}
+
+// Splits a dotted symbol like foo.bar.baz on the last dot. Returns the portion
+// after the last dot (baz) and updates `*parent_size` to the length of the
+// parent (foo.bar). Returns NULL if no dots were present.
+static const char* PyUpb_DescriptorPool_SplitSymbolName(const char* sym,
+ size_t* parent_size) {
+ const char* last_dot = strrchr(sym, '.');
+ if (!last_dot) return NULL;
+ *parent_size = last_dot - sym;
+ return last_dot + 1;
+}
+
+/*
+ * PyUpb_DescriptorPool_FindFieldByName()
+ *
+ * Implements:
+ * DescriptorPool.FindFieldByName(self, name)
+ */
+static PyObject* PyUpb_DescriptorPool_FindFieldByName(PyObject* _self,
+ PyObject* arg) {
+ PyUpb_DescriptorPool* self = (PyUpb_DescriptorPool*)_self;
+
+ const char* name = PyUpb_VerifyStrData(arg);
+ if (!name) return NULL;
+
+ size_t parent_size;
+ const char* child = PyUpb_DescriptorPool_SplitSymbolName(name, &parent_size);
+ const upb_FieldDef* f = NULL;
+ if (child) {
+ const upb_MessageDef* parent =
+ upb_DefPool_FindMessageByNameWithSize(self->symtab, name, parent_size);
+ if (parent == NULL && self->db) {
+ if (!PyUpb_DescriptorPool_TryLoadSymbol(self, arg)) return NULL;
+ parent = upb_DefPool_FindMessageByNameWithSize(self->symtab, name,
+ parent_size);
+ }
+ if (parent) {
+ f = upb_MessageDef_FindFieldByName(parent, child);
+ }
+ }
+
+ if (!f) {
+ return PyErr_Format(PyExc_KeyError, "Couldn't find message %.200s", name);
+ }
+
+ return PyUpb_FieldDescriptor_Get(f);
+}
+
+/*
+ * PyUpb_DescriptorPool_FindEnumTypeByName()
+ *
+ * Implements:
+ * DescriptorPool.FindEnumTypeByName(self, name)
+ */
+static PyObject* PyUpb_DescriptorPool_FindEnumTypeByName(PyObject* _self,
+ PyObject* arg) {
+ PyUpb_DescriptorPool* self = (PyUpb_DescriptorPool*)_self;
+
+ const char* name = PyUpb_VerifyStrData(arg);
+ if (!name) return NULL;
+
+ const upb_EnumDef* e = upb_DefPool_FindEnumByName(self->symtab, name);
+ if (e == NULL && self->db) {
+ if (!PyUpb_DescriptorPool_TryLoadSymbol(self, arg)) return NULL;
+ e = upb_DefPool_FindEnumByName(self->symtab, name);
+ }
+ if (e == NULL) {
+ return PyErr_Format(PyExc_KeyError, "Couldn't find enum %.200s", name);
+ }
+
+ return PyUpb_EnumDescriptor_Get(e);
+}
+
+/*
+ * PyUpb_DescriptorPool_FindOneofByName()
+ *
+ * Implements:
+ * DescriptorPool.FindOneofByName(self, name)
+ */
+static PyObject* PyUpb_DescriptorPool_FindOneofByName(PyObject* _self,
+ PyObject* arg) {
+ PyUpb_DescriptorPool* self = (PyUpb_DescriptorPool*)_self;
+
+ const char* name = PyUpb_VerifyStrData(arg);
+ if (!name) return NULL;
+
+ size_t parent_size;
+ const char* child = PyUpb_DescriptorPool_SplitSymbolName(name, &parent_size);
+
+ if (child) {
+ const upb_MessageDef* parent =
+ upb_DefPool_FindMessageByNameWithSize(self->symtab, name, parent_size);
+ if (parent == NULL && self->db) {
+ if (!PyUpb_DescriptorPool_TryLoadSymbol(self, arg)) return NULL;
+ parent = upb_DefPool_FindMessageByNameWithSize(self->symtab, name,
+ parent_size);
+ }
+ if (parent) {
+ const upb_OneofDef* o = upb_MessageDef_FindOneofByName(parent, child);
+ return PyUpb_OneofDescriptor_Get(o);
+ }
+ }
+
+ return PyErr_Format(PyExc_KeyError, "Couldn't find oneof %.200s", name);
+}
+
+static PyObject* PyUpb_DescriptorPool_FindServiceByName(PyObject* _self,
+ PyObject* arg) {
+ PyUpb_DescriptorPool* self = (PyUpb_DescriptorPool*)_self;
+
+ const char* name = PyUpb_VerifyStrData(arg);
+ if (!name) return NULL;
+
+ const upb_ServiceDef* s = upb_DefPool_FindServiceByName(self->symtab, name);
+ if (s == NULL && self->db) {
+ if (!PyUpb_DescriptorPool_TryLoadSymbol(self, arg)) return NULL;
+ s = upb_DefPool_FindServiceByName(self->symtab, name);
+ }
+ if (s == NULL) {
+ return PyErr_Format(PyExc_KeyError, "Couldn't find service %.200s", name);
+ }
+
+ return PyUpb_ServiceDescriptor_Get(s);
+}
+
+static PyObject* PyUpb_DescriptorPool_FindMethodByName(PyObject* _self,
+ PyObject* arg) {
+ PyUpb_DescriptorPool* self = (PyUpb_DescriptorPool*)_self;
+
+ const char* name = PyUpb_VerifyStrData(arg);
+ if (!name) return NULL;
+ size_t parent_size;
+ const char* child = PyUpb_DescriptorPool_SplitSymbolName(name, &parent_size);
+
+ if (!child) goto err;
+ const upb_ServiceDef* parent =
+ upb_DefPool_FindServiceByNameWithSize(self->symtab, name, parent_size);
+ if (parent == NULL && self->db) {
+ if (!PyUpb_DescriptorPool_TryLoadSymbol(self, arg)) return NULL;
+ parent =
+ upb_DefPool_FindServiceByNameWithSize(self->symtab, name, parent_size);
+ }
+ if (!parent) goto err;
+ const upb_MethodDef* m = upb_ServiceDef_FindMethodByName(parent, child);
+ if (!m) goto err;
+ return PyUpb_MethodDescriptor_Get(m);
+
+err:
+ return PyErr_Format(PyExc_KeyError, "Couldn't find method %.200s", name);
+}
+
+static PyObject* PyUpb_DescriptorPool_FindFileContainingSymbol(PyObject* _self,
+ PyObject* arg) {
+ PyUpb_DescriptorPool* self = (PyUpb_DescriptorPool*)_self;
+
+ const char* name = PyUpb_VerifyStrData(arg);
+ if (!name) return NULL;
+
+ const upb_FileDef* f =
+ upb_DefPool_FindFileContainingSymbol(self->symtab, name);
+ if (f == NULL && self->db) {
+ if (!PyUpb_DescriptorPool_TryLoadSymbol(self, arg)) return NULL;
+ f = upb_DefPool_FindFileContainingSymbol(self->symtab, name);
+ }
+ if (f == NULL) {
+ return PyErr_Format(PyExc_KeyError, "Couldn't find symbol %.200s", name);
+ }
+
+ return PyUpb_FileDescriptor_Get(f);
+}
+
+static PyObject* PyUpb_DescriptorPool_FindExtensionByNumber(PyObject* _self,
+ PyObject* args) {
+ PyUpb_DescriptorPool* self = (PyUpb_DescriptorPool*)_self;
+ PyObject* message_descriptor;
+ int number;
+ if (!PyArg_ParseTuple(args, "Oi", &message_descriptor, &number)) {
+ return NULL;
+ }
+
+ const upb_FieldDef* f = upb_DefPool_FindExtensionByNumber(
+ self->symtab, PyUpb_Descriptor_GetDef(message_descriptor), number);
+ if (f == NULL) {
+ return PyErr_Format(PyExc_KeyError, "Couldn't find Extension %d", number);
+ }
+
+ return PyUpb_FieldDescriptor_Get(f);
+}
+
+static PyObject* PyUpb_DescriptorPool_FindAllExtensions(PyObject* _self,
+ PyObject* msg_desc) {
+ PyUpb_DescriptorPool* self = (PyUpb_DescriptorPool*)_self;
+ const upb_MessageDef* m = PyUpb_Descriptor_GetDef(msg_desc);
+ size_t n;
+ const upb_FieldDef** ext = upb_DefPool_GetAllExtensions(self->symtab, m, &n);
+ PyObject* ret = PyList_New(n);
+ if (!ret) goto done;
+ for (size_t i = 0; i < n; i++) {
+ PyObject* field = PyUpb_FieldDescriptor_Get(ext[i]);
+ if (!field) {
+ Py_DECREF(ret);
+ ret = NULL;
+ goto done;
+ }
+ PyList_SetItem(ret, i, field);
+ }
+done:
+ free(ext);
+ return ret;
+}
+
+static PyMethodDef PyUpb_DescriptorPool_Methods[] = {
+ {"Add", PyUpb_DescriptorPool_Add, METH_O,
+ "Adds the FileDescriptorProto and its types to this pool."},
+ {"AddSerializedFile", PyUpb_DescriptorPool_AddSerializedFile, METH_O,
+ "Adds a serialized FileDescriptorProto to this pool."},
+ {"FindFileByName", PyUpb_DescriptorPool_FindFileByName, METH_O,
+ "Searches for a file descriptor by its .proto name."},
+ {"FindMessageTypeByName", PyUpb_DescriptorPool_FindMessageTypeByName,
+ METH_O, "Searches for a message descriptor by full name."},
+ {"FindFieldByName", PyUpb_DescriptorPool_FindFieldByName, METH_O,
+ "Searches for a field descriptor by full name."},
+ {"FindExtensionByName", PyUpb_DescriptorPool_FindExtensionByName, METH_O,
+ "Searches for extension descriptor by full name."},
+ {"FindEnumTypeByName", PyUpb_DescriptorPool_FindEnumTypeByName, METH_O,
+ "Searches for enum type descriptor by full name."},
+ {"FindOneofByName", PyUpb_DescriptorPool_FindOneofByName, METH_O,
+ "Searches for oneof descriptor by full name."},
+ {"FindServiceByName", PyUpb_DescriptorPool_FindServiceByName, METH_O,
+ "Searches for service descriptor by full name."},
+ {"FindMethodByName", PyUpb_DescriptorPool_FindMethodByName, METH_O,
+ "Searches for method descriptor by full name."},
+ {"FindFileContainingSymbol", PyUpb_DescriptorPool_FindFileContainingSymbol,
+ METH_O, "Gets the FileDescriptor containing the specified symbol."},
+ {"FindExtensionByNumber", PyUpb_DescriptorPool_FindExtensionByNumber,
+ METH_VARARGS, "Gets the extension descriptor for the given number."},
+ {"FindAllExtensions", PyUpb_DescriptorPool_FindAllExtensions, METH_O,
+ "Gets all known extensions of the given message descriptor."},
+ {NULL}};
+
+static PyType_Slot PyUpb_DescriptorPool_Slots[] = {
+ {Py_tp_clear, PyUpb_DescriptorPool_Clear},
+ {Py_tp_dealloc, PyUpb_DescriptorPool_Dealloc},
+ {Py_tp_methods, PyUpb_DescriptorPool_Methods},
+ {Py_tp_new, PyUpb_DescriptorPool_New},
+ {Py_tp_traverse, PyUpb_DescriptorPool_Traverse},
+ {0, NULL}};
+
+static PyType_Spec PyUpb_DescriptorPool_Spec = {
+ PYUPB_MODULE_NAME ".DescriptorPool",
+ sizeof(PyUpb_DescriptorPool),
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
+ PyUpb_DescriptorPool_Slots,
+};
+
+// -----------------------------------------------------------------------------
+// Top Level
+// -----------------------------------------------------------------------------
+
+bool PyUpb_InitDescriptorPool(PyObject* m) {
+ PyUpb_ModuleState* state = PyUpb_ModuleState_GetFromModule(m);
+ PyTypeObject* descriptor_pool_type =
+ PyUpb_AddClass(m, &PyUpb_DescriptorPool_Spec);
+
+ if (!descriptor_pool_type) return false;
+
+ state->default_pool = PyUpb_DescriptorPool_DoCreateWithCache(
+ descriptor_pool_type, NULL, state->obj_cache);
+ return state->default_pool &&
+ PyModule_AddObject(m, "default_pool", state->default_pool) == 0;
+}
diff --git a/upb/python/descriptor_pool.h b/upb/python/descriptor_pool.h
new file mode 100644
index 0000000..ae50ef0
--- /dev/null
+++ b/upb/python/descriptor_pool.h
@@ -0,0 +1,51 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef PYUPB_DESCRIPTOR_POOL_H__
+#define PYUPB_DESCRIPTOR_POOL_H__
+
+#include <stdbool.h>
+
+#include "protobuf.h"
+
+// Returns a Python wrapper object for the given symtab. The symtab must have
+// been created from a Python DescriptorPool originally.
+PyObject* PyUpb_DescriptorPool_Get(const upb_DefPool* symtab);
+
+// Given a Python DescriptorPool, returns the underlying symtab.
+upb_DefPool* PyUpb_DescriptorPool_GetSymtab(PyObject* pool);
+
+// Returns the default DescriptorPool (a global singleton).
+PyObject* PyUpb_DescriptorPool_GetDefaultPool(void);
+
+// Module-level init.
+bool PyUpb_InitDescriptorPool(PyObject* m);
+
+#endif // PYUPB_DESCRIPTOR_POOL_H__
diff --git a/upb/python/dist/BUILD.bazel b/upb/python/dist/BUILD.bazel
new file mode 100644
index 0000000..4a0075e
--- /dev/null
+++ b/upb/python/dist/BUILD.bazel
@@ -0,0 +1,470 @@
+# Copyright (c) 2009-2022, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load("//bazel:py_proto_library.bzl", "py_proto_library")
+load(":dist.bzl", "py_dist", "py_dist_module")
+load("@bazel_skylib//lib:selects.bzl", "selects")
+load("@com_google_protobuf//:protobuf_version.bzl", "PROTOBUF_PYTHON_VERSION")
+load("@rules_pkg//:mappings.bzl", "pkg_files", "strip_prefix")
+load("@rules_pkg//:pkg.bzl", "pkg_tar")
+load("@rules_python//python:packaging.bzl", "py_wheel")
+load("@system_python//:version.bzl", "SYSTEM_PYTHON_VERSION")
+
+licenses(["notice"])
+
+py_dist_module(
+ name = "message_mod",
+ extension = "//python:_message_binary",
+ module_name = "google._upb._message",
+)
+
+py_proto_library(
+ name = "well_known_proto_py_pb2",
+ deps = [
+ "@com_google_protobuf//:any_proto",
+ "@com_google_protobuf//:api_proto",
+ "@com_google_protobuf//:descriptor_proto",
+ "@com_google_protobuf//:duration_proto",
+ "@com_google_protobuf//:empty_proto",
+ "@com_google_protobuf//:field_mask_proto",
+ "@com_google_protobuf//:source_context_proto",
+ "@com_google_protobuf//:struct_proto",
+ "@com_google_protobuf//:timestamp_proto",
+ "@com_google_protobuf//:type_proto",
+ "@com_google_protobuf//:wrappers_proto",
+ ],
+)
+
+py_proto_library(
+ name = "plugin_py_pb2",
+ deps = ["@com_google_protobuf//:compiler_plugin_proto"],
+)
+
+config_setting(
+ name = "linux_aarch64_release",
+ flag_values = {
+ "@com_google_protobuf//toolchain:release": "True",
+ },
+ values = {"cpu": "linux-aarch_64"},
+)
+
+config_setting(
+ name = "linux_aarch64_local",
+ constraint_values = [
+ "@platforms//os:linux",
+ "@platforms//cpu:aarch64",
+ ],
+ flag_values = {
+ "@com_google_protobuf//toolchain:release": "False",
+ },
+)
+
+config_setting(
+ name = "linux_x86_64_release",
+ flag_values = {
+ "@com_google_protobuf//toolchain:release": "True",
+ },
+ values = {"cpu": "linux-x86_64"},
+)
+
+config_setting(
+ name = "linux_x86_64_local",
+ constraint_values = [
+ "@platforms//os:linux",
+ "@platforms//cpu:x86_64",
+ ],
+ flag_values = {
+ "@com_google_protobuf//toolchain:release": "False",
+ },
+)
+
+config_setting(
+ name = "osx_x86_64_release",
+ flag_values = {
+ "@com_google_protobuf//toolchain:release": "True",
+ },
+ values = {"cpu": "osx-x86_64"},
+)
+
+config_setting(
+ name = "osx_x86_64_local",
+ constraint_values = [
+ "@platforms//os:osx",
+ "@platforms//cpu:x86_64",
+ ],
+ flag_values = {
+ "@com_google_protobuf//toolchain:release": "False",
+ },
+)
+
+selects.config_setting_group(
+ name = "osx_x86_64",
+ match_any = [
+ ":osx_x86_64_release",
+ ":osx_x86_64_local",
+ ],
+)
+
+config_setting(
+ name = "osx_aarch64_release",
+ flag_values = {
+ "@com_google_protobuf//toolchain:release": "True",
+ },
+ values = {"cpu": "osx-aarch_64"},
+)
+
+config_setting(
+ name = "osx_aarch64_local",
+ constraint_values = [
+ "@platforms//os:osx",
+ "@platforms//cpu:aarch64",
+ ],
+ flag_values = {
+ "@com_google_protobuf//toolchain:release": "False",
+ },
+)
+
+selects.config_setting_group(
+ name = "osx_aarch64",
+ match_any = [
+ ":osx_aarch64_release",
+ ":osx_aarch64_local",
+ ],
+)
+
+config_setting(
+ name = "osx_universal2",
+ values = {"cpu": "osx-universal2"},
+)
+
+config_setting(
+ name = "windows_x86_32_release",
+ flag_values = {
+ "@com_google_protobuf//toolchain:release": "True",
+ },
+ values = {"cpu": "win32"},
+)
+
+config_setting(
+ name = "windows_x86_32_local",
+ constraint_values = [
+ "@platforms//os:windows",
+ "@platforms//cpu:x86_32",
+ ],
+ flag_values = {
+ "@com_google_protobuf//toolchain:release": "False",
+ },
+)
+
+selects.config_setting_group(
+ name = "windows_x86_32",
+ match_any = [
+ ":windows_x86_32_release",
+ ":windows_x86_32_local",
+ ],
+)
+
+config_setting(
+ name = "windows_x86_64_release",
+ flag_values = {
+ "@com_google_protobuf//toolchain:release": "True",
+ },
+ values = {"cpu": "win64"},
+)
+
+config_setting(
+ name = "windows_x86_64_local",
+ constraint_values = [
+ "@platforms//os:windows",
+ "@platforms//cpu:x86_64",
+ ],
+ flag_values = {
+ "@com_google_protobuf//toolchain:release": "False",
+ },
+)
+
+selects.config_setting_group(
+ name = "windows_x86_64",
+ match_any = [
+ ":windows_x86_64_release",
+ ":windows_x86_64_local",
+ ],
+)
+
+pkg_files(
+ name = "generated_wkt",
+ srcs = [
+ ":well_known_proto_py_pb2",
+ "//:descriptor_upb_proto",
+ "//:descriptor_upb_proto_reflection",
+ ],
+ prefix = "google/protobuf",
+)
+
+pkg_files(
+ name = "generated_wkt_compiler",
+ srcs = [
+ ":plugin_py_pb2",
+ ],
+ prefix = "google/protobuf/compiler",
+)
+
+pkg_files(
+ name = "utf8_range_source_files",
+ srcs = ["@utf8_range//:utf8_range_srcs"],
+ prefix = "utf8_range",
+)
+
+pkg_files(
+ name = "dist_source_files",
+ srcs = [
+ "MANIFEST.in",
+ "setup.py",
+ ],
+)
+
+# Passing filegroups to pkg_tar directly results in incorrect
+# `protobuf/external/upb/` directory structure when built from the protobuf
+# repo. This can be removed once repositories are merged.
+pkg_files(
+ name = "filegroup_source_files",
+ srcs = [
+ "//:LICENSE",
+ "//:source_files",
+ "//python:message_srcs",
+ "//upb/base:source_files",
+ "//upb/collections:source_files",
+ "//upb/hash:source_files",
+ "//upb/lex:source_files",
+ "//upb/mem:source_files",
+ "//upb/message:source_files",
+ "//upb/mini_descriptor:source_files",
+ "//upb/mini_table:source_files",
+ "//upb/port:source_files",
+ "//upb/text:source_files",
+ "//upb/util:source_files",
+ "//upb/wire:source_files",
+ ],
+ strip_prefix = strip_prefix.from_root(""),
+)
+
+# NOTE: This package currently only works for macos and ubuntu, MSVC users
+# should use a binary wheel.
+pkg_tar(
+ name = "source_tarball",
+ srcs = [
+ ":dist_source_files",
+ ":filegroup_source_files",
+ ":generated_wkt",
+ ":generated_wkt_compiler",
+ ":utf8_range_source_files",
+ "@com_google_protobuf//python:python_source_files",
+ ],
+ extension = "tar.gz",
+ package_dir = "protobuf",
+ package_file_name = "protobuf.tar.gz",
+ strip_prefix = ".",
+ target_compatible_with = select({
+ "@system_python//:none": ["@platforms//:incompatible"],
+ "//conditions:default": [],
+ }),
+)
+
+genrule(
+ name = "source_wheel",
+ srcs = [":source_tarball"],
+ outs = ["protobuf-%s.tar.gz" % PROTOBUF_PYTHON_VERSION],
+ cmd = """
+ set -eux
+ tar -xzvf $(location :source_tarball)
+ cd protobuf/
+ python3 setup.py sdist
+ cd ..
+ mv protobuf/dist/*.tar.gz $@
+ """,
+ target_compatible_with = select({
+ "@system_python//:none": ["@platforms//:incompatible"],
+ "//conditions:default": [],
+ }),
+)
+
+py_wheel(
+ name = "binary_wheel",
+ abi = select({
+ "//python:full_api_3.7": "cp37m",
+ "//python:full_api_3.8": "cp38",
+ "//python:full_api_3.9": "cp39",
+ "//conditions:default": "abi3",
+ }),
+ author = "protobuf@googlegroups.com",
+ author_email = "protobuf@googlegroups.com",
+ classifiers = [
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ ],
+ distribution = "protobuf",
+ extra_distinfo_files = {
+ "//:LICENSE": "LICENSE",
+ },
+ homepage = "https://developers.google.com/protocol-buffers/",
+ license = "3-Clause BSD License",
+ platform = select({
+ ":linux_x86_64_local": "linux_x86_64",
+ ":linux_x86_64_release": "manylinux2014_x86_64",
+ ":linux_aarch64_local": "linux_aarch64",
+ ":linux_aarch64_release": "manylinux2014_aarch64",
+ ":osx_universal2": "macosx_10_9_universal2",
+ ":osx_aarch64": "macosx_11_0_arm64",
+ ":windows_x86_32": "win32",
+ ":windows_x86_64": "win_amd64",
+ "//conditions:default": "any",
+ }),
+ python_requires = ">=3.7",
+ python_tag = selects.with_or({
+ ("//python:limited_api_3.7", "//python:full_api_3.7"): "cp37",
+ "//python:full_api_3.8": "cp38",
+ "//python:full_api_3.9": "cp39",
+ "//python:limited_api_3.10": "cp310",
+ "//conditions:default": "cp" + SYSTEM_PYTHON_VERSION,
+ }),
+ strip_path_prefixes = [
+ "python/dist/",
+ "python/",
+ "src/",
+ ],
+ target_compatible_with = select({
+ "@system_python//:none": ["@platforms//:incompatible"],
+ "//conditions:default": [],
+ }),
+ version = PROTOBUF_PYTHON_VERSION,
+ deps = [
+ ":message_mod",
+ ":plugin_py_pb2",
+ ":well_known_proto_py_pb2",
+ "@com_google_protobuf//:python_srcs",
+ ],
+)
+
+py_wheel(
+ name = "pure_python_wheel",
+ abi = "none",
+ author = "protobuf@googlegroups.com",
+ author_email = "protobuf@googlegroups.com",
+ classifiers = [
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ ],
+ distribution = "protobuf",
+ extra_distinfo_files = {
+ "//:LICENSE": "LICENSE",
+ },
+ homepage = "https://developers.google.com/protocol-buffers/",
+ license = "3-Clause BSD License",
+ platform = "any",
+ python_requires = ">=3.7",
+ python_tag = "py3",
+ strip_path_prefixes = [
+ "python/",
+ "src/",
+ ],
+ target_compatible_with = select({
+ "@system_python//:none": ["@platforms//:incompatible"],
+ "//conditions:default": [],
+ }),
+ version = PROTOBUF_PYTHON_VERSION,
+ deps = [
+ ":plugin_py_pb2",
+ ":well_known_proto_py_pb2",
+ "@com_google_protobuf//:python_srcs",
+ ],
+)
+
+py_wheel(
+ name = "test_wheel",
+ testonly = True,
+ abi = "none",
+ distribution = "protobuftests",
+ extra_distinfo_files = {
+ "//:LICENSE": "LICENSE",
+ },
+ platform = "any",
+ python_tag = "py3",
+ strip_path_prefixes = [
+ "python/",
+ "src/",
+ ],
+ target_compatible_with = select({
+ "@system_python//:none": ["@platforms//:incompatible"],
+ "//conditions:default": [],
+ }),
+ version = PROTOBUF_PYTHON_VERSION,
+ deps = [
+ "//python/pb_unit_tests:test_files",
+ "@com_google_protobuf//:python_common_test_protos",
+ "@com_google_protobuf//:python_specific_test_protos",
+ "@com_google_protobuf//:python_test_srcs",
+ "@com_google_protobuf//src/google/protobuf:testdata",
+ ],
+)
+
+py_dist(
+ name = "dist",
+ binary_wheel = ":binary_wheel",
+ full_api_cpus = [
+ # TODO(b/231485326): fix win32 build
+ "win32",
+ "win64",
+ ],
+ # Windows needs version-specific wheels until 3.10.
+ full_api_versions = [
+ "37",
+ "38",
+ "39",
+ ],
+ # Limited API: these wheels will satisfy any Python version >= the
+ # given version.
+ #
+ # Technically the limited API doesn't have the functions we need until
+ # 3.10, but on Linux we can get away with using 3.7 (see ../python_api.h for
+ # details).
+ limited_api_wheels = {
+ # TODO(b/231485326): fix win32 build
+ "win32": "310",
+ "win64": "310",
+ "linux-x86_64": "37",
+ "linux-aarch_64": "37",
+ "osx-universal2": "37",
+ },
+ pure_python_wheel = ":pure_python_wheel",
+ tags = ["manual"],
+)
diff --git a/upb/python/dist/MANIFEST.in b/upb/python/dist/MANIFEST.in
new file mode 100644
index 0000000..1b61936
--- /dev/null
+++ b/upb/python/dist/MANIFEST.in
@@ -0,0 +1,2 @@
+global-include *.h
+global-include *.inc
\ No newline at end of file
diff --git a/upb/python/dist/dist.bzl b/upb/python/dist/dist.bzl
new file mode 100644
index 0000000..7ce38e3
--- /dev/null
+++ b/upb/python/dist/dist.bzl
@@ -0,0 +1,192 @@
+"""Rules to create python distribution files and properly name them"""
+
+load("@bazel_skylib//rules:common_settings.bzl", "BuildSettingInfo")
+load("@system_python//:version.bzl", "SYSTEM_PYTHON_VERSION")
+
+def _get_suffix(limited_api, python_version, cpu):
+ """Computes an ABI version tag for an extension module per PEP 3149."""
+ if "win32" in cpu or "win64" in cpu:
+ if limited_api:
+ return ".pyd"
+ if "win32" in cpu:
+ abi = "win32"
+ elif "win64" in cpu:
+ abi = "win_amd64"
+ else:
+ fail("Unsupported CPU: " + cpu)
+ return ".cp{}-{}.{}".format(python_version, abi, "pyd")
+
+ if python_version == "system":
+ python_version = SYSTEM_PYTHON_VERSION
+ if int(python_version) < 38:
+ python_version += "m"
+ abis = {
+ "darwin_arm64": "darwin",
+ "darwin": "darwin",
+ "osx-x86_64": "darwin",
+ "osx-aarch_64": "darwin",
+ "linux-aarch_64": "aarch64-linux-gnu",
+ "linux-x86_64": "x86_64-linux-gnu",
+ "k8": "x86_64-linux-gnu",
+ }
+
+ return ".cpython-{}-{}.{}".format(
+ python_version,
+ abis[cpu],
+ "so" if limited_api else "abi3.so",
+ )
+ elif limited_api:
+ return ".abi3.so"
+
+ fail("Unsupported combination of flags")
+
+def _declare_module_file(ctx, module_name, python_version, limited_api):
+ """Declares an output file for a Python module with this name, version, and limited api."""
+ base_filename = module_name.replace(".", "/")
+ suffix = _get_suffix(
+ python_version = python_version,
+ limited_api = limited_api,
+ cpu = ctx.var["TARGET_CPU"],
+ )
+ filename = base_filename + suffix
+ return ctx.actions.declare_file(filename)
+
+# --------------------------------------------------------------------------------------------------
+# py_dist_module()
+#
+# Creates a Python binary extension module that is ready for distribution.
+#
+# py_dist_module(
+# name = "message_mod",
+# extension = "//python:_message_binary",
+# module_name = "google._upb._message",
+# )
+#
+# In the simple case, this simply involves copying the input file to the proper filename for
+# our current configuration (module_name, cpu, python_version, limited_abi).
+#
+# For multiarch platforms (osx-universal2), we must combine binaries for multiple architectures
+# into a single output binary using the "llvm-lipo" tool. A config transition depends on multiple
+# architectures to get us the input files we need.
+
+def _py_multiarch_transition_impl(settings, attr):
+ if settings["//command_line_option:cpu"] == "osx-universal2":
+ return [{"//command_line_option:cpu": cpu} for cpu in ["osx-aarch_64", "osx-x86_64"]]
+ else:
+ return settings
+
+_py_multiarch_transition = transition(
+ implementation = _py_multiarch_transition_impl,
+ inputs = ["//command_line_option:cpu"],
+ outputs = ["//command_line_option:cpu"],
+)
+
+def _py_dist_module_impl(ctx):
+ output_file = _declare_module_file(
+ ctx = ctx,
+ module_name = ctx.attr.module_name,
+ python_version = ctx.attr._python_version[BuildSettingInfo].value,
+ limited_api = ctx.attr._limited_api[BuildSettingInfo].value,
+ )
+ if len(ctx.attr.extension) == 1:
+ src = ctx.attr.extension[0][DefaultInfo].files.to_list()[0]
+ ctx.actions.run(
+ executable = "cp",
+ arguments = [src.path, output_file.path],
+ inputs = [src],
+ outputs = [output_file],
+ )
+ return [
+ DefaultInfo(files = depset([output_file])),
+ ]
+ else:
+ srcs = [mod[DefaultInfo].files.to_list()[0] for mod in ctx.attr.extension]
+ ctx.actions.run(
+ executable = "/usr/local/bin/llvm-lipo",
+ arguments = ["-create", "-output", output_file.path] + [src.path for src in srcs],
+ inputs = srcs,
+ outputs = [output_file],
+ )
+ return [
+ DefaultInfo(files = depset([output_file])),
+ ]
+
+py_dist_module = rule(
+ output_to_genfiles = True,
+ implementation = _py_dist_module_impl,
+ attrs = {
+ "module_name": attr.string(mandatory = True),
+ "extension": attr.label(
+ mandatory = True,
+ cfg = _py_multiarch_transition,
+ ),
+ "_limited_api": attr.label(default = "//python:limited_api"),
+ "_python_version": attr.label(default = "//python:python_version"),
+ "_allowlist_function_transition": attr.label(
+ default = "@bazel_tools//tools/allowlists/function_transition_allowlist",
+ ),
+ },
+)
+
+# --------------------------------------------------------------------------------------------------
+# py_dist()
+#
+# A rule that builds a collection of binary wheels, using transitions to depend on many different
+# python versions and cpus.
+
+def _py_dist_transition_impl(settings, attr):
+ _ignore = (settings) # @unused
+ transitions = []
+
+ for cpu, version in attr.limited_api_wheels.items():
+ transitions.append({
+ "//command_line_option:cpu": cpu,
+ "//python:python_version": version,
+ "//python:limited_api": True,
+ })
+
+ for version in attr.full_api_versions:
+ for cpu in attr.full_api_cpus:
+ transitions.append({
+ "//command_line_option:cpu": cpu,
+ "//python:python_version": version,
+ "//python:limited_api": False,
+ })
+
+ return transitions
+
+_py_dist_transition = transition(
+ implementation = _py_dist_transition_impl,
+ inputs = [],
+ outputs = [
+ "//command_line_option:cpu",
+ "//python:python_version",
+ "//python:limited_api",
+ ],
+)
+
+def _py_dist_impl(ctx):
+ binary_files = [dep[DefaultInfo].files for dep in ctx.attr.binary_wheel]
+ pure_python_files = [ctx.attr.pure_python_wheel[DefaultInfo].files]
+ return [
+ DefaultInfo(files = depset(
+ transitive = binary_files + pure_python_files,
+ )),
+ ]
+
+py_dist = rule(
+ implementation = _py_dist_impl,
+ attrs = {
+ "binary_wheel": attr.label(
+ mandatory = True,
+ cfg = _py_dist_transition,
+ ),
+ "pure_python_wheel": attr.label(mandatory = True),
+ "limited_api_wheels": attr.string_dict(),
+ "full_api_versions": attr.string_list(),
+ "full_api_cpus": attr.string_list(),
+ "_allowlist_function_transition": attr.label(
+ default = "@bazel_tools//tools/allowlists/function_transition_allowlist",
+ ),
+ },
+)
diff --git a/upb/python/dist/setup.py b/upb/python/dist/setup.py
new file mode 100755
index 0000000..df93e9a
--- /dev/null
+++ b/upb/python/dist/setup.py
@@ -0,0 +1,103 @@
+#! /usr/bin/env python
+# Protocol Buffers - Google's data interchange format
+# Copyright 2008 Google Inc. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# See README for usage instructions.
+
+import glob
+import os
+import sys
+import sysconfig
+
+# We must use setuptools, not distutils, because we need to use the
+# namespace_packages option for the "google" package.
+from setuptools import setup, Extension, find_packages
+
+
+def GetVersion():
+ """Reads and returns the version from google/protobuf/__init__.py.
+
+ Do not import google.protobuf.__init__ directly, because an installed
+ protobuf library may be loaded instead.
+
+ Returns:
+ The version.
+ """
+
+ with open(os.path.join('google', 'protobuf', '__init__.py')) as version_file:
+ file_globals = {}
+ exec(version_file.read(), file_globals) # pylint:disable=exec-used
+ return file_globals["__version__"]
+
+
+current_dir = os.path.dirname(os.path.abspath(__file__))
+extra_link_args = []
+
+if sys.platform.startswith('win'):
+ extra_link_args = ['-static']
+
+setup(
+ name='protobuf',
+ version=GetVersion(),
+ description='Protocol Buffers',
+ download_url='https://github.com/protocolbuffers/protobuf/releases',
+ long_description="Protocol Buffers are Google's data interchange format",
+ url='https://developers.google.com/protocol-buffers/',
+ project_urls={
+ 'Source': 'https://github.com/protocolbuffers/protobuf',
+ },
+ maintainer='protobuf@googlegroups.com',
+ maintainer_email='protobuf@googlegroups.com',
+ license='BSD-3-Clause',
+ classifiers=[
+ 'Programming Language :: Python',
+ 'Programming Language :: Python :: 3',
+ 'Programming Language :: Python :: 3.7',
+ 'Programming Language :: Python :: 3.8',
+ 'Programming Language :: Python :: 3.9',
+ 'Programming Language :: Python :: 3.10',
+ ],
+ namespace_packages=['google'],
+ packages=find_packages(),
+ install_requires=[],
+ ext_modules=[
+ Extension(
+ 'google._upb._message',
+ glob.glob('google/protobuf/*.c')
+ + glob.glob('python/*.c')
+ + glob.glob('upb/**/*.c', recursive=True)
+ + glob.glob('utf8_range/*.c'),
+ include_dirs=[current_dir, os.path.join(current_dir, 'utf8_range')],
+ language='c',
+ extra_link_args=extra_link_args,
+ )
+ ],
+ python_requires='>=3.7',
+)
diff --git a/upb/python/extension_dict.c b/upb/python/extension_dict.c
new file mode 100644
index 0000000..d4b4dda
--- /dev/null
+++ b/upb/python/extension_dict.c
@@ -0,0 +1,256 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "python/extension_dict.h"
+
+#include "python/message.h"
+#include "python/protobuf.h"
+#include "upb/reflection/def.h"
+
+// -----------------------------------------------------------------------------
+// ExtensionDict
+// -----------------------------------------------------------------------------
+
+typedef struct {
+ PyObject_HEAD;
+ PyObject* msg; // Owning ref to our parent pessage.
+} PyUpb_ExtensionDict;
+
+PyObject* PyUpb_ExtensionDict_New(PyObject* msg) {
+ PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
+ PyUpb_ExtensionDict* ext_dict =
+ (void*)PyType_GenericAlloc(state->extension_dict_type, 0);
+ ext_dict->msg = msg;
+ Py_INCREF(ext_dict->msg);
+ return &ext_dict->ob_base;
+}
+
+static PyObject* PyUpb_ExtensionDict_FindExtensionByName(PyObject* _self,
+ PyObject* key) {
+ PyUpb_ExtensionDict* self = (PyUpb_ExtensionDict*)_self;
+ const char* name = PyUpb_GetStrData(key);
+ if (!name) {
+ PyErr_Format(PyExc_TypeError, "_FindExtensionByName expect a str");
+ return NULL;
+ }
+ const upb_MessageDef* m = PyUpb_Message_GetMsgdef(self->msg);
+ const upb_FileDef* file = upb_MessageDef_File(m);
+ const upb_DefPool* symtab = upb_FileDef_Pool(file);
+ const upb_FieldDef* ext = upb_DefPool_FindExtensionByName(symtab, name);
+ if (ext) {
+ return PyUpb_FieldDescriptor_Get(ext);
+ } else {
+ Py_RETURN_NONE;
+ }
+}
+
+static PyObject* PyUpb_ExtensionDict_FindExtensionByNumber(PyObject* _self,
+ PyObject* arg) {
+ PyUpb_ExtensionDict* self = (PyUpb_ExtensionDict*)_self;
+ const upb_MessageDef* m = PyUpb_Message_GetMsgdef(self->msg);
+ const upb_MiniTable* l = upb_MessageDef_MiniTable(m);
+ const upb_FileDef* file = upb_MessageDef_File(m);
+ const upb_DefPool* symtab = upb_FileDef_Pool(file);
+ const upb_ExtensionRegistry* reg = upb_DefPool_ExtensionRegistry(symtab);
+ int64_t number = PyLong_AsLong(arg);
+ if (number == -1 && PyErr_Occurred()) return NULL;
+ const upb_MiniTableExtension* ext =
+ (upb_MiniTableExtension*)upb_ExtensionRegistry_Lookup(reg, l, number);
+ if (ext) {
+ const upb_FieldDef* f = upb_DefPool_FindExtensionByMiniTable(symtab, ext);
+ return PyUpb_FieldDescriptor_Get(f);
+ } else {
+ Py_RETURN_NONE;
+ }
+}
+
+static void PyUpb_ExtensionDict_Dealloc(PyUpb_ExtensionDict* self) {
+ PyUpb_Message_ClearExtensionDict(self->msg);
+ Py_DECREF(self->msg);
+ PyUpb_Dealloc(self);
+}
+
+static PyObject* PyUpb_ExtensionDict_RichCompare(PyObject* _self,
+ PyObject* _other, int opid) {
+ // Only equality comparisons are implemented.
+ if (opid != Py_EQ && opid != Py_NE) {
+ Py_INCREF(Py_NotImplemented);
+ return Py_NotImplemented;
+ }
+ PyUpb_ExtensionDict* self = (PyUpb_ExtensionDict*)_self;
+ bool equals = false;
+ if (PyObject_TypeCheck(_other, Py_TYPE(_self))) {
+ PyUpb_ExtensionDict* other = (PyUpb_ExtensionDict*)_other;
+ equals = self->msg == other->msg;
+ }
+ bool ret = opid == Py_EQ ? equals : !equals;
+ return PyBool_FromLong(ret);
+}
+
+static int PyUpb_ExtensionDict_Contains(PyObject* _self, PyObject* key) {
+ PyUpb_ExtensionDict* self = (PyUpb_ExtensionDict*)_self;
+ const upb_FieldDef* f = PyUpb_Message_GetExtensionDef(self->msg, key);
+ if (!f) return -1;
+ upb_Message* msg = PyUpb_Message_GetIfReified(self->msg);
+ if (!msg) return 0;
+ if (upb_FieldDef_IsRepeated(f)) {
+ upb_MessageValue val = upb_Message_GetFieldByDef(msg, f);
+ return upb_Array_Size(val.array_val) > 0;
+ } else {
+ return upb_Message_HasFieldByDef(msg, f);
+ }
+}
+
+static Py_ssize_t PyUpb_ExtensionDict_Length(PyObject* _self) {
+ PyUpb_ExtensionDict* self = (PyUpb_ExtensionDict*)_self;
+ upb_Message* msg = PyUpb_Message_GetIfReified(self->msg);
+ return msg ? upb_Message_ExtensionCount(msg) : 0;
+}
+
+static PyObject* PyUpb_ExtensionDict_Subscript(PyObject* _self, PyObject* key) {
+ PyUpb_ExtensionDict* self = (PyUpb_ExtensionDict*)_self;
+ const upb_FieldDef* f = PyUpb_Message_GetExtensionDef(self->msg, key);
+ if (!f) return NULL;
+ return PyUpb_Message_GetFieldValue(self->msg, f);
+}
+
+static int PyUpb_ExtensionDict_AssignSubscript(PyObject* _self, PyObject* key,
+ PyObject* val) {
+ PyUpb_ExtensionDict* self = (PyUpb_ExtensionDict*)_self;
+ const upb_FieldDef* f = PyUpb_Message_GetExtensionDef(self->msg, key);
+ if (!f) return -1;
+ if (val) {
+ return PyUpb_Message_SetFieldValue(self->msg, f, val, PyExc_TypeError);
+ } else {
+ PyUpb_Message_DoClearField(self->msg, f);
+ return 0;
+ }
+}
+
+static PyObject* PyUpb_ExtensionIterator_New(PyObject* _ext_dict);
+
+static PyMethodDef PyUpb_ExtensionDict_Methods[] = {
+ {"_FindExtensionByName", PyUpb_ExtensionDict_FindExtensionByName, METH_O,
+ "Finds an extension by name."},
+ {"_FindExtensionByNumber", PyUpb_ExtensionDict_FindExtensionByNumber,
+ METH_O, "Finds an extension by number."},
+ {NULL, NULL},
+};
+
+static PyType_Slot PyUpb_ExtensionDict_Slots[] = {
+ {Py_tp_dealloc, PyUpb_ExtensionDict_Dealloc},
+ {Py_tp_methods, PyUpb_ExtensionDict_Methods},
+ //{Py_tp_getset, PyUpb_ExtensionDict_Getters},
+ //{Py_tp_hash, PyObject_HashNotImplemented},
+ {Py_tp_richcompare, PyUpb_ExtensionDict_RichCompare},
+ {Py_tp_iter, PyUpb_ExtensionIterator_New},
+ {Py_sq_contains, PyUpb_ExtensionDict_Contains},
+ {Py_sq_length, PyUpb_ExtensionDict_Length},
+ {Py_mp_length, PyUpb_ExtensionDict_Length},
+ {Py_mp_subscript, PyUpb_ExtensionDict_Subscript},
+ {Py_mp_ass_subscript, PyUpb_ExtensionDict_AssignSubscript},
+ {0, NULL}};
+
+static PyType_Spec PyUpb_ExtensionDict_Spec = {
+ PYUPB_MODULE_NAME ".ExtensionDict", // tp_name
+ sizeof(PyUpb_ExtensionDict), // tp_basicsize
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT, // tp_flags
+ PyUpb_ExtensionDict_Slots,
+};
+
+// -----------------------------------------------------------------------------
+// ExtensionIterator
+// -----------------------------------------------------------------------------
+
+typedef struct {
+ PyObject_HEAD;
+ PyObject* msg;
+ size_t iter;
+} PyUpb_ExtensionIterator;
+
+static PyObject* PyUpb_ExtensionIterator_New(PyObject* _ext_dict) {
+ PyUpb_ExtensionDict* ext_dict = (PyUpb_ExtensionDict*)_ext_dict;
+ PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
+ PyUpb_ExtensionIterator* iter =
+ (void*)PyType_GenericAlloc(state->extension_iterator_type, 0);
+ if (!iter) return NULL;
+ iter->msg = ext_dict->msg;
+ iter->iter = kUpb_Message_Begin;
+ Py_INCREF(iter->msg);
+ return &iter->ob_base;
+}
+
+static void PyUpb_ExtensionIterator_Dealloc(void* _self) {
+ PyUpb_ExtensionIterator* self = (PyUpb_ExtensionIterator*)_self;
+ Py_DECREF(self->msg);
+ PyUpb_Dealloc(_self);
+}
+
+PyObject* PyUpb_ExtensionIterator_IterNext(PyObject* _self) {
+ PyUpb_ExtensionIterator* self = (PyUpb_ExtensionIterator*)_self;
+ upb_Message* msg = PyUpb_Message_GetIfReified(self->msg);
+ if (!msg) return NULL;
+ const upb_MessageDef* m = PyUpb_Message_GetMsgdef(self->msg);
+ const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m));
+ while (true) {
+ const upb_FieldDef* f;
+ upb_MessageValue val;
+ if (!upb_Message_Next(msg, m, symtab, &f, &val, &self->iter)) return NULL;
+ if (upb_FieldDef_IsExtension(f)) return PyUpb_FieldDescriptor_Get(f);
+ }
+}
+
+static PyType_Slot PyUpb_ExtensionIterator_Slots[] = {
+ {Py_tp_dealloc, PyUpb_ExtensionIterator_Dealloc},
+ {Py_tp_iter, PyObject_SelfIter},
+ {Py_tp_iternext, PyUpb_ExtensionIterator_IterNext},
+ {0, NULL}};
+
+static PyType_Spec PyUpb_ExtensionIterator_Spec = {
+ PYUPB_MODULE_NAME ".ExtensionIterator", // tp_name
+ sizeof(PyUpb_ExtensionIterator), // tp_basicsize
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT, // tp_flags
+ PyUpb_ExtensionIterator_Slots,
+};
+
+// -----------------------------------------------------------------------------
+// Top Level
+// -----------------------------------------------------------------------------
+
+bool PyUpb_InitExtensionDict(PyObject* m) {
+ PyUpb_ModuleState* s = PyUpb_ModuleState_GetFromModule(m);
+
+ s->extension_dict_type = PyUpb_AddClass(m, &PyUpb_ExtensionDict_Spec);
+ s->extension_iterator_type = PyUpb_AddClass(m, &PyUpb_ExtensionIterator_Spec);
+
+ return s->extension_dict_type && s->extension_iterator_type;
+}
diff --git a/upb/python/extension_dict.h b/upb/python/extension_dict.h
new file mode 100644
index 0000000..99d2add
--- /dev/null
+++ b/upb/python/extension_dict.h
@@ -0,0 +1,42 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef PYUPB_EXTENSION_DICT_H__
+#define PYUPB_EXTENSION_DICT_H__
+
+#include <stdbool.h>
+
+#include "python/python_api.h"
+
+PyObject* PyUpb_ExtensionDict_New(PyObject* msg);
+
+bool PyUpb_InitExtensionDict(PyObject* m);
+
+#endif // PYUPB_EXTENSION_DICT_H__
diff --git a/upb/python/map.c b/upb/python/map.c
new file mode 100644
index 0000000..bd9022d
--- /dev/null
+++ b/upb/python/map.c
@@ -0,0 +1,529 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "python/map.h"
+
+#include "python/convert.h"
+#include "python/message.h"
+#include "python/protobuf.h"
+#include "upb/collections/map.h"
+#include "upb/reflection/def.h"
+
+// -----------------------------------------------------------------------------
+// MapContainer
+// -----------------------------------------------------------------------------
+
+typedef struct {
+ PyObject_HEAD;
+ PyObject* arena;
+ // The field descriptor (upb_FieldDef*).
+ // The low bit indicates whether the container is reified (see ptr below).
+ // - low bit set: repeated field is a stub (empty map, no underlying data).
+ // - low bit clear: repeated field is reified (points to upb_Array).
+ uintptr_t field;
+ union {
+ PyObject* parent; // stub: owning pointer to parent message.
+ upb_Map* map; // reified: the data for this array.
+ } ptr;
+ int version;
+} PyUpb_MapContainer;
+
+static PyObject* PyUpb_MapIterator_New(PyUpb_MapContainer* map);
+
+static bool PyUpb_MapContainer_IsStub(PyUpb_MapContainer* self) {
+ return self->field & 1;
+}
+
+// If the map is reified, returns it. Otherwise, returns NULL.
+// If NULL is returned, the object is empty and has no underlying data.
+static upb_Map* PyUpb_MapContainer_GetIfReified(PyUpb_MapContainer* self) {
+ return PyUpb_MapContainer_IsStub(self) ? NULL : self->ptr.map;
+}
+
+static const upb_FieldDef* PyUpb_MapContainer_GetField(
+ PyUpb_MapContainer* self) {
+ return (const upb_FieldDef*)(self->field & ~(uintptr_t)1);
+}
+
+static void PyUpb_MapContainer_Dealloc(void* _self) {
+ PyUpb_MapContainer* self = _self;
+ Py_DECREF(self->arena);
+ if (PyUpb_MapContainer_IsStub(self)) {
+ PyUpb_Message_CacheDelete(self->ptr.parent,
+ PyUpb_MapContainer_GetField(self));
+ Py_DECREF(self->ptr.parent);
+ } else {
+ PyUpb_ObjCache_Delete(self->ptr.map);
+ }
+ PyUpb_Dealloc(_self);
+}
+
+PyTypeObject* PyUpb_MapContainer_GetClass(const upb_FieldDef* f) {
+ assert(upb_FieldDef_IsMap(f));
+ PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
+ const upb_FieldDef* val =
+ upb_MessageDef_Field(upb_FieldDef_MessageSubDef(f), 1);
+ assert(upb_FieldDef_Number(val) == 2);
+ return upb_FieldDef_IsSubMessage(val) ? state->message_map_container_type
+ : state->scalar_map_container_type;
+}
+
+PyObject* PyUpb_MapContainer_NewStub(PyObject* parent, const upb_FieldDef* f,
+ PyObject* arena) {
+ // We only create stubs when the parent is reified, by convention. However
+ // this is not an invariant: the parent could become reified at any time.
+ assert(PyUpb_Message_GetIfReified(parent) == NULL);
+ PyTypeObject* cls = PyUpb_MapContainer_GetClass(f);
+ PyUpb_MapContainer* map = (void*)PyType_GenericAlloc(cls, 0);
+ map->arena = arena;
+ map->field = (uintptr_t)f | 1;
+ map->ptr.parent = parent;
+ map->version = 0;
+ Py_INCREF(arena);
+ Py_INCREF(parent);
+ return &map->ob_base;
+}
+
+void PyUpb_MapContainer_Reify(PyObject* _self, upb_Map* map) {
+ PyUpb_MapContainer* self = (PyUpb_MapContainer*)_self;
+ if (!map) {
+ const upb_FieldDef* f = PyUpb_MapContainer_GetField(self);
+ upb_Arena* arena = PyUpb_Arena_Get(self->arena);
+ const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
+ const upb_FieldDef* key_f = upb_MessageDef_Field(entry_m, 0);
+ const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
+ map = upb_Map_New(arena, upb_FieldDef_CType(key_f),
+ upb_FieldDef_CType(val_f));
+ }
+ PyUpb_ObjCache_Add(map, &self->ob_base);
+ Py_DECREF(self->ptr.parent);
+ self->ptr.map = map; // Overwrites self->ptr.parent.
+ self->field &= ~(uintptr_t)1;
+ assert(!PyUpb_MapContainer_IsStub(self));
+}
+
+void PyUpb_MapContainer_Invalidate(PyObject* obj) {
+ PyUpb_MapContainer* self = (PyUpb_MapContainer*)obj;
+ self->version++;
+}
+
+upb_Map* PyUpb_MapContainer_EnsureReified(PyObject* _self) {
+ PyUpb_MapContainer* self = (PyUpb_MapContainer*)_self;
+ self->version++;
+ upb_Map* map = PyUpb_MapContainer_GetIfReified(self);
+ if (map) return map; // Already writable.
+
+ const upb_FieldDef* f = PyUpb_MapContainer_GetField(self);
+ upb_Arena* arena = PyUpb_Arena_Get(self->arena);
+ const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
+ const upb_FieldDef* key_f = upb_MessageDef_Field(entry_m, 0);
+ const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
+ map =
+ upb_Map_New(arena, upb_FieldDef_CType(key_f), upb_FieldDef_CType(val_f));
+ upb_MessageValue msgval = {.map_val = map};
+ PyUpb_Message_SetConcreteSubobj(self->ptr.parent, f, msgval);
+ PyUpb_MapContainer_Reify((PyObject*)self, map);
+ return map;
+}
+
+bool PyUpb_MapContainer_Set(PyUpb_MapContainer* self, upb_Map* map,
+ upb_MessageValue key, upb_MessageValue val,
+ upb_Arena* arena) {
+ switch (upb_Map_Insert(map, key, val, arena)) {
+ case kUpb_MapInsertStatus_Inserted:
+ return true;
+ case kUpb_MapInsertStatus_Replaced:
+ // We did not insert a new key, undo the previous invalidate.
+ self->version--;
+ return true;
+ case kUpb_MapInsertStatus_OutOfMemory:
+ return false;
+ }
+ return false; // Unreachable, silence compiler warning.
+}
+
+int PyUpb_MapContainer_AssignSubscript(PyObject* _self, PyObject* key,
+ PyObject* val) {
+ PyUpb_MapContainer* self = (PyUpb_MapContainer*)_self;
+ upb_Map* map = PyUpb_MapContainer_EnsureReified(_self);
+ const upb_FieldDef* f = PyUpb_MapContainer_GetField(self);
+ const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
+ const upb_FieldDef* key_f = upb_MessageDef_Field(entry_m, 0);
+ const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
+ upb_Arena* arena = PyUpb_Arena_Get(self->arena);
+ upb_MessageValue u_key, u_val;
+ if (!PyUpb_PyToUpb(key, key_f, &u_key, arena)) return -1;
+
+ if (val) {
+ if (!PyUpb_PyToUpb(val, val_f, &u_val, arena)) return -1;
+ if (!PyUpb_MapContainer_Set(self, map, u_key, u_val, arena)) return -1;
+ } else {
+ if (!upb_Map_Delete(map, u_key, NULL)) {
+ PyErr_Format(PyExc_KeyError, "Key not present in map");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+PyObject* PyUpb_MapContainer_Subscript(PyObject* _self, PyObject* key) {
+ PyUpb_MapContainer* self = (PyUpb_MapContainer*)_self;
+ upb_Map* map = PyUpb_MapContainer_GetIfReified(self);
+ const upb_FieldDef* f = PyUpb_MapContainer_GetField(self);
+ const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
+ const upb_FieldDef* key_f = upb_MessageDef_Field(entry_m, 0);
+ const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
+ upb_Arena* arena = PyUpb_Arena_Get(self->arena);
+ upb_MessageValue u_key, u_val;
+ if (!PyUpb_PyToUpb(key, key_f, &u_key, arena)) return NULL;
+ if (!map || !upb_Map_Get(map, u_key, &u_val)) {
+ map = PyUpb_MapContainer_EnsureReified(_self);
+ upb_Arena* arena = PyUpb_Arena_Get(self->arena);
+ if (upb_FieldDef_IsSubMessage(val_f)) {
+ const upb_Message* m = upb_FieldDef_MessageSubDef(val_f);
+ const upb_MiniTable* layout = upb_MessageDef_MiniTable(m);
+ u_val.msg_val = upb_Message_New(layout, arena);
+ } else {
+ memset(&u_val, 0, sizeof(u_val));
+ }
+ if (!PyUpb_MapContainer_Set(self, map, u_key, u_val, arena)) return false;
+ }
+ return PyUpb_UpbToPy(u_val, val_f, self->arena);
+}
+
+PyObject* PyUpb_MapContainer_Contains(PyObject* _self, PyObject* key) {
+ PyUpb_MapContainer* self = (PyUpb_MapContainer*)_self;
+ upb_Map* map = PyUpb_MapContainer_GetIfReified(self);
+ if (!map) Py_RETURN_FALSE;
+ const upb_FieldDef* f = PyUpb_MapContainer_GetField(self);
+ const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
+ const upb_FieldDef* key_f = upb_MessageDef_Field(entry_m, 0);
+ upb_MessageValue u_key;
+ if (!PyUpb_PyToUpb(key, key_f, &u_key, NULL)) return NULL;
+ if (upb_Map_Get(map, u_key, NULL)) {
+ Py_RETURN_TRUE;
+ } else {
+ Py_RETURN_FALSE;
+ }
+}
+
+PyObject* PyUpb_MapContainer_Clear(PyObject* _self, PyObject* key) {
+ upb_Map* map = PyUpb_MapContainer_EnsureReified(_self);
+ upb_Map_Clear(map);
+ Py_RETURN_NONE;
+}
+
+static PyObject* PyUpb_MapContainer_Get(PyObject* _self, PyObject* args,
+ PyObject* kwargs) {
+ PyUpb_MapContainer* self = (PyUpb_MapContainer*)_self;
+ static const char* kwlist[] = {"key", "default", NULL};
+ PyObject* key;
+ PyObject* default_value = NULL;
+ upb_Map* map = PyUpb_MapContainer_GetIfReified(self);
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", (char**)kwlist, &key,
+ &default_value)) {
+ return NULL;
+ }
+
+ const upb_FieldDef* f = PyUpb_MapContainer_GetField(self);
+ const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
+ const upb_FieldDef* key_f = upb_MessageDef_Field(entry_m, 0);
+ const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
+ upb_Arena* arena = PyUpb_Arena_Get(self->arena);
+ upb_MessageValue u_key, u_val;
+ if (!PyUpb_PyToUpb(key, key_f, &u_key, arena)) return NULL;
+ if (map && upb_Map_Get(map, u_key, &u_val)) {
+ return PyUpb_UpbToPy(u_val, val_f, self->arena);
+ }
+ if (default_value) {
+ Py_INCREF(default_value);
+ return default_value;
+ }
+ Py_RETURN_NONE;
+}
+
+static PyObject* PyUpb_MapContainer_GetEntryClass(PyObject* _self,
+ PyObject* arg) {
+ PyUpb_MapContainer* self = (PyUpb_MapContainer*)_self;
+ const upb_FieldDef* f = PyUpb_MapContainer_GetField(self);
+ const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
+ return PyUpb_Descriptor_GetClass(entry_m);
+}
+
+Py_ssize_t PyUpb_MapContainer_Length(PyObject* _self) {
+ PyUpb_MapContainer* self = (PyUpb_MapContainer*)_self;
+ upb_Map* map = PyUpb_MapContainer_GetIfReified(self);
+ return map ? upb_Map_Size(map) : 0;
+}
+
+PyUpb_MapContainer* PyUpb_MapContainer_Check(PyObject* _self) {
+ PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
+ if (!PyObject_TypeCheck(_self, state->message_map_container_type) &&
+ !PyObject_TypeCheck(_self, state->scalar_map_container_type)) {
+ PyErr_Format(PyExc_TypeError, "Expected protobuf map, but got %R", _self);
+ return NULL;
+ }
+ return (PyUpb_MapContainer*)_self;
+}
+
+int PyUpb_Message_InitMapAttributes(PyObject* map, PyObject* value,
+ const upb_FieldDef* f);
+
+static PyObject* PyUpb_MapContainer_MergeFrom(PyObject* _self, PyObject* _arg) {
+ PyUpb_MapContainer* self = (PyUpb_MapContainer*)_self;
+ const upb_FieldDef* f = PyUpb_MapContainer_GetField(self);
+
+ if (PyDict_Check(_arg)) {
+ return PyErr_Format(PyExc_AttributeError, "Merging of dict is not allowed");
+ }
+
+ if (PyUpb_Message_InitMapAttributes(_self, _arg, f) < 0) {
+ return NULL;
+ }
+
+ Py_RETURN_NONE;
+}
+
+static PyObject* PyUpb_MapContainer_Repr(PyObject* _self) {
+ PyUpb_MapContainer* self = (PyUpb_MapContainer*)_self;
+ upb_Map* map = PyUpb_MapContainer_GetIfReified(self);
+ PyObject* dict = PyDict_New();
+ if (map) {
+ const upb_FieldDef* f = PyUpb_MapContainer_GetField(self);
+ const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
+ const upb_FieldDef* key_f = upb_MessageDef_Field(entry_m, 0);
+ const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
+ size_t iter = kUpb_Map_Begin;
+ upb_MessageValue map_key, map_val;
+ while (upb_Map_Next(map, &map_key, &map_val, &iter)) {
+ PyObject* key = PyUpb_UpbToPy(map_key, key_f, self->arena);
+ PyObject* val = PyUpb_UpbToPy(map_val, val_f, self->arena);
+ if (!key || !val) {
+ Py_XDECREF(key);
+ Py_XDECREF(val);
+ Py_DECREF(dict);
+ return NULL;
+ }
+ PyDict_SetItem(dict, key, val);
+ Py_DECREF(key);
+ Py_DECREF(val);
+ }
+ }
+ PyObject* repr = PyObject_Repr(dict);
+ Py_DECREF(dict);
+ return repr;
+}
+
+PyObject* PyUpb_MapContainer_GetOrCreateWrapper(upb_Map* map,
+ const upb_FieldDef* f,
+ PyObject* arena) {
+ PyUpb_MapContainer* ret = (void*)PyUpb_ObjCache_Get(map);
+ if (ret) return &ret->ob_base;
+
+ PyTypeObject* cls = PyUpb_MapContainer_GetClass(f);
+ ret = (void*)PyType_GenericAlloc(cls, 0);
+ ret->arena = arena;
+ ret->field = (uintptr_t)f;
+ ret->ptr.map = map;
+ ret->version = 0;
+ Py_INCREF(arena);
+ PyUpb_ObjCache_Add(map, &ret->ob_base);
+ return &ret->ob_base;
+}
+
+// -----------------------------------------------------------------------------
+// ScalarMapContainer
+// -----------------------------------------------------------------------------
+
+static PyMethodDef PyUpb_ScalarMapContainer_Methods[] = {
+ {"__contains__", PyUpb_MapContainer_Contains, METH_O,
+ "Tests whether a key is a member of the map."},
+ {"clear", PyUpb_MapContainer_Clear, METH_NOARGS,
+ "Removes all elements from the map."},
+ {"get", (PyCFunction)PyUpb_MapContainer_Get, METH_VARARGS | METH_KEYWORDS,
+ "Gets the value for the given key if present, or otherwise a default"},
+ {"GetEntryClass", PyUpb_MapContainer_GetEntryClass, METH_NOARGS,
+ "Return the class used to build Entries of (key, value) pairs."},
+ {"MergeFrom", PyUpb_MapContainer_MergeFrom, METH_O,
+ "Merges a map into the current map."},
+ /*
+ { "__deepcopy__", (PyCFunction)DeepCopy, METH_VARARGS,
+ "Makes a deep copy of the class." },
+ { "__reduce__", (PyCFunction)Reduce, METH_NOARGS,
+ "Outputs picklable representation of the repeated field." },
+ */
+ {NULL, NULL},
+};
+
+static PyType_Slot PyUpb_ScalarMapContainer_Slots[] = {
+ {Py_tp_dealloc, PyUpb_MapContainer_Dealloc},
+ {Py_mp_length, PyUpb_MapContainer_Length},
+ {Py_mp_subscript, PyUpb_MapContainer_Subscript},
+ {Py_mp_ass_subscript, PyUpb_MapContainer_AssignSubscript},
+ {Py_tp_methods, PyUpb_ScalarMapContainer_Methods},
+ {Py_tp_iter, PyUpb_MapIterator_New},
+ {Py_tp_repr, PyUpb_MapContainer_Repr},
+ {0, NULL},
+};
+
+static PyType_Spec PyUpb_ScalarMapContainer_Spec = {
+ PYUPB_MODULE_NAME ".ScalarMapContainer",
+ sizeof(PyUpb_MapContainer),
+ 0,
+ Py_TPFLAGS_DEFAULT,
+ PyUpb_ScalarMapContainer_Slots,
+};
+
+// -----------------------------------------------------------------------------
+// MessageMapContainer
+// -----------------------------------------------------------------------------
+
+static PyMethodDef PyUpb_MessageMapContainer_Methods[] = {
+ {"__contains__", PyUpb_MapContainer_Contains, METH_O,
+ "Tests whether the map contains this element."},
+ {"clear", PyUpb_MapContainer_Clear, METH_NOARGS,
+ "Removes all elements from the map."},
+ {"get", (PyCFunction)PyUpb_MapContainer_Get, METH_VARARGS | METH_KEYWORDS,
+ "Gets the value for the given key if present, or otherwise a default"},
+ {"get_or_create", PyUpb_MapContainer_Subscript, METH_O,
+ "Alias for getitem, useful to make explicit that the map is mutated."},
+ {"GetEntryClass", PyUpb_MapContainer_GetEntryClass, METH_NOARGS,
+ "Return the class used to build Entries of (key, value) pairs."},
+ {"MergeFrom", PyUpb_MapContainer_MergeFrom, METH_O,
+ "Merges a map into the current map."},
+ /*
+ { "__deepcopy__", (PyCFunction)DeepCopy, METH_VARARGS,
+ "Makes a deep copy of the class." },
+ { "__reduce__", (PyCFunction)Reduce, METH_NOARGS,
+ "Outputs picklable representation of the repeated field." },
+ */
+ {NULL, NULL},
+};
+
+static PyType_Slot PyUpb_MessageMapContainer_Slots[] = {
+ {Py_tp_dealloc, PyUpb_MapContainer_Dealloc},
+ {Py_mp_length, PyUpb_MapContainer_Length},
+ {Py_mp_subscript, PyUpb_MapContainer_Subscript},
+ {Py_mp_ass_subscript, PyUpb_MapContainer_AssignSubscript},
+ {Py_tp_methods, PyUpb_MessageMapContainer_Methods},
+ {Py_tp_iter, PyUpb_MapIterator_New},
+ {Py_tp_repr, PyUpb_MapContainer_Repr},
+ {0, NULL}};
+
+static PyType_Spec PyUpb_MessageMapContainer_Spec = {
+ PYUPB_MODULE_NAME ".MessageMapContainer", sizeof(PyUpb_MapContainer), 0,
+ Py_TPFLAGS_DEFAULT, PyUpb_MessageMapContainer_Slots};
+
+// -----------------------------------------------------------------------------
+// MapIterator
+// -----------------------------------------------------------------------------
+
+typedef struct {
+ PyObject_HEAD;
+ PyUpb_MapContainer* map; // We own a reference.
+ size_t iter;
+ int version;
+} PyUpb_MapIterator;
+
+static PyObject* PyUpb_MapIterator_New(PyUpb_MapContainer* map) {
+ PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
+ PyUpb_MapIterator* iter =
+ (void*)PyType_GenericAlloc(state->map_iterator_type, 0);
+ iter->map = map;
+ iter->iter = kUpb_Map_Begin;
+ iter->version = map->version;
+ Py_INCREF(map);
+ return &iter->ob_base;
+}
+
+static void PyUpb_MapIterator_Dealloc(void* _self) {
+ PyUpb_MapIterator* self = (PyUpb_MapIterator*)_self;
+ Py_DECREF(&self->map->ob_base);
+ PyUpb_Dealloc(_self);
+}
+
+PyObject* PyUpb_MapIterator_IterNext(PyObject* _self) {
+ PyUpb_MapIterator* self = (PyUpb_MapIterator*)_self;
+ if (self->version != self->map->version) {
+ return PyErr_Format(PyExc_RuntimeError, "Map modified during iteration.");
+ }
+ upb_Map* map = PyUpb_MapContainer_GetIfReified(self->map);
+ if (!map) return NULL;
+ upb_MessageValue key, val;
+ if (!upb_Map_Next(map, &key, &val, &self->iter)) return NULL;
+ const upb_FieldDef* f = PyUpb_MapContainer_GetField(self->map);
+ const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
+ const upb_FieldDef* key_f = upb_MessageDef_Field(entry_m, 0);
+ return PyUpb_UpbToPy(key, key_f, self->map->arena);
+}
+
+static PyType_Slot PyUpb_MapIterator_Slots[] = {
+ {Py_tp_dealloc, PyUpb_MapIterator_Dealloc},
+ {Py_tp_iter, PyObject_SelfIter},
+ {Py_tp_iternext, PyUpb_MapIterator_IterNext},
+ {0, NULL}};
+
+static PyType_Spec PyUpb_MapIterator_Spec = {
+ PYUPB_MODULE_NAME ".MapIterator", sizeof(PyUpb_MapIterator), 0,
+ Py_TPFLAGS_DEFAULT, PyUpb_MapIterator_Slots};
+
+// -----------------------------------------------------------------------------
+// Top Level
+// -----------------------------------------------------------------------------
+
+static PyObject* GetMutableMappingBase(void) {
+ PyObject* collections = NULL;
+ PyObject* mapping = NULL;
+ PyObject* bases = NULL;
+ if ((collections = PyImport_ImportModule("collections.abc")) &&
+ (mapping = PyObject_GetAttrString(collections, "MutableMapping"))) {
+ bases = Py_BuildValue("(O)", mapping);
+ }
+ Py_XDECREF(collections);
+ Py_XDECREF(mapping);
+ return bases;
+}
+
+bool PyUpb_Map_Init(PyObject* m) {
+ PyUpb_ModuleState* state = PyUpb_ModuleState_GetFromModule(m);
+ PyObject* bases = GetMutableMappingBase();
+ if (!bases) return false;
+
+ state->message_map_container_type =
+ PyUpb_AddClassWithBases(m, &PyUpb_MessageMapContainer_Spec, bases);
+ state->scalar_map_container_type =
+ PyUpb_AddClassWithBases(m, &PyUpb_ScalarMapContainer_Spec, bases);
+ state->map_iterator_type = PyUpb_AddClass(m, &PyUpb_MapIterator_Spec);
+
+ Py_DECREF(bases);
+
+ return state->message_map_container_type &&
+ state->scalar_map_container_type && state->map_iterator_type;
+}
diff --git a/upb/python/map.h b/upb/python/map.h
new file mode 100644
index 0000000..6c2c47d
--- /dev/null
+++ b/upb/python/map.h
@@ -0,0 +1,69 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef PYUPB_MAP_H__
+#define PYUPB_MAP_H__
+
+#include <stdbool.h>
+
+#include "python/python_api.h"
+#include "upb/reflection/def.h"
+
+// Creates a new repeated field stub for field `f` of message object `parent`.
+// Precondition: `parent` must be a stub.
+PyObject* PyUpb_MapContainer_NewStub(PyObject* parent, const upb_FieldDef* f,
+ PyObject* arena);
+
+// Returns a map object wrapping `map`, of field type `f`, which must be on
+// `arena`. If an existing wrapper object exists, it will be returned,
+// otherwise a new object will be created. The caller always owns a ref on the
+// returned value.
+PyObject* PyUpb_MapContainer_GetOrCreateWrapper(upb_Map* map,
+ const upb_FieldDef* f,
+ PyObject* arena);
+
+// Reifies a map stub to point to the concrete data in `map`.
+// If `map` is NULL, an appropriate empty map will be constructed.
+void PyUpb_MapContainer_Reify(PyObject* self, upb_Map* map);
+
+// Reifies this map object if it is not already reified.
+upb_Map* PyUpb_MapContainer_EnsureReified(PyObject* self);
+
+// Assigns `self[key] = val` for the map `self`.
+int PyUpb_MapContainer_AssignSubscript(PyObject* self, PyObject* key,
+ PyObject* val);
+
+// Invalidates any existing iterators for the map `obj`.
+void PyUpb_MapContainer_Invalidate(PyObject* obj);
+
+// Module-level init.
+bool PyUpb_Map_Init(PyObject* m);
+
+#endif // PYUPB_MAP_H__
diff --git a/upb/python/message.c b/upb/python/message.c
new file mode 100644
index 0000000..e61eebb
--- /dev/null
+++ b/upb/python/message.c
@@ -0,0 +1,2008 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "python/message.h"
+
+#include "python/convert.h"
+#include "python/descriptor.h"
+#include "python/extension_dict.h"
+#include "python/map.h"
+#include "python/repeated.h"
+#include "upb/message/copy.h"
+#include "upb/reflection/def.h"
+#include "upb/reflection/message.h"
+#include "upb/text/encode.h"
+#include "upb/util/required_fields.h"
+
+static const upb_MessageDef* PyUpb_MessageMeta_GetMsgdef(PyObject* cls);
+static PyObject* PyUpb_MessageMeta_GetAttr(PyObject* self, PyObject* name);
+
+// -----------------------------------------------------------------------------
+// CPythonBits
+// -----------------------------------------------------------------------------
+
+// This struct contains a few things that are not exposed directly through the
+// limited API, but that we can get at in somewhat more roundabout ways. The
+// roundabout ways are slower, so we cache the values here.
+//
+// These values are valid to cache in a global, even across sub-interpreters,
+// because they are not pointers to interpreter state. They are process
+// globals that will be the same for any interpreter in this process.
+typedef struct {
+ // For each member, we note the equivalent expression that we could use in the
+ // full (non-limited) API.
+ newfunc type_new; // PyTypeObject.tp_new
+ destructor type_dealloc; // PyTypeObject.tp_dealloc
+ getattrofunc type_getattro; // PyTypeObject.tp_getattro
+ setattrofunc type_setattro; // PyTypeObject.tp_setattro
+ size_t type_basicsize; // sizeof(PyHeapTypeObject)
+
+ // While we can refer to PY_VERSION_HEX in the limited API, this will give us
+ // the version of Python we were compiled against, which may be different
+ // than the version we are dynamically linked against. Here we want the
+ // version that is actually running in this process.
+ long python_version_hex; // PY_VERSION_HEX
+} PyUpb_CPythonBits;
+
+// A global containing the values for this process.
+PyUpb_CPythonBits cpython_bits;
+
+destructor upb_Pre310_PyType_GetDeallocSlot(PyTypeObject* type_subclass) {
+ // This is a bit desperate. We need type_dealloc(), but PyType_GetSlot(type,
+ // Py_tp_dealloc) will return subtype_dealloc(). There appears to be no way
+ // whatsoever to fetch type_dealloc() through the limited API until Python
+ // 3.10.
+ //
+ // To work around this so we attempt to find it by looking for the offset of
+ // tp_dealloc in PyTypeObject, then memcpy() it directly. This should always
+ // work in practice.
+ //
+ // Starting with Python 3.10 on you can call PyType_GetSlot() on non-heap
+ // types. We will be able to replace all this hack with just:
+ //
+ // PyType_GetSlot(&PyType_Type, Py_tp_dealloc)
+ //
+ destructor subtype_dealloc = PyType_GetSlot(type_subclass, Py_tp_dealloc);
+ for (size_t i = 0; i < 2000; i += sizeof(uintptr_t)) {
+ destructor maybe_subtype_dealloc;
+ memcpy(&maybe_subtype_dealloc, (char*)type_subclass + i,
+ sizeof(destructor));
+ if (maybe_subtype_dealloc == subtype_dealloc) {
+ destructor type_dealloc;
+ memcpy(&type_dealloc, (char*)&PyType_Type + i, sizeof(destructor));
+ return type_dealloc;
+ }
+ }
+ assert(false);
+ return NULL;
+}
+
+static bool PyUpb_CPythonBits_Init(PyUpb_CPythonBits* bits) {
+ PyObject* bases = NULL;
+ PyTypeObject* type = NULL;
+ PyObject* size = NULL;
+ PyObject* sys = NULL;
+ PyObject* hex_version = NULL;
+ bool ret = false;
+
+ // PyType_GetSlot() only works on heap types, so we cannot use it on
+ // &PyType_Type directly. Instead we create our own (temporary) type derived
+ // from PyType_Type: this will inherit all of the slots from PyType_Type, but
+ // as a heap type it can be queried with PyType_GetSlot().
+ static PyType_Slot dummy_slots[] = {{0, NULL}};
+
+ static PyType_Spec dummy_spec = {
+ "module.DummyClass", // tp_name
+ 0, // To be filled in by size of base // tp_basicsize
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT, // tp_flags
+ dummy_slots,
+ };
+
+ bases = Py_BuildValue("(O)", &PyType_Type);
+ if (!bases) goto err;
+ type = (PyTypeObject*)PyType_FromSpecWithBases(&dummy_spec, bases);
+ if (!type) goto err;
+
+ bits->type_new = PyType_GetSlot(type, Py_tp_new);
+ bits->type_dealloc = upb_Pre310_PyType_GetDeallocSlot(type);
+ bits->type_getattro = PyType_GetSlot(type, Py_tp_getattro);
+ bits->type_setattro = PyType_GetSlot(type, Py_tp_setattro);
+
+ size = PyObject_GetAttrString((PyObject*)&PyType_Type, "__basicsize__");
+ if (!size) goto err;
+ bits->type_basicsize = PyLong_AsLong(size);
+ if (bits->type_basicsize == -1) goto err;
+
+ assert(bits->type_new);
+ assert(bits->type_dealloc);
+ assert(bits->type_getattro);
+ assert(bits->type_setattro);
+
+#ifndef Py_LIMITED_API
+ assert(bits->type_new == PyType_Type.tp_new);
+ assert(bits->type_dealloc == PyType_Type.tp_dealloc);
+ assert(bits->type_getattro == PyType_Type.tp_getattro);
+ assert(bits->type_setattro == PyType_Type.tp_setattro);
+ assert(bits->type_basicsize == sizeof(PyHeapTypeObject));
+#endif
+
+ sys = PyImport_ImportModule("sys");
+ hex_version = PyObject_GetAttrString(sys, "hexversion");
+ bits->python_version_hex = PyLong_AsLong(hex_version);
+ ret = true;
+
+err:
+ Py_XDECREF(bases);
+ Py_XDECREF(type);
+ Py_XDECREF(size);
+ Py_XDECREF(sys);
+ Py_XDECREF(hex_version);
+ return ret;
+}
+
+// -----------------------------------------------------------------------------
+// Message
+// -----------------------------------------------------------------------------
+
+// The main message object. The type of the object (PyUpb_Message.ob_type)
+// will be an instance of the PyUpb_MessageMeta type (defined below). So the
+// chain is:
+// FooMessage = MessageMeta(...)
+// foo = FooMessage()
+//
+// Which becomes:
+// Object C Struct Type Python type (ob_type)
+// ----------------- ----------------- ---------------------
+// foo PyUpb_Message FooMessage
+// FooMessage PyUpb_MessageMeta message_meta_type
+// message_meta_type PyTypeObject 'type' in Python
+//
+// A message object can be in one of two states: present or non-present. When
+// a message is non-present, it stores a reference to its parent, and a write
+// to any attribute will trigger the message to become present in its parent.
+// The parent may also be non-present, in which case a mutation will trigger a
+// chain reaction.
+typedef struct PyUpb_Message {
+ PyObject_HEAD;
+ PyObject* arena;
+ uintptr_t def; // Tagged, low bit 1 == upb_FieldDef*, else upb_MessageDef*
+ union {
+ // when def is msgdef, the data for this msg.
+ upb_Message* msg;
+ // when def is fielddef, owning pointer to parent
+ struct PyUpb_Message* parent;
+ } ptr;
+ PyObject* ext_dict; // Weak pointer to extension dict, if any.
+ // name->obj dict for non-present msg/map/repeated, NULL if none.
+ PyUpb_WeakMap* unset_subobj_map;
+ int version;
+} PyUpb_Message;
+
+static PyObject* PyUpb_Message_GetAttr(PyObject* _self, PyObject* attr);
+
+bool PyUpb_Message_IsStub(PyUpb_Message* msg) { return msg->def & 1; }
+
+const upb_FieldDef* PyUpb_Message_GetFieldDef(PyUpb_Message* msg) {
+ assert(PyUpb_Message_IsStub(msg));
+ return (void*)(msg->def & ~(uintptr_t)1);
+}
+
+static const upb_MessageDef* _PyUpb_Message_GetMsgdef(PyUpb_Message* msg) {
+ return PyUpb_Message_IsStub(msg)
+ ? upb_FieldDef_MessageSubDef(PyUpb_Message_GetFieldDef(msg))
+ : (void*)msg->def;
+}
+
+const upb_MessageDef* PyUpb_Message_GetMsgdef(PyObject* self) {
+ return _PyUpb_Message_GetMsgdef((PyUpb_Message*)self);
+}
+
+static upb_Message* PyUpb_Message_GetMsg(PyUpb_Message* self) {
+ assert(!PyUpb_Message_IsStub(self));
+ return self->ptr.msg;
+}
+
+bool PyUpb_Message_TryCheck(PyObject* self) {
+ PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
+ PyObject* type = (PyObject*)Py_TYPE(self);
+ return Py_TYPE(type) == state->message_meta_type;
+}
+
+bool PyUpb_Message_Verify(PyObject* self) {
+ if (!PyUpb_Message_TryCheck(self)) {
+ PyErr_Format(PyExc_TypeError, "Expected a message object, but got %R.",
+ self);
+ return false;
+ }
+ return true;
+}
+
+// If the message is reified, returns it. Otherwise, returns NULL.
+// If NULL is returned, the object is empty and has no underlying data.
+upb_Message* PyUpb_Message_GetIfReified(PyObject* _self) {
+ PyUpb_Message* self = (void*)_self;
+ return PyUpb_Message_IsStub(self) ? NULL : self->ptr.msg;
+}
+
+static PyObject* PyUpb_Message_New(PyObject* cls, PyObject* unused_args,
+ PyObject* unused_kwargs) {
+ const upb_MessageDef* msgdef = PyUpb_MessageMeta_GetMsgdef(cls);
+ const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
+ PyUpb_Message* msg = (void*)PyType_GenericAlloc((PyTypeObject*)cls, 0);
+ msg->def = (uintptr_t)msgdef;
+ msg->arena = PyUpb_Arena_New();
+ msg->ptr.msg = upb_Message_New(layout, PyUpb_Arena_Get(msg->arena));
+ msg->unset_subobj_map = NULL;
+ msg->ext_dict = NULL;
+ msg->version = 0;
+
+ PyObject* ret = &msg->ob_base;
+ PyUpb_ObjCache_Add(msg->ptr.msg, ret);
+ return ret;
+}
+
+/*
+ * PyUpb_Message_LookupName()
+ *
+ * Tries to find a field or oneof named `py_name` in the message object `self`.
+ * The user must pass `f` and/or `o` to indicate whether a field or a oneof name
+ * is expected. If the name is found and it has an expected type, the function
+ * sets `*f` or `*o` respectively and returns true. Otherwise returns false
+ * and sets an exception of type `exc_type` if provided.
+ */
+static bool PyUpb_Message_LookupName(PyUpb_Message* self, PyObject* py_name,
+ const upb_FieldDef** f,
+ const upb_OneofDef** o,
+ PyObject* exc_type) {
+ assert(f || o);
+ Py_ssize_t size;
+ const char* name = NULL;
+ if (PyUnicode_Check(py_name)) {
+ name = PyUnicode_AsUTF8AndSize(py_name, &size);
+ } else if (PyBytes_Check(py_name)) {
+ PyBytes_AsStringAndSize(py_name, (char**)&name, &size);
+ }
+ if (!name) {
+ PyErr_Format(exc_type,
+ "Expected a field name, but got non-string argument %S.",
+ py_name);
+ return false;
+ }
+ const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
+
+ if (!upb_MessageDef_FindByNameWithSize(msgdef, name, size, f, o)) {
+ if (exc_type) {
+ PyErr_Format(exc_type, "Protocol message %s has no \"%s\" field.",
+ upb_MessageDef_Name(msgdef), name);
+ }
+ return false;
+ }
+
+ if (!o && !*f) {
+ if (exc_type) {
+ PyErr_Format(exc_type, "Expected a field name, but got oneof name %s.",
+ name);
+ }
+ return false;
+ }
+
+ if (!f && !*o) {
+ if (exc_type) {
+ PyErr_Format(exc_type, "Expected a oneof name, but got field name %s.",
+ name);
+ }
+ return false;
+ }
+
+ return true;
+}
+
+static bool PyUpb_Message_InitMessageMapEntry(PyObject* dst, PyObject* src) {
+ if (!src || !dst) return false;
+
+ PyObject* ok = PyObject_CallMethod(dst, "CopyFrom", "O", src);
+ if (!ok) return false;
+ Py_DECREF(ok);
+
+ return true;
+}
+
+int PyUpb_Message_InitMapAttributes(PyObject* map, PyObject* value,
+ const upb_FieldDef* f) {
+ const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
+ const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
+ PyObject* it = NULL;
+ PyObject* tmp = NULL;
+ int ret = -1;
+ if (upb_FieldDef_IsSubMessage(val_f)) {
+ it = PyObject_GetIter(value);
+ if (it == NULL) {
+ PyErr_Format(PyExc_TypeError, "Argument for field %s is not iterable",
+ upb_FieldDef_FullName(f));
+ goto err;
+ }
+ PyObject* e;
+ while ((e = PyIter_Next(it)) != NULL) {
+ PyObject* src = PyObject_GetItem(value, e);
+ PyObject* dst = PyObject_GetItem(map, e);
+ Py_DECREF(e);
+ bool ok = PyUpb_Message_InitMessageMapEntry(dst, src);
+ Py_XDECREF(src);
+ Py_XDECREF(dst);
+ if (!ok) goto err;
+ }
+ } else {
+ tmp = PyObject_CallMethod(map, "update", "O", value);
+ if (!tmp) goto err;
+ }
+ ret = 0;
+
+err:
+ Py_XDECREF(it);
+ Py_XDECREF(tmp);
+ return ret;
+}
+
+void PyUpb_Message_EnsureReified(PyUpb_Message* self);
+
+static bool PyUpb_Message_InitMapAttribute(PyObject* _self, PyObject* name,
+ const upb_FieldDef* f,
+ PyObject* value) {
+ PyObject* map = PyUpb_Message_GetAttr(_self, name);
+ int ok = PyUpb_Message_InitMapAttributes(map, value, f);
+ Py_DECREF(map);
+ return ok >= 0;
+}
+
+static bool PyUpb_Message_InitRepeatedMessageAttribute(PyObject* _self,
+ PyObject* repeated,
+ PyObject* value,
+ const upb_FieldDef* f) {
+ PyObject* it = PyObject_GetIter(value);
+ if (!it) {
+ PyErr_Format(PyExc_TypeError, "Argument for field %s is not iterable",
+ upb_FieldDef_FullName(f));
+ return false;
+ }
+ PyObject* e = NULL;
+ PyObject* m = NULL;
+ while ((e = PyIter_Next(it)) != NULL) {
+ if (PyDict_Check(e)) {
+ m = PyUpb_RepeatedCompositeContainer_Add(repeated, NULL, e);
+ if (!m) goto err;
+ } else {
+ m = PyUpb_RepeatedCompositeContainer_Add(repeated, NULL, NULL);
+ if (!m) goto err;
+ PyObject* merged = PyUpb_Message_MergeFrom(m, e);
+ if (!merged) goto err;
+ Py_DECREF(merged);
+ }
+ Py_DECREF(e);
+ Py_DECREF(m);
+ m = NULL;
+ }
+
+err:
+ Py_XDECREF(it);
+ Py_XDECREF(e);
+ Py_XDECREF(m);
+ return !PyErr_Occurred(); // Check PyIter_Next() exit.
+}
+
+static bool PyUpb_Message_InitRepeatedAttribute(PyObject* _self, PyObject* name,
+ PyObject* value) {
+ PyUpb_Message* self = (void*)_self;
+ const upb_FieldDef* field;
+ if (!PyUpb_Message_LookupName(self, name, &field, NULL,
+ PyExc_AttributeError)) {
+ return false;
+ }
+ bool ok = false;
+ PyObject* repeated = PyUpb_Message_GetFieldValue(_self, field);
+ PyObject* tmp = NULL;
+ if (!repeated) goto err;
+ if (upb_FieldDef_IsSubMessage(field)) {
+ if (!PyUpb_Message_InitRepeatedMessageAttribute(_self, repeated, value,
+ field)) {
+ goto err;
+ }
+ } else {
+ tmp = PyUpb_RepeatedContainer_Extend(repeated, value);
+ if (!tmp) goto err;
+ }
+ ok = true;
+
+err:
+ Py_XDECREF(repeated);
+ Py_XDECREF(tmp);
+ return ok;
+}
+
+static PyObject* PyUpb_Message_MergePartialFrom(PyObject*, PyObject*);
+
+static bool PyUpb_Message_InitMessageAttribute(PyObject* _self, PyObject* name,
+ PyObject* value) {
+ PyObject* submsg = PyUpb_Message_GetAttr(_self, name);
+ if (!submsg) return -1;
+ assert(!PyErr_Occurred());
+ bool ok;
+ if (PyUpb_Message_TryCheck(value)) {
+ PyObject* tmp = PyUpb_Message_MergePartialFrom(submsg, value);
+ ok = tmp != NULL;
+ Py_XDECREF(tmp);
+ } else if (PyDict_Check(value)) {
+ assert(!PyErr_Occurred());
+ ok = PyUpb_Message_InitAttributes(submsg, NULL, value) >= 0;
+ } else {
+ const upb_MessageDef* m = PyUpb_Message_GetMsgdef(_self);
+ PyErr_Format(PyExc_TypeError, "Message must be initialized with a dict: %s",
+ upb_MessageDef_FullName(m));
+ ok = false;
+ }
+ Py_DECREF(submsg);
+ return ok;
+}
+
+static bool PyUpb_Message_InitScalarAttribute(upb_Message* msg,
+ const upb_FieldDef* f,
+ PyObject* value,
+ upb_Arena* arena) {
+ upb_MessageValue msgval;
+ assert(!PyErr_Occurred());
+ if (!PyUpb_PyToUpb(value, f, &msgval, arena)) return false;
+ upb_Message_SetFieldByDef(msg, f, msgval, arena);
+ return true;
+}
+
+int PyUpb_Message_InitAttributes(PyObject* _self, PyObject* args,
+ PyObject* kwargs) {
+ assert(!PyErr_Occurred());
+
+ if (args != NULL && PyTuple_Size(args) != 0) {
+ PyErr_SetString(PyExc_TypeError, "No positional arguments allowed");
+ return -1;
+ }
+
+ if (kwargs == NULL) return 0;
+
+ PyUpb_Message* self = (void*)_self;
+ Py_ssize_t pos = 0;
+ PyObject* name;
+ PyObject* value;
+ PyUpb_Message_EnsureReified(self);
+ upb_Message* msg = PyUpb_Message_GetMsg(self);
+ upb_Arena* arena = PyUpb_Arena_Get(self->arena);
+
+ while (PyDict_Next(kwargs, &pos, &name, &value)) {
+ assert(!PyErr_Occurred());
+ const upb_FieldDef* f;
+ assert(!PyErr_Occurred());
+ if (!PyUpb_Message_LookupName(self, name, &f, NULL, PyExc_ValueError)) {
+ return -1;
+ }
+
+ if (value == Py_None) continue; // Ignored.
+
+ assert(!PyErr_Occurred());
+
+ if (upb_FieldDef_IsMap(f)) {
+ if (!PyUpb_Message_InitMapAttribute(_self, name, f, value)) return -1;
+ } else if (upb_FieldDef_IsRepeated(f)) {
+ if (!PyUpb_Message_InitRepeatedAttribute(_self, name, value)) return -1;
+ } else if (upb_FieldDef_IsSubMessage(f)) {
+ if (!PyUpb_Message_InitMessageAttribute(_self, name, value)) return -1;
+ } else {
+ if (!PyUpb_Message_InitScalarAttribute(msg, f, value, arena)) return -1;
+ }
+ if (PyErr_Occurred()) return -1;
+ }
+
+ if (PyErr_Occurred()) return -1;
+ return 0;
+}
+
+static int PyUpb_Message_Init(PyObject* _self, PyObject* args,
+ PyObject* kwargs) {
+ if (args != NULL && PyTuple_Size(args) != 0) {
+ PyErr_SetString(PyExc_TypeError, "No positional arguments allowed");
+ return -1;
+ }
+
+ return PyUpb_Message_InitAttributes(_self, args, kwargs);
+}
+
+static PyObject* PyUpb_Message_NewStub(PyObject* parent, const upb_FieldDef* f,
+ PyObject* arena) {
+ const upb_MessageDef* sub_m = upb_FieldDef_MessageSubDef(f);
+ PyObject* cls = PyUpb_Descriptor_GetClass(sub_m);
+
+ PyUpb_Message* msg = (void*)PyType_GenericAlloc((PyTypeObject*)cls, 0);
+ msg->def = (uintptr_t)f | 1;
+ msg->arena = arena;
+ msg->ptr.parent = (PyUpb_Message*)parent;
+ msg->unset_subobj_map = NULL;
+ msg->ext_dict = NULL;
+ msg->version = 0;
+
+ Py_DECREF(cls);
+ Py_INCREF(parent);
+ Py_INCREF(arena);
+ return &msg->ob_base;
+}
+
+static bool PyUpb_Message_IsEmpty(const upb_Message* msg,
+ const upb_MessageDef* m,
+ const upb_DefPool* ext_pool) {
+ if (!msg) return true;
+
+ size_t iter = kUpb_Message_Begin;
+ const upb_FieldDef* f;
+ upb_MessageValue val;
+ if (upb_Message_Next(msg, m, ext_pool, &f, &val, &iter)) return false;
+
+ size_t len;
+ (void)upb_Message_GetUnknown(msg, &len);
+ return len == 0;
+}
+
+static bool PyUpb_Message_IsEqual(PyUpb_Message* m1, PyObject* _m2) {
+ PyUpb_Message* m2 = (void*)_m2;
+ if (m1 == m2) return true;
+ if (!PyObject_TypeCheck(_m2, m1->ob_base.ob_type)) {
+ return false;
+ }
+ const upb_MessageDef* m1_msgdef = _PyUpb_Message_GetMsgdef(m1);
+#ifndef NDEBUG
+ const upb_MessageDef* m2_msgdef = _PyUpb_Message_GetMsgdef(m2);
+ assert(m1_msgdef == m2_msgdef);
+#endif
+ const upb_Message* m1_msg = PyUpb_Message_GetIfReified((PyObject*)m1);
+ const upb_Message* m2_msg = PyUpb_Message_GetIfReified(_m2);
+ const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m1_msgdef));
+
+ const bool e1 = PyUpb_Message_IsEmpty(m1_msg, m1_msgdef, symtab);
+ const bool e2 = PyUpb_Message_IsEmpty(m2_msg, m1_msgdef, symtab);
+ if (e1 || e2) return e1 && e2;
+
+ return upb_Message_IsEqual(m1_msg, m2_msg, m1_msgdef);
+}
+
+static const upb_FieldDef* PyUpb_Message_InitAsMsg(PyUpb_Message* m,
+ upb_Arena* arena) {
+ const upb_FieldDef* f = PyUpb_Message_GetFieldDef(m);
+ const upb_MessageDef* m2 = upb_FieldDef_MessageSubDef(f);
+ m->ptr.msg = upb_Message_New(upb_MessageDef_MiniTable(m2), arena);
+ m->def = (uintptr_t)m2;
+ PyUpb_ObjCache_Add(m->ptr.msg, &m->ob_base);
+ return f;
+}
+
+static void PyUpb_Message_SetField(PyUpb_Message* parent, const upb_FieldDef* f,
+ PyUpb_Message* child, upb_Arena* arena) {
+ upb_MessageValue msgval = {.msg_val = PyUpb_Message_GetMsg(child)};
+ upb_Message_SetFieldByDef(PyUpb_Message_GetMsg(parent), f, msgval, arena);
+ PyUpb_WeakMap_Delete(parent->unset_subobj_map, f);
+ // Releases a ref previously owned by child->ptr.parent of our child.
+ Py_DECREF(child);
+}
+
+/*
+ * PyUpb_Message_EnsureReified()
+ *
+ * This implements the "expando" behavior of Python protos:
+ * foo = FooProto()
+ *
+ * # The intermediate messages don't really exist, and won't be serialized.
+ * x = foo.bar.bar.bar.bar.bar.baz
+ *
+ * # Now all the intermediate objects are created.
+ * foo.bar.bar.bar.bar.bar.baz = 5
+ *
+ * This function should be called before performing any mutation of a protobuf
+ * object.
+ *
+ * Post-condition:
+ * PyUpb_Message_IsStub(self) is false
+ */
+void PyUpb_Message_EnsureReified(PyUpb_Message* self) {
+ if (!PyUpb_Message_IsStub(self)) return;
+ upb_Arena* arena = PyUpb_Arena_Get(self->arena);
+
+ // This is a non-present message. We need to create a real upb_Message for
+ // this object and every parent until we reach a present message.
+ PyUpb_Message* child = self;
+ PyUpb_Message* parent = self->ptr.parent;
+ const upb_FieldDef* child_f = PyUpb_Message_InitAsMsg(child, arena);
+ Py_INCREF(child); // To avoid a special-case in PyUpb_Message_SetField().
+
+ do {
+ PyUpb_Message* next_parent = parent->ptr.parent;
+ const upb_FieldDef* parent_f = NULL;
+ if (PyUpb_Message_IsStub(parent)) {
+ parent_f = PyUpb_Message_InitAsMsg(parent, arena);
+ }
+ PyUpb_Message_SetField(parent, child_f, child, arena);
+ child = parent;
+ child_f = parent_f;
+ parent = next_parent;
+ } while (child_f);
+
+ // Releases ref previously owned by child->ptr.parent of our child.
+ Py_DECREF(child);
+ self->version++;
+}
+
+static void PyUpb_Message_SyncSubobjs(PyUpb_Message* self);
+
+/*
+ * PyUpb_Message_Reify()
+ *
+ * The message equivalent of PyUpb_*Container_Reify(), this transitions
+ * the wrapper from the unset state (owning a reference on self->ptr.parent) to
+ * the set state (having a non-owning pointer to self->ptr.msg).
+ */
+static void PyUpb_Message_Reify(PyUpb_Message* self, const upb_FieldDef* f,
+ upb_Message* msg) {
+ assert(f == PyUpb_Message_GetFieldDef(self));
+ if (!msg) {
+ const upb_MessageDef* msgdef = PyUpb_Message_GetMsgdef((PyObject*)self);
+ const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
+ msg = upb_Message_New(layout, PyUpb_Arena_Get(self->arena));
+ }
+ PyUpb_ObjCache_Add(msg, &self->ob_base);
+ Py_DECREF(&self->ptr.parent->ob_base);
+ self->ptr.msg = msg; // Overwrites self->ptr.parent
+ self->def = (uintptr_t)upb_FieldDef_MessageSubDef(f);
+ PyUpb_Message_SyncSubobjs(self);
+}
+
+/*
+ * PyUpb_Message_SyncSubobjs()
+ *
+ * This operation must be invoked whenever the underlying upb_Message has been
+ * mutated directly in C. This will attach any newly-present field data
+ * to previously returned stub wrapper objects.
+ *
+ * For example:
+ * foo = FooMessage()
+ * sub = foo.submsg # Empty, unset sub-message
+ *
+ * # SyncSubobjs() is required to connect our existing 'sub' wrapper to the
+ * # newly created foo.submsg data in C.
+ * foo.MergeFrom(FooMessage(submsg={}))
+ *
+ * This requires that all of the new sub-objects that have appeared are owned
+ * by `self`'s arena.
+ */
+static void PyUpb_Message_SyncSubobjs(PyUpb_Message* self) {
+ PyUpb_WeakMap* subobj_map = self->unset_subobj_map;
+ if (!subobj_map) return;
+
+ upb_Message* msg = PyUpb_Message_GetMsg(self);
+ intptr_t iter = PYUPB_WEAKMAP_BEGIN;
+ const void* key;
+ PyObject* obj;
+
+ // The last ref to this message could disappear during iteration.
+ // When we call PyUpb_*Container_Reify() below, the container will drop
+ // its ref on `self`. If that was the last ref on self, the object will be
+ // deleted, and `subobj_map` along with it. We need it to live until we are
+ // done iterating.
+ Py_INCREF(&self->ob_base);
+
+ while (PyUpb_WeakMap_Next(subobj_map, &key, &obj, &iter)) {
+ const upb_FieldDef* f = key;
+ if (upb_FieldDef_HasPresence(f) && !upb_Message_HasFieldByDef(msg, f))
+ continue;
+ upb_MessageValue msgval = upb_Message_GetFieldByDef(msg, f);
+ PyUpb_WeakMap_DeleteIter(subobj_map, &iter);
+ if (upb_FieldDef_IsMap(f)) {
+ if (!msgval.map_val) continue;
+ PyUpb_MapContainer_Reify(obj, (upb_Map*)msgval.map_val);
+ } else if (upb_FieldDef_IsRepeated(f)) {
+ if (!msgval.array_val) continue;
+ PyUpb_RepeatedContainer_Reify(obj, (upb_Array*)msgval.array_val);
+ } else {
+ PyUpb_Message* sub = (void*)obj;
+ assert(self == sub->ptr.parent);
+ PyUpb_Message_Reify(sub, f, (upb_Message*)msgval.msg_val);
+ }
+ }
+
+ Py_DECREF(&self->ob_base);
+
+ // TODO(haberman): present fields need to be iterated too if they can reach
+ // a WeakMap.
+}
+
+static PyObject* PyUpb_Message_ToString(PyUpb_Message* self) {
+ if (PyUpb_Message_IsStub(self)) {
+ return PyUnicode_FromStringAndSize(NULL, 0);
+ }
+ upb_Message* msg = PyUpb_Message_GetMsg(self);
+ const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
+ const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(msgdef));
+ char buf[1024];
+ int options = UPB_TXTENC_SKIPUNKNOWN;
+ size_t size = upb_TextEncode(msg, msgdef, symtab, options, buf, sizeof(buf));
+ if (size < sizeof(buf)) {
+ return PyUnicode_FromStringAndSize(buf, size);
+ } else {
+ char* buf2 = malloc(size + 1);
+ size_t size2 = upb_TextEncode(msg, msgdef, symtab, options, buf2, size + 1);
+ assert(size == size2);
+ PyObject* ret = PyUnicode_FromStringAndSize(buf2, size2);
+ free(buf2);
+ return ret;
+ }
+}
+
+static PyObject* PyUpb_Message_RichCompare(PyObject* _self, PyObject* other,
+ int opid) {
+ PyUpb_Message* self = (void*)_self;
+ if (opid != Py_EQ && opid != Py_NE) {
+ Py_INCREF(Py_NotImplemented);
+ return Py_NotImplemented;
+ }
+ if (!PyObject_TypeCheck(other, Py_TYPE(self))) {
+ Py_INCREF(Py_NotImplemented);
+ return Py_NotImplemented;
+ }
+ bool ret = PyUpb_Message_IsEqual(self, other);
+ if (opid == Py_NE) ret = !ret;
+ return PyBool_FromLong(ret);
+}
+
+void PyUpb_Message_CacheDelete(PyObject* _self, const upb_FieldDef* f) {
+ PyUpb_Message* self = (void*)_self;
+ PyUpb_WeakMap_Delete(self->unset_subobj_map, f);
+}
+
+void PyUpb_Message_SetConcreteSubobj(PyObject* _self, const upb_FieldDef* f,
+ upb_MessageValue subobj) {
+ PyUpb_Message* self = (void*)_self;
+ PyUpb_Message_EnsureReified(self);
+ PyUpb_Message_CacheDelete(_self, f);
+ upb_Message_SetFieldByDef(self->ptr.msg, f, subobj,
+ PyUpb_Arena_Get(self->arena));
+}
+
+static void PyUpb_Message_Dealloc(PyObject* _self) {
+ PyUpb_Message* self = (void*)_self;
+
+ if (PyUpb_Message_IsStub(self)) {
+ PyUpb_Message_CacheDelete((PyObject*)self->ptr.parent,
+ PyUpb_Message_GetFieldDef(self));
+ Py_DECREF(self->ptr.parent);
+ } else {
+ PyUpb_ObjCache_Delete(self->ptr.msg);
+ }
+
+ if (self->unset_subobj_map) {
+ PyUpb_WeakMap_Free(self->unset_subobj_map);
+ }
+
+ Py_DECREF(self->arena);
+
+ // We do not use PyUpb_Dealloc() here because Message is a base type and for
+ // base types there is a bug we have to work around in this case (see below).
+ PyTypeObject* tp = Py_TYPE(self);
+ freefunc tp_free = PyType_GetSlot(tp, Py_tp_free);
+ tp_free(self);
+
+ if (cpython_bits.python_version_hex >= 0x03080000) {
+ // Prior to Python 3.8 there is a bug where deallocating the type here would
+ // lead to a double-decref: https://bugs.python.org/issue37879
+ Py_DECREF(tp);
+ }
+}
+
+PyObject* PyUpb_Message_Get(upb_Message* u_msg, const upb_MessageDef* m,
+ PyObject* arena) {
+ PyObject* ret = PyUpb_ObjCache_Get(u_msg);
+ if (ret) return ret;
+
+ PyObject* cls = PyUpb_Descriptor_GetClass(m);
+ // It is not safe to use PyObject_{,GC}_New() due to:
+ // https://bugs.python.org/issue35810
+ PyUpb_Message* py_msg = (void*)PyType_GenericAlloc((PyTypeObject*)cls, 0);
+ py_msg->arena = arena;
+ py_msg->def = (uintptr_t)m;
+ py_msg->ptr.msg = u_msg;
+ py_msg->unset_subobj_map = NULL;
+ py_msg->ext_dict = NULL;
+ py_msg->version = 0;
+ ret = &py_msg->ob_base;
+ Py_DECREF(cls);
+ Py_INCREF(arena);
+ PyUpb_ObjCache_Add(u_msg, ret);
+ return ret;
+}
+
+/* PyUpb_Message_GetStub()
+ *
+ * Non-present messages return "stub" objects that point to their parent, but
+ * will materialize into real upb objects if they are mutated.
+ *
+ * Note: we do *not* create stubs for repeated/map fields unless the parent
+ * is a stub:
+ *
+ * msg = TestMessage()
+ * msg.submessage # (A) Creates a stub
+ * msg.repeated_foo # (B) Does *not* create a stub
+ * msg.submessage.repeated_bar # (C) Creates a stub
+ *
+ * In case (B) we have some freedom: we could either create a stub, or create
+ * a reified object with underlying data. It appears that either could work
+ * equally well, with no observable change to users. There isn't a clear
+ * advantage to either choice. We choose to follow the behavior of the
+ * pre-existing C++ behavior for consistency, but if it becomes apparent that
+ * there would be some benefit to reversing this decision, it should be totally
+ * within the realm of possibility.
+ */
+PyObject* PyUpb_Message_GetStub(PyUpb_Message* self,
+ const upb_FieldDef* field) {
+ PyObject* _self = (void*)self;
+ if (!self->unset_subobj_map) {
+ self->unset_subobj_map = PyUpb_WeakMap_New();
+ }
+ PyObject* subobj = PyUpb_WeakMap_Get(self->unset_subobj_map, field);
+
+ if (subobj) return subobj;
+
+ if (upb_FieldDef_IsMap(field)) {
+ subobj = PyUpb_MapContainer_NewStub(_self, field, self->arena);
+ } else if (upb_FieldDef_IsRepeated(field)) {
+ subobj = PyUpb_RepeatedContainer_NewStub(_self, field, self->arena);
+ } else {
+ subobj = PyUpb_Message_NewStub(&self->ob_base, field, self->arena);
+ }
+ PyUpb_WeakMap_Add(self->unset_subobj_map, field, subobj);
+
+ assert(!PyErr_Occurred());
+ return subobj;
+}
+
+PyObject* PyUpb_Message_GetPresentWrapper(PyUpb_Message* self,
+ const upb_FieldDef* field) {
+ assert(!PyUpb_Message_IsStub(self));
+ upb_MutableMessageValue mutval =
+ upb_Message_Mutable(self->ptr.msg, field, PyUpb_Arena_Get(self->arena));
+ if (upb_FieldDef_IsMap(field)) {
+ return PyUpb_MapContainer_GetOrCreateWrapper(mutval.map, field,
+ self->arena);
+ } else {
+ return PyUpb_RepeatedContainer_GetOrCreateWrapper(mutval.array, field,
+ self->arena);
+ }
+}
+
+PyObject* PyUpb_Message_GetScalarValue(PyUpb_Message* self,
+ const upb_FieldDef* field) {
+ upb_MessageValue val;
+ if (PyUpb_Message_IsStub(self)) {
+ // Unset message always returns default values.
+ val = upb_FieldDef_Default(field);
+ } else {
+ val = upb_Message_GetFieldByDef(self->ptr.msg, field);
+ }
+ return PyUpb_UpbToPy(val, field, self->arena);
+}
+
+/*
+ * PyUpb_Message_GetFieldValue()
+ *
+ * Implements the equivalent of getattr(msg, field), once `field` has
+ * already been resolved to a `upb_FieldDef*`.
+ *
+ * This may involve constructing a wrapper object for the given field, or
+ * returning one that was previously constructed. If the field is not actually
+ * set, the wrapper object will be an "unset" object that is not actually
+ * connected to any C data.
+ */
+PyObject* PyUpb_Message_GetFieldValue(PyObject* _self,
+ const upb_FieldDef* field) {
+ PyUpb_Message* self = (void*)_self;
+ assert(upb_FieldDef_ContainingType(field) == PyUpb_Message_GetMsgdef(_self));
+ bool submsg = upb_FieldDef_IsSubMessage(field);
+ bool seq = upb_FieldDef_IsRepeated(field);
+
+ if ((PyUpb_Message_IsStub(self) && (submsg || seq)) ||
+ (submsg && !seq && !upb_Message_HasFieldByDef(self->ptr.msg, field))) {
+ return PyUpb_Message_GetStub(self, field);
+ } else if (seq) {
+ return PyUpb_Message_GetPresentWrapper(self, field);
+ } else {
+ return PyUpb_Message_GetScalarValue(self, field);
+ }
+}
+
+int PyUpb_Message_SetFieldValue(PyObject* _self, const upb_FieldDef* field,
+ PyObject* value, PyObject* exc) {
+ PyUpb_Message* self = (void*)_self;
+ assert(value);
+
+ if (upb_FieldDef_IsSubMessage(field) || upb_FieldDef_IsRepeated(field)) {
+ PyErr_Format(exc,
+ "Assignment not allowed to message, map, or repeated "
+ "field \"%s\" in protocol message object.",
+ upb_FieldDef_Name(field));
+ return -1;
+ }
+
+ PyUpb_Message_EnsureReified(self);
+
+ upb_MessageValue val;
+ upb_Arena* arena = PyUpb_Arena_Get(self->arena);
+ if (!PyUpb_PyToUpb(value, field, &val, arena)) {
+ return -1;
+ }
+
+ upb_Message_SetFieldByDef(self->ptr.msg, field, val, arena);
+ return 0;
+}
+
+int PyUpb_Message_GetVersion(PyObject* _self) {
+ PyUpb_Message* self = (void*)_self;
+ return self->version;
+}
+
+/*
+ * PyUpb_Message_GetAttr()
+ *
+ * Implements:
+ * foo = msg.foo
+ *
+ * Attribute lookup must find both message fields and base class methods like
+ * msg.SerializeToString().
+ */
+__attribute__((flatten)) static PyObject* PyUpb_Message_GetAttr(
+ PyObject* _self, PyObject* attr) {
+ PyUpb_Message* self = (void*)_self;
+
+ // Lookup field by name.
+ const upb_FieldDef* field;
+ if (PyUpb_Message_LookupName(self, attr, &field, NULL, NULL)) {
+ return PyUpb_Message_GetFieldValue(_self, field);
+ }
+
+ // Check base class attributes.
+ assert(!PyErr_Occurred());
+ PyObject* ret = PyObject_GenericGetAttr(_self, attr);
+ if (ret) return ret;
+
+ // Swallow AttributeError if it occurred and try again on the metaclass
+ // to pick up class attributes. But we have to special-case "Extensions"
+ // which affirmatively returns AttributeError when a message is not
+ // extendable.
+ const char* name;
+ if (PyErr_ExceptionMatches(PyExc_AttributeError) &&
+ (name = PyUpb_GetStrData(attr)) && strcmp(name, "Extensions") != 0) {
+ PyErr_Clear();
+ return PyUpb_MessageMeta_GetAttr((PyObject*)Py_TYPE(_self), attr);
+ }
+
+ return NULL;
+}
+
+/*
+ * PyUpb_Message_SetAttr()
+ *
+ * Implements:
+ * msg.foo = foo
+ */
+static int PyUpb_Message_SetAttr(PyObject* _self, PyObject* attr,
+ PyObject* value) {
+ PyUpb_Message* self = (void*)_self;
+ const upb_FieldDef* field;
+ if (!PyUpb_Message_LookupName(self, attr, &field, NULL,
+ PyExc_AttributeError)) {
+ return -1;
+ }
+
+ return PyUpb_Message_SetFieldValue(_self, field, value, PyExc_AttributeError);
+}
+
+static PyObject* PyUpb_Message_HasField(PyObject* _self, PyObject* arg) {
+ PyUpb_Message* self = (void*)_self;
+ const upb_FieldDef* field;
+ const upb_OneofDef* oneof;
+
+ if (!PyUpb_Message_LookupName(self, arg, &field, &oneof, PyExc_ValueError)) {
+ return NULL;
+ }
+
+ if (field && !upb_FieldDef_HasPresence(field)) {
+ PyErr_Format(PyExc_ValueError, "Field %s does not have presence.",
+ upb_FieldDef_FullName(field));
+ return NULL;
+ }
+
+ if (PyUpb_Message_IsStub(self)) Py_RETURN_FALSE;
+
+ return PyBool_FromLong(field ? upb_Message_HasFieldByDef(self->ptr.msg, field)
+ : upb_Message_WhichOneof(self->ptr.msg, oneof) !=
+ NULL);
+}
+
+static PyObject* PyUpb_Message_FindInitializationErrors(PyObject* _self,
+ PyObject* arg);
+
+static PyObject* PyUpb_Message_IsInitializedAppendErrors(PyObject* _self,
+ PyObject* errors) {
+ PyObject* list = PyUpb_Message_FindInitializationErrors(_self, NULL);
+ if (!list) return NULL;
+ bool ok = PyList_Size(list) == 0;
+ PyObject* ret = NULL;
+ PyObject* extend_result = NULL;
+ if (!ok) {
+ extend_result = PyObject_CallMethod(errors, "extend", "O", list);
+ if (!extend_result) goto done;
+ }
+ ret = PyBool_FromLong(ok);
+
+done:
+ Py_XDECREF(list);
+ Py_XDECREF(extend_result);
+ return ret;
+}
+
+static PyObject* PyUpb_Message_IsInitialized(PyObject* _self, PyObject* args) {
+ PyObject* errors = NULL;
+ if (!PyArg_ParseTuple(args, "|O", &errors)) {
+ return NULL;
+ }
+ if (errors) {
+ // We need to collect a list of unset required fields and append it to
+ // `errors`.
+ return PyUpb_Message_IsInitializedAppendErrors(_self, errors);
+ } else {
+ // We just need to return a boolean "true" or "false" for whether all
+ // required fields are set.
+ upb_Message* msg = PyUpb_Message_GetIfReified(_self);
+ const upb_MessageDef* m = PyUpb_Message_GetMsgdef(_self);
+ const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m));
+ bool initialized = !upb_util_HasUnsetRequired(msg, m, symtab, NULL);
+ return PyBool_FromLong(initialized);
+ }
+}
+
+static PyObject* PyUpb_Message_ListFieldsItemKey(PyObject* self,
+ PyObject* val) {
+ assert(PyTuple_Check(val));
+ PyObject* field = PyTuple_GetItem(val, 0);
+ const upb_FieldDef* f = PyUpb_FieldDescriptor_GetDef(field);
+ return PyLong_FromLong(upb_FieldDef_Number(f));
+}
+
+static PyObject* PyUpb_Message_CheckCalledFromGeneratedFile(
+ PyObject* unused, PyObject* unused_arg) {
+ PyErr_SetString(
+ PyExc_TypeError,
+ "Descriptors cannot be created directly.\n"
+ "If this call came from a _pb2.py file, your generated code is out of "
+ "date and must be regenerated with protoc >= 3.19.0.\n"
+ "If you cannot immediately regenerate your protos, some other possible "
+ "workarounds are:\n"
+ " 1. Downgrade the protobuf package to 3.20.x or lower.\n"
+ " 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will "
+ "use pure-Python parsing and will be much slower).\n"
+ "\n"
+ "More information: "
+ "https://developers.google.com/protocol-buffers/docs/news/"
+ "2022-05-06#python-updates");
+ return NULL;
+}
+
+static bool PyUpb_Message_SortFieldList(PyObject* list) {
+ PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
+ bool ok = false;
+ PyObject* args = PyTuple_New(0);
+ PyObject* kwargs = PyDict_New();
+ PyObject* method = PyObject_GetAttrString(list, "sort");
+ PyObject* call_result = NULL;
+ if (!args || !kwargs || !method) goto err;
+ if (PyDict_SetItemString(kwargs, "key", state->listfields_item_key) < 0) {
+ goto err;
+ }
+ call_result = PyObject_Call(method, args, kwargs);
+ if (!call_result) goto err;
+ ok = true;
+
+err:
+ Py_XDECREF(method);
+ Py_XDECREF(args);
+ Py_XDECREF(kwargs);
+ Py_XDECREF(call_result);
+ return ok;
+}
+
+static PyObject* PyUpb_Message_ListFields(PyObject* _self, PyObject* arg) {
+ PyObject* list = PyList_New(0);
+ upb_Message* msg = PyUpb_Message_GetIfReified(_self);
+ if (!msg) return list;
+
+ size_t iter1 = kUpb_Message_Begin;
+ const upb_MessageDef* m = PyUpb_Message_GetMsgdef(_self);
+ const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m));
+ const upb_FieldDef* f;
+ PyObject* field_desc = NULL;
+ PyObject* py_val = NULL;
+ PyObject* tuple = NULL;
+ upb_MessageValue val;
+ uint32_t last_field = 0;
+ bool in_order = true;
+ while (upb_Message_Next(msg, m, symtab, &f, &val, &iter1)) {
+ const uint32_t field_number = upb_FieldDef_Number(f);
+ if (field_number < last_field) in_order = false;
+ last_field = field_number;
+ PyObject* field_desc = PyUpb_FieldDescriptor_Get(f);
+ PyObject* py_val = PyUpb_Message_GetFieldValue(_self, f);
+ if (!field_desc || !py_val) goto err;
+ PyObject* tuple = Py_BuildValue("(NN)", field_desc, py_val);
+ field_desc = NULL;
+ py_val = NULL;
+ if (!tuple) goto err;
+ if (PyList_Append(list, tuple)) goto err;
+ Py_DECREF(tuple);
+ tuple = NULL;
+ }
+
+ // Users rely on fields being returned in field number order.
+ if (!in_order && !PyUpb_Message_SortFieldList(list)) goto err;
+
+ return list;
+
+err:
+ Py_XDECREF(field_desc);
+ Py_XDECREF(py_val);
+ Py_XDECREF(tuple);
+ Py_DECREF(list);
+ return NULL;
+}
+
+static PyObject* PyUpb_Message_MergeInternal(PyObject* self, PyObject* arg,
+ bool check_required) {
+ if (self->ob_type != arg->ob_type) {
+ PyErr_Format(PyExc_TypeError,
+ "Parameter to MergeFrom() must be instance of same class: "
+ "expected %S got %S.",
+ Py_TYPE(self), Py_TYPE(arg));
+ return NULL;
+ }
+ // OPT: exit if src is empty.
+ PyObject* subargs = PyTuple_New(0);
+ PyObject* serialized =
+ check_required
+ ? PyUpb_Message_SerializeToString(arg, subargs, NULL)
+ : PyUpb_Message_SerializePartialToString(arg, subargs, NULL);
+ Py_DECREF(subargs);
+ if (!serialized) return NULL;
+ PyObject* ret = PyUpb_Message_MergeFromString(self, serialized);
+ Py_DECREF(serialized);
+ Py_DECREF(ret);
+ Py_RETURN_NONE;
+}
+
+PyObject* PyUpb_Message_MergeFrom(PyObject* self, PyObject* arg) {
+ return PyUpb_Message_MergeInternal(self, arg, true);
+}
+
+static PyObject* PyUpb_Message_MergePartialFrom(PyObject* self, PyObject* arg) {
+ return PyUpb_Message_MergeInternal(self, arg, false);
+}
+
+static PyObject* PyUpb_Message_Clear(PyUpb_Message* self);
+
+static PyObject* PyUpb_Message_CopyFrom(PyObject* _self, PyObject* arg) {
+ if (_self->ob_type != arg->ob_type) {
+ PyErr_Format(PyExc_TypeError,
+ "Parameter to CopyFrom() must be instance of same class: "
+ "expected %S got %S.",
+ Py_TYPE(_self), Py_TYPE(arg));
+ return NULL;
+ }
+ if (_self == arg) {
+ Py_RETURN_NONE;
+ }
+ PyUpb_Message* self = (void*)_self;
+ PyUpb_Message* other = (void*)arg;
+ PyUpb_Message_EnsureReified(self);
+
+ const upb_Message* other_msg = PyUpb_Message_GetIfReified((PyObject*)other);
+ if (other_msg) {
+ upb_Message_DeepCopy(self->ptr.msg, other_msg,
+ upb_MessageDef_MiniTable(other->def),
+ PyUpb_Arena_Get(self->arena));
+ } else {
+ PyObject* tmp = PyUpb_Message_Clear(self);
+ Py_DECREF(tmp);
+ }
+ PyUpb_Message_SyncSubobjs(self);
+
+ Py_RETURN_NONE;
+}
+
+static PyObject* PyUpb_Message_SetInParent(PyObject* _self, PyObject* arg) {
+ PyUpb_Message* self = (void*)_self;
+ PyUpb_Message_EnsureReified(self);
+ Py_RETURN_NONE;
+}
+
+static PyObject* PyUpb_Message_UnknownFields(PyObject* _self, PyObject* arg) {
+ // TODO(haberman): re-enable when unknown fields are added.
+ // return PyUpb_UnknownFields_New(_self);
+ PyErr_SetString(PyExc_NotImplementedError, "unknown field accessor");
+ return NULL;
+}
+
+PyObject* PyUpb_Message_MergeFromString(PyObject* _self, PyObject* arg) {
+ PyUpb_Message* self = (void*)_self;
+ char* buf;
+ Py_ssize_t size;
+ PyObject* bytes = NULL;
+
+ if (PyMemoryView_Check(arg)) {
+ bytes = PyBytes_FromObject(arg);
+ // Cannot fail when passed something of the correct type.
+ int err = PyBytes_AsStringAndSize(bytes, &buf, &size);
+ (void)err;
+ assert(err >= 0);
+ } else if (PyBytes_AsStringAndSize(arg, &buf, &size) < 0) {
+ return NULL;
+ }
+
+ PyUpb_Message_EnsureReified(self);
+ const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
+ const upb_FileDef* file = upb_MessageDef_File(msgdef);
+ const upb_ExtensionRegistry* extreg =
+ upb_DefPool_ExtensionRegistry(upb_FileDef_Pool(file));
+ const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
+ upb_Arena* arena = PyUpb_Arena_Get(self->arena);
+ PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
+ int options = upb_DecodeOptions_MaxDepth(
+ state->allow_oversize_protos ? UINT16_MAX
+ : kUpb_WireFormat_DefaultDepthLimit);
+ upb_DecodeStatus status =
+ upb_Decode(buf, size, self->ptr.msg, layout, extreg, options, arena);
+ Py_XDECREF(bytes);
+ if (status != kUpb_DecodeStatus_Ok) {
+ PyErr_Format(state->decode_error_class, "Error parsing message");
+ return NULL;
+ }
+ PyUpb_Message_SyncSubobjs(self);
+ return PyLong_FromSsize_t(size);
+}
+
+static PyObject* PyUpb_Message_ParseFromString(PyObject* self, PyObject* arg) {
+ PyObject* tmp = PyUpb_Message_Clear((PyUpb_Message*)self);
+ Py_DECREF(tmp);
+ return PyUpb_Message_MergeFromString(self, arg);
+}
+
+static PyObject* PyUpb_Message_ByteSize(PyObject* self, PyObject* args) {
+ // TODO(https://github.com/protocolbuffers/upb/issues/462): At the moment upb
+ // does not have a "byte size" function, so we just serialize to string and
+ // get the size of the string.
+ PyObject* subargs = PyTuple_New(0);
+ PyObject* serialized = PyUpb_Message_SerializeToString(self, subargs, NULL);
+ Py_DECREF(subargs);
+ if (!serialized) return NULL;
+ size_t size = PyBytes_Size(serialized);
+ Py_DECREF(serialized);
+ return PyLong_FromSize_t(size);
+}
+
+static PyObject* PyUpb_Message_Clear(PyUpb_Message* self) {
+ PyUpb_Message_EnsureReified(self);
+ const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
+ PyUpb_WeakMap* subobj_map = self->unset_subobj_map;
+
+ if (subobj_map) {
+ upb_Message* msg = PyUpb_Message_GetMsg(self);
+ (void)msg; // Suppress unused warning when asserts are disabled.
+ intptr_t iter = PYUPB_WEAKMAP_BEGIN;
+ const void* key;
+ PyObject* obj;
+
+ while (PyUpb_WeakMap_Next(subobj_map, &key, &obj, &iter)) {
+ const upb_FieldDef* f = key;
+ PyUpb_WeakMap_DeleteIter(subobj_map, &iter);
+ if (upb_FieldDef_IsMap(f)) {
+ assert(upb_Message_GetFieldByDef(msg, f).map_val == NULL);
+ PyUpb_MapContainer_Reify(obj, NULL);
+ } else if (upb_FieldDef_IsRepeated(f)) {
+ assert(upb_Message_GetFieldByDef(msg, f).array_val == NULL);
+ PyUpb_RepeatedContainer_Reify(obj, NULL);
+ } else {
+ assert(!upb_Message_HasFieldByDef(msg, f));
+ PyUpb_Message* sub = (void*)obj;
+ assert(self == sub->ptr.parent);
+ PyUpb_Message_Reify(sub, f, NULL);
+ }
+ }
+ }
+
+ upb_Message_ClearByDef(self->ptr.msg, msgdef);
+ Py_RETURN_NONE;
+}
+
+void PyUpb_Message_DoClearField(PyObject* _self, const upb_FieldDef* f) {
+ PyUpb_Message* self = (void*)_self;
+ PyUpb_Message_EnsureReified((PyUpb_Message*)self);
+
+ // We must ensure that any stub object is reified so its parent no longer
+ // points to us.
+ PyObject* sub = self->unset_subobj_map
+ ? PyUpb_WeakMap_Get(self->unset_subobj_map, f)
+ : NULL;
+
+ if (upb_FieldDef_IsMap(f)) {
+ // For maps we additionally have to invalidate any iterators. So we need
+ // to get an object even if it's reified.
+ if (!sub) {
+ sub = PyUpb_Message_GetFieldValue(_self, f);
+ }
+ PyUpb_MapContainer_EnsureReified(sub);
+ PyUpb_MapContainer_Invalidate(sub);
+ } else if (upb_FieldDef_IsRepeated(f)) {
+ if (sub) {
+ PyUpb_RepeatedContainer_EnsureReified(sub);
+ }
+ } else if (upb_FieldDef_IsSubMessage(f)) {
+ if (sub) {
+ PyUpb_Message_EnsureReified((PyUpb_Message*)sub);
+ }
+ }
+
+ Py_XDECREF(sub);
+ upb_Message_ClearFieldByDef(self->ptr.msg, f);
+}
+
+static PyObject* PyUpb_Message_ClearExtension(PyObject* _self, PyObject* arg) {
+ PyUpb_Message* self = (void*)_self;
+ PyUpb_Message_EnsureReified(self);
+ const upb_FieldDef* f = PyUpb_Message_GetExtensionDef(_self, arg);
+ if (!f) return NULL;
+ PyUpb_Message_DoClearField(_self, f);
+ Py_RETURN_NONE;
+}
+
+static PyObject* PyUpb_Message_ClearField(PyObject* _self, PyObject* arg) {
+ PyUpb_Message* self = (void*)_self;
+
+ // We always need EnsureReified() here (even for an unset message) to
+ // preserve behavior like:
+ // msg = FooMessage()
+ // msg.foo.Clear()
+ // assert msg.HasField("foo")
+ PyUpb_Message_EnsureReified(self);
+
+ const upb_FieldDef* f;
+ const upb_OneofDef* o;
+ if (!PyUpb_Message_LookupName(self, arg, &f, &o, PyExc_ValueError)) {
+ return NULL;
+ }
+
+ if (o) f = upb_Message_WhichOneof(self->ptr.msg, o);
+ if (f) PyUpb_Message_DoClearField(_self, f);
+ Py_RETURN_NONE;
+}
+
+static PyObject* PyUpb_Message_DiscardUnknownFields(PyUpb_Message* self,
+ PyObject* arg) {
+ PyUpb_Message_EnsureReified(self);
+ const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
+ upb_Message_DiscardUnknown(self->ptr.msg, msgdef, 64);
+ Py_RETURN_NONE;
+}
+
+static PyObject* PyUpb_Message_FindInitializationErrors(PyObject* _self,
+ PyObject* arg) {
+ PyUpb_Message* self = (void*)_self;
+ upb_Message* msg = PyUpb_Message_GetIfReified(_self);
+ const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
+ const upb_DefPool* ext_pool = upb_FileDef_Pool(upb_MessageDef_File(msgdef));
+ upb_FieldPathEntry* fields;
+ PyObject* ret = PyList_New(0);
+ if (upb_util_HasUnsetRequired(msg, msgdef, ext_pool, &fields)) {
+ char* buf = NULL;
+ size_t size = 0;
+ assert(fields->field);
+ while (fields->field) {
+ upb_FieldPathEntry* field = fields;
+ size_t need = upb_FieldPath_ToText(&fields, buf, size);
+ if (need >= size) {
+ fields = field;
+ size = size ? size * 2 : 16;
+ while (size <= need) size *= 2;
+ buf = realloc(buf, size);
+ need = upb_FieldPath_ToText(&fields, buf, size);
+ assert(size > need);
+ }
+ PyObject* str = PyUnicode_FromString(buf);
+ PyList_Append(ret, str);
+ Py_DECREF(str);
+ }
+ free(buf);
+ }
+ return ret;
+}
+
+static PyObject* PyUpb_Message_FromString(PyObject* cls, PyObject* serialized) {
+ PyObject* ret = NULL;
+ PyObject* length = NULL;
+
+ ret = PyObject_CallObject(cls, NULL);
+ if (ret == NULL) goto err;
+ length = PyUpb_Message_MergeFromString(ret, serialized);
+ if (length == NULL) goto err;
+
+done:
+ Py_XDECREF(length);
+ return ret;
+
+err:
+ Py_XDECREF(ret);
+ ret = NULL;
+ goto done;
+}
+
+const upb_FieldDef* PyUpb_Message_GetExtensionDef(PyObject* _self,
+ PyObject* key) {
+ const upb_FieldDef* f = PyUpb_FieldDescriptor_GetDef(key);
+ if (!f) {
+ PyErr_Clear();
+ PyErr_Format(PyExc_KeyError, "Object %R is not a field descriptor\n", key);
+ return NULL;
+ }
+ if (!upb_FieldDef_IsExtension(f)) {
+ PyErr_Format(PyExc_KeyError, "Field %s is not an extension\n",
+ upb_FieldDef_FullName(f));
+ return NULL;
+ }
+ const upb_MessageDef* msgdef = PyUpb_Message_GetMsgdef(_self);
+ if (upb_FieldDef_ContainingType(f) != msgdef) {
+ PyErr_Format(PyExc_KeyError, "Extension doesn't match (%s vs %s)",
+ upb_MessageDef_FullName(msgdef), upb_FieldDef_FullName(f));
+ return NULL;
+ }
+ return f;
+}
+
+static PyObject* PyUpb_Message_HasExtension(PyObject* _self,
+ PyObject* ext_desc) {
+ upb_Message* msg = PyUpb_Message_GetIfReified(_self);
+ const upb_FieldDef* f = PyUpb_Message_GetExtensionDef(_self, ext_desc);
+ if (!f) return NULL;
+ if (upb_FieldDef_IsRepeated(f)) {
+ PyErr_SetString(PyExc_KeyError,
+ "Field is repeated. A singular method is required.");
+ return NULL;
+ }
+ if (!msg) Py_RETURN_FALSE;
+ return PyBool_FromLong(upb_Message_HasFieldByDef(msg, f));
+}
+
+void PyUpb_Message_ReportInitializationErrors(const upb_MessageDef* msgdef,
+ PyObject* errors, PyObject* exc) {
+ PyObject* comma = PyUnicode_FromString(",");
+ PyObject* missing_fields = NULL;
+ if (!comma) goto done;
+ missing_fields = PyUnicode_Join(comma, errors);
+ if (!missing_fields) goto done;
+ PyErr_Format(exc, "Message %s is missing required fields: %U",
+ upb_MessageDef_FullName(msgdef), missing_fields);
+done:
+ Py_XDECREF(comma);
+ Py_XDECREF(missing_fields);
+ Py_DECREF(errors);
+}
+
+PyObject* PyUpb_Message_SerializeInternal(PyObject* _self, PyObject* args,
+ PyObject* kwargs,
+ bool check_required) {
+ PyUpb_Message* self = (void*)_self;
+ if (!PyUpb_Message_Verify((PyObject*)self)) return NULL;
+ static const char* kwlist[] = {"deterministic", NULL};
+ int deterministic = 0;
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|p", (char**)(kwlist),
+ &deterministic)) {
+ return NULL;
+ }
+
+ const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
+ if (PyUpb_Message_IsStub(self)) {
+ // Nothing to serialize, but we do have to check whether the message is
+ // initialized.
+ PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
+ PyObject* errors = PyUpb_Message_FindInitializationErrors(_self, NULL);
+ if (!errors) return NULL;
+ if (PyList_Size(errors) == 0) {
+ Py_DECREF(errors);
+ return PyBytes_FromStringAndSize(NULL, 0);
+ }
+ PyUpb_Message_ReportInitializationErrors(msgdef, errors,
+ state->encode_error_class);
+ return NULL;
+ }
+
+ upb_Arena* arena = upb_Arena_New();
+ const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
+ size_t size = 0;
+ // Python does not currently have any effective limit on serialization depth.
+ int options = upb_EncodeOptions_MaxDepth(UINT16_MAX);
+ if (check_required) options |= kUpb_EncodeOption_CheckRequired;
+ if (deterministic) options |= kUpb_EncodeOption_Deterministic;
+ char* pb;
+ upb_EncodeStatus status =
+ upb_Encode(self->ptr.msg, layout, options, arena, &pb, &size);
+ PyObject* ret = NULL;
+
+ if (status != kUpb_EncodeStatus_Ok) {
+ PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
+ PyObject* errors = PyUpb_Message_FindInitializationErrors(_self, NULL);
+ if (PyList_Size(errors) != 0) {
+ PyUpb_Message_ReportInitializationErrors(msgdef, errors,
+ state->encode_error_class);
+ } else {
+ PyErr_Format(state->encode_error_class, "Failed to serialize proto");
+ }
+ goto done;
+ }
+
+ ret = PyBytes_FromStringAndSize(pb, size);
+
+done:
+ upb_Arena_Free(arena);
+ return ret;
+}
+
+PyObject* PyUpb_Message_SerializeToString(PyObject* _self, PyObject* args,
+ PyObject* kwargs) {
+ return PyUpb_Message_SerializeInternal(_self, args, kwargs, true);
+}
+
+PyObject* PyUpb_Message_SerializePartialToString(PyObject* _self,
+ PyObject* args,
+ PyObject* kwargs) {
+ return PyUpb_Message_SerializeInternal(_self, args, kwargs, false);
+}
+
+static PyObject* PyUpb_Message_WhichOneof(PyObject* _self, PyObject* name) {
+ PyUpb_Message* self = (void*)_self;
+ const upb_OneofDef* o;
+ if (!PyUpb_Message_LookupName(self, name, NULL, &o, PyExc_ValueError)) {
+ return NULL;
+ }
+ upb_Message* msg = PyUpb_Message_GetIfReified(_self);
+ if (!msg) Py_RETURN_NONE;
+ const upb_FieldDef* f = upb_Message_WhichOneof(msg, o);
+ if (!f) Py_RETURN_NONE;
+ return PyUnicode_FromString(upb_FieldDef_Name(f));
+}
+
+PyObject* DeepCopy(PyObject* _self, PyObject* arg) {
+ PyUpb_Message* self = (void*)_self;
+
+ PyObject* arena = PyUpb_Arena_New();
+ upb_Message* clone =
+ upb_Message_DeepClone(self->ptr.msg, upb_MessageDef_MiniTable(self->def),
+ PyUpb_Arena_Get(arena));
+ PyObject* ret = PyUpb_Message_Get(clone, self->def, arena);
+ Py_DECREF(arena);
+
+ return ret;
+}
+
+void PyUpb_Message_ClearExtensionDict(PyObject* _self) {
+ PyUpb_Message* self = (void*)_self;
+ assert(self->ext_dict);
+ self->ext_dict = NULL;
+}
+
+static PyObject* PyUpb_Message_GetExtensionDict(PyObject* _self,
+ void* closure) {
+ PyUpb_Message* self = (void*)_self;
+ if (self->ext_dict) {
+ Py_INCREF(self->ext_dict);
+ return self->ext_dict;
+ }
+
+ const upb_MessageDef* m = _PyUpb_Message_GetMsgdef(self);
+ if (upb_MessageDef_ExtensionRangeCount(m) == 0) {
+ PyErr_SetNone(PyExc_AttributeError);
+ return NULL;
+ }
+
+ self->ext_dict = PyUpb_ExtensionDict_New(_self);
+ return self->ext_dict;
+}
+
+static PyGetSetDef PyUpb_Message_Getters[] = {
+ {"Extensions", PyUpb_Message_GetExtensionDict, NULL, "Extension dict"},
+ {NULL}};
+
+static PyMethodDef PyUpb_Message_Methods[] = {
+ {"__deepcopy__", (PyCFunction)DeepCopy, METH_VARARGS,
+ "Makes a deep copy of the class."},
+ // TODO(https://github.com/protocolbuffers/upb/issues/459)
+ //{ "__unicode__", (PyCFunction)ToUnicode, METH_NOARGS,
+ // "Outputs a unicode representation of the message." },
+ {"ByteSize", (PyCFunction)PyUpb_Message_ByteSize, METH_NOARGS,
+ "Returns the size of the message in bytes."},
+ {"Clear", (PyCFunction)PyUpb_Message_Clear, METH_NOARGS,
+ "Clears the message."},
+ {"ClearExtension", PyUpb_Message_ClearExtension, METH_O,
+ "Clears a message field."},
+ {"ClearField", PyUpb_Message_ClearField, METH_O, "Clears a message field."},
+ {"CopyFrom", PyUpb_Message_CopyFrom, METH_O,
+ "Copies a protocol message into the current message."},
+ {"DiscardUnknownFields", (PyCFunction)PyUpb_Message_DiscardUnknownFields,
+ METH_NOARGS, "Discards the unknown fields."},
+ {"FindInitializationErrors", PyUpb_Message_FindInitializationErrors,
+ METH_NOARGS, "Finds unset required fields."},
+ {"FromString", PyUpb_Message_FromString, METH_O | METH_CLASS,
+ "Creates new method instance from given serialized data."},
+ {"HasExtension", PyUpb_Message_HasExtension, METH_O,
+ "Checks if a message field is set."},
+ {"HasField", PyUpb_Message_HasField, METH_O,
+ "Checks if a message field is set."},
+ {"IsInitialized", PyUpb_Message_IsInitialized, METH_VARARGS,
+ "Checks if all required fields of a protocol message are set."},
+ {"ListFields", PyUpb_Message_ListFields, METH_NOARGS,
+ "Lists all set fields of a message."},
+ {"MergeFrom", PyUpb_Message_MergeFrom, METH_O,
+ "Merges a protocol message into the current message."},
+ {"MergeFromString", PyUpb_Message_MergeFromString, METH_O,
+ "Merges a serialized message into the current message."},
+ {"ParseFromString", PyUpb_Message_ParseFromString, METH_O,
+ "Parses a serialized message into the current message."},
+ {"SerializePartialToString",
+ (PyCFunction)PyUpb_Message_SerializePartialToString,
+ METH_VARARGS | METH_KEYWORDS,
+ "Serializes the message to a string, even if it isn't initialized."},
+ {"SerializeToString", (PyCFunction)PyUpb_Message_SerializeToString,
+ METH_VARARGS | METH_KEYWORDS,
+ "Serializes the message to a string, only for initialized messages."},
+ {"SetInParent", (PyCFunction)PyUpb_Message_SetInParent, METH_NOARGS,
+ "Sets the has bit of the given field in its parent message."},
+ {"UnknownFields", (PyCFunction)PyUpb_Message_UnknownFields, METH_NOARGS,
+ "Parse unknown field set"},
+ {"WhichOneof", PyUpb_Message_WhichOneof, METH_O,
+ "Returns the name of the field set inside a oneof, "
+ "or None if no field is set."},
+ {"_ListFieldsItemKey", PyUpb_Message_ListFieldsItemKey,
+ METH_O | METH_STATIC,
+ "Compares ListFields() list entries by field number"},
+ {"_CheckCalledFromGeneratedFile",
+ PyUpb_Message_CheckCalledFromGeneratedFile, METH_NOARGS | METH_STATIC,
+ "Raises TypeError if the caller is not in a _pb2.py file."},
+ {NULL, NULL}};
+
+static PyType_Slot PyUpb_Message_Slots[] = {
+ {Py_tp_dealloc, PyUpb_Message_Dealloc},
+ {Py_tp_doc, "A ProtocolMessage"},
+ {Py_tp_getattro, PyUpb_Message_GetAttr},
+ {Py_tp_getset, PyUpb_Message_Getters},
+ {Py_tp_hash, PyObject_HashNotImplemented},
+ {Py_tp_methods, PyUpb_Message_Methods},
+ {Py_tp_new, PyUpb_Message_New},
+ {Py_tp_str, PyUpb_Message_ToString},
+ {Py_tp_repr, PyUpb_Message_ToString},
+ {Py_tp_richcompare, PyUpb_Message_RichCompare},
+ {Py_tp_setattro, PyUpb_Message_SetAttr},
+ {Py_tp_init, PyUpb_Message_Init},
+ {0, NULL}};
+
+PyType_Spec PyUpb_Message_Spec = {
+ PYUPB_MODULE_NAME ".Message", // tp_name
+ sizeof(PyUpb_Message), // tp_basicsize
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, // tp_flags
+ PyUpb_Message_Slots,
+};
+
+// -----------------------------------------------------------------------------
+// MessageMeta
+// -----------------------------------------------------------------------------
+
+// MessageMeta is the metaclass for message objects. The generated code uses it
+// to construct message classes, ie.
+//
+// FooMessage = _message.MessageMeta('FooMessage', (_message.Message), {...})
+//
+// (This is not quite true: at the moment the Python library subclasses
+// MessageMeta, and uses that subclass as the metaclass. There is a TODO below
+// to simplify this, so that the illustration above is indeed accurate).
+
+typedef struct {
+ const upb_MiniTable* layout;
+ PyObject* py_message_descriptor;
+} PyUpb_MessageMeta;
+
+// The PyUpb_MessageMeta struct is trailing data tacked onto the end of
+// MessageMeta instances. This means that we get our instances of this struct
+// by adding the appropriate number of bytes.
+static PyUpb_MessageMeta* PyUpb_GetMessageMeta(PyObject* cls) {
+#ifndef NDEBUG
+ PyUpb_ModuleState* state = PyUpb_ModuleState_MaybeGet();
+ assert(!state || cls->ob_type == state->message_meta_type);
+#endif
+ return (PyUpb_MessageMeta*)((char*)cls + cpython_bits.type_basicsize);
+}
+
+static const upb_MessageDef* PyUpb_MessageMeta_GetMsgdef(PyObject* cls) {
+ PyUpb_MessageMeta* self = PyUpb_GetMessageMeta(cls);
+ return PyUpb_Descriptor_GetDef(self->py_message_descriptor);
+}
+
+PyObject* PyUpb_MessageMeta_DoCreateClass(PyObject* py_descriptor,
+ const char* name, PyObject* dict) {
+ PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
+ PyTypeObject* descriptor_type = state->descriptor_types[kPyUpb_Descriptor];
+ if (!PyObject_TypeCheck(py_descriptor, descriptor_type)) {
+ return PyErr_Format(PyExc_TypeError, "Expected a message Descriptor");
+ }
+
+ const upb_MessageDef* msgdef = PyUpb_Descriptor_GetDef(py_descriptor);
+ assert(msgdef);
+ assert(!PyUpb_ObjCache_Get(upb_MessageDef_MiniTable(msgdef)));
+
+ PyObject* slots = PyTuple_New(0);
+ if (!slots) return NULL;
+ int status = PyDict_SetItemString(dict, "__slots__", slots);
+ Py_DECREF(slots);
+ if (status < 0) return NULL;
+
+ // Bases are either:
+ // (Message, Message) # for regular messages
+ // (Message, Message, WktBase) # For well-known types
+ PyObject* wkt_bases = PyUpb_GetWktBases(state);
+ PyObject* wkt_base =
+ PyDict_GetItemString(wkt_bases, upb_MessageDef_FullName(msgdef));
+ PyObject* args;
+ if (wkt_base == NULL) {
+ args = Py_BuildValue("s(OO)O", name, state->cmessage_type,
+ state->message_class, dict);
+ } else {
+ args = Py_BuildValue("s(OOO)O", name, state->cmessage_type,
+ state->message_class, wkt_base, dict);
+ }
+
+ PyObject* ret = cpython_bits.type_new(state->message_meta_type, args, NULL);
+ Py_DECREF(args);
+ if (!ret) return NULL;
+
+ PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(ret);
+ meta->py_message_descriptor = py_descriptor;
+ meta->layout = upb_MessageDef_MiniTable(msgdef);
+ Py_INCREF(meta->py_message_descriptor);
+
+ PyUpb_ObjCache_Add(meta->layout, ret);
+
+ return ret;
+}
+
+static PyObject* PyUpb_MessageMeta_New(PyTypeObject* type, PyObject* args,
+ PyObject* kwargs) {
+ PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
+ static const char* kwlist[] = {"name", "bases", "dict", 0};
+ PyObject *bases, *dict;
+ const char* name;
+
+ // Check arguments: (name, bases, dict)
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "sO!O!:type", (char**)kwlist,
+ &name, &PyTuple_Type, &bases, &PyDict_Type,
+ &dict)) {
+ return NULL;
+ }
+
+ // Check bases: only (), or (message.Message,) are allowed
+ Py_ssize_t size = PyTuple_Size(bases);
+ if (!(size == 0 ||
+ (size == 1 && PyTuple_GetItem(bases, 0) == state->message_class))) {
+ PyErr_Format(PyExc_TypeError,
+ "A Message class can only inherit from Message, not %S",
+ bases);
+ return NULL;
+ }
+
+ // Check dict['DESCRIPTOR']
+ PyObject* py_descriptor = PyDict_GetItemString(dict, "DESCRIPTOR");
+ if (py_descriptor == NULL) {
+ PyErr_SetString(PyExc_TypeError, "Message class has no DESCRIPTOR");
+ return NULL;
+ }
+
+ const upb_MessageDef* m = PyUpb_Descriptor_GetDef(py_descriptor);
+ PyObject* ret = PyUpb_ObjCache_Get(upb_MessageDef_MiniTable(m));
+ if (ret) return ret;
+ return PyUpb_MessageMeta_DoCreateClass(py_descriptor, name, dict);
+}
+
+static void PyUpb_MessageMeta_Dealloc(PyObject* self) {
+ PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(self);
+ PyUpb_ObjCache_Delete(meta->layout);
+ Py_DECREF(meta->py_message_descriptor);
+ PyTypeObject* tp = Py_TYPE(self);
+ cpython_bits.type_dealloc(self);
+ Py_DECREF(tp);
+}
+
+void PyUpb_MessageMeta_AddFieldNumber(PyObject* self, const upb_FieldDef* f) {
+ PyObject* name =
+ PyUnicode_FromFormat("%s_FIELD_NUMBER", upb_FieldDef_Name(f));
+ PyObject* upper = PyObject_CallMethod(name, "upper", "");
+ PyObject_SetAttr(self, upper, PyLong_FromLong(upb_FieldDef_Number(f)));
+ Py_DECREF(name);
+ Py_DECREF(upper);
+}
+
+static PyObject* PyUpb_MessageMeta_GetDynamicAttr(PyObject* self,
+ PyObject* name) {
+ const char* name_buf = PyUpb_GetStrData(name);
+ if (!name_buf) return NULL;
+ const upb_MessageDef* msgdef = PyUpb_MessageMeta_GetMsgdef(self);
+ const upb_FileDef* filedef = upb_MessageDef_File(msgdef);
+ const upb_DefPool* symtab = upb_FileDef_Pool(filedef);
+
+ PyObject* py_key =
+ PyBytes_FromFormat("%s.%s", upb_MessageDef_FullName(msgdef), name_buf);
+ const char* key = PyUpb_GetStrData(py_key);
+ PyObject* ret = NULL;
+ const upb_MessageDef* nested = upb_DefPool_FindMessageByName(symtab, key);
+ const upb_EnumDef* enumdef;
+ const upb_EnumValueDef* enumval;
+ const upb_FieldDef* ext;
+
+ if (nested) {
+ ret = PyUpb_Descriptor_GetClass(nested);
+ } else if ((enumdef = upb_DefPool_FindEnumByName(symtab, key))) {
+ PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
+ PyObject* klass = state->enum_type_wrapper_class;
+ ret = PyUpb_EnumDescriptor_Get(enumdef);
+ ret = PyObject_CallFunctionObjArgs(klass, ret, NULL);
+ } else if ((enumval = upb_DefPool_FindEnumByNameval(symtab, key))) {
+ ret = PyLong_FromLong(upb_EnumValueDef_Number(enumval));
+ } else if ((ext = upb_DefPool_FindExtensionByName(symtab, key))) {
+ ret = PyUpb_FieldDescriptor_Get(ext);
+ }
+
+ Py_DECREF(py_key);
+
+ const char* suffix = "_FIELD_NUMBER";
+ size_t n = strlen(name_buf);
+ size_t suffix_n = strlen(suffix);
+ if (n > suffix_n && memcmp(suffix, name_buf + n - suffix_n, suffix_n) == 0) {
+ // We can't look up field names dynamically, because the <NAME>_FIELD_NUMBER
+ // naming scheme upper-cases the field name and is therefore non-reversible.
+ // So we just add all field numbers.
+ int n = upb_MessageDef_FieldCount(msgdef);
+ for (int i = 0; i < n; i++) {
+ PyUpb_MessageMeta_AddFieldNumber(self, upb_MessageDef_Field(msgdef, i));
+ }
+ n = upb_MessageDef_NestedExtensionCount(msgdef);
+ for (int i = 0; i < n; i++) {
+ PyUpb_MessageMeta_AddFieldNumber(
+ self, upb_MessageDef_NestedExtension(msgdef, i));
+ }
+ ret = PyObject_GenericGetAttr(self, name);
+ }
+
+ return ret;
+}
+
+static PyObject* PyUpb_MessageMeta_GetAttr(PyObject* self, PyObject* name) {
+ // We want to first delegate to the type's tp_dict to retrieve any attributes
+ // that were previously calculated and cached in the type's dict.
+ PyObject* ret = cpython_bits.type_getattro(self, name);
+ if (ret) return ret;
+
+ // We did not find a cached attribute. Try to calculate the attribute
+ // dynamically, using the descriptor as an argument.
+ PyErr_Clear();
+ ret = PyUpb_MessageMeta_GetDynamicAttr(self, name);
+
+ if (ret) {
+ PyObject_SetAttr(self, name, ret);
+ PyErr_Clear();
+ return ret;
+ }
+
+ PyErr_SetObject(PyExc_AttributeError, name);
+ return NULL;
+}
+
+static PyType_Slot PyUpb_MessageMeta_Slots[] = {
+ {Py_tp_new, PyUpb_MessageMeta_New},
+ {Py_tp_dealloc, PyUpb_MessageMeta_Dealloc},
+ {Py_tp_getattro, PyUpb_MessageMeta_GetAttr},
+ {0, NULL}};
+
+static PyType_Spec PyUpb_MessageMeta_Spec = {
+ PYUPB_MODULE_NAME ".MessageMeta", // tp_name
+ 0, // To be filled in by size of base // tp_basicsize
+ 0, // tp_itemsize
+ // TODO(haberman): remove BASETYPE, Python should just use MessageMeta
+ // directly instead of subclassing it.
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, // tp_flags
+ PyUpb_MessageMeta_Slots,
+};
+
+static PyObject* PyUpb_MessageMeta_CreateType(void) {
+ PyObject* bases = Py_BuildValue("(O)", &PyType_Type);
+ if (!bases) return NULL;
+ PyUpb_MessageMeta_Spec.basicsize =
+ cpython_bits.type_basicsize + sizeof(PyUpb_MessageMeta);
+ PyObject* type = PyType_FromSpecWithBases(&PyUpb_MessageMeta_Spec, bases);
+ Py_DECREF(bases);
+ return type;
+}
+
+bool PyUpb_InitMessage(PyObject* m) {
+ if (!PyUpb_CPythonBits_Init(&cpython_bits)) return false;
+ PyObject* message_meta_type = PyUpb_MessageMeta_CreateType();
+
+ PyUpb_ModuleState* state = PyUpb_ModuleState_GetFromModule(m);
+ state->cmessage_type = PyUpb_AddClass(m, &PyUpb_Message_Spec);
+ state->message_meta_type = (PyTypeObject*)message_meta_type;
+
+ if (!state->cmessage_type || !state->message_meta_type) return false;
+ if (PyModule_AddObject(m, "MessageMeta", message_meta_type)) return false;
+ state->listfields_item_key = PyObject_GetAttrString(
+ (PyObject*)state->cmessage_type, "_ListFieldsItemKey");
+
+ PyObject* mod =
+ PyImport_ImportModule(PYUPB_PROTOBUF_PUBLIC_PACKAGE ".message");
+ if (mod == NULL) return false;
+
+ state->encode_error_class = PyObject_GetAttrString(mod, "EncodeError");
+ state->decode_error_class = PyObject_GetAttrString(mod, "DecodeError");
+ state->message_class = PyObject_GetAttrString(mod, "Message");
+ Py_DECREF(mod);
+
+ PyObject* enum_type_wrapper = PyImport_ImportModule(
+ PYUPB_PROTOBUF_INTERNAL_PACKAGE ".enum_type_wrapper");
+ if (enum_type_wrapper == NULL) return false;
+
+ state->enum_type_wrapper_class =
+ PyObject_GetAttrString(enum_type_wrapper, "EnumTypeWrapper");
+ Py_DECREF(enum_type_wrapper);
+
+ if (!state->encode_error_class || !state->decode_error_class ||
+ !state->message_class || !state->listfields_item_key ||
+ !state->enum_type_wrapper_class) {
+ return false;
+ }
+
+ return true;
+}
diff --git a/upb/python/message.h b/upb/python/message.h
new file mode 100644
index 0000000..885b5df
--- /dev/null
+++ b/upb/python/message.h
@@ -0,0 +1,106 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef PYPB_MESSAGE_H__
+#define PYPB_MESSAGE_H__
+
+#include <stdbool.h>
+
+#include "python/protobuf.h"
+#include "upb/reflection/message.h"
+
+// Removes the wrapper object for this field from the unset subobject cache.
+void PyUpb_Message_CacheDelete(PyObject* _self, const upb_FieldDef* f);
+
+// Sets the field value for `f` to `subobj`, evicting the wrapper object from
+// the "unset subobject" cache now that real data exists for it. The caller
+// must also update the wrapper associated with `f` to point to `subobj` also.
+void PyUpb_Message_SetConcreteSubobj(PyObject* _self, const upb_FieldDef* f,
+ upb_MessageValue subobj);
+
+// Gets a Python wrapper object for message `u_msg` of type `m`, returning a
+// cached wrapper if one was previously created. If a new object is created,
+// it will reference `arena`, which must own `u_msg`.
+PyObject* PyUpb_Message_Get(upb_Message* u_msg, const upb_MessageDef* m,
+ PyObject* arena);
+
+// Verifies that a Python object is a message. Sets a TypeError exception and
+// returns false on failure.
+bool PyUpb_Message_Verify(PyObject* self);
+
+// Gets the upb_Message* for this message object if the message is reified.
+// Otherwise returns NULL.
+upb_Message* PyUpb_Message_GetIfReified(PyObject* _self);
+
+// Returns the `upb_MessageDef` for a given Message.
+const upb_MessageDef* PyUpb_Message_GetMsgdef(PyObject* self);
+
+// Functions that match the corresponding methods on the message object.
+PyObject* PyUpb_Message_MergeFrom(PyObject* self, PyObject* arg);
+PyObject* PyUpb_Message_MergeFromString(PyObject* self, PyObject* arg);
+PyObject* PyUpb_Message_SerializeToString(PyObject* self, PyObject* args,
+ PyObject* kwargs);
+PyObject* PyUpb_Message_SerializePartialToString(PyObject* self, PyObject* args,
+ PyObject* kwargs);
+
+// Sets fields of the message according to the attribuges in `kwargs`.
+int PyUpb_Message_InitAttributes(PyObject* _self, PyObject* args,
+ PyObject* kwargs);
+
+// Checks that `key` is a field descriptor for an extension type, and that the
+// extendee is this message. Otherwise returns NULL and sets a KeyError.
+const upb_FieldDef* PyUpb_Message_GetExtensionDef(PyObject* _self,
+ PyObject* key);
+
+// Clears the given field in this message.
+void PyUpb_Message_DoClearField(PyObject* _self, const upb_FieldDef* f);
+
+// Clears the ExtensionDict from the message. The message must have an
+// ExtensionDict set.
+void PyUpb_Message_ClearExtensionDict(PyObject* _self);
+
+// Implements the equivalent of getattr(msg, field), once `field` has
+// already been resolved to a `upb_FieldDef*`.
+PyObject* PyUpb_Message_GetFieldValue(PyObject* _self,
+ const upb_FieldDef* field);
+
+// Implements the equivalent of setattr(msg, field, value), once `field` has
+// already been resolved to a `upb_FieldDef*`.
+int PyUpb_Message_SetFieldValue(PyObject* _self, const upb_FieldDef* field,
+ PyObject* value, PyObject* exc);
+
+// Returns the version associated with this message. The version will be
+// incremented when the message changes.
+int PyUpb_Message_GetVersion(PyObject* _self);
+
+// Module-level init.
+bool PyUpb_InitMessage(PyObject* m);
+
+#endif // PYPB_MESSAGE_H__
diff --git a/upb/python/minimal_test.py b/upb/python/minimal_test.py
new file mode 100644
index 0000000..e1690d1
--- /dev/null
+++ b/upb/python/minimal_test.py
@@ -0,0 +1,187 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""A bare-bones unit test that doesn't load any generated code."""
+
+
+import unittest
+from google.protobuf.pyext import _message
+from google3.net.proto2.python.internal import api_implementation
+from google.protobuf import unittest_pb2
+from google.protobuf import map_unittest_pb2
+from google.protobuf import descriptor_pool
+from google.protobuf import text_format
+from google.protobuf import message_factory
+from google.protobuf import message
+from google3.net.proto2.python.internal import factory_test1_pb2
+from google3.net.proto2.python.internal import factory_test2_pb2
+from google3.net.proto2.python.internal import more_extensions_pb2
+from google.protobuf import descriptor_pb2
+
+class TestMessageExtension(unittest.TestCase):
+
+ def test_descriptor_pool(self):
+ serialized_desc = b'\n\ntest.proto\"\x0e\n\x02M1*\x08\x08\x01\x10\x80\x80\x80\x80\x02:\x15\n\x08test_ext\x12\x03.M1\x18\x01 \x01(\x05'
+ pool = _message.DescriptorPool()
+ file_desc = pool.AddSerializedFile(serialized_desc)
+ self.assertEqual("test.proto", file_desc.name)
+ ext_desc = pool.FindExtensionByName("test_ext")
+ self.assertEqual(1, ext_desc.number)
+
+ # Test object cache: repeatedly retrieving the same descriptor
+ # should result in the same object
+ self.assertIs(ext_desc, pool.FindExtensionByName("test_ext"))
+
+
+ def test_lib_is_upb(self):
+ # Ensure we are not pulling in a different protobuf library on the
+ # system.
+ print(_message._IS_UPB)
+ self.assertTrue(_message._IS_UPB)
+ self.assertEqual(api_implementation.Type(), "cpp")
+
+ def test_repeated_field_slice_delete(self):
+ def test_slice(start, end, step):
+ vals = list(range(20))
+ message = unittest_pb2.TestAllTypes(repeated_int32=vals)
+ del vals[start:end:step]
+ del message.repeated_int32[start:end:step]
+ self.assertEqual(vals, list(message.repeated_int32))
+ test_slice(3, 11, 1)
+ test_slice(3, 11, 2)
+ test_slice(3, 11, 3)
+ test_slice(11, 3, -1)
+ test_slice(11, 3, -2)
+ test_slice(11, 3, -3)
+ test_slice(10, 25, 4)
+
+ def testExtensionsErrors(self):
+ msg = unittest_pb2.TestAllTypes()
+ self.assertRaises(AttributeError, getattr, msg, 'Extensions')
+
+ def testClearStubMapField(self):
+ msg = map_unittest_pb2.TestMapSubmessage()
+ int32_map = msg.test_map.map_int32_int32
+ msg.test_map.ClearField("map_int32_int32")
+ int32_map[123] = 456
+ self.assertEqual(0, msg.test_map.ByteSize())
+
+ def testClearReifiedMapField(self):
+ msg = map_unittest_pb2.TestMap()
+ int32_map = msg.map_int32_int32
+ int32_map[123] = 456
+ msg.ClearField("map_int32_int32")
+ int32_map[111] = 222
+ self.assertEqual(0, msg.ByteSize())
+
+ def testClearStubRepeatedField(self):
+ msg = unittest_pb2.NestedTestAllTypes()
+ int32_array = msg.payload.repeated_int32
+ msg.payload.ClearField("repeated_int32")
+ int32_array.append(123)
+ self.assertEqual(0, msg.payload.ByteSize())
+
+ def testClearReifiedRepeatdField(self):
+ msg = unittest_pb2.TestAllTypes()
+ int32_array = msg.repeated_int32
+ int32_array.append(123)
+ self.assertNotEqual(0, msg.ByteSize())
+ msg.ClearField("repeated_int32")
+ int32_array.append(123)
+ self.assertEqual(0, msg.ByteSize())
+
+ def testFloatPrinting(self):
+ message = unittest_pb2.TestAllTypes()
+ message.optional_float = -0.0
+ self.assertEqual(str(message), 'optional_float: -0\n')
+
+class OversizeProtosTest(unittest.TestCase):
+ def setUp(self):
+ msg = unittest_pb2.NestedTestAllTypes()
+ m = msg
+ for i in range(101):
+ m = m.child
+ m.Clear()
+ self.p_serialized = msg.SerializeToString()
+
+ def testAssertOversizeProto(self):
+ from google.protobuf.pyext._message import SetAllowOversizeProtos
+ SetAllowOversizeProtos(False)
+ q = unittest_pb2.NestedTestAllTypes()
+ with self.assertRaises(message.DecodeError):
+ q.ParseFromString(self.p_serialized)
+ print(q)
+
+ def testSucceedOversizeProto(self):
+ from google.protobuf.pyext._message import SetAllowOversizeProtos
+ SetAllowOversizeProtos(True)
+ q = unittest_pb2.NestedTestAllTypes()
+ q.ParseFromString(self.p_serialized)
+
+ def testExtensionIter(self):
+ extendee_proto = more_extensions_pb2.ExtendedMessage()
+
+ extension_int32 = more_extensions_pb2.optional_int_extension
+ extendee_proto.Extensions[extension_int32] = 23
+
+ extension_repeated = more_extensions_pb2.repeated_int_extension
+ extendee_proto.Extensions[extension_repeated].append(11)
+
+ extension_msg = more_extensions_pb2.optional_message_extension
+ extendee_proto.Extensions[extension_msg].foreign_message_int = 56
+
+ # Set some normal fields.
+ extendee_proto.optional_int32 = 1
+ extendee_proto.repeated_string.append('hi')
+
+ expected = {
+ extension_int32: True,
+ extension_msg: True,
+ extension_repeated: True
+ }
+ count = 0
+ for item in extendee_proto.Extensions:
+ del expected[item]
+ self.assertIn(item, extendee_proto.Extensions)
+ count += 1
+ self.assertEqual(count, 3)
+ self.assertEqual(len(expected), 0)
+
+ def testIsInitializedStub(self):
+ proto = unittest_pb2.TestRequiredForeign()
+ self.assertTrue(proto.IsInitialized())
+ self.assertFalse(proto.optional_message.IsInitialized())
+ errors = []
+ self.assertFalse(proto.optional_message.IsInitialized(errors))
+ self.assertEqual(['a', 'b', 'c'], errors)
+ self.assertRaises(message.EncodeError, proto.optional_message.SerializeToString)
+
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
diff --git a/upb/python/pb_unit_tests/BUILD b/upb/python/pb_unit_tests/BUILD
new file mode 100644
index 0000000..d63a081
--- /dev/null
+++ b/upb/python/pb_unit_tests/BUILD
@@ -0,0 +1,102 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load(":pyproto_test_wrapper.bzl", "pyproto_test_wrapper")
+# begin:github_only
+load("@pip_deps//:requirements.bzl", "requirement")
+# end:github_only
+
+# begin:google_only
+# package(default_applicable_licenses = ["//:license"])
+# end:google_only
+
+licenses(["notice"])
+
+pyproto_test_wrapper(name = "descriptor_database_test")
+
+pyproto_test_wrapper(name = "descriptor_pool_test")
+
+pyproto_test_wrapper(name = "descriptor_test")
+
+# begin:github_only
+pyproto_test_wrapper(name = "generator_test")
+# end:github_only
+
+pyproto_test_wrapper(name = "json_format_test")
+
+pyproto_test_wrapper(name = "keywords_test")
+
+pyproto_test_wrapper(name = "message_factory_test")
+
+# begin:github_only
+# This target has different dependencies and fails when using the wrapper
+# TODO: Move this to using pyproto_test_wrapper
+py_test(
+ name = "numpy_test",
+ srcs = ["numpy_test_wrapper.py"],
+ main = "numpy_test_wrapper.py",
+ deps = [
+ requirement("numpy"),
+ "@com_google_protobuf//python/google/protobuf/internal/numpy:numpy_test",
+ "//python:_message",
+ ],
+ target_compatible_with = select({
+ "@system_python//:supported": [],
+ "//conditions:default": ["@platforms//:incompatible"],
+ }),
+)
+# end:github_only
+
+# begin:google_only
+# pyproto_test_wrapper(name = "numpy_test")
+# end:google_only
+
+pyproto_test_wrapper(name = "proto_builder_test")
+
+pyproto_test_wrapper(name = "service_reflection_test")
+
+pyproto_test_wrapper(name = "symbol_database_test")
+
+pyproto_test_wrapper(name = "text_encoding_test")
+
+pyproto_test_wrapper(name = "message_test")
+
+pyproto_test_wrapper(name = "reflection_test")
+
+pyproto_test_wrapper(name = "text_format_test")
+
+pyproto_test_wrapper(name = "unknown_fields_test")
+
+pyproto_test_wrapper(name = "well_known_types_test")
+
+pyproto_test_wrapper(name = "wire_format_test")
+
+filegroup(
+ name = "test_files",
+ srcs = glob(["*.py"]),
+ visibility = [
+ "//python/dist:__pkg__", # Scheuklappen: keep
+ ],
+)
diff --git a/upb/python/pb_unit_tests/README.md b/upb/python/pb_unit_tests/README.md
new file mode 100644
index 0000000..669f067
--- /dev/null
+++ b/upb/python/pb_unit_tests/README.md
@@ -0,0 +1,11 @@
+
+# Protobuf Unit Tests
+
+This directory contains wrappers around the Python unit tests defined in
+the protobuf repo. Python+upb is intended to be a drop-in replacement for
+protobuf Python, so we should be able to pass the same set of unit tests.
+
+Our wrappers contain exclusion lists for tests we know we are not currently
+passing. Ideally these exclusion lists will become empty once Python+upb is
+fully implemented. However there may be a few edge cases that we decide
+are not worth matching with perfect parity.
diff --git a/upb/python/pb_unit_tests/descriptor_database_test_wrapper.py b/upb/python/pb_unit_tests/descriptor_database_test_wrapper.py
new file mode 100644
index 0000000..2e6081f
--- /dev/null
+++ b/upb/python/pb_unit_tests/descriptor_database_test_wrapper.py
@@ -0,0 +1,35 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from google.protobuf.internal.descriptor_database_test import *
+import unittest
+
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
diff --git a/upb/python/pb_unit_tests/descriptor_pool_test_wrapper.py b/upb/python/pb_unit_tests/descriptor_pool_test_wrapper.py
new file mode 100644
index 0000000..1c4f282
--- /dev/null
+++ b/upb/python/pb_unit_tests/descriptor_pool_test_wrapper.py
@@ -0,0 +1,45 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+from google.protobuf.internal.descriptor_pool_test import *
+
+SecondaryDescriptorFromDescriptorDB.testErrorCollector.__unittest_expecting_failure__ = True
+
+# begin:github_only
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
+# end:github_only
+
+# begin:google_only
+# from absl import app
+# if __name__ == '__main__':
+# app.run(lambda argv: unittest.main(verbosity=2))
+# end:google_only
diff --git a/upb/python/pb_unit_tests/descriptor_test_wrapper.py b/upb/python/pb_unit_tests/descriptor_test_wrapper.py
new file mode 100644
index 0000000..11f47ad
--- /dev/null
+++ b/upb/python/pb_unit_tests/descriptor_test_wrapper.py
@@ -0,0 +1,46 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from google.protobuf.internal.descriptor_test import *
+import unittest
+
+# These fail because they attempt to add fields with conflicting JSON names.
+# We don't want to support this going forward.
+MakeDescriptorTest.testCamelcaseName.__unittest_expecting_failure__ = True
+MakeDescriptorTest.testJsonName.__unittest_expecting_failure__ = True
+
+# We pass this test, but the error message is slightly different.
+# Our error message is better.
+NewDescriptorTest.testImmutableCppDescriptor.__unittest_expecting_failure__ = True
+
+DescriptorTest.testGetDebugString.__unittest_expecting_failure__ = True
+
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
diff --git a/upb/python/pb_unit_tests/generator_test_wrapper.py b/upb/python/pb_unit_tests/generator_test_wrapper.py
new file mode 100644
index 0000000..9ffc27f
--- /dev/null
+++ b/upb/python/pb_unit_tests/generator_test_wrapper.py
@@ -0,0 +1,35 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from google.protobuf.internal.generator_test import *
+import unittest
+
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
diff --git a/upb/python/pb_unit_tests/json_format_test_wrapper.py b/upb/python/pb_unit_tests/json_format_test_wrapper.py
new file mode 100644
index 0000000..27d855c
--- /dev/null
+++ b/upb/python/pb_unit_tests/json_format_test_wrapper.py
@@ -0,0 +1,35 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from google.protobuf.internal.json_format_test import *
+import unittest
+
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
diff --git a/upb/python/pb_unit_tests/keywords_test_wrapper.py b/upb/python/pb_unit_tests/keywords_test_wrapper.py
new file mode 100644
index 0000000..d940178
--- /dev/null
+++ b/upb/python/pb_unit_tests/keywords_test_wrapper.py
@@ -0,0 +1,35 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from google.protobuf.internal.keywords_test import *
+import unittest
+
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
diff --git a/upb/python/pb_unit_tests/message_factory_test_wrapper.py b/upb/python/pb_unit_tests/message_factory_test_wrapper.py
new file mode 100644
index 0000000..4e3a7ba
--- /dev/null
+++ b/upb/python/pb_unit_tests/message_factory_test_wrapper.py
@@ -0,0 +1,37 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from google.protobuf.internal.message_factory_test import *
+import unittest
+
+MessageFactoryTest.testDuplicateExtensionNumber.__unittest_expecting_failure__ = True
+
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
diff --git a/upb/python/pb_unit_tests/message_test_wrapper.py b/upb/python/pb_unit_tests/message_test_wrapper.py
new file mode 100644
index 0000000..fcac3a3
--- /dev/null
+++ b/upb/python/pb_unit_tests/message_test_wrapper.py
@@ -0,0 +1,55 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from google.protobuf.internal.message_test import *
+import unittest
+
+MessageTest.testExtendFloatWithNothing_proto2.__unittest_skip__ = True
+MessageTest.testExtendFloatWithNothing_proto3.__unittest_skip__ = True
+MessageTest.testExtendInt32WithNothing_proto2.__unittest_skip__ = True
+MessageTest.testExtendInt32WithNothing_proto3.__unittest_skip__ = True
+MessageTest.testExtendStringWithNothing_proto2.__unittest_skip__ = True
+MessageTest.testExtendStringWithNothing_proto3.__unittest_skip__ = True
+
+# Python/C++ customizes the C++ TextFormat to always print trailing ".0" for
+# floats. upb doesn't do this, it matches C++ TextFormat.
+MessageTest.testFloatPrinting_proto2.__unittest_expecting_failure__ = True
+MessageTest.testFloatPrinting_proto3.__unittest_expecting_failure__ = True
+
+# For these tests we are throwing the correct error, only the text of the error
+# message is a mismatch. For technical reasons around the limited API, matching
+# the existing error message exactly is not feasible.
+Proto3Test.testCopyFromBadType.__unittest_expecting_failure__ = True
+Proto3Test.testMergeFromBadType.__unittest_expecting_failure__ = True
+
+Proto2Test.test_documentation.__unittest_expecting_failure__ = True
+
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
diff --git a/upb/python/pb_unit_tests/numpy_test_wrapper.py b/upb/python/pb_unit_tests/numpy_test_wrapper.py
new file mode 100644
index 0000000..62089e9
--- /dev/null
+++ b/upb/python/pb_unit_tests/numpy_test_wrapper.py
@@ -0,0 +1,36 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+
+from google.protobuf.internal.numpy.numpy_test import *
+
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
diff --git a/upb/python/pb_unit_tests/proto_builder_test_wrapper.py b/upb/python/pb_unit_tests/proto_builder_test_wrapper.py
new file mode 100644
index 0000000..468d13e
--- /dev/null
+++ b/upb/python/pb_unit_tests/proto_builder_test_wrapper.py
@@ -0,0 +1,37 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from google.protobuf.internal.proto_builder_test import *
+import unittest
+
+ProtoBuilderTest.testMakeLargeProtoClass.__unittest_expecting_failure__ = True
+
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
diff --git a/upb/python/pb_unit_tests/pyproto_test_wrapper.bzl b/upb/python/pb_unit_tests/pyproto_test_wrapper.bzl
new file mode 100644
index 0000000..d5df8fc
--- /dev/null
+++ b/upb/python/pb_unit_tests/pyproto_test_wrapper.bzl
@@ -0,0 +1,46 @@
+# begin:github_only
+
+def pyproto_test_wrapper(name, deps = []):
+ src = name + "_wrapper.py"
+ native.py_test(
+ name = name,
+ srcs = [src],
+ legacy_create_init = False,
+ main = src,
+ data = ["@com_google_protobuf//src/google/protobuf:testdata"],
+ deps = [
+ "//python:_message",
+ "@com_google_protobuf//:python_common_test_protos",
+ "@com_google_protobuf//:python_specific_test_protos",
+ "@com_google_protobuf//:python_test_srcs",
+ "@com_google_protobuf//:python_srcs",
+ ] + deps,
+ target_compatible_with = select({
+ "@system_python//:supported": [],
+ "//conditions:default": ["@platforms//:incompatible"],
+ }),
+ )
+
+# end:github_only
+
+# begin:google_only
+#
+# load("//third_party/bazel_rules/rules_python/python:py_test.bzl", "py_test")
+#
+# def pyproto_test_wrapper(name):
+# src = name + "_wrapper.py"
+# py_test(
+# name = name,
+# srcs = [src],
+# main = src,
+# deps = [
+# "//third_party/py/google/protobuf/internal:" + name + "_for_deps",
+# "//net/proto2/python/public:use_upb_protos",
+# ],
+# target_compatible_with = select({
+# "@platforms//os:windows": ["@platforms//:incompatible"],
+# "//conditions:default": [],
+# }),
+# )
+#
+# end:google_only
diff --git a/upb/python/pb_unit_tests/reflection_test_wrapper.py b/upb/python/pb_unit_tests/reflection_test_wrapper.py
new file mode 100644
index 0000000..9de7f3e
--- /dev/null
+++ b/upb/python/pb_unit_tests/reflection_test_wrapper.py
@@ -0,0 +1,53 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from google.protobuf.internal.reflection_test import *
+import unittest
+
+# These tests depend on a specific iteration order for extensions, which is not
+# reasonable to guarantee.
+Proto2ReflectionTest.testExtensionIter.__unittest_expecting_failure__ = True
+
+# These tests depend on a specific serialization order for extensions, which is
+# not reasonable to guarantee.
+SerializationTest.testCanonicalSerializationOrder.__unittest_expecting_failure__ = True
+SerializationTest.testCanonicalSerializationOrderSameAsCpp.__unittest_expecting_failure__ = True
+
+# This test relies on the internal implementation using Python descriptors.
+# This is an implementation detail that users should not depend on.
+SerializationTest.testFieldDataDescriptor.__unittest_expecting_failure__ = True
+
+SerializationTest.testFieldProperties.__unittest_expecting_failure__ = True
+
+# TODO(259423340) Python Docker image on MacOS failing.
+ClassAPITest.testParsingNestedClass.__unittest_skip__ = True
+
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
diff --git a/upb/python/pb_unit_tests/service_reflection_test_wrapper.py b/upb/python/pb_unit_tests/service_reflection_test_wrapper.py
new file mode 100644
index 0000000..bc0345c
--- /dev/null
+++ b/upb/python/pb_unit_tests/service_reflection_test_wrapper.py
@@ -0,0 +1,35 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from google.protobuf.internal.service_reflection_test import *
+import unittest
+
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
diff --git a/upb/python/pb_unit_tests/symbol_database_test_wrapper.py b/upb/python/pb_unit_tests/symbol_database_test_wrapper.py
new file mode 100644
index 0000000..16ea965
--- /dev/null
+++ b/upb/python/pb_unit_tests/symbol_database_test_wrapper.py
@@ -0,0 +1,35 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from google.protobuf.internal.symbol_database_test import *
+import unittest
+
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
diff --git a/upb/python/pb_unit_tests/text_encoding_test_wrapper.py b/upb/python/pb_unit_tests/text_encoding_test_wrapper.py
new file mode 100644
index 0000000..3eb8153
--- /dev/null
+++ b/upb/python/pb_unit_tests/text_encoding_test_wrapper.py
@@ -0,0 +1,35 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from google.protobuf.internal.text_encoding_test import *
+import unittest
+
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
diff --git a/upb/python/pb_unit_tests/text_format_test_wrapper.py b/upb/python/pb_unit_tests/text_format_test_wrapper.py
new file mode 100644
index 0000000..535561d
--- /dev/null
+++ b/upb/python/pb_unit_tests/text_format_test_wrapper.py
@@ -0,0 +1,35 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from google.protobuf.internal.text_format_test import *
+import unittest
+
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
diff --git a/upb/python/pb_unit_tests/unknown_fields_test_wrapper.py b/upb/python/pb_unit_tests/unknown_fields_test_wrapper.py
new file mode 100644
index 0000000..1807f6d
--- /dev/null
+++ b/upb/python/pb_unit_tests/unknown_fields_test_wrapper.py
@@ -0,0 +1,35 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from google.protobuf.internal.unknown_fields_test import *
+import unittest
+
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
diff --git a/upb/python/pb_unit_tests/well_known_types_test_wrapper.py b/upb/python/pb_unit_tests/well_known_types_test_wrapper.py
new file mode 100644
index 0000000..5006332
--- /dev/null
+++ b/upb/python/pb_unit_tests/well_known_types_test_wrapper.py
@@ -0,0 +1,36 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from google.protobuf.internal.well_known_types_test import *
+import os
+import unittest
+
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
diff --git a/upb/python/pb_unit_tests/wire_format_test_wrapper.py b/upb/python/pb_unit_tests/wire_format_test_wrapper.py
new file mode 100644
index 0000000..3b13a2b
--- /dev/null
+++ b/upb/python/pb_unit_tests/wire_format_test_wrapper.py
@@ -0,0 +1,35 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2023 Google LLC. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google LLC nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from google.protobuf.internal.wire_format_test import *
+import unittest
+
+if __name__ == '__main__':
+ unittest.main(verbosity=2)
diff --git a/upb/python/protobuf.c b/upb/python/protobuf.c
new file mode 100644
index 0000000..eaba951
--- /dev/null
+++ b/upb/python/protobuf.c
@@ -0,0 +1,357 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "python/protobuf.h"
+
+#include "python/descriptor.h"
+#include "python/descriptor_containers.h"
+#include "python/descriptor_pool.h"
+#include "python/extension_dict.h"
+#include "python/map.h"
+#include "python/message.h"
+#include "python/repeated.h"
+#include "python/unknown_fields.h"
+
+static void PyUpb_ModuleDealloc(void* module) {
+ PyUpb_ModuleState* s = PyModule_GetState(module);
+ PyUpb_WeakMap_Free(s->obj_cache);
+ if (s->c_descriptor_symtab) {
+ upb_DefPool_Free(s->c_descriptor_symtab);
+ }
+}
+
+PyObject* PyUpb_SetAllowOversizeProtos(PyObject* m, PyObject* arg) {
+ if (!arg || !PyBool_Check(arg)) {
+ PyErr_SetString(PyExc_TypeError,
+ "Argument to SetAllowOversizeProtos must be boolean");
+ return NULL;
+ }
+ PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
+ state->allow_oversize_protos = PyObject_IsTrue(arg);
+ Py_INCREF(arg);
+ return arg;
+}
+
+static PyMethodDef PyUpb_ModuleMethods[] = {
+ {"SetAllowOversizeProtos", PyUpb_SetAllowOversizeProtos, METH_O,
+ "Enable/disable oversize proto parsing."},
+ {NULL, NULL}};
+
+static struct PyModuleDef module_def = {PyModuleDef_HEAD_INIT,
+ PYUPB_MODULE_NAME,
+ "Protobuf Module",
+ sizeof(PyUpb_ModuleState),
+ PyUpb_ModuleMethods, // m_methods
+ NULL, // m_slots
+ NULL, // m_traverse
+ NULL, // m_clear
+ PyUpb_ModuleDealloc};
+
+// -----------------------------------------------------------------------------
+// ModuleState
+// -----------------------------------------------------------------------------
+
+PyUpb_ModuleState* PyUpb_ModuleState_MaybeGet(void) {
+ PyObject* module = PyState_FindModule(&module_def);
+ return module ? PyModule_GetState(module) : NULL;
+}
+
+PyUpb_ModuleState* PyUpb_ModuleState_GetFromModule(PyObject* module) {
+ PyUpb_ModuleState* state = PyModule_GetState(module);
+ assert(state);
+ assert(PyModule_GetDef(module) == &module_def);
+ return state;
+}
+
+PyUpb_ModuleState* PyUpb_ModuleState_Get(void) {
+ PyObject* module = PyState_FindModule(&module_def);
+ assert(module);
+ return PyUpb_ModuleState_GetFromModule(module);
+}
+
+PyObject* PyUpb_GetWktBases(PyUpb_ModuleState* state) {
+ if (!state->wkt_bases) {
+ PyObject* wkt_module = PyImport_ImportModule(PYUPB_PROTOBUF_INTERNAL_PACKAGE
+ ".well_known_types");
+
+ if (wkt_module == NULL) {
+ return false;
+ }
+
+ state->wkt_bases = PyObject_GetAttrString(wkt_module, "WKTBASES");
+ PyObject* m = PyState_FindModule(&module_def);
+ // Reparent ownership to m.
+ PyModule_AddObject(m, "__internal_wktbases", state->wkt_bases);
+ Py_DECREF(wkt_module);
+ }
+
+ return state->wkt_bases;
+}
+
+// -----------------------------------------------------------------------------
+// WeakMap
+// -----------------------------------------------------------------------------
+
+struct PyUpb_WeakMap {
+ upb_inttable table;
+ upb_Arena* arena;
+};
+
+PyUpb_WeakMap* PyUpb_WeakMap_New(void) {
+ upb_Arena* arena = upb_Arena_New();
+ PyUpb_WeakMap* map = upb_Arena_Malloc(arena, sizeof(*map));
+ map->arena = arena;
+ upb_inttable_init(&map->table, map->arena);
+ return map;
+}
+
+void PyUpb_WeakMap_Free(PyUpb_WeakMap* map) { upb_Arena_Free(map->arena); }
+
+// To give better entropy in the table key, we shift away low bits that are
+// always zero.
+static const int PyUpb_PtrShift = (sizeof(void*) == 4) ? 2 : 3;
+
+uintptr_t PyUpb_WeakMap_GetKey(const void* key) {
+ uintptr_t n = (uintptr_t)key;
+ assert((n & ((1 << PyUpb_PtrShift) - 1)) == 0);
+ return n >> PyUpb_PtrShift;
+}
+
+void PyUpb_WeakMap_Add(PyUpb_WeakMap* map, const void* key, PyObject* py_obj) {
+ upb_inttable_insert(&map->table, PyUpb_WeakMap_GetKey(key),
+ upb_value_ptr(py_obj), map->arena);
+}
+
+void PyUpb_WeakMap_Delete(PyUpb_WeakMap* map, const void* key) {
+ upb_value val;
+ bool removed =
+ upb_inttable_remove(&map->table, PyUpb_WeakMap_GetKey(key), &val);
+ (void)removed;
+ assert(removed);
+}
+
+void PyUpb_WeakMap_TryDelete(PyUpb_WeakMap* map, const void* key) {
+ upb_inttable_remove(&map->table, PyUpb_WeakMap_GetKey(key), NULL);
+}
+
+PyObject* PyUpb_WeakMap_Get(PyUpb_WeakMap* map, const void* key) {
+ upb_value val;
+ if (upb_inttable_lookup(&map->table, PyUpb_WeakMap_GetKey(key), &val)) {
+ PyObject* ret = upb_value_getptr(val);
+ Py_INCREF(ret);
+ return ret;
+ } else {
+ return NULL;
+ }
+}
+
+bool PyUpb_WeakMap_Next(PyUpb_WeakMap* map, const void** key, PyObject** obj,
+ intptr_t* iter) {
+ uintptr_t u_key;
+ upb_value val;
+ if (!upb_inttable_next(&map->table, &u_key, &val, iter)) return false;
+ *key = (void*)(u_key << PyUpb_PtrShift);
+ *obj = upb_value_getptr(val);
+ return true;
+}
+
+void PyUpb_WeakMap_DeleteIter(PyUpb_WeakMap* map, intptr_t* iter) {
+ upb_inttable_removeiter(&map->table, iter);
+}
+
+// -----------------------------------------------------------------------------
+// ObjCache
+// -----------------------------------------------------------------------------
+
+PyUpb_WeakMap* PyUpb_ObjCache_Instance(void) {
+ PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
+ return state->obj_cache;
+}
+
+void PyUpb_ObjCache_Add(const void* key, PyObject* py_obj) {
+ PyUpb_WeakMap_Add(PyUpb_ObjCache_Instance(), key, py_obj);
+}
+
+void PyUpb_ObjCache_Delete(const void* key) {
+ PyUpb_ModuleState* state = PyUpb_ModuleState_MaybeGet();
+ if (!state) {
+ // During the shutdown sequence, our object's Dealloc() methods can be
+ // called *after* our module Dealloc() method has been called. At that
+ // point our state will be NULL and there is nothing to delete out of the
+ // map.
+ return;
+ }
+ PyUpb_WeakMap_Delete(state->obj_cache, key);
+}
+
+PyObject* PyUpb_ObjCache_Get(const void* key) {
+ return PyUpb_WeakMap_Get(PyUpb_ObjCache_Instance(), key);
+}
+
+// -----------------------------------------------------------------------------
+// Arena
+// -----------------------------------------------------------------------------
+
+typedef struct {
+ PyObject_HEAD;
+ upb_Arena* arena;
+} PyUpb_Arena;
+
+PyObject* PyUpb_Arena_New(void) {
+ PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
+ PyUpb_Arena* arena = (void*)PyType_GenericAlloc(state->arena_type, 0);
+ arena->arena = upb_Arena_New();
+ return &arena->ob_base;
+}
+
+static void PyUpb_Arena_Dealloc(PyObject* self) {
+ upb_Arena_Free(PyUpb_Arena_Get(self));
+ PyUpb_Dealloc(self);
+}
+
+upb_Arena* PyUpb_Arena_Get(PyObject* arena) {
+ return ((PyUpb_Arena*)arena)->arena;
+}
+
+static PyType_Slot PyUpb_Arena_Slots[] = {
+ {Py_tp_dealloc, PyUpb_Arena_Dealloc},
+ {0, NULL},
+};
+
+static PyType_Spec PyUpb_Arena_Spec = {
+ PYUPB_MODULE_NAME ".Arena",
+ sizeof(PyUpb_Arena),
+ 0, // itemsize
+ Py_TPFLAGS_DEFAULT,
+ PyUpb_Arena_Slots,
+};
+
+static bool PyUpb_InitArena(PyObject* m) {
+ PyUpb_ModuleState* state = PyUpb_ModuleState_GetFromModule(m);
+ state->arena_type = PyUpb_AddClass(m, &PyUpb_Arena_Spec);
+ return state->arena_type;
+}
+
+// -----------------------------------------------------------------------------
+// Utilities
+// -----------------------------------------------------------------------------
+
+PyTypeObject* AddObject(PyObject* m, const char* name, PyType_Spec* spec) {
+ PyObject* type = PyType_FromSpec(spec);
+ return type && PyModule_AddObject(m, name, type) == 0 ? (PyTypeObject*)type
+ : NULL;
+}
+
+static const char* PyUpb_GetClassName(PyType_Spec* spec) {
+ // spec->name contains a fully-qualified name, like:
+ // google.protobuf.pyext._message.FooBar
+ //
+ // Find the rightmost '.' to get "FooBar".
+ const char* name = strrchr(spec->name, '.');
+ assert(name);
+ return name + 1;
+}
+
+PyTypeObject* PyUpb_AddClass(PyObject* m, PyType_Spec* spec) {
+ PyObject* type = PyType_FromSpec(spec);
+ const char* name = PyUpb_GetClassName(spec);
+ if (PyModule_AddObject(m, name, type) < 0) {
+ Py_XDECREF(type);
+ return NULL;
+ }
+ return (PyTypeObject*)type;
+}
+
+PyTypeObject* PyUpb_AddClassWithBases(PyObject* m, PyType_Spec* spec,
+ PyObject* bases) {
+ PyObject* type = PyType_FromSpecWithBases(spec, bases);
+ const char* name = PyUpb_GetClassName(spec);
+ if (PyModule_AddObject(m, name, type) < 0) {
+ Py_XDECREF(type);
+ return NULL;
+ }
+ return (PyTypeObject*)type;
+}
+
+const char* PyUpb_GetStrData(PyObject* obj) {
+ if (PyUnicode_Check(obj)) {
+ return PyUnicode_AsUTF8AndSize(obj, NULL);
+ } else if (PyBytes_Check(obj)) {
+ return PyBytes_AsString(obj);
+ } else {
+ return NULL;
+ }
+}
+
+const char* PyUpb_VerifyStrData(PyObject* obj) {
+ const char* ret = PyUpb_GetStrData(obj);
+ if (ret) return ret;
+ PyErr_Format(PyExc_TypeError, "Expected string: %S", obj);
+ return NULL;
+}
+
+PyObject* PyUpb_Forbidden_New(PyObject* cls, PyObject* args, PyObject* kwds) {
+ PyObject* name = PyObject_GetAttrString(cls, "__name__");
+ PyErr_Format(PyExc_RuntimeError,
+ "Objects of type %U may not be created directly.", name);
+ Py_XDECREF(name);
+ return NULL;
+}
+
+// -----------------------------------------------------------------------------
+// Module Entry Point
+// -----------------------------------------------------------------------------
+
+__attribute__((visibility("default"))) PyMODINIT_FUNC PyInit__message(void) {
+ PyObject* m = PyModule_Create(&module_def);
+ if (!m) return NULL;
+
+ PyUpb_ModuleState* state = PyUpb_ModuleState_GetFromModule(m);
+
+ state->allow_oversize_protos = false;
+ state->wkt_bases = NULL;
+ state->obj_cache = PyUpb_WeakMap_New();
+ state->c_descriptor_symtab = NULL;
+
+ if (!PyUpb_InitDescriptorContainers(m) || !PyUpb_InitDescriptorPool(m) ||
+ !PyUpb_InitDescriptor(m) || !PyUpb_InitArena(m) ||
+ !PyUpb_InitExtensionDict(m) || !PyUpb_Map_Init(m) ||
+ !PyUpb_InitMessage(m) || !PyUpb_Repeated_Init(m) ||
+ !PyUpb_UnknownFields_Init(m)) {
+ Py_DECREF(m);
+ return NULL;
+ }
+
+ // Temporary: an cookie we can use in the tests to ensure we are testing upb
+ // and not another protobuf library on the system.
+ PyModule_AddIntConstant(m, "_IS_UPB", 1);
+
+ return m;
+}
diff --git a/upb/python/protobuf.h b/upb/python/protobuf.h
new file mode 100644
index 0000000..480ff13
--- /dev/null
+++ b/upb/python/protobuf.h
@@ -0,0 +1,234 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef PYUPB_PROTOBUF_H__
+#define PYUPB_PROTOBUF_H__
+
+#include <stdbool.h>
+
+#include "python/descriptor.h"
+#include "python/python_api.h"
+#include "upb/hash/int_table.h"
+
+// begin:github_only
+#define PYUPB_PROTOBUF_PUBLIC_PACKAGE "google.protobuf"
+#define PYUPB_PROTOBUF_INTERNAL_PACKAGE "google.protobuf.internal"
+#define PYUPB_DESCRIPTOR_PROTO_PACKAGE "google.protobuf"
+#define PYUPB_DESCRIPTOR_MODULE "google.protobuf.descriptor_pb2"
+#define PYUPB_MODULE_NAME "google._upb._message"
+// end:github_only
+
+// begin:google_only
+// #define PYUPB_PROTOBUF_PUBLIC_PACKAGE "google3.net.proto2.python.public"
+// #define PYUPB_PROTOBUF_INTERNAL_PACKAGE "google3.net.proto2.python.internal"
+// #define PYUPB_DESCRIPTOR_PROTO_PACKAGE "proto2"
+// #define PYUPB_DESCRIPTOR_MODULE "google3.net.proto2.proto.descriptor_pb2"
+// #define PYUPB_MODULE_NAME "google3.third_party.upb.python._message"
+// end:google_only
+
+#define PYUPB_RETURN_OOM return PyErr_SetNone(PyExc_MemoryError), NULL
+
+struct PyUpb_WeakMap;
+typedef struct PyUpb_WeakMap PyUpb_WeakMap;
+
+// -----------------------------------------------------------------------------
+// ModuleState
+// -----------------------------------------------------------------------------
+
+// We store all "global" state in this struct instead of using (C) global
+// variables. This makes this extension compatible with sub-interpreters.
+
+typedef struct {
+ // From descriptor.c
+ PyTypeObject* descriptor_types[kPyUpb_Descriptor_Count];
+
+ // From descriptor_containers.c
+ PyTypeObject* by_name_map_type;
+ PyTypeObject* by_name_iterator_type;
+ PyTypeObject* by_number_map_type;
+ PyTypeObject* by_number_iterator_type;
+ PyTypeObject* generic_sequence_type;
+
+ // From descriptor_pool.c
+ PyObject* default_pool;
+
+ // From descriptor_pool.c
+ PyTypeObject* descriptor_pool_type;
+ upb_DefPool* c_descriptor_symtab;
+
+ // From extension_dict.c
+ PyTypeObject* extension_dict_type;
+ PyTypeObject* extension_iterator_type;
+
+ // From map.c
+ PyTypeObject* map_iterator_type;
+ PyTypeObject* message_map_container_type;
+ PyTypeObject* scalar_map_container_type;
+
+ // From message.c
+ PyObject* decode_error_class;
+ PyObject* descriptor_string;
+ PyObject* encode_error_class;
+ PyObject* enum_type_wrapper_class;
+ PyObject* message_class;
+ PyTypeObject* cmessage_type;
+ PyTypeObject* message_meta_type;
+ PyObject* listfields_item_key;
+
+ // From protobuf.c
+ bool allow_oversize_protos;
+ PyObject* wkt_bases;
+ PyTypeObject* arena_type;
+ PyUpb_WeakMap* obj_cache;
+
+ // From repeated.c
+ PyTypeObject* repeated_composite_container_type;
+ PyTypeObject* repeated_scalar_container_type;
+
+ // From unknown_fields.c
+ PyTypeObject* unknown_fields_type;
+ PyObject* unknown_field_type;
+} PyUpb_ModuleState;
+
+// Returns the global state object from the current interpreter. The current
+// interpreter is looked up from thread-local state.
+PyUpb_ModuleState* PyUpb_ModuleState_Get(void);
+PyUpb_ModuleState* PyUpb_ModuleState_GetFromModule(PyObject* module);
+
+// Returns NULL if module state is not yet available (during startup).
+// Any use of the module state during startup needs to be passed explicitly.
+PyUpb_ModuleState* PyUpb_ModuleState_MaybeGet(void);
+
+// Returns:
+// from google.protobuf.internal.well_known_types import WKTBASES
+//
+// This has to be imported lazily rather than at module load time, because
+// otherwise it would cause a circular import.
+PyObject* PyUpb_GetWktBases(PyUpb_ModuleState* state);
+
+// -----------------------------------------------------------------------------
+// WeakMap
+// -----------------------------------------------------------------------------
+
+// A WeakMap maps C pointers to the corresponding Python wrapper object. We
+// want a consistent Python wrapper object for each C object, both to save
+// memory and to provide object stability (ie. x is x).
+//
+// Each wrapped object should add itself to the map when it is constructed and
+// remove itself from the map when it is destroyed. The map is weak so it does
+// not take references to the cached objects.
+
+PyUpb_WeakMap* PyUpb_WeakMap_New(void);
+void PyUpb_WeakMap_Free(PyUpb_WeakMap* map);
+
+// Adds the given object to the map, indexed by the given key.
+void PyUpb_WeakMap_Add(PyUpb_WeakMap* map, const void* key, PyObject* py_obj);
+
+// Removes the given key from the cache. It must exist in the cache currently.
+void PyUpb_WeakMap_Delete(PyUpb_WeakMap* map, const void* key);
+void PyUpb_WeakMap_TryDelete(PyUpb_WeakMap* map, const void* key);
+
+// Returns a new reference to an object if it exists, otherwise returns NULL.
+PyObject* PyUpb_WeakMap_Get(PyUpb_WeakMap* map, const void* key);
+
+#define PYUPB_WEAKMAP_BEGIN UPB_INTTABLE_BEGIN
+
+// Iteration over the weak map, eg.
+//
+// intptr_t it = PYUPB_WEAKMAP_BEGIN;
+// while (PyUpb_WeakMap_Next(map, &key, &obj, &it)) {
+// // ...
+// }
+//
+// Note that the callee does not own a ref on the returned `obj`.
+bool PyUpb_WeakMap_Next(PyUpb_WeakMap* map, const void** key, PyObject** obj,
+ intptr_t* iter);
+void PyUpb_WeakMap_DeleteIter(PyUpb_WeakMap* map, intptr_t* iter);
+
+// -----------------------------------------------------------------------------
+// ObjCache
+// -----------------------------------------------------------------------------
+
+// The object cache is a global WeakMap for mapping upb objects to the
+// corresponding wrapper.
+void PyUpb_ObjCache_Add(const void* key, PyObject* py_obj);
+void PyUpb_ObjCache_Delete(const void* key);
+PyObject* PyUpb_ObjCache_Get(const void* key); // returns NULL if not present.
+PyUpb_WeakMap* PyUpb_ObjCache_Instance(void);
+
+// -----------------------------------------------------------------------------
+// Arena
+// -----------------------------------------------------------------------------
+
+PyObject* PyUpb_Arena_New(void);
+upb_Arena* PyUpb_Arena_Get(PyObject* arena);
+
+// -----------------------------------------------------------------------------
+// Utilities
+// -----------------------------------------------------------------------------
+
+PyTypeObject* AddObject(PyObject* m, const char* name, PyType_Spec* spec);
+
+// Creates a Python type from `spec` and adds it to the given module `m`.
+PyTypeObject* PyUpb_AddClass(PyObject* m, PyType_Spec* spec);
+
+// Like PyUpb_AddClass(), but allows you to specify a tuple of base classes
+// in `bases`.
+PyTypeObject* PyUpb_AddClassWithBases(PyObject* m, PyType_Spec* spec,
+ PyObject* bases);
+
+// A function that implements the tp_new slot for types that we do not allow
+// users to create directly. This will immediately fail with an error message.
+PyObject* PyUpb_Forbidden_New(PyObject* cls, PyObject* args, PyObject* kwds);
+
+// Our standard dealloc func. It follows the guidance defined in:
+// https://docs.python.org/3/c-api/typeobj.html#c.PyTypeObject.tp_dealloc
+// However it tests Py_TPFLAGS_HEAPTYPE dynamically so that a single dealloc
+// function can work for any type.
+static inline void PyUpb_Dealloc(void* self) {
+ PyTypeObject* tp = Py_TYPE(self);
+ assert(PyType_GetFlags(tp) & Py_TPFLAGS_HEAPTYPE);
+ freefunc tp_free = (freefunc)PyType_GetSlot(tp, Py_tp_free);
+ tp_free(self);
+ Py_DECREF(tp);
+}
+
+// Equivalent to the Py_NewRef() function introduced in Python 3.10. If/when we
+// drop support for Python <3.10, we can remove this function and replace all
+// callers with Py_NewRef().
+static inline PyObject* PyUpb_NewRef(PyObject* obj) {
+ Py_INCREF(obj);
+ return obj;
+}
+
+const char* PyUpb_GetStrData(PyObject* obj);
+const char* PyUpb_VerifyStrData(PyObject* obj);
+
+#endif // PYUPB_PROTOBUF_H__
diff --git a/upb/python/py_extension.bzl b/upb/python/py_extension.bzl
new file mode 100644
index 0000000..7b918bc
--- /dev/null
+++ b/upb/python/py_extension.bzl
@@ -0,0 +1,60 @@
+"""Macro to support py_extension """
+
+load("@bazel_skylib//lib:selects.bzl", "selects")
+
+def py_extension(name, srcs, copts, deps = [], **kwargs):
+ """Creates a C++ library to extend python
+
+ Args:
+ name: Name of the target
+ srcs: List of source files to create the target
+ copts: List of C++ compile options to use
+ deps: Libraries that the target depends on
+ """
+
+ native.cc_binary(
+ name = name + "_binary",
+ srcs = srcs,
+ copts = copts + ["-fvisibility=hidden"],
+ linkopts = selects.with_or({
+ (
+ "//python/dist:osx_x86_64",
+ "//python/dist:osx_aarch64",
+ ): ["-undefined", "dynamic_lookup"],
+ "//python/dist:windows_x86_32": ["-static-libgcc"],
+ "//conditions:default": [],
+ }),
+ linkshared = True,
+ linkstatic = True,
+ deps = deps + select({
+ "//python:limited_api_3.7": ["@python-3.7.0//:python_headers"],
+ "//python:full_api_3.7_win32": ["@nuget_python_i686_3.7.0//:python_full_api"],
+ "//python:full_api_3.7_win64": ["@nuget_python_x86-64_3.7.0//:python_full_api"],
+ "//python:full_api_3.8_win32": ["@nuget_python_i686_3.8.0//:python_full_api"],
+ "//python:full_api_3.8_win64": ["@nuget_python_x86-64_3.8.0//:python_full_api"],
+ "//python:full_api_3.9_win32": ["@nuget_python_i686_3.9.0//:python_full_api"],
+ "//python:full_api_3.9_win64": ["@nuget_python_x86-64_3.9.0//:python_full_api"],
+ "//python:limited_api_3.10_win32": ["@nuget_python_i686_3.10.0//:python_limited_api"],
+ "//python:limited_api_3.10_win64": ["@nuget_python_x86-64_3.10.0//:python_limited_api"],
+ "//conditions:default": ["@system_python//:python_headers"],
+ }),
+ **kwargs
+ )
+
+ EXT_SUFFIX = ".abi3.so"
+ output_file = "google/_upb/" + name + EXT_SUFFIX
+
+ native.genrule(
+ name = "copy" + name,
+ srcs = [":" + name + "_binary"],
+ outs = [output_file],
+ cmd = "cp $< $@",
+ visibility = ["//python:__subpackages__"],
+ )
+
+ native.py_library(
+ name = name,
+ data = [output_file],
+ imports = ["."],
+ visibility = ["//python:__subpackages__"],
+ )
diff --git a/upb/python/python_api.h b/upb/python/python_api.h
new file mode 100644
index 0000000..fae7df2
--- /dev/null
+++ b/upb/python/python_api.h
@@ -0,0 +1,64 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef PYUPB_PYTHON_H__
+#define PYUPB_PYTHON_H__
+
+// We restrict ourselves to the limited API, so that a single build can be
+// ABI-compatible with a wide range of Python versions.
+//
+// The build system will define Py_LIMITED_API as appropriate (see BUILD). We
+// only want to define it for our distribution packages, since we can do some
+// extra assertions when Py_LIMITED_API is not defined. Also Py_LIMITED_API is
+// incompatible with Py_DEBUG.
+
+// #define Py_LIMITED_API <val> // Defined by build system when appropriate.
+
+#include "Python.h"
+
+// Ideally we could restrict ourselves to the limited API of 3.7, but this is
+// a very important function that was not officially added to the limited API
+// until 3.10. Without this function, there is no way of getting data from a
+// Python `str` object without a copy.
+//
+// While this function was not *officially* added to the limited API until
+// Python 3.10, In practice it has been stable since Python 3.1.
+// https://bugs.python.org/issue41784
+//
+// On Linux/ELF and macOS/Mach-O, we can get away with using this function with
+// the limited API prior to 3.10.
+
+#if (defined(__linux__) || defined(__APPLE__)) && defined(Py_LIMITED_API) && \
+ Py_LIMITED_API < 0x03100000
+PyAPI_FUNC(const char*)
+ PyUnicode_AsUTF8AndSize(PyObject* unicode, Py_ssize_t* size);
+#endif
+
+#endif // PYUPB_PYTHON_H__
diff --git a/upb/python/repeated.c b/upb/python/repeated.c
new file mode 100644
index 0000000..b526c88
--- /dev/null
+++ b/upb/python/repeated.c
@@ -0,0 +1,833 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "python/repeated.h"
+
+#include "python/convert.h"
+#include "python/message.h"
+#include "python/protobuf.h"
+
+static PyObject* PyUpb_RepeatedCompositeContainer_Append(PyObject* _self,
+ PyObject* value);
+static PyObject* PyUpb_RepeatedScalarContainer_Append(PyObject* _self,
+ PyObject* value);
+
+// For an expression like:
+// foo[index]
+//
+// Converts `index` to an effective i/count/step, for a repeated field
+// field of size `size`.
+static bool IndexToRange(PyObject* index, Py_ssize_t size, Py_ssize_t* i,
+ Py_ssize_t* count, Py_ssize_t* step) {
+ assert(i && count && step);
+ if (PySlice_Check(index)) {
+ Py_ssize_t start, stop;
+ if (PySlice_Unpack(index, &start, &stop, step) < 0) return false;
+ *count = PySlice_AdjustIndices(size, &start, &stop, *step);
+ *i = start;
+ } else {
+ *i = PyNumber_AsSsize_t(index, PyExc_IndexError);
+
+ if (*i == -1 && PyErr_Occurred()) {
+ PyErr_SetString(PyExc_TypeError, "list indices must be integers");
+ return false;
+ }
+
+ if (*i < 0) *i += size;
+ *step = 0;
+ *count = 1;
+
+ if (*i < 0 || size <= *i) {
+ PyErr_Format(PyExc_IndexError, "list index out of range");
+ return false;
+ }
+ }
+ return true;
+}
+
+// Wrapper for a repeated field.
+typedef struct {
+ PyObject_HEAD;
+ PyObject* arena;
+ // The field descriptor (PyObject*).
+ // The low bit indicates whether the container is reified (see ptr below).
+ // - low bit set: repeated field is a stub (no underlying data).
+ // - low bit clear: repeated field is reified (points to upb_Array).
+ uintptr_t field;
+ union {
+ PyObject* parent; // stub: owning pointer to parent message.
+ upb_Array* arr; // reified: the data for this array.
+ } ptr;
+} PyUpb_RepeatedContainer;
+
+static bool PyUpb_RepeatedContainer_IsStub(PyUpb_RepeatedContainer* self) {
+ return self->field & 1;
+}
+
+static PyObject* PyUpb_RepeatedContainer_GetFieldDescriptor(
+ PyUpb_RepeatedContainer* self) {
+ return (PyObject*)(self->field & ~(uintptr_t)1);
+}
+
+static const upb_FieldDef* PyUpb_RepeatedContainer_GetField(
+ PyUpb_RepeatedContainer* self) {
+ return PyUpb_FieldDescriptor_GetDef(
+ PyUpb_RepeatedContainer_GetFieldDescriptor(self));
+}
+
+// If the repeated field is reified, returns it. Otherwise, returns NULL.
+// If NULL is returned, the object is empty and has no underlying data.
+static upb_Array* PyUpb_RepeatedContainer_GetIfReified(
+ PyUpb_RepeatedContainer* self) {
+ return PyUpb_RepeatedContainer_IsStub(self) ? NULL : self->ptr.arr;
+}
+
+void PyUpb_RepeatedContainer_Reify(PyObject* _self, upb_Array* arr) {
+ PyUpb_RepeatedContainer* self = (PyUpb_RepeatedContainer*)_self;
+ assert(PyUpb_RepeatedContainer_IsStub(self));
+ if (!arr) {
+ const upb_FieldDef* f = PyUpb_RepeatedContainer_GetField(self);
+ upb_Arena* arena = PyUpb_Arena_Get(self->arena);
+ arr = upb_Array_New(arena, upb_FieldDef_CType(f));
+ }
+ PyUpb_ObjCache_Add(arr, &self->ob_base);
+ Py_DECREF(self->ptr.parent);
+ self->ptr.arr = arr; // Overwrites self->ptr.parent.
+ self->field &= ~(uintptr_t)1;
+ assert(!PyUpb_RepeatedContainer_IsStub(self));
+}
+
+upb_Array* PyUpb_RepeatedContainer_EnsureReified(PyObject* _self) {
+ PyUpb_RepeatedContainer* self = (PyUpb_RepeatedContainer*)_self;
+ upb_Array* arr = PyUpb_RepeatedContainer_GetIfReified(self);
+ if (arr) return arr; // Already writable.
+
+ const upb_FieldDef* f = PyUpb_RepeatedContainer_GetField(self);
+ upb_Arena* arena = PyUpb_Arena_Get(self->arena);
+ arr = upb_Array_New(arena, upb_FieldDef_CType(f));
+ PyUpb_Message_SetConcreteSubobj(self->ptr.parent, f,
+ (upb_MessageValue){.array_val = arr});
+ PyUpb_RepeatedContainer_Reify((PyObject*)self, arr);
+ return arr;
+}
+
+static void PyUpb_RepeatedContainer_Dealloc(PyObject* _self) {
+ PyUpb_RepeatedContainer* self = (PyUpb_RepeatedContainer*)_self;
+ Py_DECREF(self->arena);
+ if (PyUpb_RepeatedContainer_IsStub(self)) {
+ PyUpb_Message_CacheDelete(self->ptr.parent,
+ PyUpb_RepeatedContainer_GetField(self));
+ Py_DECREF(self->ptr.parent);
+ } else {
+ PyUpb_ObjCache_Delete(self->ptr.arr);
+ }
+ Py_DECREF(PyUpb_RepeatedContainer_GetFieldDescriptor(self));
+ PyUpb_Dealloc(self);
+}
+
+static PyTypeObject* PyUpb_RepeatedContainer_GetClass(const upb_FieldDef* f) {
+ assert(upb_FieldDef_IsRepeated(f) && !upb_FieldDef_IsMap(f));
+ PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
+ return upb_FieldDef_IsSubMessage(f) ? state->repeated_composite_container_type
+ : state->repeated_scalar_container_type;
+}
+
+static Py_ssize_t PyUpb_RepeatedContainer_Length(PyObject* self) {
+ upb_Array* arr =
+ PyUpb_RepeatedContainer_GetIfReified((PyUpb_RepeatedContainer*)self);
+ return arr ? upb_Array_Size(arr) : 0;
+}
+
+PyObject* PyUpb_RepeatedContainer_NewStub(PyObject* parent,
+ const upb_FieldDef* f,
+ PyObject* arena) {
+ // We only create stubs when the parent is reified, by convention. However
+ // this is not an invariant: the parent could become reified at any time.
+ assert(PyUpb_Message_GetIfReified(parent) == NULL);
+ PyTypeObject* cls = PyUpb_RepeatedContainer_GetClass(f);
+ PyUpb_RepeatedContainer* repeated = (void*)PyType_GenericAlloc(cls, 0);
+ repeated->arena = arena;
+ repeated->field = (uintptr_t)PyUpb_FieldDescriptor_Get(f) | 1;
+ repeated->ptr.parent = parent;
+ Py_INCREF(arena);
+ Py_INCREF(parent);
+ return &repeated->ob_base;
+}
+
+PyObject* PyUpb_RepeatedContainer_GetOrCreateWrapper(upb_Array* arr,
+ const upb_FieldDef* f,
+ PyObject* arena) {
+ PyObject* ret = PyUpb_ObjCache_Get(arr);
+ if (ret) return ret;
+
+ PyTypeObject* cls = PyUpb_RepeatedContainer_GetClass(f);
+ PyUpb_RepeatedContainer* repeated = (void*)PyType_GenericAlloc(cls, 0);
+ repeated->arena = arena;
+ repeated->field = (uintptr_t)PyUpb_FieldDescriptor_Get(f);
+ repeated->ptr.arr = arr;
+ ret = &repeated->ob_base;
+ Py_INCREF(arena);
+ PyUpb_ObjCache_Add(arr, ret);
+ return ret;
+}
+
+static PyObject* PyUpb_RepeatedContainer_MergeFrom(PyObject* _self,
+ PyObject* args);
+
+PyObject* PyUpb_RepeatedContainer_DeepCopy(PyObject* _self, PyObject* value) {
+ PyUpb_RepeatedContainer* self = (PyUpb_RepeatedContainer*)_self;
+ PyUpb_RepeatedContainer* clone =
+ (void*)PyType_GenericAlloc(Py_TYPE(_self), 0);
+ if (clone == NULL) return NULL;
+ const upb_FieldDef* f = PyUpb_RepeatedContainer_GetField(self);
+ clone->arena = PyUpb_Arena_New();
+ clone->field = (uintptr_t)PyUpb_FieldDescriptor_Get(f);
+ clone->ptr.arr =
+ upb_Array_New(PyUpb_Arena_Get(clone->arena), upb_FieldDef_CType(f));
+ PyUpb_ObjCache_Add(clone->ptr.arr, (PyObject*)clone);
+ PyObject* result = PyUpb_RepeatedContainer_MergeFrom((PyObject*)clone, _self);
+ if (!result) {
+ Py_DECREF(clone);
+ return NULL;
+ }
+ Py_DECREF(result);
+ return (PyObject*)clone;
+}
+
+PyObject* PyUpb_RepeatedContainer_Extend(PyObject* _self, PyObject* value) {
+ PyUpb_RepeatedContainer* self = (PyUpb_RepeatedContainer*)_self;
+ upb_Array* arr = PyUpb_RepeatedContainer_EnsureReified(_self);
+ size_t start_size = upb_Array_Size(arr);
+ PyObject* it = PyObject_GetIter(value);
+ if (!it) {
+ PyErr_SetString(PyExc_TypeError, "Value must be iterable");
+ return NULL;
+ }
+
+ const upb_FieldDef* f = PyUpb_RepeatedContainer_GetField(self);
+ bool submsg = upb_FieldDef_IsSubMessage(f);
+ PyObject* e;
+
+ while ((e = PyIter_Next(it))) {
+ PyObject* ret;
+ if (submsg) {
+ ret = PyUpb_RepeatedCompositeContainer_Append(_self, e);
+ } else {
+ ret = PyUpb_RepeatedScalarContainer_Append(_self, e);
+ }
+ Py_XDECREF(ret);
+ Py_DECREF(e);
+ }
+
+ Py_DECREF(it);
+
+ if (PyErr_Occurred()) {
+ upb_Array_Resize(arr, start_size, NULL);
+ return NULL;
+ }
+
+ Py_RETURN_NONE;
+}
+
+static PyObject* PyUpb_RepeatedContainer_Item(PyObject* _self,
+ Py_ssize_t index) {
+ PyUpb_RepeatedContainer* self = (PyUpb_RepeatedContainer*)_self;
+ upb_Array* arr = PyUpb_RepeatedContainer_GetIfReified(self);
+ Py_ssize_t size = arr ? upb_Array_Size(arr) : 0;
+ if (index < 0 || index >= size) {
+ PyErr_Format(PyExc_IndexError, "list index (%zd) out of range", index);
+ return NULL;
+ }
+ const upb_FieldDef* f = PyUpb_RepeatedContainer_GetField(self);
+ return PyUpb_UpbToPy(upb_Array_Get(arr, index), f, self->arena);
+}
+
+PyObject* PyUpb_RepeatedContainer_ToList(PyObject* _self) {
+ PyUpb_RepeatedContainer* self = (PyUpb_RepeatedContainer*)_self;
+ upb_Array* arr = PyUpb_RepeatedContainer_GetIfReified(self);
+ if (!arr) return PyList_New(0);
+
+ const upb_FieldDef* f = PyUpb_RepeatedContainer_GetField(self);
+ size_t n = upb_Array_Size(arr);
+ PyObject* list = PyList_New(n);
+ for (size_t i = 0; i < n; i++) {
+ PyObject* val = PyUpb_UpbToPy(upb_Array_Get(arr, i), f, self->arena);
+ if (!val) {
+ Py_DECREF(list);
+ return NULL;
+ }
+ PyList_SetItem(list, i, val);
+ }
+ return list;
+}
+
+static PyObject* PyUpb_RepeatedContainer_Repr(PyObject* _self) {
+ PyObject* list = PyUpb_RepeatedContainer_ToList(_self);
+ if (!list) return NULL;
+ assert(!PyErr_Occurred());
+ PyObject* repr = PyObject_Repr(list);
+ Py_DECREF(list);
+ return repr;
+}
+
+static PyObject* PyUpb_RepeatedContainer_RichCompare(PyObject* _self,
+ PyObject* _other,
+ int opid) {
+ if (opid != Py_EQ && opid != Py_NE) {
+ Py_INCREF(Py_NotImplemented);
+ return Py_NotImplemented;
+ }
+ PyObject* list1 = PyUpb_RepeatedContainer_ToList(_self);
+ PyObject* list2 = _other;
+ PyObject* del = NULL;
+ if (PyObject_TypeCheck(_other, _self->ob_type)) {
+ del = list2 = PyUpb_RepeatedContainer_ToList(_other);
+ }
+ PyObject* ret = PyObject_RichCompare(list1, list2, opid);
+ Py_DECREF(list1);
+ Py_XDECREF(del);
+ return ret;
+}
+
+static PyObject* PyUpb_RepeatedContainer_Subscript(PyObject* _self,
+ PyObject* key) {
+ PyUpb_RepeatedContainer* self = (PyUpb_RepeatedContainer*)_self;
+ upb_Array* arr = PyUpb_RepeatedContainer_GetIfReified(self);
+ Py_ssize_t size = arr ? upb_Array_Size(arr) : 0;
+ Py_ssize_t idx, count, step;
+ if (!IndexToRange(key, size, &idx, &count, &step)) return NULL;
+ const upb_FieldDef* f = PyUpb_RepeatedContainer_GetField(self);
+ if (step == 0) {
+ return PyUpb_UpbToPy(upb_Array_Get(arr, idx), f, self->arena);
+ } else {
+ PyObject* list = PyList_New(count);
+ for (Py_ssize_t i = 0; i < count; i++, idx += step) {
+ upb_MessageValue msgval = upb_Array_Get(self->ptr.arr, idx);
+ PyObject* item = PyUpb_UpbToPy(msgval, f, self->arena);
+ if (!item) {
+ Py_DECREF(list);
+ return NULL;
+ }
+ PyList_SetItem(list, i, item);
+ }
+ return list;
+ }
+}
+
+static int PyUpb_RepeatedContainer_SetSubscript(
+ PyUpb_RepeatedContainer* self, upb_Array* arr, const upb_FieldDef* f,
+ Py_ssize_t idx, Py_ssize_t count, Py_ssize_t step, PyObject* value) {
+ upb_Arena* arena = PyUpb_Arena_Get(self->arena);
+ if (upb_FieldDef_IsSubMessage(f)) {
+ PyErr_SetString(PyExc_TypeError, "does not support assignment");
+ return -1;
+ }
+
+ if (step == 0) {
+ // Set single value.
+ upb_MessageValue msgval;
+ if (!PyUpb_PyToUpb(value, f, &msgval, arena)) return -1;
+ upb_Array_Set(arr, idx, msgval);
+ return 0;
+ }
+
+ // Set range.
+ PyObject* seq =
+ PySequence_Fast(value, "must assign iterable to extended slice");
+ PyObject* item = NULL;
+ int ret = -1;
+ if (!seq) goto err;
+ Py_ssize_t seq_size = PySequence_Size(seq);
+ if (seq_size != count) {
+ if (step == 1) {
+ // We must shift the tail elements (either right or left).
+ size_t tail = upb_Array_Size(arr) - (idx + count);
+ upb_Array_Resize(arr, idx + seq_size + tail, arena);
+ upb_Array_Move(arr, idx + seq_size, idx + count, tail);
+ count = seq_size;
+ } else {
+ PyErr_Format(PyExc_ValueError,
+ "attempt to assign sequence of %zd to extended slice "
+ "of size %zd",
+ seq_size, count);
+ goto err;
+ }
+ }
+ for (Py_ssize_t i = 0; i < count; i++, idx += step) {
+ upb_MessageValue msgval;
+ item = PySequence_GetItem(seq, i);
+ if (!item) goto err;
+ // XXX: if this fails we can leave the list partially mutated.
+ if (!PyUpb_PyToUpb(item, f, &msgval, arena)) goto err;
+ Py_DECREF(item);
+ item = NULL;
+ upb_Array_Set(arr, idx, msgval);
+ }
+ ret = 0;
+
+err:
+ Py_XDECREF(seq);
+ Py_XDECREF(item);
+ return ret;
+}
+
+static int PyUpb_RepeatedContainer_DeleteSubscript(upb_Array* arr,
+ Py_ssize_t idx,
+ Py_ssize_t count,
+ Py_ssize_t step) {
+ // Normalize direction: deletion is order-independent.
+ Py_ssize_t start = idx;
+ if (step < 0) {
+ Py_ssize_t end = start + step * (count - 1);
+ start = end;
+ step = -step;
+ }
+
+ size_t dst = start;
+ size_t src;
+ if (step > 1) {
+ // Move elements between steps:
+ //
+ // src
+ // |
+ // |------X---X---X---X------------------------------|
+ // |
+ // dst <-------- tail -------------->
+ src = start + 1;
+ for (Py_ssize_t i = 1; i < count; i++, dst += step - 1, src += step) {
+ upb_Array_Move(arr, dst, src, step);
+ }
+ } else {
+ src = start + count;
+ }
+
+ // Move tail.
+ size_t tail = upb_Array_Size(arr) - src;
+ size_t new_size = dst + tail;
+ assert(new_size == upb_Array_Size(arr) - count);
+ upb_Array_Move(arr, dst, src, tail);
+ upb_Array_Resize(arr, new_size, NULL);
+ return 0;
+}
+
+static int PyUpb_RepeatedContainer_AssignSubscript(PyObject* _self,
+ PyObject* key,
+ PyObject* value) {
+ PyUpb_RepeatedContainer* self = (PyUpb_RepeatedContainer*)_self;
+ const upb_FieldDef* f = PyUpb_RepeatedContainer_GetField(self);
+ upb_Array* arr = PyUpb_RepeatedContainer_EnsureReified(_self);
+ Py_ssize_t size = arr ? upb_Array_Size(arr) : 0;
+ Py_ssize_t idx, count, step;
+ if (!IndexToRange(key, size, &idx, &count, &step)) return -1;
+ if (value) {
+ return PyUpb_RepeatedContainer_SetSubscript(self, arr, f, idx, count, step,
+ value);
+ } else {
+ return PyUpb_RepeatedContainer_DeleteSubscript(arr, idx, count, step);
+ }
+}
+
+static PyObject* PyUpb_RepeatedContainer_Pop(PyObject* _self, PyObject* args) {
+ PyUpb_RepeatedContainer* self = (PyUpb_RepeatedContainer*)_self;
+ Py_ssize_t index = -1;
+ if (!PyArg_ParseTuple(args, "|n", &index)) return NULL;
+ upb_Array* arr = PyUpb_RepeatedContainer_EnsureReified(_self);
+ size_t size = upb_Array_Size(arr);
+ if (index < 0) index += size;
+ if (index >= size) index = size - 1;
+ PyObject* ret = PyUpb_RepeatedContainer_Item(_self, index);
+ if (!ret) return NULL;
+ upb_Array_Delete(self->ptr.arr, index, 1);
+ return ret;
+}
+
+static PyObject* PyUpb_RepeatedContainer_Remove(PyObject* _self,
+ PyObject* value) {
+ upb_Array* arr = PyUpb_RepeatedContainer_EnsureReified(_self);
+ Py_ssize_t match_index = -1;
+ Py_ssize_t n = PyUpb_RepeatedContainer_Length(_self);
+ for (Py_ssize_t i = 0; i < n; ++i) {
+ PyObject* elem = PyUpb_RepeatedContainer_Item(_self, i);
+ if (!elem) return NULL;
+ int eq = PyObject_RichCompareBool(elem, value, Py_EQ);
+ Py_DECREF(elem);
+ if (eq) {
+ match_index = i;
+ break;
+ }
+ }
+ if (match_index == -1) {
+ PyErr_SetString(PyExc_ValueError, "remove(x): x not in container");
+ return NULL;
+ }
+ if (PyUpb_RepeatedContainer_DeleteSubscript(arr, match_index, 1, 1) < 0) {
+ return NULL;
+ }
+ Py_RETURN_NONE;
+}
+
+// A helper function used only for Sort().
+static bool PyUpb_RepeatedContainer_Assign(PyObject* _self, PyObject* list) {
+ PyUpb_RepeatedContainer* self = (PyUpb_RepeatedContainer*)_self;
+ const upb_FieldDef* f = PyUpb_RepeatedContainer_GetField(self);
+ upb_Array* arr = PyUpb_RepeatedContainer_EnsureReified(_self);
+ Py_ssize_t size = PyList_Size(list);
+ bool submsg = upb_FieldDef_IsSubMessage(f);
+ upb_Arena* arena = PyUpb_Arena_Get(self->arena);
+ for (Py_ssize_t i = 0; i < size; ++i) {
+ PyObject* obj = PyList_GetItem(list, i);
+ upb_MessageValue msgval;
+ if (submsg) {
+ msgval.msg_val = PyUpb_Message_GetIfReified(obj);
+ assert(msgval.msg_val);
+ } else {
+ if (!PyUpb_PyToUpb(obj, f, &msgval, arena)) return false;
+ }
+ upb_Array_Set(arr, i, msgval);
+ }
+ return true;
+}
+
+static PyObject* PyUpb_RepeatedContainer_Sort(PyObject* pself, PyObject* args,
+ PyObject* kwds) {
+ // Support the old sort_function argument for backwards
+ // compatibility.
+ if (kwds != NULL) {
+ PyObject* sort_func = PyDict_GetItemString(kwds, "sort_function");
+ if (sort_func != NULL) {
+ // Must set before deleting as sort_func is a borrowed reference
+ // and kwds might be the only thing keeping it alive.
+ if (PyDict_SetItemString(kwds, "cmp", sort_func) == -1) return NULL;
+ if (PyDict_DelItemString(kwds, "sort_function") == -1) return NULL;
+ }
+ }
+
+ PyObject* ret = NULL;
+ PyObject* full_slice = NULL;
+ PyObject* list = NULL;
+ PyObject* m = NULL;
+ PyObject* res = NULL;
+ if ((full_slice = PySlice_New(NULL, NULL, NULL)) &&
+ (list = PyUpb_RepeatedContainer_Subscript(pself, full_slice)) &&
+ (m = PyObject_GetAttrString(list, "sort")) &&
+ (res = PyObject_Call(m, args, kwds)) &&
+ PyUpb_RepeatedContainer_Assign(pself, list)) {
+ Py_INCREF(Py_None);
+ ret = Py_None;
+ }
+
+ Py_XDECREF(full_slice);
+ Py_XDECREF(list);
+ Py_XDECREF(m);
+ Py_XDECREF(res);
+ return ret;
+}
+
+static PyObject* PyUpb_RepeatedContainer_Reverse(PyObject* _self) {
+ upb_Array* arr = PyUpb_RepeatedContainer_EnsureReified(_self);
+ size_t n = upb_Array_Size(arr);
+ size_t half = n / 2; // Rounds down.
+ for (size_t i = 0; i < half; i++) {
+ size_t i2 = n - i - 1;
+ upb_MessageValue val1 = upb_Array_Get(arr, i);
+ upb_MessageValue val2 = upb_Array_Get(arr, i2);
+ upb_Array_Set(arr, i, val2);
+ upb_Array_Set(arr, i2, val1);
+ }
+ Py_RETURN_NONE;
+}
+
+static PyObject* PyUpb_RepeatedContainer_MergeFrom(PyObject* _self,
+ PyObject* args) {
+ return PyUpb_RepeatedContainer_Extend(_self, args);
+}
+
+// -----------------------------------------------------------------------------
+// RepeatedCompositeContainer
+// -----------------------------------------------------------------------------
+
+static PyObject* PyUpb_RepeatedCompositeContainer_AppendNew(PyObject* _self) {
+ PyUpb_RepeatedContainer* self = (PyUpb_RepeatedContainer*)_self;
+ upb_Array* arr = PyUpb_RepeatedContainer_EnsureReified(_self);
+ if (!arr) return NULL;
+ const upb_FieldDef* f = PyUpb_RepeatedContainer_GetField(self);
+ upb_Arena* arena = PyUpb_Arena_Get(self->arena);
+ const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f);
+ const upb_MiniTable* layout = upb_MessageDef_MiniTable(m);
+ upb_Message* msg = upb_Message_New(layout, arena);
+ upb_MessageValue msgval = {.msg_val = msg};
+ upb_Array_Append(arr, msgval, arena);
+ return PyUpb_Message_Get(msg, m, self->arena);
+}
+
+PyObject* PyUpb_RepeatedCompositeContainer_Add(PyObject* _self, PyObject* args,
+ PyObject* kwargs) {
+ PyUpb_RepeatedContainer* self = (PyUpb_RepeatedContainer*)_self;
+ PyObject* py_msg = PyUpb_RepeatedCompositeContainer_AppendNew(_self);
+ if (!py_msg) return NULL;
+ if (PyUpb_Message_InitAttributes(py_msg, args, kwargs) < 0) {
+ Py_DECREF(py_msg);
+ upb_Array_Delete(self->ptr.arr, upb_Array_Size(self->ptr.arr) - 1, 1);
+ return NULL;
+ }
+ return py_msg;
+}
+
+static PyObject* PyUpb_RepeatedCompositeContainer_Append(PyObject* _self,
+ PyObject* value) {
+ if (!PyUpb_Message_Verify(value)) return NULL;
+ PyObject* py_msg = PyUpb_RepeatedCompositeContainer_AppendNew(_self);
+ if (!py_msg) return NULL;
+ PyObject* none = PyUpb_Message_MergeFrom(py_msg, value);
+ if (!none) {
+ Py_DECREF(py_msg);
+ return NULL;
+ }
+ Py_DECREF(none);
+ return py_msg;
+}
+
+static PyObject* PyUpb_RepeatedContainer_Insert(PyObject* _self,
+ PyObject* args) {
+ PyUpb_RepeatedContainer* self = (PyUpb_RepeatedContainer*)_self;
+ Py_ssize_t index;
+ PyObject* value;
+ if (!PyArg_ParseTuple(args, "nO", &index, &value)) return NULL;
+ upb_Array* arr = PyUpb_RepeatedContainer_EnsureReified(_self);
+ if (!arr) return NULL;
+
+ // Normalize index.
+ Py_ssize_t size = upb_Array_Size(arr);
+ if (index < 0) index += size;
+ if (index < 0) index = 0;
+ if (index > size) index = size;
+
+ const upb_FieldDef* f = PyUpb_RepeatedContainer_GetField(self);
+ upb_MessageValue msgval;
+ upb_Arena* arena = PyUpb_Arena_Get(self->arena);
+ if (upb_FieldDef_IsSubMessage(f)) {
+ // Create message.
+ const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f);
+ const upb_MiniTable* layout = upb_MessageDef_MiniTable(m);
+ upb_Message* msg = upb_Message_New(layout, arena);
+ PyObject* py_msg = PyUpb_Message_Get(msg, m, self->arena);
+ PyObject* ret = PyUpb_Message_MergeFrom(py_msg, value);
+ Py_DECREF(py_msg);
+ if (!ret) return NULL;
+ Py_DECREF(ret);
+ msgval.msg_val = msg;
+ } else {
+ if (!PyUpb_PyToUpb(value, f, &msgval, arena)) return NULL;
+ }
+
+ upb_Array_Insert(arr, index, 1, arena);
+ upb_Array_Set(arr, index, msgval);
+
+ Py_RETURN_NONE;
+}
+
+static PyMethodDef PyUpb_RepeatedCompositeContainer_Methods[] = {
+ {"__deepcopy__", PyUpb_RepeatedContainer_DeepCopy, METH_VARARGS,
+ "Makes a deep copy of the class."},
+ {"add", (PyCFunction)PyUpb_RepeatedCompositeContainer_Add,
+ METH_VARARGS | METH_KEYWORDS, "Adds an object to the repeated container."},
+ {"append", PyUpb_RepeatedCompositeContainer_Append, METH_O,
+ "Appends a message to the end of the repeated container."},
+ {"insert", PyUpb_RepeatedContainer_Insert, METH_VARARGS,
+ "Inserts a message before the specified index."},
+ {"extend", PyUpb_RepeatedContainer_Extend, METH_O,
+ "Adds objects to the repeated container."},
+ {"pop", PyUpb_RepeatedContainer_Pop, METH_VARARGS,
+ "Removes an object from the repeated container and returns it."},
+ {"remove", PyUpb_RepeatedContainer_Remove, METH_O,
+ "Removes an object from the repeated container."},
+ {"sort", (PyCFunction)PyUpb_RepeatedContainer_Sort,
+ METH_VARARGS | METH_KEYWORDS, "Sorts the repeated container."},
+ {"reverse", (PyCFunction)PyUpb_RepeatedContainer_Reverse, METH_NOARGS,
+ "Reverses elements order of the repeated container."},
+ {"MergeFrom", PyUpb_RepeatedContainer_MergeFrom, METH_O,
+ "Adds objects to the repeated container."},
+ {NULL, NULL}};
+
+static PyType_Slot PyUpb_RepeatedCompositeContainer_Slots[] = {
+ {Py_tp_dealloc, PyUpb_RepeatedContainer_Dealloc},
+ {Py_tp_methods, PyUpb_RepeatedCompositeContainer_Methods},
+ {Py_sq_length, PyUpb_RepeatedContainer_Length},
+ {Py_sq_item, PyUpb_RepeatedContainer_Item},
+ {Py_mp_length, PyUpb_RepeatedContainer_Length},
+ {Py_tp_repr, PyUpb_RepeatedContainer_Repr},
+ {Py_mp_subscript, PyUpb_RepeatedContainer_Subscript},
+ {Py_mp_ass_subscript, PyUpb_RepeatedContainer_AssignSubscript},
+ {Py_tp_new, PyUpb_Forbidden_New},
+ {Py_tp_richcompare, PyUpb_RepeatedContainer_RichCompare},
+ {Py_tp_hash, PyObject_HashNotImplemented},
+ {0, NULL}};
+
+static PyType_Spec PyUpb_RepeatedCompositeContainer_Spec = {
+ PYUPB_MODULE_NAME ".RepeatedCompositeContainer",
+ sizeof(PyUpb_RepeatedContainer),
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT,
+ PyUpb_RepeatedCompositeContainer_Slots,
+};
+
+// -----------------------------------------------------------------------------
+// RepeatedScalarContainer
+// -----------------------------------------------------------------------------
+
+static PyObject* PyUpb_RepeatedScalarContainer_Append(PyObject* _self,
+ PyObject* value) {
+ PyUpb_RepeatedContainer* self = (PyUpb_RepeatedContainer*)_self;
+ upb_Array* arr = PyUpb_RepeatedContainer_EnsureReified(_self);
+ upb_Arena* arena = PyUpb_Arena_Get(self->arena);
+ const upb_FieldDef* f = PyUpb_RepeatedContainer_GetField(self);
+ upb_MessageValue msgval;
+ if (!PyUpb_PyToUpb(value, f, &msgval, arena)) {
+ return NULL;
+ }
+ upb_Array_Append(arr, msgval, arena);
+ Py_RETURN_NONE;
+}
+
+static int PyUpb_RepeatedScalarContainer_AssignItem(PyObject* _self,
+ Py_ssize_t index,
+ PyObject* item) {
+ PyUpb_RepeatedContainer* self = (PyUpb_RepeatedContainer*)_self;
+ upb_Array* arr = PyUpb_RepeatedContainer_GetIfReified(self);
+ Py_ssize_t size = arr ? upb_Array_Size(arr) : 0;
+ if (index < 0 || index >= size) {
+ PyErr_Format(PyExc_IndexError, "list index (%zd) out of range", index);
+ return -1;
+ }
+ const upb_FieldDef* f = PyUpb_RepeatedContainer_GetField(self);
+ upb_MessageValue msgval;
+ upb_Arena* arena = PyUpb_Arena_Get(self->arena);
+ if (!PyUpb_PyToUpb(item, f, &msgval, arena)) {
+ return -1;
+ }
+ upb_Array_Set(self->ptr.arr, index, msgval);
+ return 0;
+}
+
+static PyObject* PyUpb_RepeatedScalarContainer_Reduce(PyObject* unused_self,
+ PyObject* unused_other) {
+ PyObject* pickle_module = PyImport_ImportModule("pickle");
+ if (!pickle_module) return NULL;
+ PyObject* pickle_error = PyObject_GetAttrString(pickle_module, "PickleError");
+ Py_DECREF(pickle_module);
+ if (!pickle_error) return NULL;
+ PyErr_Format(pickle_error,
+ "can't pickle repeated message fields, convert to list first");
+ Py_DECREF(pickle_error);
+ return NULL;
+}
+
+static PyMethodDef PyUpb_RepeatedScalarContainer_Methods[] = {
+ {"__deepcopy__", PyUpb_RepeatedContainer_DeepCopy, METH_VARARGS,
+ "Makes a deep copy of the class."},
+ {"__reduce__", PyUpb_RepeatedScalarContainer_Reduce, METH_NOARGS,
+ "Outputs picklable representation of the repeated field."},
+ {"append", PyUpb_RepeatedScalarContainer_Append, METH_O,
+ "Appends an object to the repeated container."},
+ {"extend", PyUpb_RepeatedContainer_Extend, METH_O,
+ "Appends objects to the repeated container."},
+ {"insert", PyUpb_RepeatedContainer_Insert, METH_VARARGS,
+ "Inserts an object at the specified position in the container."},
+ {"pop", PyUpb_RepeatedContainer_Pop, METH_VARARGS,
+ "Removes an object from the repeated container and returns it."},
+ {"remove", PyUpb_RepeatedContainer_Remove, METH_O,
+ "Removes an object from the repeated container."},
+ {"sort", (PyCFunction)PyUpb_RepeatedContainer_Sort,
+ METH_VARARGS | METH_KEYWORDS, "Sorts the repeated container."},
+ {"reverse", (PyCFunction)PyUpb_RepeatedContainer_Reverse, METH_NOARGS,
+ "Reverses elements order of the repeated container."},
+ {"MergeFrom", PyUpb_RepeatedContainer_MergeFrom, METH_O,
+ "Merges a repeated container into the current container."},
+ {NULL, NULL}};
+
+static PyType_Slot PyUpb_RepeatedScalarContainer_Slots[] = {
+ {Py_tp_dealloc, PyUpb_RepeatedContainer_Dealloc},
+ {Py_tp_methods, PyUpb_RepeatedScalarContainer_Methods},
+ {Py_tp_new, PyUpb_Forbidden_New},
+ {Py_tp_repr, PyUpb_RepeatedContainer_Repr},
+ {Py_sq_length, PyUpb_RepeatedContainer_Length},
+ {Py_sq_item, PyUpb_RepeatedContainer_Item},
+ {Py_sq_ass_item, PyUpb_RepeatedScalarContainer_AssignItem},
+ {Py_mp_length, PyUpb_RepeatedContainer_Length},
+ {Py_mp_subscript, PyUpb_RepeatedContainer_Subscript},
+ {Py_mp_ass_subscript, PyUpb_RepeatedContainer_AssignSubscript},
+ {Py_tp_richcompare, PyUpb_RepeatedContainer_RichCompare},
+ {Py_tp_hash, PyObject_HashNotImplemented},
+ {0, NULL}};
+
+static PyType_Spec PyUpb_RepeatedScalarContainer_Spec = {
+ PYUPB_MODULE_NAME ".RepeatedScalarContainer",
+ sizeof(PyUpb_RepeatedContainer),
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT,
+ PyUpb_RepeatedScalarContainer_Slots,
+};
+
+// -----------------------------------------------------------------------------
+// Top Level
+// -----------------------------------------------------------------------------
+
+static bool PyUpb_Repeated_RegisterAsSequence(PyUpb_ModuleState* state) {
+ PyObject* collections = NULL;
+ PyObject* seq = NULL;
+ PyObject* ret1 = NULL;
+ PyObject* ret2 = NULL;
+ PyTypeObject* type1 = state->repeated_scalar_container_type;
+ PyTypeObject* type2 = state->repeated_composite_container_type;
+
+ bool ok = (collections = PyImport_ImportModule("collections.abc")) &&
+ (seq = PyObject_GetAttrString(collections, "MutableSequence")) &&
+ (ret1 = PyObject_CallMethod(seq, "register", "O", type1)) &&
+ (ret2 = PyObject_CallMethod(seq, "register", "O", type2));
+
+ Py_XDECREF(collections);
+ Py_XDECREF(seq);
+ Py_XDECREF(ret1);
+ Py_XDECREF(ret2);
+ return ok;
+}
+
+bool PyUpb_Repeated_Init(PyObject* m) {
+ PyUpb_ModuleState* state = PyUpb_ModuleState_GetFromModule(m);
+
+ state->repeated_composite_container_type =
+ PyUpb_AddClass(m, &PyUpb_RepeatedCompositeContainer_Spec);
+ state->repeated_scalar_container_type =
+ PyUpb_AddClass(m, &PyUpb_RepeatedScalarContainer_Spec);
+
+ return state->repeated_composite_container_type &&
+ state->repeated_scalar_container_type &&
+ PyUpb_Repeated_RegisterAsSequence(state);
+}
diff --git a/upb/python/repeated.h b/upb/python/repeated.h
new file mode 100644
index 0000000..54670e7
--- /dev/null
+++ b/upb/python/repeated.h
@@ -0,0 +1,72 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef PYUPB_REPEATED_H__
+#define PYUPB_REPEATED_H__
+
+#include <stdbool.h>
+
+#include "python/python_api.h"
+#include "upb/reflection/def.h"
+
+// Creates a new repeated field stub for field `f` of message object `parent`.
+// Precondition: `parent` must be a stub.
+PyObject* PyUpb_RepeatedContainer_NewStub(PyObject* parent,
+ const upb_FieldDef* f,
+ PyObject* arena);
+
+// Returns a repeated field object wrapping `arr`, of field type `f`, which
+// must be on `arena`. If an existing wrapper object exists, it will be
+// returned, otherwise a new object will be created. The caller always owns a
+// ref on the returned value.
+PyObject* PyUpb_RepeatedContainer_GetOrCreateWrapper(upb_Array* arr,
+ const upb_FieldDef* f,
+ PyObject* arena);
+
+// Reifies a repeated field stub to point to the concrete data in `arr`.
+// If `arr` is NULL, an appropriate empty array will be constructed.
+void PyUpb_RepeatedContainer_Reify(PyObject* self, upb_Array* arr);
+
+// Reifies this repeated object if it is not already reified.
+upb_Array* PyUpb_RepeatedContainer_EnsureReified(PyObject* self);
+
+// Implements repeated_field.extend(iterable). `_self` must be a repeated
+// field (either repeated composite or repeated scalar).
+PyObject* PyUpb_RepeatedContainer_Extend(PyObject* _self, PyObject* value);
+
+// Implements repeated_field.add(initial_values). `_self` must be a repeated
+// composite field.
+PyObject* PyUpb_RepeatedCompositeContainer_Add(PyObject* _self, PyObject* args,
+ PyObject* kwargs);
+
+// Module-level init.
+bool PyUpb_Repeated_Init(PyObject* m);
+
+#endif // PYUPB_REPEATED_H__
diff --git a/upb/python/requirements.txt b/upb/python/requirements.txt
new file mode 100644
index 0000000..ee6dfb4
--- /dev/null
+++ b/upb/python/requirements.txt
@@ -0,0 +1 @@
+numpy==1.21.6
diff --git a/upb/python/requirements_311.txt b/upb/python/requirements_311.txt
new file mode 100644
index 0000000..1f2c987
--- /dev/null
+++ b/upb/python/requirements_311.txt
@@ -0,0 +1 @@
+numpy==1.23.5
diff --git a/upb/python/unknown_fields.c b/upb/python/unknown_fields.c
new file mode 100644
index 0000000..f228f23
--- /dev/null
+++ b/upb/python/unknown_fields.c
@@ -0,0 +1,358 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "python/unknown_fields.h"
+
+#include "python/message.h"
+#include "python/protobuf.h"
+#include "upb/wire/eps_copy_input_stream.h"
+#include "upb/wire/reader.h"
+#include "upb/wire/types.h"
+
+// -----------------------------------------------------------------------------
+// UnknownFieldSet
+// -----------------------------------------------------------------------------
+
+typedef struct {
+ PyObject_HEAD;
+ PyObject* fields;
+} PyUpb_UnknownFieldSet;
+
+static void PyUpb_UnknownFieldSet_Dealloc(PyObject* _self) {
+ PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self;
+ Py_XDECREF(self->fields);
+ PyUpb_Dealloc(self);
+}
+
+PyUpb_UnknownFieldSet* PyUpb_UnknownFieldSet_NewBare(void) {
+ PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
+ PyUpb_UnknownFieldSet* self =
+ (void*)PyType_GenericAlloc(s->unknown_fields_type, 0);
+ return self;
+}
+
+// For MessageSet the established behavior is for UnknownFieldSet to interpret
+// the MessageSet wire format:
+// message MessageSet {
+// repeated group Item = 1 {
+// required int32 type_id = 2;
+// required bytes message = 3;
+// }
+// }
+//
+// And create unknown fields like:
+// UnknownField(type_id, WIRE_TYPE_DELIMITED, message)
+//
+// For any unknown fields that are unexpected per the wire format defined above,
+// we drop them on the floor.
+
+enum {
+ kUpb_MessageSet_StartItemTag = (1 << 3) | kUpb_WireType_StartGroup,
+ kUpb_MessageSet_EndItemTag = (1 << 3) | kUpb_WireType_EndGroup,
+ kUpb_MessageSet_TypeIdTag = (2 << 3) | kUpb_WireType_Varint,
+ kUpb_MessageSet_MessageTag = (3 << 3) | kUpb_WireType_Delimited,
+};
+
+static const char* PyUpb_UnknownFieldSet_BuildMessageSetItem(
+ PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream,
+ const char* ptr) {
+ PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
+ int type_id = 0;
+ PyObject* msg = NULL;
+ while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
+ uint32_t tag;
+ ptr = upb_WireReader_ReadTag(ptr, &tag);
+ if (!ptr) goto err;
+ switch (tag) {
+ case kUpb_MessageSet_EndItemTag:
+ goto done;
+ case kUpb_MessageSet_TypeIdTag: {
+ uint64_t tmp;
+ ptr = upb_WireReader_ReadVarint(ptr, &tmp);
+ if (!ptr) goto err;
+ if (!type_id) type_id = tmp;
+ break;
+ }
+ case kUpb_MessageSet_MessageTag: {
+ int size;
+ ptr = upb_WireReader_ReadSize(ptr, &size);
+ if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size)) {
+ goto err;
+ }
+ const char* str = ptr;
+ ptr = upb_EpsCopyInputStream_ReadStringAliased(stream, &str, size);
+ if (!msg) {
+ msg = PyBytes_FromStringAndSize(str, size);
+ if (!msg) goto err;
+ } else {
+ // already saw a message here so deliberately skipping the duplicate
+ }
+ break;
+ }
+ default:
+ ptr = upb_WireReader_SkipValue(ptr, tag, stream);
+ if (!ptr) goto err;
+ }
+ }
+
+done:
+ if (type_id && msg) {
+ PyObject* field = PyObject_CallFunction(
+ s->unknown_field_type, "iiO", type_id, kUpb_WireType_Delimited, msg);
+ if (!field) goto err;
+ PyList_Append(self->fields, field);
+ Py_DECREF(field);
+ }
+ Py_XDECREF(msg);
+ return ptr;
+
+err:
+ Py_XDECREF(msg);
+ return NULL;
+}
+
+static const char* PyUpb_UnknownFieldSet_BuildMessageSet(
+ PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream,
+ const char* ptr) {
+ self->fields = PyList_New(0);
+ while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
+ uint32_t tag;
+ ptr = upb_WireReader_ReadTag(ptr, &tag);
+ if (!ptr) goto err;
+ if (tag == kUpb_MessageSet_StartItemTag) {
+ ptr = PyUpb_UnknownFieldSet_BuildMessageSetItem(self, stream, ptr);
+ } else {
+ ptr = upb_WireReader_SkipValue(ptr, tag, stream);
+ }
+ if (!ptr) goto err;
+ }
+ if (upb_EpsCopyInputStream_IsError(stream)) goto err;
+ return ptr;
+
+err:
+ Py_DECREF(self->fields);
+ self->fields = NULL;
+ return NULL;
+}
+
+static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self,
+ upb_EpsCopyInputStream* stream,
+ const char* ptr,
+ int group_number);
+
+static const char* PyUpb_UnknownFieldSet_BuildValue(
+ PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream,
+ const char* ptr, int field_number, int wire_type, int group_number,
+ PyObject** data) {
+ switch (wire_type) {
+ case kUpb_WireType_Varint: {
+ uint64_t val;
+ ptr = upb_WireReader_ReadVarint(ptr, &val);
+ if (!ptr) return NULL;
+ *data = PyLong_FromUnsignedLongLong(val);
+ return ptr;
+ }
+ case kUpb_WireType_64Bit: {
+ uint64_t val;
+ ptr = upb_WireReader_ReadFixed64(ptr, &val);
+ *data = PyLong_FromUnsignedLongLong(val);
+ return ptr;
+ }
+ case kUpb_WireType_32Bit: {
+ uint32_t val;
+ ptr = upb_WireReader_ReadFixed32(ptr, &val);
+ *data = PyLong_FromUnsignedLongLong(val);
+ return ptr;
+ }
+ case kUpb_WireType_Delimited: {
+ int size;
+ ptr = upb_WireReader_ReadSize(ptr, &size);
+ if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size)) {
+ return NULL;
+ }
+ const char* str = ptr;
+ ptr = upb_EpsCopyInputStream_ReadStringAliased(stream, &str, size);
+ *data = PyBytes_FromStringAndSize(str, size);
+ return ptr;
+ }
+ case kUpb_WireType_StartGroup: {
+ PyUpb_UnknownFieldSet* sub = PyUpb_UnknownFieldSet_NewBare();
+ if (!sub) return NULL;
+ *data = &sub->ob_base;
+ return PyUpb_UnknownFieldSet_Build(sub, stream, ptr, field_number);
+ }
+ default:
+ assert(0);
+ *data = NULL;
+ return NULL;
+ }
+}
+
+// For non-MessageSet we just build the unknown fields exactly as they exist on
+// the wire.
+static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self,
+ upb_EpsCopyInputStream* stream,
+ const char* ptr,
+ int group_number) {
+ PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
+ self->fields = PyList_New(0);
+ while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
+ uint32_t tag;
+ ptr = upb_WireReader_ReadTag(ptr, &tag);
+ if (!ptr) goto err;
+ PyObject* data = NULL;
+ int field_number = upb_WireReader_GetFieldNumber(tag);
+ int wire_type = upb_WireReader_GetWireType(tag);
+ if (wire_type == kUpb_WireType_EndGroup) {
+ if (field_number != group_number) return NULL;
+ return ptr;
+ }
+ ptr = PyUpb_UnknownFieldSet_BuildValue(self, stream, ptr, field_number,
+ wire_type, group_number, &data);
+ if (!ptr) {
+ Py_XDECREF(data);
+ goto err;
+ }
+ assert(data);
+ PyObject* field = PyObject_CallFunction(s->unknown_field_type, "iiN",
+ field_number, wire_type, data);
+ PyList_Append(self->fields, field);
+ Py_DECREF(field);
+ }
+ if (upb_EpsCopyInputStream_IsError(stream)) goto err;
+ return ptr;
+
+err:
+ Py_DECREF(self->fields);
+ self->fields = NULL;
+ return NULL;
+}
+
+static PyObject* PyUpb_UnknownFieldSet_New(PyTypeObject* type, PyObject* args,
+ PyObject* kwargs) {
+ char* kwlist[] = {"message", 0};
+ PyObject* py_msg = NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &py_msg)) {
+ return NULL;
+ }
+
+ if (!PyUpb_Message_Verify(py_msg)) return NULL;
+ PyUpb_UnknownFieldSet* self = PyUpb_UnknownFieldSet_NewBare();
+ upb_Message* msg = PyUpb_Message_GetIfReified(py_msg);
+ if (!msg) return &self->ob_base;
+
+ size_t size;
+ const char* ptr = upb_Message_GetUnknown(msg, &size);
+ if (size == 0) return &self->ob_base;
+
+ upb_EpsCopyInputStream stream;
+ upb_EpsCopyInputStream_Init(&stream, &ptr, size, true);
+ const upb_MessageDef* msgdef = PyUpb_Message_GetMsgdef(py_msg);
+
+ bool ok;
+ if (upb_MessageDef_IsMessageSet(msgdef)) {
+ ok = PyUpb_UnknownFieldSet_BuildMessageSet(self, &stream, ptr) != NULL;
+ } else {
+ ok = PyUpb_UnknownFieldSet_Build(self, &stream, ptr, -1) != NULL;
+ }
+
+ if (!ok) {
+ Py_DECREF(&self->ob_base);
+ return NULL;
+ }
+
+ return &self->ob_base;
+}
+
+static Py_ssize_t PyUpb_UnknownFieldSet_Length(PyObject* _self) {
+ PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self;
+ return self->fields ? PyObject_Length(self->fields) : 0;
+}
+
+static PyObject* PyUpb_UnknownFieldSet_GetItem(PyObject* _self,
+ Py_ssize_t index) {
+ PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self;
+ if (!self->fields) {
+ PyErr_Format(PyExc_IndexError, "list index (%zd) out of range", index);
+ return NULL;
+ }
+ PyObject* ret = PyList_GetItem(self->fields, index);
+ if (ret) Py_INCREF(ret);
+ return ret;
+}
+
+static PyType_Slot PyUpb_UnknownFieldSet_Slots[] = {
+ {Py_tp_new, &PyUpb_UnknownFieldSet_New},
+ {Py_tp_dealloc, &PyUpb_UnknownFieldSet_Dealloc},
+ {Py_sq_length, PyUpb_UnknownFieldSet_Length},
+ {Py_sq_item, PyUpb_UnknownFieldSet_GetItem},
+ {Py_tp_hash, PyObject_HashNotImplemented},
+ {0, NULL},
+};
+
+static PyType_Spec PyUpb_UnknownFieldSet_Spec = {
+ PYUPB_MODULE_NAME ".UnknownFieldSet", // tp_name
+ sizeof(PyUpb_UnknownFieldSet), // tp_basicsize
+ 0, // tp_itemsize
+ Py_TPFLAGS_DEFAULT, // tp_flags
+ PyUpb_UnknownFieldSet_Slots,
+};
+
+// -----------------------------------------------------------------------------
+// Top Level
+// -----------------------------------------------------------------------------
+
+PyObject* PyUpb_UnknownFieldSet_CreateNamedTuple(void) {
+ PyObject* mod = NULL;
+ PyObject* namedtuple = NULL;
+ PyObject* ret = NULL;
+
+ mod = PyImport_ImportModule("collections");
+ if (!mod) goto done;
+ namedtuple = PyObject_GetAttrString(mod, "namedtuple");
+ if (!namedtuple) goto done;
+ ret = PyObject_CallFunction(namedtuple, "s[sss]", "PyUnknownField",
+ "field_number", "wire_type", "data");
+
+done:
+ Py_XDECREF(mod);
+ Py_XDECREF(namedtuple);
+ return ret;
+}
+
+bool PyUpb_UnknownFields_Init(PyObject* m) {
+ PyUpb_ModuleState* s = PyUpb_ModuleState_GetFromModule(m);
+
+ s->unknown_fields_type = PyUpb_AddClass(m, &PyUpb_UnknownFieldSet_Spec);
+ s->unknown_field_type = PyUpb_UnknownFieldSet_CreateNamedTuple();
+
+ return s->unknown_fields_type && s->unknown_field_type;
+}
diff --git a/upb/python/unknown_fields.h b/upb/python/unknown_fields.h
new file mode 100644
index 0000000..85ea40c
--- /dev/null
+++ b/upb/python/unknown_fields.h
@@ -0,0 +1,42 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef PYUPB_UNKNOWN_FIELDS_H__
+#define PYUPB_UNKNOWN_FIELDS_H__
+
+#include <stdbool.h>
+
+#include "python/python_api.h"
+
+PyObject* PyUpb_UnknownFields_New(PyObject* msg);
+
+bool PyUpb_UnknownFields_Init(PyObject* m);
+
+#endif // PYUPB_UNKNOWN_FIELDS_H__
diff --git a/upb/python/version_script.lds b/upb/python/version_script.lds
new file mode 100644
index 0000000..7cb8300
--- /dev/null
+++ b/upb/python/version_script.lds
@@ -0,0 +1,6 @@
+message {
+ global:
+ PyInit__message;
+ local:
+ *;
+};
diff --git a/upb/upb/base/BUILD b/upb/upb/base/BUILD
new file mode 100644
index 0000000..db6757c
--- /dev/null
+++ b/upb/upb/base/BUILD
@@ -0,0 +1,71 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load("//bazel:build_defs.bzl", "UPB_DEFAULT_COPTS")
+
+cc_library(
+ name = "base",
+ srcs = [
+ "status.c",
+ ],
+ hdrs = [
+ "descriptor_constants.h",
+ "status.h",
+ "status.hpp",
+ "string_view.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ "//:port",
+ ],
+)
+
+cc_library(
+ name = "internal",
+ hdrs = [
+ "internal/log2.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = ["//:port"],
+)
+
+# begin:github_only
+filegroup(
+ name = "source_files",
+ srcs = glob(
+ [
+ "**/*.c",
+ "**/*.h",
+ "**/*.hpp",
+ ],
+ ),
+ visibility = [
+ "//cmake:__pkg__",
+ "//python/dist:__pkg__",
+ ]
+)
+# end:github_only
diff --git a/upb/upb/base/descriptor_constants.h b/upb/upb/base/descriptor_constants.h
new file mode 100644
index 0000000..ee68d67
--- /dev/null
+++ b/upb/upb/base/descriptor_constants.h
@@ -0,0 +1,107 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_BASE_DESCRIPTOR_CONSTANTS_H_
+#define UPB_BASE_DESCRIPTOR_CONSTANTS_H_
+
+// Must be last.
+#include "upb/port/def.inc"
+
+// The types a field can have. Note that this list is not identical to the
+// types defined in descriptor.proto, which gives INT32 and SINT32 separate
+// types (we distinguish the two with the "integer encoding" enum below).
+// This enum is an internal convenience only and has no meaning outside of upb.
+typedef enum {
+ kUpb_CType_Bool = 1,
+ kUpb_CType_Float = 2,
+ kUpb_CType_Int32 = 3,
+ kUpb_CType_UInt32 = 4,
+ kUpb_CType_Enum = 5, // Enum values are int32. TODO(b/279178239): rename
+ kUpb_CType_Message = 6,
+ kUpb_CType_Double = 7,
+ kUpb_CType_Int64 = 8,
+ kUpb_CType_UInt64 = 9,
+ kUpb_CType_String = 10,
+ kUpb_CType_Bytes = 11
+} upb_CType;
+
+// The repeated-ness of each field; this matches descriptor.proto.
+typedef enum {
+ kUpb_Label_Optional = 1,
+ kUpb_Label_Required = 2,
+ kUpb_Label_Repeated = 3
+} upb_Label;
+
+// Descriptor types, as defined in descriptor.proto.
+typedef enum {
+ kUpb_FieldType_Double = 1,
+ kUpb_FieldType_Float = 2,
+ kUpb_FieldType_Int64 = 3,
+ kUpb_FieldType_UInt64 = 4,
+ kUpb_FieldType_Int32 = 5,
+ kUpb_FieldType_Fixed64 = 6,
+ kUpb_FieldType_Fixed32 = 7,
+ kUpb_FieldType_Bool = 8,
+ kUpb_FieldType_String = 9,
+ kUpb_FieldType_Group = 10,
+ kUpb_FieldType_Message = 11,
+ kUpb_FieldType_Bytes = 12,
+ kUpb_FieldType_UInt32 = 13,
+ kUpb_FieldType_Enum = 14,
+ kUpb_FieldType_SFixed32 = 15,
+ kUpb_FieldType_SFixed64 = 16,
+ kUpb_FieldType_SInt32 = 17,
+ kUpb_FieldType_SInt64 = 18,
+} upb_FieldType;
+
+#define kUpb_FieldType_SizeOf 19
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+UPB_INLINE bool upb_FieldType_IsPackable(upb_FieldType type) {
+ // clang-format off
+ const unsigned kUnpackableTypes =
+ (1 << kUpb_FieldType_String) |
+ (1 << kUpb_FieldType_Bytes) |
+ (1 << kUpb_FieldType_Message) |
+ (1 << kUpb_FieldType_Group);
+ // clang-format on
+ return (1 << type) & ~kUnpackableTypes;
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_BASE_DESCRIPTOR_CONSTANTS_H_ */
diff --git a/upb/upb/base/internal/log2.h b/upb/upb/base/internal/log2.h
new file mode 100644
index 0000000..2f3256e
--- /dev/null
+++ b/upb/upb/base/internal/log2.h
@@ -0,0 +1,60 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_BASE_LOG2_H_
+#define UPB_BASE_LOG2_H_
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+UPB_INLINE int upb_Log2Ceiling(int x) {
+ if (x <= 1) return 0;
+#ifdef __GNUC__
+ return 32 - __builtin_clz(x - 1);
+#else
+ int lg2 = 0;
+ while ((1 << lg2) < x) lg2++;
+ return lg2;
+#endif
+}
+
+UPB_INLINE int upb_Log2CeilingSize(int x) { return 1 << upb_Log2Ceiling(x); }
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_BASE_LOG2_H_ */
diff --git a/upb/upb/base/status.c b/upb/upb/base/status.c
new file mode 100644
index 0000000..576c957
--- /dev/null
+++ b/upb/upb/base/status.c
@@ -0,0 +1,84 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/base/status.h"
+
+#include <errno.h>
+#include <float.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+
+// Must be last.
+#include "upb/port/def.inc"
+
+void upb_Status_Clear(upb_Status* status) {
+ if (!status) return;
+ status->ok = true;
+ status->msg[0] = '\0';
+}
+
+bool upb_Status_IsOk(const upb_Status* status) { return status->ok; }
+
+const char* upb_Status_ErrorMessage(const upb_Status* status) {
+ return status->msg;
+}
+
+void upb_Status_SetErrorMessage(upb_Status* status, const char* msg) {
+ if (!status) return;
+ status->ok = false;
+ strncpy(status->msg, msg, _kUpb_Status_MaxMessage - 1);
+ status->msg[_kUpb_Status_MaxMessage - 1] = '\0';
+}
+
+void upb_Status_SetErrorFormat(upb_Status* status, const char* fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+ upb_Status_VSetErrorFormat(status, fmt, args);
+ va_end(args);
+}
+
+void upb_Status_VSetErrorFormat(upb_Status* status, const char* fmt,
+ va_list args) {
+ if (!status) return;
+ status->ok = false;
+ vsnprintf(status->msg, sizeof(status->msg), fmt, args);
+ status->msg[_kUpb_Status_MaxMessage - 1] = '\0';
+}
+
+void upb_Status_VAppendErrorFormat(upb_Status* status, const char* fmt,
+ va_list args) {
+ size_t len;
+ if (!status) return;
+ status->ok = false;
+ len = strlen(status->msg);
+ vsnprintf(status->msg + len, sizeof(status->msg) - len, fmt, args);
+ status->msg[_kUpb_Status_MaxMessage - 1] = '\0';
+}
diff --git a/upb/upb/base/status.h b/upb/upb/base/status.h
new file mode 100644
index 0000000..9f38f4c
--- /dev/null
+++ b/upb/upb/base/status.h
@@ -0,0 +1,69 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_BASE_STATUS_H_
+#define UPB_BASE_STATUS_H_
+
+#include <stdarg.h>
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#define _kUpb_Status_MaxMessage 127
+
+typedef struct {
+ bool ok;
+ char msg[_kUpb_Status_MaxMessage]; // Error message; NULL-terminated.
+} upb_Status;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+UPB_API const char* upb_Status_ErrorMessage(const upb_Status* status);
+UPB_API bool upb_Status_IsOk(const upb_Status* status);
+
+// These are no-op if |status| is NULL.
+UPB_API void upb_Status_Clear(upb_Status* status);
+void upb_Status_SetErrorMessage(upb_Status* status, const char* msg);
+void upb_Status_SetErrorFormat(upb_Status* status, const char* fmt, ...)
+ UPB_PRINTF(2, 3);
+void upb_Status_VSetErrorFormat(upb_Status* status, const char* fmt,
+ va_list args) UPB_PRINTF(2, 0);
+void upb_Status_VAppendErrorFormat(upb_Status* status, const char* fmt,
+ va_list args) UPB_PRINTF(2, 0);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_BASE_STATUS_H_ */
diff --git a/upb/upb/base/status.hpp b/upb/upb/base/status.hpp
new file mode 100644
index 0000000..2dc4c8a
--- /dev/null
+++ b/upb/upb/base/status.hpp
@@ -0,0 +1,73 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_BASE_STATUS_HPP_
+#define UPB_BASE_STATUS_HPP_
+
+#include "upb/base/status.h"
+
+namespace upb {
+
+class Status {
+ public:
+ Status() { upb_Status_Clear(&status_); }
+
+ upb_Status* ptr() { return &status_; }
+
+ // Returns true if there is no error.
+ bool ok() const { return upb_Status_IsOk(&status_); }
+
+ // Guaranteed to be NULL-terminated.
+ const char* error_message() const {
+ return upb_Status_ErrorMessage(&status_);
+ }
+
+ // The error message will be truncated if it is longer than
+ // _kUpb_Status_MaxMessage-4.
+ void SetErrorMessage(const char* msg) {
+ upb_Status_SetErrorMessage(&status_, msg);
+ }
+ void SetFormattedErrorMessage(const char* fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+ upb_Status_VSetErrorFormat(&status_, fmt, args);
+ va_end(args);
+ }
+
+ // Resets the status to a successful state with no message.
+ void Clear() { upb_Status_Clear(&status_); }
+
+ private:
+ upb_Status status_;
+};
+
+} // namespace upb
+
+#endif // UPB_BASE_STATUS_HPP_
diff --git a/upb/upb/base/string_view.h b/upb/upb/base/string_view.h
new file mode 100644
index 0000000..c9454b3
--- /dev/null
+++ b/upb/upb/base/string_view.h
@@ -0,0 +1,81 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_BASE_STRING_VIEW_H_
+#define UPB_BASE_STRING_VIEW_H_
+
+#include <string.h>
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#define UPB_STRINGVIEW_INIT(ptr, len) \
+ { ptr, len }
+
+#define UPB_STRINGVIEW_FORMAT "%.*s"
+#define UPB_STRINGVIEW_ARGS(view) (int)(view).size, (view).data
+
+// LINT.IfChange(struct_definition)
+typedef struct {
+ const char* data;
+ size_t size;
+} upb_StringView;
+// LINT.ThenChange(
+// GoogleInternalName0,
+// //depot/google3/third_party/upb/bits/golang/accessor.go:map_go_string
+// )
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+UPB_API_INLINE upb_StringView upb_StringView_FromDataAndSize(const char* data,
+ size_t size) {
+ upb_StringView ret;
+ ret.data = data;
+ ret.size = size;
+ return ret;
+}
+
+UPB_INLINE upb_StringView upb_StringView_FromString(const char* data) {
+ return upb_StringView_FromDataAndSize(data, strlen(data));
+}
+
+UPB_INLINE bool upb_StringView_IsEqual(upb_StringView a, upb_StringView b) {
+ return a.size == b.size && memcmp(a.data, b.data, a.size) == 0;
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_BASE_STRING_VIEW_H_ */
diff --git a/upb/upb/collections/BUILD b/upb/upb/collections/BUILD
new file mode 100644
index 0000000..cc46536
--- /dev/null
+++ b/upb/upb/collections/BUILD
@@ -0,0 +1,124 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load("//bazel:build_defs.bzl", "UPB_DEFAULT_COPTS")
+
+cc_library(
+ name = "collections",
+ hdrs = [
+ "array.h",
+ "map.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":internal",
+ "//:base",
+ "//:mem",
+ "//:port",
+ ],
+)
+
+cc_library(
+ name = "internal",
+ srcs = [
+ "array.c",
+ "array.h",
+ "map.c",
+ "map.h",
+ "map_sorter.c",
+ ],
+ hdrs = [
+ "internal/array.h",
+ "internal/map.h",
+ "internal/map_sorter.h",
+ "map_gencode_util.h",
+ "message_value.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ "//:base",
+ "//:base_internal",
+ "//:hash",
+ "//:mem",
+ "//:message_internal",
+ "//:message_rep_internal",
+ "//:message_tagged_ptr",
+ "//:mini_table",
+ "//:port",
+ ],
+)
+
+cc_library(
+ name = "split64",
+ hdrs = [
+ "array_split64.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":collections",
+ "//:port",
+ ],
+)
+
+cc_test(
+ name = "array_test",
+ srcs = ["array_test.cc"],
+ deps = [
+ ":collections",
+ "//:base",
+ "//:mem",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "map_test",
+ srcs = ["map_test.cc"],
+ deps = [
+ ":collections",
+ "//:base",
+ "//:mem",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+# begin:github_only
+filegroup(
+ name = "source_files",
+ srcs = glob(
+ [
+ "**/*.c",
+ "**/*.h",
+ ],
+ ),
+ visibility = [
+ "//cmake:__pkg__",
+ "//python/dist:__pkg__",
+ ]
+)
+# end:github_only
diff --git a/upb/upb/collections/array.c b/upb/upb/collections/array.c
new file mode 100644
index 0000000..f46fc3e
--- /dev/null
+++ b/upb/upb/collections/array.c
@@ -0,0 +1,154 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/collections/internal/array.h"
+
+#include <string.h>
+
+// Must be last.
+#include "upb/port/def.inc"
+
+const char _upb_Array_CTypeSizeLg2Table[] = {
+ [kUpb_CType_Bool] = 0,
+ [kUpb_CType_Float] = 2,
+ [kUpb_CType_Int32] = 2,
+ [kUpb_CType_UInt32] = 2,
+ [kUpb_CType_Enum] = 2,
+ [kUpb_CType_Message] = UPB_SIZE(2, 3),
+ [kUpb_CType_Double] = 3,
+ [kUpb_CType_Int64] = 3,
+ [kUpb_CType_UInt64] = 3,
+ [kUpb_CType_String] = UPB_SIZE(3, 4),
+ [kUpb_CType_Bytes] = UPB_SIZE(3, 4),
+};
+
+upb_Array* upb_Array_New(upb_Arena* a, upb_CType type) {
+ return _upb_Array_New(a, 4, _upb_Array_CTypeSizeLg2(type));
+}
+
+const void* upb_Array_DataPtr(const upb_Array* arr) {
+ return _upb_array_ptr((upb_Array*)arr);
+}
+
+void* upb_Array_MutableDataPtr(upb_Array* arr) { return _upb_array_ptr(arr); }
+
+size_t upb_Array_Size(const upb_Array* arr) { return arr->size; }
+
+upb_MessageValue upb_Array_Get(const upb_Array* arr, size_t i) {
+ upb_MessageValue ret;
+ const char* data = _upb_array_constptr(arr);
+ int lg2 = arr->data & 7;
+ UPB_ASSERT(i < arr->size);
+ memcpy(&ret, data + (i << lg2), 1 << lg2);
+ return ret;
+}
+
+void upb_Array_Set(upb_Array* arr, size_t i, upb_MessageValue val) {
+ char* data = _upb_array_ptr(arr);
+ int lg2 = arr->data & 7;
+ UPB_ASSERT(i < arr->size);
+ memcpy(data + (i << lg2), &val, 1 << lg2);
+}
+
+bool upb_Array_Append(upb_Array* arr, upb_MessageValue val, upb_Arena* arena) {
+ UPB_ASSERT(arena);
+ if (!upb_Array_Resize(arr, arr->size + 1, arena)) {
+ return false;
+ }
+ upb_Array_Set(arr, arr->size - 1, val);
+ return true;
+}
+
+void upb_Array_Move(upb_Array* arr, size_t dst_idx, size_t src_idx,
+ size_t count) {
+ const int lg2 = arr->data & 7;
+ char* data = _upb_array_ptr(arr);
+ memmove(&data[dst_idx << lg2], &data[src_idx << lg2], count << lg2);
+}
+
+bool upb_Array_Insert(upb_Array* arr, size_t i, size_t count,
+ upb_Arena* arena) {
+ UPB_ASSERT(arena);
+ UPB_ASSERT(i <= arr->size);
+ UPB_ASSERT(count + arr->size >= count);
+ const size_t oldsize = arr->size;
+ if (!upb_Array_Resize(arr, arr->size + count, arena)) {
+ return false;
+ }
+ upb_Array_Move(arr, i + count, i, oldsize - i);
+ return true;
+}
+
+/*
+ * i end arr->size
+ * |------------|XXXXXXXX|--------|
+ */
+void upb_Array_Delete(upb_Array* arr, size_t i, size_t count) {
+ const size_t end = i + count;
+ UPB_ASSERT(i <= end);
+ UPB_ASSERT(end <= arr->size);
+ upb_Array_Move(arr, i, end, arr->size - end);
+ arr->size -= count;
+}
+
+bool upb_Array_Resize(upb_Array* arr, size_t size, upb_Arena* arena) {
+ const size_t oldsize = arr->size;
+ if (UPB_UNLIKELY(!_upb_Array_ResizeUninitialized(arr, size, arena))) {
+ return false;
+ }
+ const size_t newsize = arr->size;
+ if (newsize > oldsize) {
+ const int lg2 = arr->data & 7;
+ char* data = _upb_array_ptr(arr);
+ memset(data + (oldsize << lg2), 0, (newsize - oldsize) << lg2);
+ }
+ return true;
+}
+
+// EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE /////////////////////////
+
+bool _upb_array_realloc(upb_Array* arr, size_t min_capacity, upb_Arena* arena) {
+ size_t new_capacity = UPB_MAX(arr->capacity, 4);
+ int elem_size_lg2 = arr->data & 7;
+ size_t old_bytes = arr->capacity << elem_size_lg2;
+ size_t new_bytes;
+ void* ptr = _upb_array_ptr(arr);
+
+ // Log2 ceiling of size.
+ while (new_capacity < min_capacity) new_capacity *= 2;
+
+ new_bytes = new_capacity << elem_size_lg2;
+ ptr = upb_Arena_Realloc(arena, ptr, old_bytes, new_bytes);
+ if (!ptr) return false;
+
+ arr->data = _upb_tag_arrptr(ptr, elem_size_lg2);
+ arr->capacity = new_capacity;
+ return true;
+}
diff --git a/upb/upb/collections/array.h b/upb/upb/collections/array.h
new file mode 100644
index 0000000..7df2f0a
--- /dev/null
+++ b/upb/upb/collections/array.h
@@ -0,0 +1,94 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_COLLECTIONS_ARRAY_H_
+#define UPB_COLLECTIONS_ARRAY_H_
+
+#include "upb/base/descriptor_constants.h"
+#include "upb/collections/message_value.h"
+#include "upb/mem/arena.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Creates a new array on the given arena that holds elements of this type.
+UPB_API upb_Array* upb_Array_New(upb_Arena* a, upb_CType type);
+
+// Returns the number of elements in the array.
+UPB_API size_t upb_Array_Size(const upb_Array* arr);
+
+// Returns the given element, which must be within the array's current size.
+UPB_API upb_MessageValue upb_Array_Get(const upb_Array* arr, size_t i);
+
+// Sets the given element, which must be within the array's current size.
+UPB_API void upb_Array_Set(upb_Array* arr, size_t i, upb_MessageValue val);
+
+// Appends an element to the array. Returns false on allocation failure.
+UPB_API bool upb_Array_Append(upb_Array* array, upb_MessageValue val,
+ upb_Arena* arena);
+
+// Moves elements within the array using memmove().
+// Like memmove(), the source and destination elements may be overlapping.
+UPB_API void upb_Array_Move(upb_Array* array, size_t dst_idx, size_t src_idx,
+ size_t count);
+
+// Inserts one or more empty elements into the array.
+// Existing elements are shifted right.
+// The new elements have undefined state and must be set with `upb_Array_Set()`.
+// REQUIRES: `i <= upb_Array_Size(arr)`
+UPB_API bool upb_Array_Insert(upb_Array* array, size_t i, size_t count,
+ upb_Arena* arena);
+
+// Deletes one or more elements from the array.
+// Existing elements are shifted left.
+// REQUIRES: `i + count <= upb_Array_Size(arr)`
+UPB_API void upb_Array_Delete(upb_Array* array, size_t i, size_t count);
+
+// Changes the size of a vector. New elements are initialized to NULL/0.
+// Returns false on allocation failure.
+UPB_API bool upb_Array_Resize(upb_Array* array, size_t size, upb_Arena* arena);
+
+// Returns pointer to array data.
+UPB_API const void* upb_Array_DataPtr(const upb_Array* arr);
+
+// Returns mutable pointer to array data.
+UPB_API void* upb_Array_MutableDataPtr(upb_Array* arr);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_COLLECTIONS_ARRAY_H_ */
diff --git a/upb/upb/collections/array_split64.h b/upb/upb/collections/array_split64.h
new file mode 100644
index 0000000..69ea579
--- /dev/null
+++ b/upb/upb/collections/array_split64.h
@@ -0,0 +1,93 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_COLLECTIONS_ARRAY_SPLIT64_H_
+#define UPB_COLLECTIONS_ARRAY_SPLIT64_H_
+
+#include "upb/collections/array.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// JavaScript doesn't directly support 64-bit ints so we must split them.
+
+UPB_API_INLINE uint32_t upb_Array_GetInt64Hi(const upb_Array* array, size_t i) {
+ return (uint32_t)(upb_Array_Get(array, i).int64_val >> 32);
+}
+
+UPB_API_INLINE uint32_t upb_Array_GetInt64Lo(const upb_Array* array, size_t i) {
+ return (uint32_t)upb_Array_Get(array, i).int64_val;
+}
+
+UPB_API_INLINE void upb_Array_SetInt64Split(upb_Array* array, size_t i,
+ uint32_t hi, uint32_t lo) {
+ const upb_MessageValue val = {.int64_val = ((int64_t)hi) << 32 | lo};
+ upb_Array_Set(array, i, val);
+}
+
+UPB_API_INLINE bool upb_Array_AppendInt64Split(upb_Array* array, uint32_t hi,
+ uint32_t lo, upb_Arena* arena) {
+ const upb_MessageValue val = {.int64_val = ((int64_t)hi) << 32 | lo};
+ return upb_Array_Append(array, val, arena);
+}
+
+UPB_API_INLINE uint32_t upb_Array_GetUInt64Hi(const upb_Array* array,
+ size_t i) {
+ return (uint32_t)(upb_Array_Get(array, i).uint64_val >> 32);
+}
+
+UPB_API_INLINE uint32_t upb_Array_GetUInt64Lo(const upb_Array* array,
+ size_t i) {
+ return (uint32_t)upb_Array_Get(array, i).uint64_val;
+}
+
+UPB_API_INLINE void upb_Array_SetUInt64Split(upb_Array* array, size_t i,
+ uint32_t hi, uint32_t lo) {
+ const upb_MessageValue val = {.uint64_val = ((uint64_t)hi) << 32 | lo};
+ upb_Array_Set(array, i, val);
+}
+
+UPB_API_INLINE bool upb_Array_AppendUInt64Split(upb_Array* array, uint32_t hi,
+ uint32_t lo, upb_Arena* arena) {
+ const upb_MessageValue val = {.uint64_val = ((uint64_t)hi) << 32 | lo};
+ return upb_Array_Append(array, val, arena);
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_COLLECTIONS_ARRAY_SPLIT64_H_ */
diff --git a/upb/upb/collections/array_test.cc b/upb/upb/collections/array_test.cc
new file mode 100644
index 0000000..b84464b
--- /dev/null
+++ b/upb/upb/collections/array_test.cc
@@ -0,0 +1,66 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/collections/array.h"
+
+#include "gtest/gtest.h"
+#include "upb/base/status.hpp"
+#include "upb/mem/arena.hpp"
+
+TEST(ArrayTest, Resize) {
+ upb::Arena arena;
+ upb::Status status;
+
+ upb_Array* array = upb_Array_New(arena.ptr(), kUpb_CType_Int32);
+ EXPECT_TRUE(array);
+
+ for (int i = 0; i < 10; i++) {
+ upb_MessageValue mv;
+ mv.int32_val = 3 * i;
+
+ upb_Array_Append(array, mv, arena.ptr());
+ EXPECT_EQ(upb_Array_Size(array), i + 1);
+ EXPECT_EQ(upb_Array_Get(array, i).int32_val, 3 * i);
+ }
+
+ upb_Array_Resize(array, 12, arena.ptr());
+ EXPECT_EQ(upb_Array_Get(array, 10).int32_val, 0);
+ EXPECT_EQ(upb_Array_Get(array, 11).int32_val, 0);
+
+ upb_Array_Resize(array, 4, arena.ptr());
+ EXPECT_EQ(upb_Array_Size(array), 4);
+
+ upb_Array_Resize(array, 6, arena.ptr());
+ EXPECT_EQ(upb_Array_Size(array), 6);
+
+ EXPECT_EQ(upb_Array_Get(array, 3).int32_val, 9);
+ EXPECT_EQ(upb_Array_Get(array, 4).int32_val, 0);
+ EXPECT_EQ(upb_Array_Get(array, 5).int32_val, 0);
+}
diff --git a/upb/upb/collections/internal/array.h b/upb/upb/collections/internal/array.h
new file mode 100644
index 0000000..1c2f3f5
--- /dev/null
+++ b/upb/upb/collections/internal/array.h
@@ -0,0 +1,138 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_COLLECTIONS_INTERNAL_ARRAY_H_
+#define UPB_COLLECTIONS_INTERNAL_ARRAY_H_
+
+#include <string.h>
+
+#include "upb/collections/array.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// LINT.IfChange(struct_definition)
+// Our internal representation for repeated fields.
+struct upb_Array {
+ uintptr_t data; /* Tagged ptr: low 3 bits of ptr are lg2(elem size). */
+ size_t size; /* The number of elements in the array. */
+ size_t capacity; /* Allocated storage. Measured in elements. */
+};
+// LINT.ThenChange(GoogleInternalName1)
+
+UPB_INLINE size_t _upb_Array_ElementSizeLg2(const upb_Array* arr) {
+ size_t ret = arr->data & 7;
+ UPB_ASSERT(ret <= 4);
+ return ret;
+}
+
+UPB_INLINE const void* _upb_array_constptr(const upb_Array* arr) {
+ _upb_Array_ElementSizeLg2(arr); // Check assertion.
+ return (void*)(arr->data & ~(uintptr_t)7);
+}
+
+UPB_INLINE uintptr_t _upb_array_tagptr(void* ptr, int elem_size_lg2) {
+ UPB_ASSERT(elem_size_lg2 <= 4);
+ return (uintptr_t)ptr | elem_size_lg2;
+}
+
+UPB_INLINE void* _upb_array_ptr(upb_Array* arr) {
+ return (void*)_upb_array_constptr(arr);
+}
+
+UPB_INLINE uintptr_t _upb_tag_arrptr(void* ptr, int elem_size_lg2) {
+ UPB_ASSERT(elem_size_lg2 <= 4);
+ UPB_ASSERT(((uintptr_t)ptr & 7) == 0);
+ return (uintptr_t)ptr | (unsigned)elem_size_lg2;
+}
+
+extern const char _upb_Array_CTypeSizeLg2Table[];
+
+UPB_INLINE size_t _upb_Array_CTypeSizeLg2(upb_CType ctype) {
+ return _upb_Array_CTypeSizeLg2Table[ctype];
+}
+
+UPB_INLINE upb_Array* _upb_Array_New(upb_Arena* a, size_t init_capacity,
+ int elem_size_lg2) {
+ UPB_ASSERT(elem_size_lg2 <= 4);
+ const size_t arr_size = UPB_ALIGN_UP(sizeof(upb_Array), UPB_MALLOC_ALIGN);
+ const size_t bytes = arr_size + (init_capacity << elem_size_lg2);
+ upb_Array* arr = (upb_Array*)upb_Arena_Malloc(a, bytes);
+ if (!arr) return NULL;
+ arr->data = _upb_tag_arrptr(UPB_PTR_AT(arr, arr_size, void), elem_size_lg2);
+ arr->size = 0;
+ arr->capacity = init_capacity;
+ return arr;
+}
+
+// Resizes the capacity of the array to be at least min_size.
+bool _upb_array_realloc(upb_Array* arr, size_t min_size, upb_Arena* arena);
+
+UPB_INLINE bool _upb_array_reserve(upb_Array* arr, size_t size,
+ upb_Arena* arena) {
+ if (arr->capacity < size) return _upb_array_realloc(arr, size, arena);
+ return true;
+}
+
+// Resize without initializing new elements.
+UPB_INLINE bool _upb_Array_ResizeUninitialized(upb_Array* arr, size_t size,
+ upb_Arena* arena) {
+ UPB_ASSERT(size <= arr->size || arena); // Allow NULL arena when shrinking.
+ if (!_upb_array_reserve(arr, size, arena)) return false;
+ arr->size = size;
+ return true;
+}
+
+// This function is intended for situations where elem_size is compile-time
+// constant or a known expression of the form (1 << lg2), so that the expression
+// i*elem_size does not result in an actual multiplication.
+UPB_INLINE void _upb_Array_Set(upb_Array* arr, size_t i, const void* data,
+ size_t elem_size) {
+ UPB_ASSERT(i < arr->size);
+ UPB_ASSERT(elem_size == 1U << _upb_Array_ElementSizeLg2(arr));
+ char* arr_data = (char*)_upb_array_ptr(arr);
+ memcpy(arr_data + (i * elem_size), data, elem_size);
+}
+
+UPB_INLINE void _upb_array_detach(const void* msg, size_t ofs) {
+ *UPB_PTR_AT(msg, ofs, upb_Array*) = NULL;
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_COLLECTIONS_INTERNAL_ARRAY_H_ */
diff --git a/upb/upb/collections/internal/map.h b/upb/upb/collections/internal/map.h
new file mode 100644
index 0000000..5900736
--- /dev/null
+++ b/upb/upb/collections/internal/map.h
@@ -0,0 +1,173 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE /////////////////////////
+
+#ifndef UPB_COLLECTIONS_INTERNAL_MAP_H_
+#define UPB_COLLECTIONS_INTERNAL_MAP_H_
+
+#include "upb/base/string_view.h"
+#include "upb/collections/map.h"
+#include "upb/hash/str_table.h"
+#include "upb/mem/arena.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+struct upb_Map {
+ // Size of key and val, based on the map type.
+ // Strings are represented as '0' because they must be handled specially.
+ char key_size;
+ char val_size;
+
+ upb_strtable table;
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Converting between internal table representation and user values.
+//
+// _upb_map_tokey() and _upb_map_fromkey() are inverses.
+// _upb_map_tovalue() and _upb_map_fromvalue() are inverses.
+//
+// These functions account for the fact that strings are treated differently
+// from other types when stored in a map.
+
+UPB_INLINE upb_StringView _upb_map_tokey(const void* key, size_t size) {
+ if (size == UPB_MAPTYPE_STRING) {
+ return *(upb_StringView*)key;
+ } else {
+ return upb_StringView_FromDataAndSize((const char*)key, size);
+ }
+}
+
+UPB_INLINE void _upb_map_fromkey(upb_StringView key, void* out, size_t size) {
+ if (size == UPB_MAPTYPE_STRING) {
+ memcpy(out, &key, sizeof(key));
+ } else {
+ memcpy(out, key.data, size);
+ }
+}
+
+UPB_INLINE bool _upb_map_tovalue(const void* val, size_t size,
+ upb_value* msgval, upb_Arena* a) {
+ if (size == UPB_MAPTYPE_STRING) {
+ upb_StringView* strp = (upb_StringView*)upb_Arena_Malloc(a, sizeof(*strp));
+ if (!strp) return false;
+ *strp = *(upb_StringView*)val;
+ *msgval = upb_value_ptr(strp);
+ } else {
+ memcpy(msgval, val, size);
+ }
+ return true;
+}
+
+UPB_INLINE void _upb_map_fromvalue(upb_value val, void* out, size_t size) {
+ if (size == UPB_MAPTYPE_STRING) {
+ const upb_StringView* strp = (const upb_StringView*)upb_value_getptr(val);
+ memcpy(out, strp, sizeof(upb_StringView));
+ } else {
+ memcpy(out, &val, size);
+ }
+}
+
+UPB_INLINE void* _upb_map_next(const upb_Map* map, size_t* iter) {
+ upb_strtable_iter it;
+ it.t = &map->table;
+ it.index = *iter;
+ upb_strtable_next(&it);
+ *iter = it.index;
+ if (upb_strtable_done(&it)) return NULL;
+ return (void*)str_tabent(&it);
+}
+
+UPB_INLINE void _upb_Map_Clear(upb_Map* map) {
+ upb_strtable_clear(&map->table);
+}
+
+UPB_INLINE bool _upb_Map_Delete(upb_Map* map, const void* key, size_t key_size,
+ upb_value* val) {
+ upb_StringView k = _upb_map_tokey(key, key_size);
+ return upb_strtable_remove2(&map->table, k.data, k.size, val);
+}
+
+UPB_INLINE bool _upb_Map_Get(const upb_Map* map, const void* key,
+ size_t key_size, void* val, size_t val_size) {
+ upb_value tabval;
+ upb_StringView k = _upb_map_tokey(key, key_size);
+ bool ret = upb_strtable_lookup2(&map->table, k.data, k.size, &tabval);
+ if (ret && val) {
+ _upb_map_fromvalue(tabval, val, val_size);
+ }
+ return ret;
+}
+
+UPB_INLINE upb_MapInsertStatus _upb_Map_Insert(upb_Map* map, const void* key,
+ size_t key_size, void* val,
+ size_t val_size, upb_Arena* a) {
+ upb_StringView strkey = _upb_map_tokey(key, key_size);
+ upb_value tabval = {0};
+ if (!_upb_map_tovalue(val, val_size, &tabval, a)) {
+ return kUpb_MapInsertStatus_OutOfMemory;
+ }
+
+ // TODO(haberman): add overwrite operation to minimize number of lookups.
+ bool removed =
+ upb_strtable_remove2(&map->table, strkey.data, strkey.size, NULL);
+ if (!upb_strtable_insert(&map->table, strkey.data, strkey.size, tabval, a)) {
+ return kUpb_MapInsertStatus_OutOfMemory;
+ }
+ return removed ? kUpb_MapInsertStatus_Replaced
+ : kUpb_MapInsertStatus_Inserted;
+}
+
+UPB_INLINE size_t _upb_Map_Size(const upb_Map* map) {
+ return map->table.t.count;
+}
+
+// Strings/bytes are special-cased in maps.
+extern char _upb_Map_CTypeSizeTable[12];
+
+UPB_INLINE size_t _upb_Map_CTypeSize(upb_CType ctype) {
+ return _upb_Map_CTypeSizeTable[ctype];
+}
+
+// Creates a new map on the given arena with this key/value type.
+upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_COLLECTIONS_INTERNAL_MAP_H_ */
diff --git a/upb/upb/collections/internal/map_sorter.h b/upb/upb/collections/internal/map_sorter.h
new file mode 100644
index 0000000..fdc46f1
--- /dev/null
+++ b/upb/upb/collections/internal/map_sorter.h
@@ -0,0 +1,112 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE /////////////////////////
+
+#ifndef UPB_COLLECTIONS_INTERNAL_MAP_SORTER_H_
+#define UPB_COLLECTIONS_INTERNAL_MAP_SORTER_H_
+
+#include <stdlib.h>
+
+#include "upb/collections/internal/map.h"
+#include "upb/message/internal/extension.h"
+#include "upb/message/internal/map_entry.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// _upb_mapsorter sorts maps and provides ordered iteration over the entries.
+// Since maps can be recursive (map values can be messages which contain other
+// maps), _upb_mapsorter can contain a stack of maps.
+
+typedef struct {
+ void const** entries;
+ int size;
+ int cap;
+} _upb_mapsorter;
+
+typedef struct {
+ int start;
+ int pos;
+ int end;
+} _upb_sortedmap;
+
+UPB_INLINE void _upb_mapsorter_init(_upb_mapsorter* s) {
+ s->entries = NULL;
+ s->size = 0;
+ s->cap = 0;
+}
+
+UPB_INLINE void _upb_mapsorter_destroy(_upb_mapsorter* s) {
+ if (s->entries) free(s->entries);
+}
+
+UPB_INLINE bool _upb_sortedmap_next(_upb_mapsorter* s, const upb_Map* map,
+ _upb_sortedmap* sorted, upb_MapEntry* ent) {
+ if (sorted->pos == sorted->end) return false;
+ const upb_tabent* tabent = (const upb_tabent*)s->entries[sorted->pos++];
+ upb_StringView key = upb_tabstrview(tabent->key);
+ _upb_map_fromkey(key, &ent->data.k, map->key_size);
+ upb_value val = {tabent->val.val};
+ _upb_map_fromvalue(val, &ent->data.v, map->val_size);
+ return true;
+}
+
+UPB_INLINE bool _upb_sortedmap_nextext(_upb_mapsorter* s,
+ _upb_sortedmap* sorted,
+ const upb_Message_Extension** ext) {
+ if (sorted->pos == sorted->end) return false;
+ *ext = (const upb_Message_Extension*)s->entries[sorted->pos++];
+ return true;
+}
+
+UPB_INLINE void _upb_mapsorter_popmap(_upb_mapsorter* s,
+ _upb_sortedmap* sorted) {
+ s->size = sorted->start;
+}
+
+bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type,
+ const upb_Map* map, _upb_sortedmap* sorted);
+
+bool _upb_mapsorter_pushexts(_upb_mapsorter* s,
+ const upb_Message_Extension* exts, size_t count,
+ _upb_sortedmap* sorted);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_COLLECTIONS_INTERNAL_MAP_SORTER_H_ */
diff --git a/upb/upb/collections/map.c b/upb/upb/collections/map.c
new file mode 100644
index 0000000..a445e59
--- /dev/null
+++ b/upb/upb/collections/map.c
@@ -0,0 +1,145 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/collections/map.h"
+
+#include <string.h>
+
+#include "upb/collections/internal/map.h"
+#include "upb/mem/arena.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+// Strings/bytes are special-cased in maps.
+char _upb_Map_CTypeSizeTable[12] = {
+ [kUpb_CType_Bool] = 1,
+ [kUpb_CType_Float] = 4,
+ [kUpb_CType_Int32] = 4,
+ [kUpb_CType_UInt32] = 4,
+ [kUpb_CType_Enum] = 4,
+ [kUpb_CType_Message] = sizeof(void*),
+ [kUpb_CType_Double] = 8,
+ [kUpb_CType_Int64] = 8,
+ [kUpb_CType_UInt64] = 8,
+ [kUpb_CType_String] = UPB_MAPTYPE_STRING,
+ [kUpb_CType_Bytes] = UPB_MAPTYPE_STRING,
+};
+
+upb_Map* upb_Map_New(upb_Arena* a, upb_CType key_type, upb_CType value_type) {
+ return _upb_Map_New(a, _upb_Map_CTypeSize(key_type),
+ _upb_Map_CTypeSize(value_type));
+}
+
+size_t upb_Map_Size(const upb_Map* map) { return _upb_Map_Size(map); }
+
+bool upb_Map_Get(const upb_Map* map, upb_MessageValue key,
+ upb_MessageValue* val) {
+ return _upb_Map_Get(map, &key, map->key_size, val, map->val_size);
+}
+
+void upb_Map_Clear(upb_Map* map) { _upb_Map_Clear(map); }
+
+upb_MapInsertStatus upb_Map_Insert(upb_Map* map, upb_MessageValue key,
+ upb_MessageValue val, upb_Arena* arena) {
+ UPB_ASSERT(arena);
+ return (upb_MapInsertStatus)_upb_Map_Insert(map, &key, map->key_size, &val,
+ map->val_size, arena);
+}
+
+bool upb_Map_Delete(upb_Map* map, upb_MessageValue key, upb_MessageValue* val) {
+ upb_value v;
+ const bool removed = _upb_Map_Delete(map, &key, map->key_size, &v);
+ if (val) _upb_map_fromvalue(v, val, map->val_size);
+ return removed;
+}
+
+bool upb_Map_Next(const upb_Map* map, upb_MessageValue* key,
+ upb_MessageValue* val, size_t* iter) {
+ upb_StringView k;
+ upb_value v;
+ const bool ok = upb_strtable_next2(&map->table, &k, &v, (intptr_t*)iter);
+ if (ok) {
+ _upb_map_fromkey(k, key, map->key_size);
+ _upb_map_fromvalue(v, val, map->val_size);
+ }
+ return ok;
+}
+
+UPB_API void upb_Map_SetEntryValue(upb_Map* map, size_t iter,
+ upb_MessageValue val) {
+ upb_value v;
+ _upb_map_tovalue(&val, map->val_size, &v, NULL);
+ upb_strtable_setentryvalue(&map->table, iter, v);
+}
+
+bool upb_MapIterator_Next(const upb_Map* map, size_t* iter) {
+ return _upb_map_next(map, iter);
+}
+
+bool upb_MapIterator_Done(const upb_Map* map, size_t iter) {
+ upb_strtable_iter i;
+ UPB_ASSERT(iter != kUpb_Map_Begin);
+ i.t = &map->table;
+ i.index = iter;
+ return upb_strtable_done(&i);
+}
+
+// Returns the key and value for this entry of the map.
+upb_MessageValue upb_MapIterator_Key(const upb_Map* map, size_t iter) {
+ upb_strtable_iter i;
+ upb_MessageValue ret;
+ i.t = &map->table;
+ i.index = iter;
+ _upb_map_fromkey(upb_strtable_iter_key(&i), &ret, map->key_size);
+ return ret;
+}
+
+upb_MessageValue upb_MapIterator_Value(const upb_Map* map, size_t iter) {
+ upb_strtable_iter i;
+ upb_MessageValue ret;
+ i.t = &map->table;
+ i.index = iter;
+ _upb_map_fromvalue(upb_strtable_iter_value(&i), &ret, map->val_size);
+ return ret;
+}
+
+// EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE /////////////////////////
+
+upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size) {
+ upb_Map* map = upb_Arena_Malloc(a, sizeof(upb_Map));
+ if (!map) return NULL;
+
+ upb_strtable_init(&map->table, 4, a);
+ map->key_size = key_size;
+ map->val_size = value_size;
+
+ return map;
+}
diff --git a/upb/upb/collections/map.h b/upb/upb/collections/map.h
new file mode 100644
index 0000000..10b9c36
--- /dev/null
+++ b/upb/upb/collections/map.h
@@ -0,0 +1,143 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_COLLECTIONS_MAP_H_
+#define UPB_COLLECTIONS_MAP_H_
+
+#include "upb/base/descriptor_constants.h"
+#include "upb/collections/message_value.h"
+#include "upb/mem/arena.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Creates a new map on the given arena with the given key/value size.
+UPB_API upb_Map* upb_Map_New(upb_Arena* a, upb_CType key_type,
+ upb_CType value_type);
+
+// Returns the number of entries in the map.
+UPB_API size_t upb_Map_Size(const upb_Map* map);
+
+// Stores a value for the given key into |*val| (or the zero value if the key is
+// not present). Returns whether the key was present. The |val| pointer may be
+// NULL, in which case the function tests whether the given key is present.
+UPB_API bool upb_Map_Get(const upb_Map* map, upb_MessageValue key,
+ upb_MessageValue* val);
+
+// Removes all entries in the map.
+UPB_API void upb_Map_Clear(upb_Map* map);
+
+typedef enum {
+ kUpb_MapInsertStatus_Inserted = 0,
+ kUpb_MapInsertStatus_Replaced = 1,
+ kUpb_MapInsertStatus_OutOfMemory = 2,
+} upb_MapInsertStatus;
+
+// Sets the given key to the given value, returning whether the key was inserted
+// or replaced. If the key was inserted, then any existing iterators will be
+// invalidated.
+UPB_API upb_MapInsertStatus upb_Map_Insert(upb_Map* map, upb_MessageValue key,
+ upb_MessageValue val,
+ upb_Arena* arena);
+
+// Sets the given key to the given value. Returns false if memory allocation
+// failed. If the key is newly inserted, then any existing iterators will be
+// invalidated.
+UPB_API_INLINE bool upb_Map_Set(upb_Map* map, upb_MessageValue key,
+ upb_MessageValue val, upb_Arena* arena) {
+ return upb_Map_Insert(map, key, val, arena) !=
+ kUpb_MapInsertStatus_OutOfMemory;
+}
+
+// Deletes this key from the table. Returns true if the key was present.
+// If present and |val| is non-NULL, stores the deleted value.
+UPB_API bool upb_Map_Delete(upb_Map* map, upb_MessageValue key,
+ upb_MessageValue* val);
+
+// (DEPRECATED and going away soon. Do not use.)
+UPB_INLINE bool upb_Map_Delete2(upb_Map* map, upb_MessageValue key,
+ upb_MessageValue* val) {
+ return upb_Map_Delete(map, key, val);
+}
+
+// Map iteration:
+//
+// size_t iter = kUpb_Map_Begin;
+// upb_MessageValue key, val;
+// while (upb_Map_Next(map, &key, &val, &iter)) {
+// ...
+// }
+
+#define kUpb_Map_Begin ((size_t)-1)
+
+// Advances to the next entry. Returns false if no more entries are present.
+// Otherwise returns true and populates both *key and *value.
+UPB_API bool upb_Map_Next(const upb_Map* map, upb_MessageValue* key,
+ upb_MessageValue* val, size_t* iter);
+
+// Sets the value for the entry pointed to by iter.
+// WARNING: this does not currently work for string values!
+UPB_API void upb_Map_SetEntryValue(upb_Map* map, size_t iter,
+ upb_MessageValue val);
+
+// DEPRECATED iterator, slated for removal.
+
+/* Map iteration:
+ *
+ * size_t iter = kUpb_Map_Begin;
+ * while (upb_MapIterator_Next(map, &iter)) {
+ * upb_MessageValue key = upb_MapIterator_Key(map, iter);
+ * upb_MessageValue val = upb_MapIterator_Value(map, iter);
+ * }
+ */
+
+// Advances to the next entry. Returns false if no more entries are present.
+UPB_API bool upb_MapIterator_Next(const upb_Map* map, size_t* iter);
+
+// Returns true if the iterator still points to a valid entry, or false if the
+// iterator is past the last element. It is an error to call this function with
+// kUpb_Map_Begin (you must call next() at least once first).
+UPB_API bool upb_MapIterator_Done(const upb_Map* map, size_t iter);
+
+// Returns the key and value for this entry of the map.
+UPB_API upb_MessageValue upb_MapIterator_Key(const upb_Map* map, size_t iter);
+UPB_API upb_MessageValue upb_MapIterator_Value(const upb_Map* map, size_t iter);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_COLLECTIONS_MAP_H_ */
diff --git a/upb/upb/collections/map_gencode_util.h b/upb/upb/collections/map_gencode_util.h
new file mode 100644
index 0000000..c6df545
--- /dev/null
+++ b/upb/upb/collections/map_gencode_util.h
@@ -0,0 +1,81 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// These functions are only used by generated code.
+
+#ifndef UPB_COLLECTIONS_MAP_GENCODE_UTIL_H_
+#define UPB_COLLECTIONS_MAP_GENCODE_UTIL_H_
+
+#include "upb/collections/internal/map.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Message map operations, these get the map from the message first.
+
+UPB_INLINE void _upb_msg_map_key(const void* msg, void* key, size_t size) {
+ const upb_tabent* ent = (const upb_tabent*)msg;
+ uint32_t u32len;
+ upb_StringView k;
+ k.data = upb_tabstr(ent->key, &u32len);
+ k.size = u32len;
+ _upb_map_fromkey(k, key, size);
+}
+
+UPB_INLINE void _upb_msg_map_value(const void* msg, void* val, size_t size) {
+ const upb_tabent* ent = (const upb_tabent*)msg;
+ upb_value v = {ent->val.val};
+ _upb_map_fromvalue(v, val, size);
+}
+
+UPB_INLINE void _upb_msg_map_set_value(void* msg, const void* val,
+ size_t size) {
+ upb_tabent* ent = (upb_tabent*)msg;
+ // This is like _upb_map_tovalue() except the entry already exists
+ // so we can reuse the allocated upb_StringView for string fields.
+ if (size == UPB_MAPTYPE_STRING) {
+ upb_StringView* strp = (upb_StringView*)(uintptr_t)ent->val.val;
+ memcpy(strp, val, sizeof(*strp));
+ } else {
+ memcpy(&ent->val.val, val, size);
+ }
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_COLLECTIONS_MAP_GENCODE_UTIL_H_ */
diff --git a/upb/upb/collections/map_sorter.c b/upb/upb/collections/map_sorter.c
new file mode 100644
index 0000000..9ad2a66
--- /dev/null
+++ b/upb/upb/collections/map_sorter.c
@@ -0,0 +1,170 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/collections/internal/map_sorter.h"
+
+#include "upb/base/internal/log2.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+static void _upb_mapsorter_getkeys(const void* _a, const void* _b, void* a_key,
+ void* b_key, size_t size) {
+ const upb_tabent* const* a = _a;
+ const upb_tabent* const* b = _b;
+ upb_StringView a_tabkey = upb_tabstrview((*a)->key);
+ upb_StringView b_tabkey = upb_tabstrview((*b)->key);
+ _upb_map_fromkey(a_tabkey, a_key, size);
+ _upb_map_fromkey(b_tabkey, b_key, size);
+}
+
+static int _upb_mapsorter_cmpi64(const void* _a, const void* _b) {
+ int64_t a, b;
+ _upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
+ return a < b ? -1 : a > b;
+}
+
+static int _upb_mapsorter_cmpu64(const void* _a, const void* _b) {
+ uint64_t a, b;
+ _upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
+ return a < b ? -1 : a > b;
+}
+
+static int _upb_mapsorter_cmpi32(const void* _a, const void* _b) {
+ int32_t a, b;
+ _upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
+ return a < b ? -1 : a > b;
+}
+
+static int _upb_mapsorter_cmpu32(const void* _a, const void* _b) {
+ uint32_t a, b;
+ _upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
+ return a < b ? -1 : a > b;
+}
+
+static int _upb_mapsorter_cmpbool(const void* _a, const void* _b) {
+ bool a, b;
+ _upb_mapsorter_getkeys(_a, _b, &a, &b, 1);
+ return a < b ? -1 : a > b;
+}
+
+static int _upb_mapsorter_cmpstr(const void* _a, const void* _b) {
+ upb_StringView a, b;
+ _upb_mapsorter_getkeys(_a, _b, &a, &b, UPB_MAPTYPE_STRING);
+ size_t common_size = UPB_MIN(a.size, b.size);
+ int cmp = memcmp(a.data, b.data, common_size);
+ if (cmp) return -cmp;
+ return a.size < b.size ? -1 : a.size > b.size;
+}
+
+static int (*const compar[kUpb_FieldType_SizeOf])(const void*, const void*) = {
+ [kUpb_FieldType_Int64] = _upb_mapsorter_cmpi64,
+ [kUpb_FieldType_SFixed64] = _upb_mapsorter_cmpi64,
+ [kUpb_FieldType_SInt64] = _upb_mapsorter_cmpi64,
+
+ [kUpb_FieldType_UInt64] = _upb_mapsorter_cmpu64,
+ [kUpb_FieldType_Fixed64] = _upb_mapsorter_cmpu64,
+
+ [kUpb_FieldType_Int32] = _upb_mapsorter_cmpi32,
+ [kUpb_FieldType_SInt32] = _upb_mapsorter_cmpi32,
+ [kUpb_FieldType_SFixed32] = _upb_mapsorter_cmpi32,
+ [kUpb_FieldType_Enum] = _upb_mapsorter_cmpi32,
+
+ [kUpb_FieldType_UInt32] = _upb_mapsorter_cmpu32,
+ [kUpb_FieldType_Fixed32] = _upb_mapsorter_cmpu32,
+
+ [kUpb_FieldType_Bool] = _upb_mapsorter_cmpbool,
+
+ [kUpb_FieldType_String] = _upb_mapsorter_cmpstr,
+ [kUpb_FieldType_Bytes] = _upb_mapsorter_cmpstr,
+};
+
+static bool _upb_mapsorter_resize(_upb_mapsorter* s, _upb_sortedmap* sorted,
+ int size) {
+ sorted->start = s->size;
+ sorted->pos = sorted->start;
+ sorted->end = sorted->start + size;
+
+ if (sorted->end > s->cap) {
+ s->cap = upb_Log2CeilingSize(sorted->end);
+ s->entries = realloc(s->entries, s->cap * sizeof(*s->entries));
+ if (!s->entries) return false;
+ }
+
+ s->size = sorted->end;
+ return true;
+}
+
+bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type,
+ const upb_Map* map, _upb_sortedmap* sorted) {
+ int map_size = _upb_Map_Size(map);
+
+ if (!_upb_mapsorter_resize(s, sorted, map_size)) return false;
+
+ // Copy non-empty entries from the table to s->entries.
+ const void** dst = &s->entries[sorted->start];
+ const upb_tabent* src = map->table.t.entries;
+ const upb_tabent* end = src + upb_table_size(&map->table.t);
+ for (; src < end; src++) {
+ if (!upb_tabent_isempty(src)) {
+ *dst = src;
+ dst++;
+ }
+ }
+ UPB_ASSERT(dst == &s->entries[sorted->end]);
+
+ // Sort entries according to the key type.
+ qsort(&s->entries[sorted->start], map_size, sizeof(*s->entries),
+ compar[key_type]);
+ return true;
+}
+
+static int _upb_mapsorter_cmpext(const void* _a, const void* _b) {
+ const upb_Message_Extension* const* a = _a;
+ const upb_Message_Extension* const* b = _b;
+ uint32_t a_num = (*a)->ext->field.number;
+ uint32_t b_num = (*b)->ext->field.number;
+ assert(a_num != b_num);
+ return a_num < b_num ? -1 : 1;
+}
+
+bool _upb_mapsorter_pushexts(_upb_mapsorter* s,
+ const upb_Message_Extension* exts, size_t count,
+ _upb_sortedmap* sorted) {
+ if (!_upb_mapsorter_resize(s, sorted, count)) return false;
+
+ for (size_t i = 0; i < count; i++) {
+ s->entries[sorted->start + i] = &exts[i];
+ }
+
+ qsort(&s->entries[sorted->start], count, sizeof(*s->entries),
+ _upb_mapsorter_cmpext);
+ return true;
+}
diff --git a/upb/upb/collections/map_test.cc b/upb/upb/collections/map_test.cc
new file mode 100644
index 0000000..d584687
--- /dev/null
+++ b/upb/upb/collections/map_test.cc
@@ -0,0 +1,56 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/collections/map.h"
+
+#include "gtest/gtest.h"
+#include "upb/base/string_view.h"
+#include "upb/mem/arena.hpp"
+
+TEST(MapTest, DeleteRegression) {
+ upb::Arena arena;
+ upb_Map* map = upb_Map_New(arena.ptr(), kUpb_CType_Int32, kUpb_CType_String);
+
+ upb_MessageValue key;
+ key.int32_val = 0;
+
+ upb_MessageValue insert_value;
+ insert_value.str_val = upb_StringView_FromString("abcde");
+
+ upb_MapInsertStatus st = upb_Map_Insert(map, key, insert_value, arena.ptr());
+ EXPECT_EQ(kUpb_MapInsertStatus_Inserted, st);
+
+ upb_MessageValue delete_value;
+ bool removed = upb_Map_Delete(map, key, &delete_value);
+ EXPECT_TRUE(removed);
+
+ EXPECT_TRUE(
+ upb_StringView_IsEqual(insert_value.str_val, delete_value.str_val));
+}
diff --git a/upb/upb/collections/message_value.h b/upb/upb/collections/message_value.h
new file mode 100644
index 0000000..f015024a
--- /dev/null
+++ b/upb/upb/collections/message_value.h
@@ -0,0 +1,75 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Users should include array.h or map.h instead.
+// IWYU pragma: private, include "upb/collections/array.h"
+
+#ifndef UPB_MESSAGE_VALUE_H_
+#define UPB_MESSAGE_VALUE_H_
+
+#include "upb/base/string_view.h"
+#include "upb/message/tagged_ptr.h"
+#include "upb/mini_table/message.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+typedef struct upb_Array upb_Array;
+typedef struct upb_Map upb_Map;
+
+typedef union {
+ bool bool_val;
+ float float_val;
+ double double_val;
+ int32_t int32_val;
+ int64_t int64_val;
+ uint32_t uint32_val;
+ uint64_t uint64_val;
+ const upb_Array* array_val;
+ const upb_Map* map_val;
+ const upb_Message* msg_val;
+ upb_StringView str_val;
+
+ // EXPERIMENTAL: A tagged upb_Message*. Users must use this instead of
+ // msg_val if unlinked sub-messages may possibly be in use. See the
+ // documentation in kUpb_DecodeOption_ExperimentalAllowUnlinked for more
+ // information.
+ upb_TaggedMessagePtr tagged_msg_val;
+} upb_MessageValue;
+
+typedef union {
+ upb_Array* array;
+ upb_Map* map;
+ upb_Message* msg;
+} upb_MutableMessageValue;
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MESSAGE_VALUE_H_ */
diff --git a/upb/upb/conformance/BUILD b/upb/upb/conformance/BUILD
new file mode 100644
index 0000000..843dfc9
--- /dev/null
+++ b/upb/upb/conformance/BUILD
@@ -0,0 +1,173 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load(
+ "//bazel:build_defs.bzl",
+ "UPB_DEFAULT_COPTS",
+ "make_shell_script",
+)
+load(
+ "//bazel:upb_proto_library.bzl",
+ "upb_proto_library",
+ "upb_proto_reflection_library",
+)
+
+# begin:google_only
+# package(default_applicable_licenses = ["//:license"])
+# end:google_only
+
+licenses(["notice"])
+
+config_setting(
+ name = "windows",
+ constraint_values = ["@platforms//os:windows"],
+ visibility = ["//visibility:public"],
+)
+
+upb_proto_library(
+ name = "conformance_upb_proto",
+ testonly = 1,
+ deps = ["@com_google_protobuf//conformance:conformance_proto"],
+)
+
+upb_proto_reflection_library(
+ name = "conformance_upb_proto_reflection",
+ testonly = 1,
+ deps = ["@com_google_protobuf//conformance:conformance_proto"],
+)
+
+upb_proto_reflection_library(
+ name = "test_messages_proto2_upbdefs",
+ testonly = 1,
+ visibility = [
+ "//visibility:private", # Only private by automation, not intent. Owner may accept CLs adding visibility. See go/scheuklappen#explicit-private.
+ ],
+ deps = ["@com_google_protobuf//src/google/protobuf:test_messages_proto2_proto"],
+)
+
+upb_proto_reflection_library(
+ name = "test_messages_proto3_upbdefs",
+ testonly = 1,
+ visibility = [
+ "//visibility:private", # Only private by automation, not intent. Owner may accept CLs adding visibility. See go/scheuklappen#explicit-private.
+ ],
+ deps = ["@com_google_protobuf//src/google/protobuf:test_messages_proto3_proto"],
+)
+
+cc_binary(
+ name = "conformance_upb",
+ testonly = 1,
+ srcs = ["conformance_upb.c"],
+ copts = UPB_DEFAULT_COPTS,
+ data = ["conformance_upb_failures.txt"],
+ target_compatible_with = select({
+ "@platforms//os:windows": ["@platforms//:incompatible"],
+ "//conditions:default": [],
+ }),
+ deps = [
+ ":conformance_upb_proto",
+ ":conformance_upb_proto_reflection",
+ ":test_messages_proto2_upbdefs",
+ ":test_messages_proto3_upbdefs",
+ "//:json",
+ "//:port",
+ "//:reflection",
+ "//:text",
+ "//:wire",
+ ],
+)
+
+make_shell_script(
+ name = "gen_test_conformance_upb",
+ out = "test_conformance_upb.sh",
+ contents = "$1 --enforce_recommended --failure_list $2 $3",
+)
+
+sh_test(
+ name = "test_conformance_upb",
+ srcs = ["test_conformance_upb.sh"],
+ args = [
+ "$(location @com_google_protobuf//conformance:conformance_test_runner)",
+ "$(location :conformance_upb_failures.txt)",
+ "$(location :conformance_upb)",
+ ],
+ data = [
+ "conformance_upb_failures.txt",
+ ":conformance_upb",
+ "@com_google_protobuf//conformance:conformance_test_runner",
+ ],
+ target_compatible_with = select({
+ "@platforms//os:windows": ["@platforms//:incompatible"],
+ "//conditions:default": [],
+ }),
+ deps = ["@bazel_tools//tools/bash/runfiles"],
+)
+
+cc_binary(
+ name = "conformance_upb_dynamic_minitable",
+ testonly = 1,
+ srcs = ["conformance_upb.c"],
+ copts = UPB_DEFAULT_COPTS + [
+ "-DREBUILD_MINITABLES",
+ ],
+ data = ["conformance_upb_failures.txt"],
+ target_compatible_with = select({
+ "@platforms//os:windows": ["@platforms//:incompatible"],
+ "//conditions:default": [],
+ }),
+ deps = [
+ ":conformance_upb_proto",
+ ":conformance_upb_proto_reflection",
+ ":test_messages_proto2_upbdefs",
+ ":test_messages_proto3_upbdefs",
+ "//:json",
+ "//:port",
+ "//:reflection",
+ "//:text",
+ "//:wire",
+ ],
+)
+
+make_shell_script(
+ name = "gen_test_conformance_upb_dynamic_minitable",
+ out = "test_conformance_upb_dynamic_minitable.sh",
+ contents = "$1 --enforce_recommended --failure_list $2 $3",
+)
+
+sh_test(
+ name = "test_conformance_upb_dynamic_minitable",
+ srcs = ["test_conformance_upb_dynamic_minitable.sh"],
+ args = [
+ "$(location @com_google_protobuf//conformance:conformance_test_runner)",
+ "$(location :conformance_upb_failures.txt)",
+ "$(location :conformance_upb_dynamic_minitable)",
+ ],
+ data = [
+ "conformance_upb_failures.txt",
+ ":conformance_upb_dynamic_minitable",
+ "@com_google_protobuf//conformance:conformance_test_runner",
+ ],
+ deps = ["@bazel_tools//tools/bash/runfiles"],
+)
diff --git a/upb/upb/conformance/conformance_upb.c b/upb/upb/conformance/conformance_upb.c
new file mode 100644
index 0000000..0a62201
--- /dev/null
+++ b/upb/upb/conformance/conformance_upb.c
@@ -0,0 +1,350 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+/* This is a upb implementation of the upb conformance tests, see:
+ * https://github.com/google/protobuf/tree/master/conformance
+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "conformance/conformance.upb.h"
+#include "conformance/conformance.upbdefs.h"
+#include "google/protobuf/test_messages_proto2.upbdefs.h"
+#include "google/protobuf/test_messages_proto3.upbdefs.h"
+#include "upb/json/decode.h"
+#include "upb/json/encode.h"
+#include "upb/reflection/message.h"
+#include "upb/text/encode.h"
+#include "upb/wire/decode.h"
+#include "upb/wire/encode.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+int test_count = 0;
+bool verbose = false; /* Set to true to get req/resp printed on stderr. */
+
+bool CheckedRead(int fd, void* buf, size_t len) {
+ size_t ofs = 0;
+ while (len > 0) {
+ ssize_t bytes_read = read(fd, (char*)buf + ofs, len);
+
+ if (bytes_read == 0) return false;
+
+ if (bytes_read < 0) {
+ perror("reading from test runner");
+ exit(1);
+ }
+
+ len -= bytes_read;
+ ofs += bytes_read;
+ }
+
+ return true;
+}
+
+void CheckedWrite(int fd, const void* buf, size_t len) {
+ if ((size_t)write(fd, buf, len) != len) {
+ perror("writing to test runner");
+ exit(1);
+ }
+}
+
+typedef struct {
+ const conformance_ConformanceRequest* request;
+ conformance_ConformanceResponse* response;
+ upb_Arena* arena;
+ const upb_DefPool* symtab;
+} ctx;
+
+bool parse_proto(upb_Message* msg, const upb_MessageDef* m, const ctx* c) {
+ upb_StringView proto =
+ conformance_ConformanceRequest_protobuf_payload(c->request);
+ if (upb_Decode(proto.data, proto.size, msg, upb_MessageDef_MiniTable(m), NULL,
+ 0, c->arena) == kUpb_DecodeStatus_Ok) {
+ return true;
+ } else {
+ static const char msg[] = "Parse error";
+ conformance_ConformanceResponse_set_parse_error(
+ c->response, upb_StringView_FromString(msg));
+ return false;
+ }
+}
+
+void serialize_proto(const upb_Message* msg, const upb_MessageDef* m,
+ const ctx* c) {
+ size_t len;
+ char* data;
+ upb_EncodeStatus status =
+ upb_Encode(msg, upb_MessageDef_MiniTable(m), 0, c->arena, &data, &len);
+ if (status == kUpb_EncodeStatus_Ok) {
+ conformance_ConformanceResponse_set_protobuf_payload(
+ c->response, upb_StringView_FromDataAndSize(data, len));
+ } else {
+ static const char msg[] = "Error serializing.";
+ conformance_ConformanceResponse_set_serialize_error(
+ c->response, upb_StringView_FromString(msg));
+ }
+}
+
+void serialize_text(const upb_Message* msg, const upb_MessageDef* m,
+ const ctx* c) {
+ size_t len;
+ size_t len2;
+ int opts = 0;
+ char* data;
+
+ if (!conformance_ConformanceRequest_print_unknown_fields(c->request)) {
+ opts |= UPB_TXTENC_SKIPUNKNOWN;
+ }
+
+ len = upb_TextEncode(msg, m, c->symtab, opts, NULL, 0);
+ data = upb_Arena_Malloc(c->arena, len + 1);
+ len2 = upb_TextEncode(msg, m, c->symtab, opts, data, len + 1);
+ UPB_ASSERT(len == len2);
+ conformance_ConformanceResponse_set_text_payload(
+ c->response, upb_StringView_FromDataAndSize(data, len));
+}
+
+bool parse_json(upb_Message* msg, const upb_MessageDef* m, const ctx* c) {
+ upb_StringView json = conformance_ConformanceRequest_json_payload(c->request);
+ upb_Status status;
+ int opts = 0;
+
+ if (conformance_ConformanceRequest_test_category(c->request) ==
+ conformance_JSON_IGNORE_UNKNOWN_PARSING_TEST) {
+ opts |= upb_JsonDecode_IgnoreUnknown;
+ }
+
+ upb_Status_Clear(&status);
+ if (upb_JsonDecode(json.data, json.size, msg, m, c->symtab, opts, c->arena,
+ &status)) {
+ return true;
+ } else {
+ const char* inerr = upb_Status_ErrorMessage(&status);
+ size_t len = strlen(inerr);
+ char* err = upb_Arena_Malloc(c->arena, len + 1);
+ memcpy(err, inerr, strlen(inerr));
+ err[len] = '\0';
+ conformance_ConformanceResponse_set_parse_error(
+ c->response, upb_StringView_FromString(err));
+ return false;
+ }
+}
+
+void serialize_json(const upb_Message* msg, const upb_MessageDef* m,
+ const ctx* c) {
+ size_t len;
+ size_t len2;
+ int opts = 0;
+ char* data;
+ upb_Status status;
+
+ upb_Status_Clear(&status);
+ len = upb_JsonEncode(msg, m, c->symtab, opts, NULL, 0, &status);
+
+ if (len == (size_t)-1) {
+ const char* inerr = upb_Status_ErrorMessage(&status);
+ size_t len = strlen(inerr);
+ char* err = upb_Arena_Malloc(c->arena, len + 1);
+ memcpy(err, inerr, strlen(inerr));
+ err[len] = '\0';
+ conformance_ConformanceResponse_set_serialize_error(
+ c->response, upb_StringView_FromString(err));
+ return;
+ }
+
+ data = upb_Arena_Malloc(c->arena, len + 1);
+ len2 = upb_JsonEncode(msg, m, c->symtab, opts, data, len + 1, &status);
+ UPB_ASSERT(len == len2);
+ conformance_ConformanceResponse_set_json_payload(
+ c->response, upb_StringView_FromDataAndSize(data, len));
+}
+
+bool parse_input(upb_Message* msg, const upb_MessageDef* m, const ctx* c) {
+ switch (conformance_ConformanceRequest_payload_case(c->request)) {
+ case conformance_ConformanceRequest_payload_protobuf_payload:
+ return parse_proto(msg, m, c);
+ case conformance_ConformanceRequest_payload_json_payload:
+ return parse_json(msg, m, c);
+ case conformance_ConformanceRequest_payload_NOT_SET:
+ fprintf(stderr, "conformance_upb: Request didn't have payload.\n");
+ return false;
+ default: {
+ static const char msg[] = "Unsupported input format.";
+ conformance_ConformanceResponse_set_skipped(
+ c->response, upb_StringView_FromString(msg));
+ return false;
+ }
+ }
+}
+
+void write_output(const upb_Message* msg, const upb_MessageDef* m,
+ const ctx* c) {
+ switch (conformance_ConformanceRequest_requested_output_format(c->request)) {
+ case conformance_UNSPECIFIED:
+ fprintf(stderr, "conformance_upb: Unspecified output format.\n");
+ exit(1);
+ case conformance_PROTOBUF:
+ serialize_proto(msg, m, c);
+ break;
+ case conformance_TEXT_FORMAT:
+ serialize_text(msg, m, c);
+ break;
+ case conformance_JSON:
+ serialize_json(msg, m, c);
+ break;
+ default: {
+ static const char msg[] = "Unsupported output format.";
+ conformance_ConformanceResponse_set_skipped(
+ c->response, upb_StringView_FromString(msg));
+ break;
+ }
+ }
+}
+
+void DoTest(const ctx* c) {
+ upb_Message* msg;
+ upb_StringView name = conformance_ConformanceRequest_message_type(c->request);
+ const upb_MessageDef* m =
+ upb_DefPool_FindMessageByNameWithSize(c->symtab, name.data, name.size);
+#if 0
+ // Handy code for limiting conformance tests to a single input payload.
+ // This is a hack since the conformance runner doesn't give an easy way to
+ // specify what test should be run.
+ const char skip[] = "\343>\010\301\002\344>\230?\001\230?\002\230?\003";
+ upb_StringView skip_str = upb_StringView_FromDataAndSize(skip, sizeof(skip) - 1);
+ upb_StringView pb_payload =
+ conformance_ConformanceRequest_protobuf_payload(c->request);
+ if (!upb_StringView_IsEqual(pb_payload, skip_str)) m = NULL;
+#endif
+
+ if (!m) {
+ static const char msg[] = "Unknown message type.";
+ conformance_ConformanceResponse_set_skipped(c->response,
+ upb_StringView_FromString(msg));
+ return;
+ }
+
+ msg = upb_Message_New(upb_MessageDef_MiniTable(m), c->arena);
+
+ if (parse_input(msg, m, c)) {
+ write_output(msg, m, c);
+ }
+}
+
+void debug_print(const char* label, const upb_Message* msg,
+ const upb_MessageDef* m, const ctx* c) {
+ char buf[512];
+ upb_TextEncode(msg, m, c->symtab, UPB_TXTENC_SINGLELINE, buf, sizeof(buf));
+ fprintf(stderr, "%s: %s\n", label, buf);
+}
+
+bool DoTestIo(upb_DefPool* symtab) {
+ upb_Status status;
+ char* input;
+ char* output;
+ uint32_t input_size;
+ size_t output_size;
+ ctx c;
+
+ if (!CheckedRead(STDIN_FILENO, &input_size, sizeof(uint32_t))) {
+ /* EOF. */
+ return false;
+ }
+
+ c.symtab = symtab;
+ c.arena = upb_Arena_New();
+ input = upb_Arena_Malloc(c.arena, input_size);
+
+ if (!CheckedRead(STDIN_FILENO, input, input_size)) {
+ fprintf(stderr, "conformance_upb: unexpected EOF on stdin.\n");
+ exit(1);
+ }
+
+ c.request = conformance_ConformanceRequest_parse(input, input_size, c.arena);
+ c.response = conformance_ConformanceResponse_new(c.arena);
+
+ if (c.request) {
+ DoTest(&c);
+ } else {
+ fprintf(stderr, "conformance_upb: parse of ConformanceRequest failed: %s\n",
+ upb_Status_ErrorMessage(&status));
+ }
+
+ output = conformance_ConformanceResponse_serialize(c.response, c.arena,
+ &output_size);
+
+ uint32_t network_out = (uint32_t)output_size;
+ CheckedWrite(STDOUT_FILENO, &network_out, sizeof(uint32_t));
+ CheckedWrite(STDOUT_FILENO, output, output_size);
+
+ test_count++;
+
+ if (verbose) {
+ debug_print("Request", c.request,
+ conformance_ConformanceRequest_getmsgdef(symtab), &c);
+ debug_print("Response", c.response,
+ conformance_ConformanceResponse_getmsgdef(symtab), &c);
+ fprintf(stderr, "\n");
+ }
+
+ upb_Arena_Free(c.arena);
+
+ return true;
+}
+
+int main(void) {
+ upb_DefPool* symtab = upb_DefPool_New();
+
+#ifdef REBUILD_MINITABLES
+ _upb_DefPool_LoadDefInitEx(
+ symtab, &google_protobuf_test_messages_proto2_proto_upbdefinit, true);
+ _upb_DefPool_LoadDefInitEx(
+ symtab, &google_protobuf_test_messages_proto3_proto_upbdefinit, true);
+#else
+ protobuf_test_messages_proto2_TestAllTypesProto2_getmsgdef(symtab);
+ protobuf_test_messages_proto3_TestAllTypesProto3_getmsgdef(symtab);
+#endif
+
+ while (1) {
+ if (!DoTestIo(symtab)) {
+ fprintf(stderr,
+ "conformance_upb: received EOF from test runner "
+ "after %d tests, exiting\n",
+ test_count);
+ upb_DefPool_Free(symtab);
+ return 0;
+ }
+ }
+}
diff --git a/upb/upb/conformance/conformance_upb_failures.txt b/upb/upb/conformance/conformance_upb_failures.txt
new file mode 100644
index 0000000..2fb4dc8
--- /dev/null
+++ b/upb/upb/conformance/conformance_upb_failures.txt
@@ -0,0 +1,2 @@
+Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput
+Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput
diff --git a/upb/upb/generated_code_support.h b/upb/upb/generated_code_support.h
new file mode 100644
index 0000000..d589640
--- /dev/null
+++ b/upb/upb/generated_code_support.h
@@ -0,0 +1,57 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_GENERATED_CODE_SUPPORT_H_
+#define UPB_GENERATED_CODE_SUPPORT_H_
+
+// IWYU pragma: begin_exports
+#include "upb/collections/array.h"
+#include "upb/collections/internal/array.h"
+#include "upb/collections/map_gencode_util.h"
+#include "upb/collections/message_value.h"
+#include "upb/message/accessors.h"
+#include "upb/message/internal/accessors.h"
+#include "upb/message/internal/extension.h"
+#include "upb/message/internal/message.h"
+#include "upb/message/message.h"
+#include "upb/mini_descriptor/decode.h"
+#include "upb/mini_table/enum.h"
+#include "upb/mini_table/extension.h"
+#include "upb/mini_table/extension_registry.h"
+#include "upb/mini_table/field.h"
+#include "upb/mini_table/file.h"
+#include "upb/mini_table/message.h"
+#include "upb/mini_table/sub.h"
+#include "upb/wire/decode.h"
+#include "upb/wire/decode_fast.h"
+#include "upb/wire/encode.h"
+// IWYU pragma: end_exports
+
+#endif // UPB_GENERATED_CODE_SUPPORT_H_
diff --git a/upb/upb/hash/BUILD b/upb/upb/hash/BUILD
new file mode 100644
index 0000000..93aa3cd
--- /dev/null
+++ b/upb/upb/hash/BUILD
@@ -0,0 +1,74 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load("//bazel:build_defs.bzl", "UPB_DEFAULT_COPTS")
+
+cc_library(
+ name = "hash",
+ srcs = [
+ "common.c",
+ ],
+ hdrs = [
+ "common.h",
+ "int_table.h",
+ "str_table.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ "//:base",
+ "//:base_internal",
+ "//:mem",
+ "//:port",
+ ],
+)
+
+cc_test(
+ name = "test",
+ srcs = ["test.cc"],
+ deps = [
+ ":hash",
+ "//:mem",
+ "//:port",
+ "@com_google_absl//absl/container:flat_hash_map",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+# begin:github_only
+filegroup(
+ name = "source_files",
+ srcs = glob(
+ [
+ "**/*.c",
+ "**/*.h",
+ ],
+ ),
+ visibility = [
+ "//cmake:__pkg__",
+ "//python/dist:__pkg__",
+ ]
+)
+# end:github_only
diff --git a/upb/upb/hash/common.c b/upb/upb/hash/common.c
new file mode 100644
index 0000000..d8addce
--- /dev/null
+++ b/upb/upb/hash/common.c
@@ -0,0 +1,876 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+/*
+ * upb_table Implementation
+ *
+ * Implementation is heavily inspired by Lua's ltable.c.
+ */
+
+#include <string.h>
+
+#include "upb/base/internal/log2.h"
+#include "upb/hash/int_table.h"
+#include "upb/hash/str_table.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#define UPB_MAXARRSIZE 16 // 2**16 = 64k.
+
+// From Chromium.
+#define ARRAY_SIZE(x) \
+ ((sizeof(x) / sizeof(0 [x])) / ((size_t)(!(sizeof(x) % sizeof(0 [x])))))
+
+static const double MAX_LOAD = 0.85;
+
+/* The minimum utilization of the array part of a mixed hash/array table. This
+ * is a speed/memory-usage tradeoff (though it's not straightforward because of
+ * cache effects). The lower this is, the more memory we'll use. */
+static const double MIN_DENSITY = 0.1;
+
+static bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
+
+static upb_value _upb_value_val(uint64_t val) {
+ upb_value ret;
+ _upb_value_setval(&ret, val);
+ return ret;
+}
+
+static int log2ceil(uint64_t v) {
+ int ret = 0;
+ bool pow2 = is_pow2(v);
+ while (v >>= 1) ret++;
+ ret = pow2 ? ret : ret + 1; // Ceiling.
+ return UPB_MIN(UPB_MAXARRSIZE, ret);
+}
+
+char* upb_strdup2(const char* s, size_t len, upb_Arena* a) {
+ size_t n;
+ char* p;
+
+ /* Prevent overflow errors. */
+ if (len == SIZE_MAX) return NULL;
+ /* Always null-terminate, even if binary data; but don't rely on the input to
+ * have a null-terminating byte since it may be a raw binary buffer. */
+ n = len + 1;
+ p = upb_Arena_Malloc(a, n);
+ if (p) {
+ if (len != 0) memcpy(p, s, len);
+ p[len] = 0;
+ }
+ return p;
+}
+
+/* A type to represent the lookup key of either a strtable or an inttable. */
+typedef union {
+ uintptr_t num;
+ struct {
+ const char* str;
+ size_t len;
+ } str;
+} lookupkey_t;
+
+static lookupkey_t strkey2(const char* str, size_t len) {
+ lookupkey_t k;
+ k.str.str = str;
+ k.str.len = len;
+ return k;
+}
+
+static lookupkey_t intkey(uintptr_t key) {
+ lookupkey_t k;
+ k.num = key;
+ return k;
+}
+
+typedef uint32_t hashfunc_t(upb_tabkey key);
+typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
+
+/* Base table (shared code) ***************************************************/
+
+static uint32_t upb_inthash(uintptr_t key) { return (uint32_t)key; }
+
+static const upb_tabent* upb_getentry(const upb_table* t, uint32_t hash) {
+ return t->entries + (hash & t->mask);
+}
+
+static bool upb_arrhas(upb_tabval key) { return key.val != (uint64_t)-1; }
+
+static bool isfull(upb_table* t) { return t->count == t->max_count; }
+
+static bool init(upb_table* t, uint8_t size_lg2, upb_Arena* a) {
+ size_t bytes;
+
+ t->count = 0;
+ t->size_lg2 = size_lg2;
+ t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
+ t->max_count = upb_table_size(t) * MAX_LOAD;
+ bytes = upb_table_size(t) * sizeof(upb_tabent);
+ if (bytes > 0) {
+ t->entries = upb_Arena_Malloc(a, bytes);
+ if (!t->entries) return false;
+ memset(t->entries, 0, bytes);
+ } else {
+ t->entries = NULL;
+ }
+ return true;
+}
+
+static upb_tabent* emptyent(upb_table* t, upb_tabent* e) {
+ upb_tabent* begin = t->entries;
+ upb_tabent* end = begin + upb_table_size(t);
+ for (e = e + 1; e < end; e++) {
+ if (upb_tabent_isempty(e)) return e;
+ }
+ for (e = begin; e < end; e++) {
+ if (upb_tabent_isempty(e)) return e;
+ }
+ UPB_ASSERT(false);
+ return NULL;
+}
+
+static upb_tabent* getentry_mutable(upb_table* t, uint32_t hash) {
+ return (upb_tabent*)upb_getentry(t, hash);
+}
+
+static const upb_tabent* findentry(const upb_table* t, lookupkey_t key,
+ uint32_t hash, eqlfunc_t* eql) {
+ const upb_tabent* e;
+
+ if (t->size_lg2 == 0) return NULL;
+ e = upb_getentry(t, hash);
+ if (upb_tabent_isempty(e)) return NULL;
+ while (1) {
+ if (eql(e->key, key)) return e;
+ if ((e = e->next) == NULL) return NULL;
+ }
+}
+
+static upb_tabent* findentry_mutable(upb_table* t, lookupkey_t key,
+ uint32_t hash, eqlfunc_t* eql) {
+ return (upb_tabent*)findentry(t, key, hash, eql);
+}
+
+static bool lookup(const upb_table* t, lookupkey_t key, upb_value* v,
+ uint32_t hash, eqlfunc_t* eql) {
+ const upb_tabent* e = findentry(t, key, hash, eql);
+ if (e) {
+ if (v) {
+ _upb_value_setval(v, e->val.val);
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
+
+/* The given key must not already exist in the table. */
+static void insert(upb_table* t, lookupkey_t key, upb_tabkey tabkey,
+ upb_value val, uint32_t hash, hashfunc_t* hashfunc,
+ eqlfunc_t* eql) {
+ upb_tabent* mainpos_e;
+ upb_tabent* our_e;
+
+ UPB_ASSERT(findentry(t, key, hash, eql) == NULL);
+
+ t->count++;
+ mainpos_e = getentry_mutable(t, hash);
+ our_e = mainpos_e;
+
+ if (upb_tabent_isempty(mainpos_e)) {
+ /* Our main position is empty; use it. */
+ our_e->next = NULL;
+ } else {
+ /* Collision. */
+ upb_tabent* new_e = emptyent(t, mainpos_e);
+ /* Head of collider's chain. */
+ upb_tabent* chain = getentry_mutable(t, hashfunc(mainpos_e->key));
+ if (chain == mainpos_e) {
+ /* Existing ent is in its main position (it has the same hash as us, and
+ * is the head of our chain). Insert to new ent and append to this chain.
+ */
+ new_e->next = mainpos_e->next;
+ mainpos_e->next = new_e;
+ our_e = new_e;
+ } else {
+ /* Existing ent is not in its main position (it is a node in some other
+ * chain). This implies that no existing ent in the table has our hash.
+ * Evict it (updating its chain) and use its ent for head of our chain. */
+ *new_e = *mainpos_e; /* copies next. */
+ while (chain->next != mainpos_e) {
+ chain = (upb_tabent*)chain->next;
+ UPB_ASSERT(chain);
+ }
+ chain->next = new_e;
+ our_e = mainpos_e;
+ our_e->next = NULL;
+ }
+ }
+ our_e->key = tabkey;
+ our_e->val.val = val.val;
+ UPB_ASSERT(findentry(t, key, hash, eql) == our_e);
+}
+
+static bool rm(upb_table* t, lookupkey_t key, upb_value* val,
+ upb_tabkey* removed, uint32_t hash, eqlfunc_t* eql) {
+ upb_tabent* chain = getentry_mutable(t, hash);
+ if (upb_tabent_isempty(chain)) return false;
+ if (eql(chain->key, key)) {
+ /* Element to remove is at the head of its chain. */
+ t->count--;
+ if (val) _upb_value_setval(val, chain->val.val);
+ if (removed) *removed = chain->key;
+ if (chain->next) {
+ upb_tabent* move = (upb_tabent*)chain->next;
+ *chain = *move;
+ move->key = 0; /* Make the slot empty. */
+ } else {
+ chain->key = 0; /* Make the slot empty. */
+ }
+ return true;
+ } else {
+ /* Element to remove is either in a non-head position or not in the
+ * table. */
+ while (chain->next && !eql(chain->next->key, key)) {
+ chain = (upb_tabent*)chain->next;
+ }
+ if (chain->next) {
+ /* Found element to remove. */
+ upb_tabent* rm = (upb_tabent*)chain->next;
+ t->count--;
+ if (val) _upb_value_setval(val, chain->next->val.val);
+ if (removed) *removed = rm->key;
+ rm->key = 0; /* Make the slot empty. */
+ chain->next = rm->next;
+ return true;
+ } else {
+ /* Element to remove is not in the table. */
+ return false;
+ }
+ }
+}
+
+static size_t next(const upb_table* t, size_t i) {
+ do {
+ if (++i >= upb_table_size(t)) return SIZE_MAX - 1; /* Distinct from -1. */
+ } while (upb_tabent_isempty(&t->entries[i]));
+
+ return i;
+}
+
+static size_t begin(const upb_table* t) { return next(t, -1); }
+
+/* upb_strtable ***************************************************************/
+
+/* A simple "subclass" of upb_table that only adds a hash function for strings.
+ */
+
+static upb_tabkey strcopy(lookupkey_t k2, upb_Arena* a) {
+ uint32_t len = (uint32_t)k2.str.len;
+ char* str = upb_Arena_Malloc(a, k2.str.len + sizeof(uint32_t) + 1);
+ if (str == NULL) return 0;
+ memcpy(str, &len, sizeof(uint32_t));
+ if (k2.str.len) memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len);
+ str[sizeof(uint32_t) + k2.str.len] = '\0';
+ return (uintptr_t)str;
+}
+
+/* Adapted from ABSL's wyhash. */
+
+static uint64_t UnalignedLoad64(const void* p) {
+ uint64_t val;
+ memcpy(&val, p, 8);
+ return val;
+}
+
+static uint32_t UnalignedLoad32(const void* p) {
+ uint32_t val;
+ memcpy(&val, p, 4);
+ return val;
+}
+
+#if defined(_MSC_VER) && defined(_M_X64)
+#include <intrin.h>
+#endif
+
+/* Computes a * b, returning the low 64 bits of the result and storing the high
+ * 64 bits in |*high|. */
+static uint64_t upb_umul128(uint64_t v0, uint64_t v1, uint64_t* out_high) {
+#ifdef __SIZEOF_INT128__
+ __uint128_t p = v0;
+ p *= v1;
+ *out_high = (uint64_t)(p >> 64);
+ return (uint64_t)p;
+#elif defined(_MSC_VER) && defined(_M_X64)
+ return _umul128(v0, v1, out_high);
+#else
+ uint64_t a32 = v0 >> 32;
+ uint64_t a00 = v0 & 0xffffffff;
+ uint64_t b32 = v1 >> 32;
+ uint64_t b00 = v1 & 0xffffffff;
+ uint64_t high = a32 * b32;
+ uint64_t low = a00 * b00;
+ uint64_t mid1 = a32 * b00;
+ uint64_t mid2 = a00 * b32;
+ low += (mid1 << 32) + (mid2 << 32);
+ // Omit carry bit, for mixing we do not care about exact numerical precision.
+ high += (mid1 >> 32) + (mid2 >> 32);
+ *out_high = high;
+ return low;
+#endif
+}
+
+static uint64_t WyhashMix(uint64_t v0, uint64_t v1) {
+ uint64_t high;
+ uint64_t low = upb_umul128(v0, v1, &high);
+ return low ^ high;
+}
+
+static uint64_t Wyhash(const void* data, size_t len, uint64_t seed,
+ const uint64_t salt[]) {
+ const uint8_t* ptr = (const uint8_t*)data;
+ uint64_t starting_length = (uint64_t)len;
+ uint64_t current_state = seed ^ salt[0];
+
+ if (len > 64) {
+ // If we have more than 64 bytes, we're going to handle chunks of 64
+ // bytes at a time. We're going to build up two separate hash states
+ // which we will then hash together.
+ uint64_t duplicated_state = current_state;
+
+ do {
+ uint64_t a = UnalignedLoad64(ptr);
+ uint64_t b = UnalignedLoad64(ptr + 8);
+ uint64_t c = UnalignedLoad64(ptr + 16);
+ uint64_t d = UnalignedLoad64(ptr + 24);
+ uint64_t e = UnalignedLoad64(ptr + 32);
+ uint64_t f = UnalignedLoad64(ptr + 40);
+ uint64_t g = UnalignedLoad64(ptr + 48);
+ uint64_t h = UnalignedLoad64(ptr + 56);
+
+ uint64_t cs0 = WyhashMix(a ^ salt[1], b ^ current_state);
+ uint64_t cs1 = WyhashMix(c ^ salt[2], d ^ current_state);
+ current_state = (cs0 ^ cs1);
+
+ uint64_t ds0 = WyhashMix(e ^ salt[3], f ^ duplicated_state);
+ uint64_t ds1 = WyhashMix(g ^ salt[4], h ^ duplicated_state);
+ duplicated_state = (ds0 ^ ds1);
+
+ ptr += 64;
+ len -= 64;
+ } while (len > 64);
+
+ current_state = current_state ^ duplicated_state;
+ }
+
+ // We now have a data `ptr` with at most 64 bytes and the current state
+ // of the hashing state machine stored in current_state.
+ while (len > 16) {
+ uint64_t a = UnalignedLoad64(ptr);
+ uint64_t b = UnalignedLoad64(ptr + 8);
+
+ current_state = WyhashMix(a ^ salt[1], b ^ current_state);
+
+ ptr += 16;
+ len -= 16;
+ }
+
+ // We now have a data `ptr` with at most 16 bytes.
+ uint64_t a = 0;
+ uint64_t b = 0;
+ if (len > 8) {
+ // When we have at least 9 and at most 16 bytes, set A to the first 64
+ // bits of the input and B to the last 64 bits of the input. Yes, they will
+ // overlap in the middle if we are working with less than the full 16
+ // bytes.
+ a = UnalignedLoad64(ptr);
+ b = UnalignedLoad64(ptr + len - 8);
+ } else if (len > 3) {
+ // If we have at least 4 and at most 8 bytes, set A to the first 32
+ // bits and B to the last 32 bits.
+ a = UnalignedLoad32(ptr);
+ b = UnalignedLoad32(ptr + len - 4);
+ } else if (len > 0) {
+ // If we have at least 1 and at most 3 bytes, read all of the provided
+ // bits into A, with some adjustments.
+ a = ((ptr[0] << 16) | (ptr[len >> 1] << 8) | ptr[len - 1]);
+ b = 0;
+ } else {
+ a = 0;
+ b = 0;
+ }
+
+ uint64_t w = WyhashMix(a ^ salt[1], b ^ current_state);
+ uint64_t z = salt[1] ^ starting_length;
+ return WyhashMix(w, z);
+}
+
+const uint64_t kWyhashSalt[5] = {
+ 0x243F6A8885A308D3ULL, 0x13198A2E03707344ULL, 0xA4093822299F31D0ULL,
+ 0x082EFA98EC4E6C89ULL, 0x452821E638D01377ULL,
+};
+
+uint32_t _upb_Hash(const void* p, size_t n, uint64_t seed) {
+ return Wyhash(p, n, seed, kWyhashSalt);
+}
+
+static uint32_t _upb_Hash_NoSeed(const char* p, size_t n) {
+ return _upb_Hash(p, n, 0);
+}
+
+static uint32_t strhash(upb_tabkey key) {
+ uint32_t len;
+ char* str = upb_tabstr(key, &len);
+ return _upb_Hash_NoSeed(str, len);
+}
+
+static bool streql(upb_tabkey k1, lookupkey_t k2) {
+ uint32_t len;
+ char* str = upb_tabstr(k1, &len);
+ return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0);
+}
+
+bool upb_strtable_init(upb_strtable* t, size_t expected_size, upb_Arena* a) {
+ // Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2
+ // denominator.
+ size_t need_entries = (expected_size + 1) * 1204 / 1024;
+ UPB_ASSERT(need_entries >= expected_size * 0.85);
+ int size_lg2 = upb_Log2Ceiling(need_entries);
+ return init(&t->t, size_lg2, a);
+}
+
+void upb_strtable_clear(upb_strtable* t) {
+ size_t bytes = upb_table_size(&t->t) * sizeof(upb_tabent);
+ t->t.count = 0;
+ memset((char*)t->t.entries, 0, bytes);
+}
+
+bool upb_strtable_resize(upb_strtable* t, size_t size_lg2, upb_Arena* a) {
+ upb_strtable new_table;
+ if (!init(&new_table.t, size_lg2, a)) return false;
+
+ intptr_t iter = UPB_STRTABLE_BEGIN;
+ upb_StringView key;
+ upb_value val;
+ while (upb_strtable_next2(t, &key, &val, &iter)) {
+ upb_strtable_insert(&new_table, key.data, key.size, val, a);
+ }
+ *t = new_table;
+ return true;
+}
+
+bool upb_strtable_insert(upb_strtable* t, const char* k, size_t len,
+ upb_value v, upb_Arena* a) {
+ lookupkey_t key;
+ upb_tabkey tabkey;
+ uint32_t hash;
+
+ if (isfull(&t->t)) {
+ /* Need to resize. New table of double the size, add old elements to it. */
+ if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
+ return false;
+ }
+ }
+
+ key = strkey2(k, len);
+ tabkey = strcopy(key, a);
+ if (tabkey == 0) return false;
+
+ hash = _upb_Hash_NoSeed(key.str.str, key.str.len);
+ insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
+ return true;
+}
+
+bool upb_strtable_lookup2(const upb_strtable* t, const char* key, size_t len,
+ upb_value* v) {
+ uint32_t hash = _upb_Hash_NoSeed(key, len);
+ return lookup(&t->t, strkey2(key, len), v, hash, &streql);
+}
+
+bool upb_strtable_remove2(upb_strtable* t, const char* key, size_t len,
+ upb_value* val) {
+ uint32_t hash = _upb_Hash_NoSeed(key, len);
+ upb_tabkey tabkey;
+ return rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql);
+}
+
+/* Iteration */
+
+void upb_strtable_begin(upb_strtable_iter* i, const upb_strtable* t) {
+ i->t = t;
+ i->index = begin(&t->t);
+}
+
+void upb_strtable_next(upb_strtable_iter* i) {
+ i->index = next(&i->t->t, i->index);
+}
+
+bool upb_strtable_done(const upb_strtable_iter* i) {
+ if (!i->t) return true;
+ return i->index >= upb_table_size(&i->t->t) ||
+ upb_tabent_isempty(str_tabent(i));
+}
+
+upb_StringView upb_strtable_iter_key(const upb_strtable_iter* i) {
+ upb_StringView key;
+ uint32_t len;
+ UPB_ASSERT(!upb_strtable_done(i));
+ key.data = upb_tabstr(str_tabent(i)->key, &len);
+ key.size = len;
+ return key;
+}
+
+upb_value upb_strtable_iter_value(const upb_strtable_iter* i) {
+ UPB_ASSERT(!upb_strtable_done(i));
+ return _upb_value_val(str_tabent(i)->val.val);
+}
+
+void upb_strtable_iter_setdone(upb_strtable_iter* i) {
+ i->t = NULL;
+ i->index = SIZE_MAX;
+}
+
+bool upb_strtable_iter_isequal(const upb_strtable_iter* i1,
+ const upb_strtable_iter* i2) {
+ if (upb_strtable_done(i1) && upb_strtable_done(i2)) return true;
+ return i1->t == i2->t && i1->index == i2->index;
+}
+
+/* upb_inttable ***************************************************************/
+
+/* For inttables we use a hybrid structure where small keys are kept in an
+ * array and large keys are put in the hash table. */
+
+static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
+
+static bool inteql(upb_tabkey k1, lookupkey_t k2) { return k1 == k2.num; }
+
+static upb_tabval* mutable_array(upb_inttable* t) {
+ return (upb_tabval*)t->array;
+}
+
+static upb_tabval* inttable_val(upb_inttable* t, uintptr_t key) {
+ if (key < t->array_size) {
+ return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
+ } else {
+ upb_tabent* e =
+ findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
+ return e ? &e->val : NULL;
+ }
+}
+
+static const upb_tabval* inttable_val_const(const upb_inttable* t,
+ uintptr_t key) {
+ return inttable_val((upb_inttable*)t, key);
+}
+
+size_t upb_inttable_count(const upb_inttable* t) {
+ return t->t.count + t->array_count;
+}
+
+static void check(upb_inttable* t) {
+ UPB_UNUSED(t);
+#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
+ {
+ // This check is very expensive (makes inserts/deletes O(N)).
+ size_t count = 0;
+ intptr_t iter = UPB_INTTABLE_BEGIN;
+ uintptr_t key;
+ upb_value val;
+ while (upb_inttable_next(t, &key, &val, &iter)) {
+ UPB_ASSERT(upb_inttable_lookup(t, key, NULL));
+ }
+ UPB_ASSERT(count == upb_inttable_count(t));
+ }
+#endif
+}
+
+bool upb_inttable_sizedinit(upb_inttable* t, size_t asize, int hsize_lg2,
+ upb_Arena* a) {
+ size_t array_bytes;
+
+ if (!init(&t->t, hsize_lg2, a)) return false;
+ /* Always make the array part at least 1 long, so that we know key 0
+ * won't be in the hash part, which simplifies things. */
+ t->array_size = UPB_MAX(1, asize);
+ t->array_count = 0;
+ array_bytes = t->array_size * sizeof(upb_value);
+ t->array = upb_Arena_Malloc(a, array_bytes);
+ if (!t->array) {
+ return false;
+ }
+ memset(mutable_array(t), 0xff, array_bytes);
+ check(t);
+ return true;
+}
+
+bool upb_inttable_init(upb_inttable* t, upb_Arena* a) {
+ return upb_inttable_sizedinit(t, 0, 4, a);
+}
+
+bool upb_inttable_insert(upb_inttable* t, uintptr_t key, upb_value val,
+ upb_Arena* a) {
+ upb_tabval tabval;
+ tabval.val = val.val;
+ UPB_ASSERT(
+ upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */
+
+ if (key < t->array_size) {
+ UPB_ASSERT(!upb_arrhas(t->array[key]));
+ t->array_count++;
+ mutable_array(t)[key].val = val.val;
+ } else {
+ if (isfull(&t->t)) {
+ /* Need to resize the hash part, but we re-use the array part. */
+ size_t i;
+ upb_table new_table;
+
+ if (!init(&new_table, t->t.size_lg2 + 1, a)) {
+ return false;
+ }
+
+ for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
+ const upb_tabent* e = &t->t.entries[i];
+ uint32_t hash;
+ upb_value v;
+
+ _upb_value_setval(&v, e->val.val);
+ hash = upb_inthash(e->key);
+ insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
+ }
+
+ UPB_ASSERT(t->t.count == new_table.count);
+
+ t->t = new_table;
+ }
+ insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
+ }
+ check(t);
+ return true;
+}
+
+bool upb_inttable_lookup(const upb_inttable* t, uintptr_t key, upb_value* v) {
+ const upb_tabval* table_v = inttable_val_const(t, key);
+ if (!table_v) return false;
+ if (v) _upb_value_setval(v, table_v->val);
+ return true;
+}
+
+bool upb_inttable_replace(upb_inttable* t, uintptr_t key, upb_value val) {
+ upb_tabval* table_v = inttable_val(t, key);
+ if (!table_v) return false;
+ table_v->val = val.val;
+ return true;
+}
+
+bool upb_inttable_remove(upb_inttable* t, uintptr_t key, upb_value* val) {
+ bool success;
+ if (key < t->array_size) {
+ if (upb_arrhas(t->array[key])) {
+ upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
+ t->array_count--;
+ if (val) {
+ _upb_value_setval(val, t->array[key].val);
+ }
+ mutable_array(t)[key] = empty;
+ success = true;
+ } else {
+ success = false;
+ }
+ } else {
+ success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql);
+ }
+ check(t);
+ return success;
+}
+
+void upb_inttable_compact(upb_inttable* t, upb_Arena* a) {
+ /* A power-of-two histogram of the table keys. */
+ size_t counts[UPB_MAXARRSIZE + 1] = {0};
+
+ /* The max key in each bucket. */
+ uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
+
+ {
+ intptr_t iter = UPB_INTTABLE_BEGIN;
+ uintptr_t key;
+ upb_value val;
+ while (upb_inttable_next(t, &key, &val, &iter)) {
+ int bucket = log2ceil(key);
+ max[bucket] = UPB_MAX(max[bucket], key);
+ counts[bucket]++;
+ }
+ }
+
+ /* Find the largest power of two that satisfies the MIN_DENSITY
+ * definition (while actually having some keys). */
+ size_t arr_count = upb_inttable_count(t);
+ int size_lg2;
+ upb_inttable new_t;
+
+ for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
+ if (counts[size_lg2] == 0) {
+ /* We can halve again without losing any entries. */
+ continue;
+ } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
+ break;
+ }
+
+ arr_count -= counts[size_lg2];
+ }
+
+ UPB_ASSERT(arr_count <= upb_inttable_count(t));
+
+ {
+ /* Insert all elements into new, perfectly-sized table. */
+ size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */
+ size_t hash_count = upb_inttable_count(t) - arr_count;
+ size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
+ int hashsize_lg2 = log2ceil(hash_size);
+
+ upb_inttable_sizedinit(&new_t, arr_size, hashsize_lg2, a);
+
+ {
+ intptr_t iter = UPB_INTTABLE_BEGIN;
+ uintptr_t key;
+ upb_value val;
+ while (upb_inttable_next(t, &key, &val, &iter)) {
+ upb_inttable_insert(&new_t, key, val, a);
+ }
+ }
+
+ UPB_ASSERT(new_t.array_size == arr_size);
+ UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2);
+ }
+ *t = new_t;
+}
+
+// Iteration.
+
+bool upb_inttable_next(const upb_inttable* t, uintptr_t* key, upb_value* val,
+ intptr_t* iter) {
+ intptr_t i = *iter;
+ if ((size_t)(i + 1) <= t->array_size) {
+ while ((size_t)++i < t->array_size) {
+ upb_tabval ent = t->array[i];
+ if (upb_arrhas(ent)) {
+ *key = i;
+ *val = _upb_value_val(ent.val);
+ *iter = i;
+ return true;
+ }
+ }
+ i--; // Back up to exactly one position before the start of the table.
+ }
+
+ size_t tab_idx = next(&t->t, i - t->array_size);
+ if (tab_idx < upb_table_size(&t->t)) {
+ upb_tabent* ent = &t->t.entries[tab_idx];
+ *key = ent->key;
+ *val = _upb_value_val(ent->val.val);
+ *iter = tab_idx + t->array_size;
+ return true;
+ }
+
+ return false;
+}
+
+void upb_inttable_removeiter(upb_inttable* t, intptr_t* iter) {
+ intptr_t i = *iter;
+ if ((size_t)i < t->array_size) {
+ t->array_count--;
+ mutable_array(t)[i].val = -1;
+ } else {
+ upb_tabent* ent = &t->t.entries[i - t->array_size];
+ upb_tabent* prev = NULL;
+
+ // Linear search, not great.
+ upb_tabent* end = &t->t.entries[upb_table_size(&t->t)];
+ for (upb_tabent* e = t->t.entries; e != end; e++) {
+ if (e->next == ent) {
+ prev = e;
+ break;
+ }
+ }
+
+ if (prev) {
+ prev->next = ent->next;
+ }
+
+ t->t.count--;
+ ent->key = 0;
+ ent->next = NULL;
+ }
+}
+
+bool upb_strtable_next2(const upb_strtable* t, upb_StringView* key,
+ upb_value* val, intptr_t* iter) {
+ size_t tab_idx = next(&t->t, *iter);
+ if (tab_idx < upb_table_size(&t->t)) {
+ upb_tabent* ent = &t->t.entries[tab_idx];
+ uint32_t len;
+ key->data = upb_tabstr(ent->key, &len);
+ key->size = len;
+ *val = _upb_value_val(ent->val.val);
+ *iter = tab_idx;
+ return true;
+ }
+
+ return false;
+}
+
+void upb_strtable_removeiter(upb_strtable* t, intptr_t* iter) {
+ intptr_t i = *iter;
+ upb_tabent* ent = &t->t.entries[i];
+ upb_tabent* prev = NULL;
+
+ // Linear search, not great.
+ upb_tabent* end = &t->t.entries[upb_table_size(&t->t)];
+ for (upb_tabent* e = t->t.entries; e != end; e++) {
+ if (e->next == ent) {
+ prev = e;
+ break;
+ }
+ }
+
+ if (prev) {
+ prev->next = ent->next;
+ }
+
+ t->t.count--;
+ ent->key = 0;
+ ent->next = NULL;
+}
+
+void upb_strtable_setentryvalue(upb_strtable* t, intptr_t iter, upb_value v) {
+ upb_tabent* ent = &t->t.entries[iter];
+ ent->val.val = v.val;
+}
diff --git a/upb/upb/hash/common.h b/upb/upb/hash/common.h
new file mode 100644
index 0000000..5280d34
--- /dev/null
+++ b/upb/upb/hash/common.h
@@ -0,0 +1,203 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+/*
+ * upb_table
+ *
+ * This header is INTERNAL-ONLY! Its interfaces are not public or stable!
+ * This file defines very fast int->upb_value (inttable) and string->upb_value
+ * (strtable) hash tables.
+ *
+ * The table uses chained scatter with Brent's variation (inspired by the Lua
+ * implementation of hash tables). The hash function for strings is Austin
+ * Appleby's "MurmurHash."
+ *
+ * The inttable uses uintptr_t as its key, which guarantees it can be used to
+ * store pointers or integers of at least 32 bits (upb isn't really useful on
+ * systems where sizeof(void*) < 4).
+ *
+ * The table must be homogeneous (all values of the same type). In debug
+ * mode, we check this on insert and lookup.
+ */
+
+#ifndef UPB_HASH_COMMON_H_
+#define UPB_HASH_COMMON_H_
+
+#include <string.h>
+
+#include "upb/base/string_view.h"
+#include "upb/mem/arena.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* upb_value ******************************************************************/
+
+typedef struct {
+ uint64_t val;
+} upb_value;
+
+/* Variant that works with a length-delimited rather than NULL-delimited string,
+ * as supported by strtable. */
+char* upb_strdup2(const char* s, size_t len, upb_Arena* a);
+
+UPB_INLINE void _upb_value_setval(upb_value* v, uint64_t val) { v->val = val; }
+
+/* For each value ctype, define the following set of functions:
+ *
+ * // Get/set an int32 from a upb_value.
+ * int32_t upb_value_getint32(upb_value val);
+ * void upb_value_setint32(upb_value *val, int32_t cval);
+ *
+ * // Construct a new upb_value from an int32.
+ * upb_value upb_value_int32(int32_t val); */
+#define FUNCS(name, membername, type_t, converter, proto_type) \
+ UPB_INLINE void upb_value_set##name(upb_value* val, type_t cval) { \
+ val->val = (converter)cval; \
+ } \
+ UPB_INLINE upb_value upb_value_##name(type_t val) { \
+ upb_value ret; \
+ upb_value_set##name(&ret, val); \
+ return ret; \
+ } \
+ UPB_INLINE type_t upb_value_get##name(upb_value val) { \
+ return (type_t)(converter)val.val; \
+ }
+
+FUNCS(int32, int32, int32_t, int32_t, UPB_CTYPE_INT32)
+FUNCS(int64, int64, int64_t, int64_t, UPB_CTYPE_INT64)
+FUNCS(uint32, uint32, uint32_t, uint32_t, UPB_CTYPE_UINT32)
+FUNCS(uint64, uint64, uint64_t, uint64_t, UPB_CTYPE_UINT64)
+FUNCS(bool, _bool, bool, bool, UPB_CTYPE_BOOL)
+FUNCS(cstr, cstr, char*, uintptr_t, UPB_CTYPE_CSTR)
+FUNCS(uintptr, uptr, uintptr_t, uintptr_t, UPB_CTYPE_UPTR)
+FUNCS(ptr, ptr, void*, uintptr_t, UPB_CTYPE_PTR)
+FUNCS(constptr, constptr, const void*, uintptr_t, UPB_CTYPE_CONSTPTR)
+
+#undef FUNCS
+
+UPB_INLINE void upb_value_setfloat(upb_value* val, float cval) {
+ memcpy(&val->val, &cval, sizeof(cval));
+}
+
+UPB_INLINE void upb_value_setdouble(upb_value* val, double cval) {
+ memcpy(&val->val, &cval, sizeof(cval));
+}
+
+UPB_INLINE upb_value upb_value_float(float cval) {
+ upb_value ret;
+ upb_value_setfloat(&ret, cval);
+ return ret;
+}
+
+UPB_INLINE upb_value upb_value_double(double cval) {
+ upb_value ret;
+ upb_value_setdouble(&ret, cval);
+ return ret;
+}
+
+#undef SET_TYPE
+
+/* upb_tabkey *****************************************************************/
+
+/* Either:
+ * 1. an actual integer key, or
+ * 2. a pointer to a string prefixed by its uint32_t length, owned by us.
+ *
+ * ...depending on whether this is a string table or an int table. We would
+ * make this a union of those two types, but C89 doesn't support statically
+ * initializing a non-first union member. */
+typedef uintptr_t upb_tabkey;
+
+UPB_INLINE char* upb_tabstr(upb_tabkey key, uint32_t* len) {
+ char* mem = (char*)key;
+ if (len) memcpy(len, mem, sizeof(*len));
+ return mem + sizeof(*len);
+}
+
+UPB_INLINE upb_StringView upb_tabstrview(upb_tabkey key) {
+ upb_StringView ret;
+ uint32_t len;
+ ret.data = upb_tabstr(key, &len);
+ ret.size = len;
+ return ret;
+}
+
+/* upb_tabval *****************************************************************/
+
+typedef struct upb_tabval {
+ uint64_t val;
+} upb_tabval;
+
+#define UPB_TABVALUE_EMPTY_INIT \
+ { -1 }
+
+/* upb_table ******************************************************************/
+
+typedef struct _upb_tabent {
+ upb_tabkey key;
+ upb_tabval val;
+
+ /* Internal chaining. This is const so we can create static initializers for
+ * tables. We cast away const sometimes, but *only* when the containing
+ * upb_table is known to be non-const. This requires a bit of care, but
+ * the subtlety is confined to table.c. */
+ const struct _upb_tabent* next;
+} upb_tabent;
+
+typedef struct {
+ size_t count; /* Number of entries in the hash part. */
+ uint32_t mask; /* Mask to turn hash value -> bucket. */
+ uint32_t max_count; /* Max count before we hit our load limit. */
+ uint8_t size_lg2; /* Size of the hashtable part is 2^size_lg2 entries. */
+ upb_tabent* entries;
+} upb_table;
+
+UPB_INLINE size_t upb_table_size(const upb_table* t) {
+ return t->size_lg2 ? 1 << t->size_lg2 : 0;
+}
+
+// Internal-only functions, in .h file only out of necessity.
+
+UPB_INLINE bool upb_tabent_isempty(const upb_tabent* e) { return e->key == 0; }
+
+uint32_t _upb_Hash(const void* p, size_t n, uint64_t seed);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_HASH_COMMON_H_ */
diff --git a/upb/upb/hash/int_table.h b/upb/upb/hash/int_table.h
new file mode 100644
index 0000000..367f1ac
--- /dev/null
+++ b/upb/upb/hash/int_table.h
@@ -0,0 +1,105 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_HASH_INT_TABLE_H_
+#define UPB_HASH_INT_TABLE_H_
+
+#include "upb/hash/common.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+typedef struct {
+ upb_table t; // For entries that don't fit in the array part.
+ const upb_tabval* array; // Array part of the table. See const note above.
+ size_t array_size; // Array part size.
+ size_t array_count; // Array part number of elements.
+} upb_inttable;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Initialize a table. If memory allocation failed, false is returned and
+// the table is uninitialized.
+bool upb_inttable_init(upb_inttable* table, upb_Arena* a);
+
+// Returns the number of values in the table.
+size_t upb_inttable_count(const upb_inttable* t);
+
+// Inserts the given key into the hashtable with the given value.
+// The key must not already exist in the hash table.
+// The value must not be UINTPTR_MAX.
+//
+// If a table resize was required but memory allocation failed, false is
+// returned and the table is unchanged.
+bool upb_inttable_insert(upb_inttable* t, uintptr_t key, upb_value val,
+ upb_Arena* a);
+
+// Looks up key in this table, returning "true" if the key was found.
+// If v is non-NULL, copies the value for this key into *v.
+bool upb_inttable_lookup(const upb_inttable* t, uintptr_t key, upb_value* v);
+
+// Removes an item from the table. Returns true if the remove was successful,
+// and stores the removed item in *val if non-NULL.
+bool upb_inttable_remove(upb_inttable* t, uintptr_t key, upb_value* val);
+
+// Updates an existing entry in an inttable.
+// If the entry does not exist, returns false and does nothing.
+// Unlike insert/remove, this does not invalidate iterators.
+bool upb_inttable_replace(upb_inttable* t, uintptr_t key, upb_value val);
+
+// Optimizes the table for the current set of entries, for both memory use and
+// lookup time. Client should call this after all entries have been inserted;
+// inserting more entries is legal, but will likely require a table resize.
+void upb_inttable_compact(upb_inttable* t, upb_Arena* a);
+
+// Iteration over inttable:
+//
+// intptr_t iter = UPB_INTTABLE_BEGIN;
+// uintptr_t key;
+// upb_value val;
+// while (upb_inttable_next(t, &key, &val, &iter)) {
+// // ...
+// }
+
+#define UPB_INTTABLE_BEGIN -1
+
+bool upb_inttable_next(const upb_inttable* t, uintptr_t* key, upb_value* val,
+ intptr_t* iter);
+void upb_inttable_removeiter(upb_inttable* t, intptr_t* iter);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_HASH_INT_TABLE_H_ */
diff --git a/upb/upb/hash/str_table.h b/upb/upb/hash/str_table.h
new file mode 100644
index 0000000..8522be9
--- /dev/null
+++ b/upb/upb/hash/str_table.h
@@ -0,0 +1,165 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_HASH_STR_TABLE_H_
+#define UPB_HASH_STR_TABLE_H_
+
+#include "upb/hash/common.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+typedef struct {
+ upb_table t;
+} upb_strtable;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Initialize a table. If memory allocation failed, false is returned and
+// the table is uninitialized.
+bool upb_strtable_init(upb_strtable* table, size_t expected_size, upb_Arena* a);
+
+// Returns the number of values in the table.
+UPB_INLINE size_t upb_strtable_count(const upb_strtable* t) {
+ return t->t.count;
+}
+
+void upb_strtable_clear(upb_strtable* t);
+
+// Inserts the given key into the hashtable with the given value.
+// The key must not already exist in the hash table. The key is not required
+// to be NULL-terminated, and the table will make an internal copy of the key.
+//
+// If a table resize was required but memory allocation failed, false is
+// returned and the table is unchanged. */
+bool upb_strtable_insert(upb_strtable* t, const char* key, size_t len,
+ upb_value val, upb_Arena* a);
+
+// Looks up key in this table, returning "true" if the key was found.
+// If v is non-NULL, copies the value for this key into *v.
+bool upb_strtable_lookup2(const upb_strtable* t, const char* key, size_t len,
+ upb_value* v);
+
+// For NULL-terminated strings.
+UPB_INLINE bool upb_strtable_lookup(const upb_strtable* t, const char* key,
+ upb_value* v) {
+ return upb_strtable_lookup2(t, key, strlen(key), v);
+}
+
+// Removes an item from the table. Returns true if the remove was successful,
+// and stores the removed item in *val if non-NULL.
+bool upb_strtable_remove2(upb_strtable* t, const char* key, size_t len,
+ upb_value* val);
+
+UPB_INLINE bool upb_strtable_remove(upb_strtable* t, const char* key,
+ upb_value* v) {
+ return upb_strtable_remove2(t, key, strlen(key), v);
+}
+
+// Exposed for testing only.
+bool upb_strtable_resize(upb_strtable* t, size_t size_lg2, upb_Arena* a);
+
+/* Iteration over strtable:
+ *
+ * intptr_t iter = UPB_STRTABLE_BEGIN;
+ * upb_StringView key;
+ * upb_value val;
+ * while (upb_strtable_next2(t, &key, &val, &iter)) {
+ * // ...
+ * }
+ */
+
+#define UPB_STRTABLE_BEGIN -1
+
+bool upb_strtable_next2(const upb_strtable* t, upb_StringView* key,
+ upb_value* val, intptr_t* iter);
+void upb_strtable_removeiter(upb_strtable* t, intptr_t* iter);
+void upb_strtable_setentryvalue(upb_strtable* t, intptr_t iter, upb_value v);
+
+/* DEPRECATED iterators, slated for removal.
+ *
+ * Iterators for string tables. We are subject to some kind of unusual
+ * design constraints:
+ *
+ * For high-level languages:
+ * - we must be able to guarantee that we don't crash or corrupt memory even if
+ * the program accesses an invalidated iterator.
+ *
+ * For C++11 range-based for:
+ * - iterators must be copyable
+ * - iterators must be comparable
+ * - it must be possible to construct an "end" value.
+ *
+ * Iteration order is undefined.
+ *
+ * Modifying the table invalidates iterators. upb_{str,int}table_done() is
+ * guaranteed to work even on an invalidated iterator, as long as the table it
+ * is iterating over has not been freed. Calling next() or accessing data from
+ * an invalidated iterator yields unspecified elements from the table, but it is
+ * guaranteed not to crash and to return real table elements (except when done()
+ * is true). */
+/* upb_strtable_iter **********************************************************/
+
+/* upb_strtable_iter i;
+ * upb_strtable_begin(&i, t);
+ * for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+ * const char *key = upb_strtable_iter_key(&i);
+ * const upb_value val = upb_strtable_iter_value(&i);
+ * // ...
+ * }
+ */
+
+typedef struct {
+ const upb_strtable* t;
+ size_t index;
+} upb_strtable_iter;
+
+UPB_INLINE const upb_tabent* str_tabent(const upb_strtable_iter* i) {
+ return &i->t->t.entries[i->index];
+}
+
+void upb_strtable_begin(upb_strtable_iter* i, const upb_strtable* t);
+void upb_strtable_next(upb_strtable_iter* i);
+bool upb_strtable_done(const upb_strtable_iter* i);
+upb_StringView upb_strtable_iter_key(const upb_strtable_iter* i);
+upb_value upb_strtable_iter_value(const upb_strtable_iter* i);
+void upb_strtable_iter_setdone(upb_strtable_iter* i);
+bool upb_strtable_iter_isequal(const upb_strtable_iter* i1,
+ const upb_strtable_iter* i2);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_HASH_STR_TABLE_H_ */
diff --git a/upb/upb/hash/test.cc b/upb/upb/hash/test.cc
new file mode 100644
index 0000000..c9a4b8c
--- /dev/null
+++ b/upb/upb/hash/test.cc
@@ -0,0 +1,263 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Tests for upb_table.
+
+#include <limits.h>
+#include <string.h>
+
+#include <iostream>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/container/flat_hash_map.h"
+#include "upb/hash/int_table.h"
+#include "upb/hash/str_table.h"
+#include "upb/mem/arena.hpp"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+using std::vector;
+
+TEST(Table, StringTable) {
+ vector<std::string> keys;
+ keys.push_back("google.protobuf.FileDescriptorSet");
+ keys.push_back("google.protobuf.FileDescriptorProto");
+ keys.push_back("google.protobuf.DescriptorProto");
+ keys.push_back("google.protobuf.DescriptorProto.ExtensionRange");
+ keys.push_back("google.protobuf.FieldDescriptorProto");
+ keys.push_back("google.protobuf.EnumDescriptorProto");
+ keys.push_back("google.protobuf.EnumValueDescriptorProto");
+ keys.push_back("google.protobuf.ServiceDescriptorProto");
+ keys.push_back("google.protobuf.MethodDescriptorProto");
+ keys.push_back("google.protobuf.FileOptions");
+ keys.push_back("google.protobuf.MessageOptions");
+ keys.push_back("google.protobuf.FieldOptions");
+ keys.push_back("google.protobuf.EnumOptions");
+ keys.push_back("google.protobuf.EnumValueOptions");
+ keys.push_back("google.protobuf.ServiceOptions");
+ keys.push_back("google.protobuf.MethodOptions");
+ keys.push_back("google.protobuf.UninterpretedOption");
+ keys.push_back("google.protobuf.UninterpretedOption.NamePart");
+
+ /* Initialize structures. */
+ upb::Arena arena;
+ upb_strtable t;
+ upb_strtable_init(&t, keys.size(), arena.ptr());
+ std::map<std::string, int32_t> m;
+ std::set<std::string> all;
+ for (const auto& key : keys) {
+ all.insert(key);
+ upb_value val = {uint64_t(key[0])};
+ upb_strtable_insert(&t, key.data(), key.size(), val, arena.ptr());
+ m[key] = key[0];
+ }
+
+ /* Test correctness. */
+ for (const auto& key : keys) {
+ upb_value val;
+ bool ok = upb_strtable_lookup2(&t, key.data(), key.size(), &val);
+ EXPECT_TRUE(ok);
+ EXPECT_EQ(val.val, uint64_t(key[0]));
+ EXPECT_EQ(m[key], key[0]);
+ }
+
+ intptr_t iter = UPB_STRTABLE_BEGIN;
+ upb_StringView key;
+ upb_value val;
+ while (upb_strtable_next2(&t, &key, &val, &iter)) {
+ std::set<std::string>::iterator i = all.find(key.data);
+ EXPECT_NE(i, all.end());
+ all.erase(i);
+ }
+ EXPECT_TRUE(all.empty());
+
+ // Test iteration with resizes.
+
+ for (int i = 0; i < 10; i++) {
+ intptr_t iter = UPB_STRTABLE_BEGIN;
+ while (upb_strtable_next2(&t, &key, &val, &iter)) {
+ // Even if we invalidate the iterator it should only return real elements.
+ EXPECT_EQ(val.val, m[key.data]);
+
+ // Force a resize even though the size isn't changing.
+ // Also forces the table size to grow so some new buckets end up empty.
+ bool ok = upb_strtable_resize(&t, 5 + i, arena.ptr());
+ EXPECT_TRUE(ok);
+ }
+ }
+}
+
+class IntTableTest : public testing::TestWithParam<int> {
+ void SetUp() override {
+ if (GetParam() > 0) {
+ for (int i = 0; i < GetParam(); i++) {
+ keys_.push_back(i + 1);
+ }
+ } else {
+ for (int32_t i = 0; i < 64; i++) {
+ if (i < 32)
+ keys_.push_back(i + 1);
+ else
+ keys_.push_back(10101 + i);
+ }
+ }
+ }
+
+ protected:
+ std::vector<int32_t> keys_;
+};
+
+TEST_P(IntTableTest, TestIntTable) {
+ /* Initialize structures. */
+ upb::Arena arena;
+ upb_inttable t;
+ upb_inttable_init(&t, arena.ptr());
+ uint32_t largest_key = 0;
+ std::map<uint32_t, uint32_t> m;
+ absl::flat_hash_map<uint32_t, uint32_t> hm;
+ for (const auto& key : keys_) {
+ largest_key = UPB_MAX((int32_t)largest_key, key);
+ upb_value val = upb_value_uint32(key * 2);
+ bool ok = upb_inttable_insert(&t, key, val, arena.ptr());
+ EXPECT_TRUE(ok);
+ m[key] = key * 2;
+ hm[key] = key * 2;
+ }
+ EXPECT_EQ(upb_inttable_count(&t), keys_.size());
+
+ /* Test correctness. */
+ int count = 0;
+ for (uint32_t i = 0; i <= largest_key; i++) {
+ upb_value val;
+ bool ok = upb_inttable_lookup(&t, i, &val);
+ if (ok) { /* Assume map implementation is correct. */
+ EXPECT_EQ(val.val, i * 2);
+ EXPECT_EQ(m[i], i * 2);
+ EXPECT_EQ(hm[i], i * 2);
+ count++;
+ }
+ }
+ EXPECT_EQ(count, keys_.size());
+ EXPECT_EQ(count, upb_inttable_count(&t));
+
+ // Test replace.
+ count = 0;
+ for (uint32_t i = 0; i <= largest_key; i++) {
+ upb_value val = upb_value_uint32(i * 3);
+ bool ok = upb_inttable_replace(&t, i, val);
+ if (ok) { /* Assume map implementation is correct. */
+ m[i] = i * 3;
+ hm[i] = i * 3;
+ count++;
+ }
+ }
+ EXPECT_EQ(count, keys_.size());
+ EXPECT_EQ(count, upb_inttable_count(&t));
+
+ // Compact and test correctness again.
+ upb_inttable_compact(&t, arena.ptr());
+ count = 0;
+ for (uint32_t i = 0; i <= largest_key; i++) {
+ upb_value val;
+ bool ok = upb_inttable_lookup(&t, i, &val);
+ if (ok) { /* Assume map implementation is correct. */
+ EXPECT_EQ(val.val, i * 3);
+ EXPECT_EQ(m[i], i * 3);
+ EXPECT_EQ(hm[i], i * 3);
+ count++;
+ }
+ }
+ EXPECT_EQ(count, keys_.size());
+ EXPECT_EQ(count, upb_inttable_count(&t));
+
+ for (const auto& key : keys_) {
+ upb_value val;
+ bool ok = upb_inttable_remove(&t, key, &val);
+ EXPECT_TRUE(ok);
+ EXPECT_EQ(val.val, (uint32_t)key * 3);
+ count--;
+ EXPECT_EQ(count, upb_inttable_count(&t));
+ }
+ EXPECT_EQ(0, upb_inttable_count(&t));
+}
+
+INSTANTIATE_TEST_SUITE_P(IntTableParams, IntTableTest,
+ testing::Values(8, 64, 512, -32));
+
+/*
+ * This test can't pass right now because the table can't store a value of
+ * (uint64_t)-1.
+ */
+TEST(Table, MaxValue) {
+ /*
+ typedef upb::TypedIntTable<uint64_t> Table;
+ Table table;
+ uintptr_t uint64_max = (uint64_t)-1;
+ table.Insert(1, uint64_max);
+ std::pair<bool, uint64_t> found = table.Lookup(1);
+ ASSERT(found.first);
+ ASSERT(found.second == uint64_max);
+ */
+}
+
+TEST(Table, Delete) {
+ upb::Arena arena;
+ upb_inttable t;
+ upb_inttable_init(&t, arena.ptr());
+ upb_inttable_insert(&t, 0, upb_value_bool(true), arena.ptr());
+ upb_inttable_insert(&t, 2, upb_value_bool(true), arena.ptr());
+ upb_inttable_insert(&t, 4, upb_value_bool(true), arena.ptr());
+ upb_inttable_compact(&t, arena.ptr());
+ upb_inttable_remove(&t, 0, nullptr);
+ upb_inttable_remove(&t, 2, nullptr);
+ upb_inttable_remove(&t, 4, nullptr);
+
+ intptr_t iter = UPB_INTTABLE_BEGIN;
+ uintptr_t key;
+ upb_value val;
+ while (upb_inttable_next(&t, &key, &val, &iter)) {
+ FAIL();
+ }
+}
+
+TEST(Table, Init) {
+ for (int i = 0; i < 2048; i++) {
+ /* Tests that the size calculations in init() (lg2 size for target load)
+ * work for all expected sizes. */
+ upb::Arena arena;
+ upb_strtable t;
+ upb_strtable_init(&t, i, arena.ptr());
+ }
+}
diff --git a/upb/upb/io/BUILD b/upb/upb/io/BUILD
new file mode 100644
index 0000000..7a512a3
--- /dev/null
+++ b/upb/upb/io/BUILD
@@ -0,0 +1,103 @@
+# begin:google_only
+# package(default_applicable_licenses = ["//:license"])
+# end:google_only
+
+cc_library(
+ name = "string",
+ hdrs = ["string.h"],
+ deps = [
+ "//:mem",
+ "//:port",
+ ],
+)
+
+cc_library(
+ name = "tokenizer",
+ srcs = ["tokenizer.c"],
+ hdrs = ["tokenizer.h"],
+ deps = [
+ ":string",
+ ":zero_copy_stream",
+ "//:base",
+ "//:lex",
+ "//:mem",
+ "//:port",
+ ],
+)
+
+cc_library(
+ name = "zero_copy_stream",
+ hdrs = [
+ "zero_copy_input_stream.h",
+ "zero_copy_output_stream.h",
+ ],
+ deps = [
+ "//:base",
+ "//:mem",
+ "//:port",
+ ],
+)
+
+# ====================================================================
+
+cc_library(
+ name = "chunked_stream",
+ testonly = 1,
+ srcs = [
+ "chunked_input_stream.c",
+ "chunked_output_stream.c",
+ ],
+ hdrs = [
+ "chunked_input_stream.h",
+ "chunked_output_stream.h",
+ ],
+ deps = [
+ ":zero_copy_stream",
+ "//:mem",
+ "//:port",
+ ],
+)
+
+cc_test(
+ name = "string_test",
+ size = "small",
+ srcs = ["string_test.cc"],
+ deps = [
+ ":string",
+ "//:mem",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "tokenizer_test",
+ size = "small",
+ srcs = ["tokenizer_test.cc"],
+ deps = [
+ ":chunked_stream",
+ ":string",
+ ":tokenizer",
+ ":zero_copy_stream",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/strings:str_format",
+ "//:lex",
+ "//:mem",
+ "//:port",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "zero_copy_stream_test",
+ size = "small",
+ srcs = [
+ "zero_copy_stream_test.cc",
+ ],
+ deps = [
+ ":chunked_stream",
+ ":zero_copy_stream",
+ "//:base",
+ "//:mem",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
diff --git a/upb/upb/io/README.md b/upb/upb/io/README.md
new file mode 100644
index 0000000..e0a7011
--- /dev/null
+++ b/upb/upb/io/README.md
@@ -0,0 +1,4 @@
+This subdir originated as a best-effort C approximation of the C++ code in
+in third_party/protobuf/io/ but over time the two will invariably diverge.
+Comments have generally been copied verbatim and may therefore refer to C++
+symbol names instead of C symbol names.
diff --git a/upb/upb/io/chunked_input_stream.c b/upb/upb/io/chunked_input_stream.c
new file mode 100644
index 0000000..58a1c95
--- /dev/null
+++ b/upb/upb/io/chunked_input_stream.c
@@ -0,0 +1,113 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/io/chunked_input_stream.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+typedef struct {
+ upb_ZeroCopyInputStream base;
+
+ const char* data;
+ size_t size;
+ size_t limit;
+ size_t position;
+ size_t last_returned_size;
+} upb_ChunkedInputStream;
+
+static const void* upb_ChunkedInputStream_Next(upb_ZeroCopyInputStream* z,
+ size_t* count,
+ upb_Status* status) {
+ upb_ChunkedInputStream* c = (upb_ChunkedInputStream*)z;
+ UPB_ASSERT(c->position <= c->size);
+
+ const char* out = c->data + c->position;
+
+ const size_t chunk = UPB_MIN(c->limit, c->size - c->position);
+ c->position += chunk;
+ c->last_returned_size = chunk;
+ *count = chunk;
+
+ return chunk ? out : NULL;
+}
+
+static void upb_ChunkedInputStream_BackUp(upb_ZeroCopyInputStream* z,
+ size_t count) {
+ upb_ChunkedInputStream* c = (upb_ChunkedInputStream*)z;
+
+ UPB_ASSERT(c->last_returned_size >= count);
+ c->position -= count;
+ c->last_returned_size -= count;
+}
+
+static bool upb_ChunkedInputStream_Skip(upb_ZeroCopyInputStream* z,
+ size_t count) {
+ upb_ChunkedInputStream* c = (upb_ChunkedInputStream*)z;
+
+ c->last_returned_size = 0; // Don't let caller back up.
+ if (count > c->size - c->position) {
+ c->position = c->size;
+ return false;
+ }
+
+ c->position += count;
+ return true;
+}
+
+static size_t upb_ChunkedInputStream_ByteCount(
+ const upb_ZeroCopyInputStream* z) {
+ const upb_ChunkedInputStream* c = (const upb_ChunkedInputStream*)z;
+
+ return c->position;
+}
+
+static const _upb_ZeroCopyInputStream_VTable upb_ChunkedInputStream_vtable = {
+ upb_ChunkedInputStream_Next,
+ upb_ChunkedInputStream_BackUp,
+ upb_ChunkedInputStream_Skip,
+ upb_ChunkedInputStream_ByteCount,
+};
+
+upb_ZeroCopyInputStream* upb_ChunkedInputStream_New(const void* data,
+ size_t size, size_t limit,
+ upb_Arena* arena) {
+ upb_ChunkedInputStream* c = upb_Arena_Malloc(arena, sizeof(*c));
+ if (!c || !limit) return NULL;
+
+ c->base.vtable = &upb_ChunkedInputStream_vtable;
+ c->data = data;
+ c->size = size;
+ c->limit = limit;
+ c->position = 0;
+ c->last_returned_size = 0;
+
+ return (upb_ZeroCopyInputStream*)c;
+}
diff --git a/upb/upb/io/chunked_input_stream.h b/upb/upb/io/chunked_input_stream.h
new file mode 100644
index 0000000..4497266
--- /dev/null
+++ b/upb/upb/io/chunked_input_stream.h
@@ -0,0 +1,56 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_IO_CHUNKED_INPUT_STREAM_H_
+#define UPB_IO_CHUNKED_INPUT_STREAM_H_
+
+#include "upb/io/zero_copy_input_stream.h"
+#include "upb/mem/arena.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// A ZeroCopyInputStream which wraps a flat buffer and limits the number of
+// bytes that can be returned by a single call to Next().
+upb_ZeroCopyInputStream* upb_ChunkedInputStream_New(const void* data,
+ size_t size, size_t limit,
+ upb_Arena* arena);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_IO_CHUNKED_INPUT_STREAM_H_ */
diff --git a/upb/upb/io/chunked_output_stream.c b/upb/upb/io/chunked_output_stream.c
new file mode 100644
index 0000000..6546124
--- /dev/null
+++ b/upb/upb/io/chunked_output_stream.c
@@ -0,0 +1,97 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/io/chunked_output_stream.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+typedef struct {
+ upb_ZeroCopyOutputStream base;
+
+ char* data;
+ size_t size;
+ size_t limit;
+ size_t position;
+ size_t last_returned_size;
+} upb_ChunkedOutputStream;
+
+static void* upb_ChunkedOutputStream_Next(upb_ZeroCopyOutputStream* z,
+ size_t* count, upb_Status* status) {
+ upb_ChunkedOutputStream* c = (upb_ChunkedOutputStream*)z;
+ UPB_ASSERT(c->position <= c->size);
+
+ char* out = c->data + c->position;
+
+ const size_t chunk = UPB_MIN(c->limit, c->size - c->position);
+ c->position += chunk;
+ c->last_returned_size = chunk;
+ *count = chunk;
+
+ return chunk ? out : NULL;
+}
+
+static void upb_ChunkedOutputStream_BackUp(upb_ZeroCopyOutputStream* z,
+ size_t count) {
+ upb_ChunkedOutputStream* c = (upb_ChunkedOutputStream*)z;
+
+ UPB_ASSERT(c->last_returned_size >= count);
+ c->position -= count;
+ c->last_returned_size -= count;
+}
+
+static size_t upb_ChunkedOutputStream_ByteCount(
+ const upb_ZeroCopyOutputStream* z) {
+ const upb_ChunkedOutputStream* c = (const upb_ChunkedOutputStream*)z;
+
+ return c->position;
+}
+
+static const _upb_ZeroCopyOutputStream_VTable upb_ChunkedOutputStream_vtable = {
+ upb_ChunkedOutputStream_Next,
+ upb_ChunkedOutputStream_BackUp,
+ upb_ChunkedOutputStream_ByteCount,
+};
+
+upb_ZeroCopyOutputStream* upb_ChunkedOutputStream_New(void* data, size_t size,
+ size_t limit,
+ upb_Arena* arena) {
+ upb_ChunkedOutputStream* c = upb_Arena_Malloc(arena, sizeof(*c));
+ if (!c || !limit) return NULL;
+
+ c->base.vtable = &upb_ChunkedOutputStream_vtable;
+ c->data = data;
+ c->size = size;
+ c->limit = limit;
+ c->position = 0;
+ c->last_returned_size = 0;
+
+ return (upb_ZeroCopyOutputStream*)c;
+}
diff --git a/upb/upb/io/chunked_output_stream.h b/upb/upb/io/chunked_output_stream.h
new file mode 100644
index 0000000..f52b6ab
--- /dev/null
+++ b/upb/upb/io/chunked_output_stream.h
@@ -0,0 +1,56 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_IO_CHUNKED_OUTPUT_STREAM_H_
+#define UPB_IO_CHUNKED_OUTPUT_STREAM_H_
+
+#include "upb/io/zero_copy_output_stream.h"
+#include "upb/mem/arena.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// A ZeroCopyOutputStream which wraps a flat buffer and limits the number of
+// bytes that can be returned by a single call to Next().
+upb_ZeroCopyOutputStream* upb_ChunkedOutputStream_New(void* data, size_t size,
+ size_t limit,
+ upb_Arena* arena);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_IO_CHUNKED_OUTPUT_STREAM_H_ */
diff --git a/upb/upb/io/string.h b/upb/upb/io/string.h
new file mode 100644
index 0000000..7aa8bdf
--- /dev/null
+++ b/upb/upb/io/string.h
@@ -0,0 +1,170 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// An attempt to provide some of the C++ string functionality in C.
+// Function names generally match those of corresponding C++ string methods.
+// All buffers are copied so operations are relatively expensive.
+// Internal character strings are always NULL-terminated.
+// All bool functions return true on success, false on failure.
+
+#ifndef UPB_IO_STRING_H_
+#define UPB_IO_STRING_H_
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "upb/mem/arena.h"
+#include "upb/port/vsnprintf_compat.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Do not directly access the fields of this struct - use the accessors only.
+// TODO(salo): Add a small (16 bytes, maybe?) internal buffer so we can avoid
+// hitting the arena for short strings.
+typedef struct {
+ size_t size_;
+ size_t capacity_;
+ char* data_;
+ upb_Arena* arena_;
+} upb_String;
+
+// Initialize an already-allocted upb_String object.
+UPB_INLINE bool upb_String_Init(upb_String* s, upb_Arena* a) {
+ static const int kDefaultCapacity = 16;
+
+ s->size_ = 0;
+ s->capacity_ = kDefaultCapacity;
+ s->data_ = (char*)upb_Arena_Malloc(a, kDefaultCapacity);
+ s->arena_ = a;
+ if (!s->data_) return false;
+ s->data_[0] = '\0';
+ return true;
+}
+
+UPB_INLINE void upb_String_Clear(upb_String* s) {
+ s->size_ = 0;
+ s->data_[0] = '\0';
+}
+
+UPB_INLINE char* upb_String_Data(const upb_String* s) { return s->data_; }
+
+UPB_INLINE size_t upb_String_Size(const upb_String* s) { return s->size_; }
+
+UPB_INLINE bool upb_String_Empty(const upb_String* s) { return s->size_ == 0; }
+
+UPB_INLINE void upb_String_Erase(upb_String* s, size_t pos, size_t len) {
+ if (pos >= s->size_) return;
+ char* des = s->data_ + pos;
+ if (pos + len > s->size_) len = s->size_ - pos;
+ char* src = des + len;
+ memmove(des, src, s->size_ - (src - s->data_) + 1);
+ s->size_ -= len;
+}
+
+UPB_INLINE bool upb_String_Reserve(upb_String* s, size_t size) {
+ if (s->capacity_ <= size) {
+ const size_t new_cap = size + 1;
+ s->data_ =
+ (char*)upb_Arena_Realloc(s->arena_, s->data_, s->capacity_, new_cap);
+ if (!s->data_) return false;
+ s->capacity_ = new_cap;
+ }
+ return true;
+}
+
+UPB_INLINE bool upb_String_Append(upb_String* s, const char* data,
+ size_t size) {
+ if (s->capacity_ <= s->size_ + size) {
+ const size_t new_cap = 2 * (s->size_ + size) + 1;
+ if (!upb_String_Reserve(s, new_cap)) return false;
+ }
+
+ memcpy(s->data_ + s->size_, data, size);
+ s->size_ += size;
+ s->data_[s->size_] = '\0';
+ return true;
+}
+
+UPB_PRINTF(2, 0)
+UPB_INLINE bool upb_String_AppendFmtV(upb_String* s, const char* fmt,
+ va_list args) {
+ size_t capacity = 1000;
+ char* buf = (char*)malloc(capacity);
+ bool out = false;
+ for (;;) {
+ const int n = _upb_vsnprintf(buf, capacity, fmt, args);
+ if (n < 0) break;
+ if (n < capacity) {
+ out = upb_String_Append(s, buf, n);
+ break;
+ }
+ capacity *= 2;
+ buf = (char*)realloc(buf, capacity);
+ }
+ free(buf);
+ return out;
+}
+
+UPB_PRINTF(2, 3)
+UPB_INLINE bool upb_String_AppendFmt(upb_String* s, const char* fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+ const bool ok = upb_String_AppendFmtV(s, fmt, args);
+ va_end(args);
+ return ok;
+}
+
+UPB_INLINE bool upb_String_Assign(upb_String* s, const char* data,
+ size_t size) {
+ upb_String_Clear(s);
+ return upb_String_Append(s, data, size);
+}
+
+UPB_INLINE bool upb_String_Copy(upb_String* des, const upb_String* src) {
+ return upb_String_Assign(des, src->data_, src->size_);
+}
+
+UPB_INLINE bool upb_String_PushBack(upb_String* s, char ch) {
+ return upb_String_Append(s, &ch, 1);
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_IO_STRING_H_ */
diff --git a/upb/upb/io/string_test.cc b/upb/upb/io/string_test.cc
new file mode 100644
index 0000000..f6c2b38
--- /dev/null
+++ b/upb/upb/io/string_test.cc
@@ -0,0 +1,130 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/io/string.h"
+
+#include <string.h>
+
+#include "gtest/gtest.h"
+#include "upb/mem/arena.hpp"
+
+TEST(StringTest, Append) {
+ upb::Arena arena;
+
+ upb_String foo;
+ EXPECT_TRUE(upb_String_Init(&foo, arena.ptr()));
+ EXPECT_EQ(upb_String_Size(&foo), 0);
+
+ EXPECT_TRUE(upb_String_Assign(&foo, "foobar", 3));
+ EXPECT_EQ(upb_String_Size(&foo), 3);
+ EXPECT_EQ(strcmp(upb_String_Data(&foo), "foo"), 0);
+
+ EXPECT_TRUE(upb_String_Append(&foo, "bar", 3));
+ EXPECT_EQ(upb_String_Size(&foo), 6);
+ EXPECT_EQ(strcmp(upb_String_Data(&foo), "foobar"), 0);
+
+ EXPECT_TRUE(upb_String_Append(&foo, "baz", 3));
+ EXPECT_EQ(upb_String_Size(&foo), 9);
+ EXPECT_EQ(strcmp(upb_String_Data(&foo), "foobarbaz"), 0);
+
+ EXPECT_TRUE(upb_String_Append(&foo, "bat", 3));
+ EXPECT_EQ(upb_String_Size(&foo), 12);
+ EXPECT_EQ(strcmp(upb_String_Data(&foo), "foobarbazbat"), 0);
+
+ EXPECT_TRUE(upb_String_Append(&foo, "feefiefoefoo", 12));
+ EXPECT_EQ(upb_String_Size(&foo), 24);
+ EXPECT_EQ(strcmp(upb_String_Data(&foo), "foobarbazbatfeefiefoefoo"), 0);
+
+ const char* password = "fiddlesnarf";
+ EXPECT_TRUE(upb_String_Assign(&foo, password, strlen(password)));
+ EXPECT_EQ(upb_String_Size(&foo), strlen(password));
+ EXPECT_EQ(strcmp(upb_String_Data(&foo), password), 0);
+}
+
+TEST(StringTest, PushBack) {
+ upb::Arena arena;
+
+ upb_String foo;
+ EXPECT_TRUE(upb_String_Init(&foo, arena.ptr()));
+ EXPECT_EQ(upb_String_Size(&foo), 0);
+
+ const std::string big =
+ "asfashfxauwhfwu4fuwafxasnfwxnxwunxuwxufhwfaiwj4w9jvwxssldfjlasviorwnvwij"
+ "grsdjrfiasrjrasijgraisjvrvoiasjspjfsjgfasjgiasjidjsrvjsrjrasjfrijwjajsrF"
+ "JWJGF4WWJSAVSLJArSJGFrAISJGASrlafjgrivarijrraisrgjiawrijg3874f87f7hqfhpf"
+ "f8929hr32p8475902387459023475297328-22-3776-26";
+ EXPECT_TRUE(upb_String_Reserve(&foo, big.size() + 1));
+ EXPECT_TRUE(upb_String_Append(&foo, big.data(), big.size()));
+ EXPECT_EQ(upb_String_Size(&foo), big.size());
+ EXPECT_EQ(strcmp(upb_String_Data(&foo), big.data()), 0);
+
+ upb_String bar;
+ EXPECT_TRUE(upb_String_Init(&bar, arena.ptr()));
+ EXPECT_EQ(upb_String_Size(&bar), 0);
+
+ EXPECT_TRUE(upb_String_PushBack(&bar, 'x'));
+ EXPECT_TRUE(upb_String_PushBack(&bar, 'y'));
+ EXPECT_TRUE(upb_String_PushBack(&bar, 'z'));
+ EXPECT_TRUE(upb_String_PushBack(&bar, 'z'));
+ EXPECT_TRUE(upb_String_PushBack(&bar, 'y'));
+ EXPECT_EQ(upb_String_Size(&bar), 5);
+ EXPECT_EQ(strcmp(upb_String_Data(&bar), "xyzzy"), 0);
+}
+
+TEST(StringTest, Erase) {
+ upb::Arena arena;
+
+ upb_String foo;
+ EXPECT_TRUE(upb_String_Init(&foo, arena.ptr()));
+
+ const char* sent = "This is an example sentence.";
+ EXPECT_TRUE(upb_String_Assign(&foo, sent, strlen(sent)));
+ EXPECT_EQ(upb_String_Size(&foo), 28);
+
+ upb_String_Erase(&foo, 10, 8);
+ EXPECT_EQ(upb_String_Size(&foo), 20);
+ EXPECT_EQ(strcmp(upb_String_Data(&foo), "This is an sentence."), 0);
+
+ upb_String_Erase(&foo, 9, 1);
+ EXPECT_EQ(upb_String_Size(&foo), 19);
+ EXPECT_EQ(strcmp(upb_String_Data(&foo), "This is a sentence."), 0);
+
+ upb_String_Erase(&foo, 5, 5);
+ EXPECT_EQ(upb_String_Size(&foo), 14);
+ EXPECT_EQ(strcmp(upb_String_Data(&foo), "This sentence."), 0);
+
+ upb_String_Erase(&foo, 4, 99);
+ EXPECT_EQ(upb_String_Size(&foo), 4);
+ EXPECT_EQ(strcmp(upb_String_Data(&foo), "This"), 0);
+
+ upb_String_Erase(&foo, 0, 4);
+ EXPECT_EQ(upb_String_Size(&foo), 0);
+ EXPECT_EQ(strcmp(upb_String_Data(&foo), ""), 0);
+}
diff --git a/upb/upb/io/tokenizer.c b/upb/upb/io/tokenizer.c
new file mode 100644
index 0000000..d82618a
--- /dev/null
+++ b/upb/upb/io/tokenizer.c
@@ -0,0 +1,1023 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/io/tokenizer.h"
+
+#include "upb/io/string.h"
+#include "upb/lex/strtod.h"
+#include "upb/lex/unicode.h"
+
+// Must be included last.
+#include "upb/port/def.inc"
+
+typedef enum {
+ // Started a line comment.
+ kUpb_CommentType_Line,
+
+ // Started a block comment.
+ kUpb_CommentType_Block,
+
+ // Consumed a slash, then realized it wasn't a comment. current_ has
+ // been filled in with a slash token. The caller should return it.
+ kUpb_CommentType_SlashNot,
+
+ // We do not appear to be starting a comment here.
+ kUpb_CommentType_None,
+} upb_CommentType;
+
+static bool upb_Tokenizer_IsUnprintable(char c) { return '\0' < c && c < ' '; }
+
+// Since we count columns we need to interpret tabs somehow. We'll take
+// the standard 8-character definition for lack of any way to do better.
+static const int kUpb_Tokenizer_TabWidth = 8;
+
+// Given a char, interpret it as a numeric digit and return its value.
+// This supports any number base up to 36.
+// Represents integer values of digits.
+// Uses 36 to indicate an invalid character since we support
+// bases up to 36.
+static const int8_t kUpb_Tokenizer_AsciiToInt[256] = {
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 00-0F
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 10-1F
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // ' '-'/'
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // '0'-'9'
+ 36, 36, 36, 36, 36, 36, 36, // ':'-'@'
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 'A'-'P'
+ 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, // 'Q'-'Z'
+ 36, 36, 36, 36, 36, 36, // '['-'`'
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 'a'-'p'
+ 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, // 'q'-'z'
+ 36, 36, 36, 36, 36, // '{'-DEL
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 80-8F
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 90-9F
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // A0-AF
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // B0-BF
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // C0-CF
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // D0-DF
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // E0-EF
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // F0-FF
+};
+
+static int DigitValue(char digit) {
+ return kUpb_Tokenizer_AsciiToInt[digit & 0xFF];
+}
+
+static bool upb_Tokenizer_IsLetter(char c) {
+ return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_');
+}
+
+static bool upb_Tokenizer_IsDigit(char c) { return '0' <= c && c <= '9'; }
+
+static bool upb_Tokenizer_IsOctalDigit(char c) { return '0' <= c && c <= '7'; }
+
+static bool upb_Tokenizer_IsHexDigit(char c) {
+ return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
+ ('A' <= c && c <= 'F');
+}
+
+static bool upb_Tokenizer_IsAlphanumeric(char c) {
+ return upb_Tokenizer_IsLetter(c) || upb_Tokenizer_IsDigit(c);
+}
+
+static bool upb_Tokenizer_IsWhitespaceNoNewline(char c) {
+ return c == ' ' || c == '\t' || c == '\r' || c == '\v' || c == '\f';
+}
+
+static bool upb_Tokenizer_IsWhitespace(char c) {
+ return c == '\n' || upb_Tokenizer_IsWhitespaceNoNewline(c);
+}
+
+static bool upb_Tokenizer_IsEscape(char c) {
+ return c == 'a' || c == 'b' || c == 'f' || c == 'n' || c == 'r' || c == 't' ||
+ c == 'v' || c == '\\' || c == '?' || c == '\'' || c == '\"';
+}
+
+static char TranslateEscape(char c) {
+ switch (c) {
+ case 'a':
+ return '\a';
+ case 'b':
+ return '\b';
+ case 'f':
+ return '\f';
+ case 'n':
+ return '\n';
+ case 'r':
+ return '\r';
+ case 't':
+ return '\t';
+ case 'v':
+ return '\v';
+ case '\\':
+ return '\\';
+ case '?':
+ return '\?'; // Trigraphs = :(
+ case '\'':
+ return '\'';
+ case '"':
+ return '\"';
+
+ // We expect escape sequences to have been validated separately.
+ default:
+ return '?';
+ }
+}
+
+// ===================================================================
+
+struct upb_Tokenizer {
+ upb_TokenType token_type; // The type of the current token.
+
+ // The exact text of the current token as it appeared in the input.
+ // e.g. tokens of TYPE_STRING will still be escaped and in quotes.
+ upb_String token_text;
+
+ // "line" and "column" specify the position of the first character of
+ // the token within the input stream. They are zero-based.
+ int token_line;
+ int token_column;
+ int token_end_column;
+
+ upb_ZeroCopyInputStream* input;
+ upb_Arena* arena;
+ upb_Status* status;
+
+ char current_char; // == buffer_[buffer_pos_], updated by NextChar().
+ const char* buffer; // Current buffer returned from input_.
+ size_t buffer_size; // Size of buffer_.
+ size_t buffer_pos; // Current position within the buffer.
+ bool read_error; // Did we previously encounter a read error?
+
+ // Line and column number of current_char_ within the whole input stream.
+ int line;
+
+ // By "column number", the proto compiler refers to a count of the number
+ // of bytes before a given byte, except that a tab character advances to
+ // the next multiple of 8 bytes. Note in particular that column numbers
+ // are zero-based, while many user interfaces use one-based column numbers.
+ int column;
+
+ // Cached values from before the most recent call to Next()
+ upb_TokenType previous_type;
+ int previous_line;
+ int previous_column;
+ int previous_end_column;
+
+ // String to which text should be appended as we advance through it.
+ // Call RecordTo(&str) to start recording and StopRecording() to stop.
+ // E.g. StartToken() calls RecordTo(¤t_.text). record_start_ is the
+ // position within the current buffer where recording started.
+ upb_String* record_target;
+ int record_start;
+ int options;
+ jmp_buf err;
+};
+
+// Convenience methods to return an error at the current line and column.
+
+UPB_NORETURN static void ReportError(upb_Tokenizer* t, const char* msg) {
+ upb_Status_SetErrorFormat(t->status, "%d:%d: %s", t->line, t->column, msg);
+ UPB_LONGJMP(t->err, 1);
+}
+
+UPB_NORETURN UPB_PRINTF(2, 3) static void ReportErrorFormat(upb_Tokenizer* t,
+ const char* fmt,
+ ...) {
+ va_list args;
+ va_start(args, fmt);
+ char msg[128];
+ vsnprintf(msg, sizeof(msg), fmt, args);
+ ReportError(t, msg);
+}
+
+// Read a new buffer from the input.
+static void Refresh(upb_Tokenizer* t) {
+ if (t->read_error) {
+ t->current_char = '\0';
+ return;
+ }
+
+ // If we're in a token, append the rest of the buffer to it.
+ if (t->record_target != NULL && t->record_start < t->buffer_size) {
+ upb_String_Append(t->record_target, t->buffer + t->record_start,
+ t->buffer_size - t->record_start);
+ t->record_start = 0;
+ }
+
+ t->buffer = NULL;
+ t->buffer_pos = 0;
+
+ upb_Status status;
+ const void* data =
+ upb_ZeroCopyInputStream_Next(t->input, &t->buffer_size, &status);
+
+ if (t->buffer_size > 0) {
+ t->buffer = data;
+ t->current_char = t->buffer[0];
+ } else {
+ // end of stream (or read error)
+ t->buffer_size = 0;
+ t->read_error = true;
+ t->current_char = '\0';
+ }
+}
+
+// Consume this character and advance to the next one.
+static void NextChar(upb_Tokenizer* t) {
+ // Update our line and column counters based on the character being
+ // consumed.
+ if (t->current_char == '\n') {
+ t->line++;
+ t->column = 0;
+ } else if (t->current_char == '\t') {
+ t->column += kUpb_Tokenizer_TabWidth - t->column % kUpb_Tokenizer_TabWidth;
+ } else {
+ t->column++;
+ }
+
+ // Advance to the next character.
+ t->buffer_pos++;
+ if (t->buffer_pos < t->buffer_size) {
+ t->current_char = t->buffer[t->buffer_pos];
+ } else {
+ Refresh(t);
+ }
+}
+
+static void RecordTo(upb_Tokenizer* t, upb_String* target) {
+ t->record_target = target;
+ t->record_start = t->buffer_pos;
+}
+
+static void StopRecording(upb_Tokenizer* t) {
+ if (t->buffer_pos > t->record_start) {
+ upb_String_Append(t->record_target, t->buffer + t->record_start,
+ t->buffer_pos - t->record_start);
+ }
+ t->record_target = NULL;
+ t->record_start = -1;
+}
+
+// Called when the current character is the first character of a new
+// token (not including whitespace or comments).
+static void StartToken(upb_Tokenizer* t) {
+ t->token_type = kUpb_TokenType_Start;
+ upb_String_Clear(&t->token_text);
+ t->token_line = t->line;
+ t->token_column = t->column;
+ RecordTo(t, &t->token_text);
+}
+
+// Called when the current character is the first character after the
+// end of the last token. After this returns, current_.text will
+// contain all text consumed since StartToken() was called.
+static void EndToken(upb_Tokenizer* t) {
+ StopRecording(t);
+ t->token_end_column = t->column;
+}
+
+// -----------------------------------------------------------------
+// These helper methods make the parsing code more readable.
+// The "character classes" referred to are defined at the top of the file.
+// The method returns true if c is a member of this "class", like "Letter"
+// or "Digit".
+
+// Returns true if the current character is of the given character
+// class, but does not consume anything.
+static bool LookingAt(const upb_Tokenizer* t, bool (*f)(char)) {
+ return f(t->current_char);
+}
+
+// If the current character is in the given class, consume it and return true.
+// Otherwise return false.
+static bool TryConsumeOne(upb_Tokenizer* t, bool (*f)(char)) {
+ if (f(t->current_char)) {
+ NextChar(t);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+// Like above, but try to consume the specific character indicated.
+static bool TryConsume(upb_Tokenizer* t, char c) {
+ if (t->current_char == c) {
+ NextChar(t);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+// Consume zero or more of the given character class.
+static void ConsumeZeroOrMore(upb_Tokenizer* t, bool (*f)(char)) {
+ while (f(t->current_char)) {
+ NextChar(t);
+ }
+}
+
+// Consume one or more of the given character class or log the given
+// error message.
+static void ConsumeOneOrMore(upb_Tokenizer* t, bool (*f)(char),
+ const char* err_msg) {
+ if (!f(t->current_char)) {
+ ReportError(t, err_msg);
+ }
+
+ do {
+ NextChar(t);
+ } while (f(t->current_char));
+}
+
+// -----------------------------------------------------------------
+// The following four methods are used to consume tokens of specific
+// types. They are actually used to consume all characters *after*
+// the first, since the calling function consumes the first character
+// in order to decide what kind of token is being read.
+
+// Read and consume a string, ending when the given delimiter is consumed.
+static void ConsumeString(upb_Tokenizer* t, char delimiter) {
+ while (true) {
+ switch (t->current_char) {
+ case '\0':
+ ReportError(t, "Unexpected end of string.");
+
+ case '\n':
+ ReportError(t, "String literals cannot cross line boundaries.");
+
+ case '\\': {
+ // An escape sequence.
+ NextChar(t);
+ if (TryConsumeOne(t, upb_Tokenizer_IsEscape)) {
+ // Valid escape sequence.
+ } else if (TryConsumeOne(t, upb_Tokenizer_IsOctalDigit)) {
+ // Possibly followed by two more octal digits, but these will
+ // just be consumed by the main loop anyway so we don't need
+ // to do so explicitly here.
+ } else if (TryConsume(t, 'x')) {
+ if (!TryConsumeOne(t, upb_Tokenizer_IsHexDigit)) {
+ ReportError(t, "Expected hex digits for escape sequence.");
+ }
+ // Possibly followed by another hex digit, but again we don't care.
+ } else if (TryConsume(t, 'u')) {
+ if (!TryConsumeOne(t, upb_Tokenizer_IsHexDigit) ||
+ !TryConsumeOne(t, upb_Tokenizer_IsHexDigit) ||
+ !TryConsumeOne(t, upb_Tokenizer_IsHexDigit) ||
+ !TryConsumeOne(t, upb_Tokenizer_IsHexDigit)) {
+ ReportError(t, "Expected four hex digits for \\u escape sequence.");
+ }
+ } else if (TryConsume(t, 'U')) {
+ // We expect 8 hex digits; but only the range up to 0x10ffff is
+ // legal.
+ if (!TryConsume(t, '0') || !TryConsume(t, '0') ||
+ !(TryConsume(t, '0') || TryConsume(t, '1')) ||
+ !TryConsumeOne(t, upb_Tokenizer_IsHexDigit) ||
+ !TryConsumeOne(t, upb_Tokenizer_IsHexDigit) ||
+ !TryConsumeOne(t, upb_Tokenizer_IsHexDigit) ||
+ !TryConsumeOne(t, upb_Tokenizer_IsHexDigit) ||
+ !TryConsumeOne(t, upb_Tokenizer_IsHexDigit)) {
+ ReportError(t,
+ "Expected eight hex digits up to 10ffff for \\U escape "
+ "sequence");
+ }
+ } else {
+ ReportError(t, "Invalid escape sequence in string literal.");
+ }
+ break;
+ }
+
+ default: {
+ if (t->current_char == delimiter) {
+ NextChar(t);
+ return;
+ }
+ NextChar(t);
+ break;
+ }
+ }
+ }
+}
+
+// Read and consume a number, returning TYPE_FLOAT or TYPE_INTEGER depending
+// on what was read. This needs to know if the first characer was a zero in
+// order to correctly recognize hex and octal numbers. It also needs to know
+// whether the first character was a '.' to parse floating point correctly.
+static upb_TokenType ConsumeNumber(upb_Tokenizer* t, bool started_with_zero,
+ bool started_with_dot) {
+ bool is_float = false;
+
+ if (started_with_zero && (TryConsume(t, 'x') || TryConsume(t, 'X'))) {
+ // A hex number (started with "0x").
+ ConsumeOneOrMore(t, upb_Tokenizer_IsHexDigit,
+ "\"0x\" must be followed by hex digits.");
+
+ } else if (started_with_zero && LookingAt(t, upb_Tokenizer_IsDigit)) {
+ // An octal number (had a leading zero).
+ ConsumeZeroOrMore(t, upb_Tokenizer_IsOctalDigit);
+ if (LookingAt(t, upb_Tokenizer_IsDigit)) {
+ ReportError(t, "Numbers starting with leading zero must be in octal.");
+ }
+
+ } else {
+ // A decimal number.
+ if (started_with_dot) {
+ is_float = true;
+ ConsumeZeroOrMore(t, upb_Tokenizer_IsDigit);
+ } else {
+ ConsumeZeroOrMore(t, upb_Tokenizer_IsDigit);
+
+ if (TryConsume(t, '.')) {
+ is_float = true;
+ ConsumeZeroOrMore(t, upb_Tokenizer_IsDigit);
+ }
+ }
+
+ if (TryConsume(t, 'e') || TryConsume(t, 'E')) {
+ is_float = true;
+ if (!TryConsume(t, '-')) TryConsume(t, '+');
+ ConsumeOneOrMore(t, upb_Tokenizer_IsDigit,
+ "\"e\" must be followed by exponent.");
+ }
+
+ if (t->options & kUpb_TokenizerOption_AllowFAfterFloat) {
+ if (TryConsume(t, 'f') || TryConsume(t, 'F')) is_float = true;
+ }
+ }
+
+ if (LookingAt(t, upb_Tokenizer_IsLetter)) {
+ ReportError(t, "Need space between number and identifier.");
+ }
+
+ if (t->current_char == '.') {
+ if (is_float) {
+ ReportError(
+ t, "Already saw decimal point or exponent; can't have another one.");
+ } else {
+ ReportError(t, "Hex and octal numbers must be integers.");
+ }
+ }
+
+ return is_float ? kUpb_TokenType_Float : kUpb_TokenType_Integer;
+}
+
+// Consume the rest of a line.
+static void ConsumeLineComment(upb_Tokenizer* t, upb_String* content) {
+ if (content != NULL) RecordTo(t, content);
+
+ while (t->current_char != '\0' && t->current_char != '\n') {
+ NextChar(t);
+ }
+ TryConsume(t, '\n');
+
+ if (content != NULL) StopRecording(t);
+}
+
+static void ConsumeBlockComment(upb_Tokenizer* t, upb_String* content) {
+ const int start_line = t->line;
+ const int start_column = t->column - 2;
+
+ if (content != NULL) RecordTo(t, content);
+
+ while (true) {
+ while (t->current_char != '\0' && t->current_char != '*' &&
+ t->current_char != '/' && t->current_char != '\n') {
+ NextChar(t);
+ }
+
+ if (TryConsume(t, '\n')) {
+ if (content != NULL) StopRecording(t);
+
+ // Consume leading whitespace and asterisk;
+ ConsumeZeroOrMore(t, upb_Tokenizer_IsWhitespaceNoNewline);
+ if (TryConsume(t, '*')) {
+ if (TryConsume(t, '/')) {
+ // End of comment.
+ break;
+ }
+ }
+
+ if (content != NULL) RecordTo(t, content);
+ } else if (TryConsume(t, '*') && TryConsume(t, '/')) {
+ // End of comment.
+ if (content != NULL) {
+ StopRecording(t);
+ // Strip trailing "*/".
+ upb_String_Erase(content, upb_String_Size(content) - 2, 2);
+ }
+ break;
+ } else if (TryConsume(t, '/') && t->current_char == '*') {
+ // Note: We didn't consume the '*' because if there is a '/' after it
+ // we want to interpret that as the end of the comment.
+ ReportError(
+ t, "\"/*\" inside block comment. Block comments cannot be nested.");
+ } else if (t->current_char == '\0') {
+ ReportErrorFormat(
+ t, "End-of-file inside block comment.\n%d:%d: Comment started here.",
+ start_line, start_column);
+ }
+ }
+}
+
+// If we're at the start of a new comment, consume it and return what kind
+// of comment it is.
+static upb_CommentType TryConsumeCommentStart(upb_Tokenizer* t) {
+ const bool style_sh = t->options & kUpb_TokenizerOption_CommentStyleShell;
+ const bool style_cpp = !style_sh;
+
+ if (style_cpp && TryConsume(t, '/')) {
+ if (TryConsume(t, '/')) {
+ return kUpb_CommentType_Line;
+ } else if (TryConsume(t, '*')) {
+ return kUpb_CommentType_Block;
+ } else {
+ // Oops, it was just a slash. Return it.
+ t->token_type = kUpb_TokenType_Symbol;
+ upb_String_Assign(&t->token_text, "/", 1);
+ t->token_line = t->line;
+ t->token_column = t->column - 1;
+ t->token_end_column = t->column;
+ return kUpb_CommentType_SlashNot;
+ }
+ } else if (style_sh && TryConsume(t, '#')) {
+ return kUpb_CommentType_Line;
+ } else {
+ return kUpb_CommentType_None;
+ }
+}
+
+// If we're looking at a TYPE_WHITESPACE token and `report_whitespace` is true,
+// consume it and return true.
+static bool TryConsumeWhitespace(upb_Tokenizer* t) {
+ if (t->options & kUpb_TokenizerOption_ReportNewlines) {
+ if (TryConsumeOne(t, upb_Tokenizer_IsWhitespaceNoNewline)) {
+ ConsumeZeroOrMore(t, upb_Tokenizer_IsWhitespaceNoNewline);
+ t->token_type = kUpb_TokenType_Whitespace;
+ return true;
+ }
+ return false;
+ }
+ if (TryConsumeOne(t, upb_Tokenizer_IsWhitespace)) {
+ ConsumeZeroOrMore(t, upb_Tokenizer_IsWhitespace);
+ t->token_type = kUpb_TokenType_Whitespace;
+ return (t->options & kUpb_TokenizerOption_ReportWhitespace) != 0;
+ }
+ return false;
+}
+
+// If we're looking at a TYPE_NEWLINE token and `report_newlines` is true,
+// consume it and return true.
+static bool TryConsumeNewline(upb_Tokenizer* t) {
+ if (t->options & kUpb_TokenizerOption_ReportNewlines) {
+ if (TryConsume(t, '\n')) {
+ t->token_type = kUpb_TokenType_Newline;
+ return true;
+ }
+ }
+ return false;
+}
+
+// -------------------------------------------------------------------
+
+int upb_Tokenizer_Column(const upb_Tokenizer* t) { return t->token_column; }
+
+int upb_Tokenizer_EndColumn(const upb_Tokenizer* t) {
+ return t->token_end_column;
+}
+
+int upb_Tokenizer_Line(const upb_Tokenizer* t) { return t->token_line; }
+
+int upb_Tokenizer_TextSize(const upb_Tokenizer* t) {
+ return t->token_text.size_;
+}
+
+const char* upb_Tokenizer_TextData(const upb_Tokenizer* t) {
+ return t->token_text.data_;
+}
+
+upb_TokenType upb_Tokenizer_Type(const upb_Tokenizer* t) {
+ return t->token_type;
+}
+
+bool upb_Tokenizer_Next(upb_Tokenizer* t, upb_Status* status) {
+ t->status = status;
+ t->previous_type = t->token_type;
+ t->previous_line = t->token_line;
+ t->previous_column = t->token_column;
+ t->previous_end_column = t->token_end_column;
+
+ if (UPB_SETJMP(t->err)) return false;
+
+ while (!t->read_error) {
+ StartToken(t);
+ bool report_token = TryConsumeWhitespace(t) || TryConsumeNewline(t);
+ EndToken(t);
+ if (report_token) return true;
+
+ switch (TryConsumeCommentStart(t)) {
+ case kUpb_CommentType_Line:
+ ConsumeLineComment(t, NULL);
+ continue;
+ case kUpb_CommentType_Block:
+ ConsumeBlockComment(t, NULL);
+ continue;
+ case kUpb_CommentType_SlashNot:
+ return true;
+ case kUpb_CommentType_None:
+ break;
+ }
+
+ // Check for EOF before continuing.
+ if (t->read_error) break;
+
+ if (LookingAt(t, upb_Tokenizer_IsUnprintable) || t->current_char == '\0') {
+ ReportError(t, "Invalid control characters encountered in text.");
+ }
+
+ // Reading some sort of token.
+ StartToken(t);
+
+ if (TryConsumeOne(t, upb_Tokenizer_IsLetter)) {
+ ConsumeZeroOrMore(t, upb_Tokenizer_IsAlphanumeric);
+ t->token_type = kUpb_TokenType_Identifier;
+ } else if (TryConsume(t, '0')) {
+ t->token_type = ConsumeNumber(t, true, false);
+ } else if (TryConsume(t, '.')) {
+ // This could be the beginning of a floating-point number, or it could
+ // just be a '.' symbol.
+
+ if (TryConsumeOne(t, upb_Tokenizer_IsDigit)) {
+ // It's a floating-point number.
+ if (t->previous_type == kUpb_TokenType_Identifier &&
+ t->token_line == t->previous_line &&
+ t->token_column == t->previous_end_column) {
+ // We don't accept syntax like "blah.123".
+ t->column -= 2;
+ ReportError(t, "Need space between identifier and decimal point.");
+ }
+ t->token_type = ConsumeNumber(t, false, true);
+ } else {
+ t->token_type = kUpb_TokenType_Symbol;
+ }
+ } else if (TryConsumeOne(t, upb_Tokenizer_IsDigit)) {
+ t->token_type = ConsumeNumber(t, false, false);
+ } else if (TryConsume(t, '\"')) {
+ ConsumeString(t, '\"');
+ t->token_type = kUpb_TokenType_String;
+ } else if (TryConsume(t, '\'')) {
+ ConsumeString(t, '\'');
+ t->token_type = kUpb_TokenType_String;
+ } else {
+ // Check if the high order bit is set.
+ if (t->current_char & 0x80) {
+ ReportErrorFormat(t, "Interpreting non ascii codepoint %d.",
+ (uint8_t)t->current_char);
+ }
+ NextChar(t);
+ t->token_type = kUpb_TokenType_Symbol;
+ }
+
+ EndToken(t);
+ return true;
+ }
+
+ // EOF
+ t->token_type = kUpb_TokenType_End;
+ upb_String_Clear(&t->token_text);
+ t->token_line = t->line;
+ t->token_column = t->column;
+ t->token_end_column = t->column;
+ upb_Status_Clear(status);
+ return false;
+}
+
+// -------------------------------------------------------------------
+// Token-parsing helpers. Remember that these don't need to report
+// errors since any errors should already have been reported while
+// tokenizing. Also, these can assume that whatever text they
+// are given is text that the tokenizer actually parsed as a token
+// of the given type.
+
+bool upb_Parse_Integer(const char* text, uint64_t max_value, uint64_t* output) {
+ // We can't just use strtoull() because (a) it accepts negative numbers,
+ // (b) We want additional range checks, (c) it reports overflows via errno.
+
+ const char* ptr = text;
+ int base = 10;
+ uint64_t overflow_if_mul_base = (UINT64_MAX / 10) + 1;
+ if (ptr[0] == '0') {
+ if (ptr[1] == 'x' || ptr[1] == 'X') {
+ // This is hex.
+ base = 16;
+ overflow_if_mul_base = (UINT64_MAX / 16) + 1;
+ ptr += 2;
+ } else {
+ // This is octal.
+ base = 8;
+ overflow_if_mul_base = (UINT64_MAX / 8) + 1;
+ }
+ }
+
+ uint64_t result = 0;
+ // For all the leading '0's, and also the first non-zero character, we
+ // don't need to multiply.
+ while (*ptr != '\0') {
+ int digit = DigitValue(*ptr++);
+ if (digit >= base) {
+ // The token provided by Tokenizer is invalid. i.e., 099 is an invalid
+ // token, but Tokenizer still think it's integer.
+ return false;
+ }
+ if (digit != 0) {
+ result = digit;
+ break;
+ }
+ }
+ for (; *ptr != '\0'; ptr++) {
+ int digit = DigitValue(*ptr);
+ if (digit < 0 || digit >= base) {
+ // The token provided by Tokenizer is invalid. i.e., 099 is an invalid
+ // token, but Tokenizer still think it's integer.
+ return false;
+ }
+ if (result >= overflow_if_mul_base) {
+ // We know the multiply we're about to do will overflow, so exit now.
+ return false;
+ }
+ // We know that result * base won't overflow, but adding digit might...
+ result = result * base + digit;
+ // C++ guarantees defined "wrap" semantics when unsigned integer
+ // operations overflow, making this a fast way to check if adding
+ // digit made result overflow, and thus, wrap around.
+ if (result < (uint64_t)base) return false;
+ }
+ if (result > max_value) return false;
+
+ *output = result;
+ return true;
+}
+
+double upb_Parse_Float(const char* text) {
+ char* end;
+ double result = _upb_NoLocaleStrtod(text, &end);
+
+ // "1e" is not a valid float, but if the tokenizer reads it, it will
+ // report an error but still return it as a valid token. We need to
+ // accept anything the tokenizer could possibly return, error or not.
+ if (*end == 'e' || *end == 'E') {
+ ++end;
+ if (*end == '-' || *end == '+') ++end;
+ }
+
+ // If the Tokenizer had allow_f_after_float_ enabled, the float may be
+ // suffixed with the letter 'f'.
+ if (*end == 'f' || *end == 'F') {
+ ++end;
+ }
+
+ if ((end - text) != strlen(text) || *text == '-') {
+ fprintf(stderr,
+ "upb_Parse_Float() passed text that could not have"
+ " been tokenized as a float: %s\n",
+ text);
+ UPB_ASSERT(0);
+ }
+ return result;
+}
+
+// Append a Unicode code point to a string as UTF8.
+static void AppendUTF8(uint32_t code_point, upb_String* output) {
+ char temp[24];
+ int len = upb_Unicode_ToUTF8(code_point, temp);
+ if (len == 0) {
+ // ConsumeString permits hex values up to 0x1FFFFF,
+ // and FetchUnicodePoint doesn't perform a range check.
+ // Unicode code points end at 0x10FFFF, so this is out-of-range.
+ len = snprintf(temp, sizeof temp, "\\U%08x", code_point);
+ }
+ upb_String_Append(output, temp, len);
+}
+
+// Try to read <len> hex digits from ptr, and stuff the numeric result into
+// *result. Returns true if that many digits were successfully consumed.
+static bool ReadHexDigits(const char* ptr, int len, uint32_t* result) {
+ *result = 0;
+ if (len == 0) return false;
+ for (const char* end = ptr + len; ptr < end; ++ptr) {
+ if (*ptr == '\0') return false;
+ *result = (*result << 4) + DigitValue(*ptr);
+ }
+ return true;
+}
+
+// Convert the escape sequence parameter to a number of expected hex digits.
+static int UnicodeLength(char key) {
+ if (key == 'u') return 4;
+ if (key == 'U') return 8;
+ return 0;
+}
+
+// Given a pointer to the 'u' or 'U' starting a Unicode escape sequence, attempt
+// to parse that sequence. On success, returns a pointer to the first char
+// beyond that sequence, and fills in *code_point. On failure, returns ptr
+// itself.
+static const char* FetchUnicodePoint(const char* ptr, uint32_t* code_point) {
+ const char* p = ptr;
+ // Fetch the code point.
+ const int len = UnicodeLength(*p++);
+ if (!ReadHexDigits(p, len, code_point)) return ptr;
+ p += len;
+
+ // Check if the code point we read is a "head surrogate." If so, then we
+ // expect it to be immediately followed by another code point which is a valid
+ // "trail surrogate," and together they form a UTF-16 pair which decodes into
+ // a single Unicode point. Trail surrogates may only use \u, not \U.
+ if (upb_Unicode_IsHigh(*code_point) && *p == '\\' && *(p + 1) == 'u') {
+ uint32_t trail_surrogate;
+ if (ReadHexDigits(p + 2, 4, &trail_surrogate) &&
+ upb_Unicode_IsLow(trail_surrogate)) {
+ *code_point = upb_Unicode_FromPair(*code_point, trail_surrogate);
+ p += 6;
+ }
+ // If this failed, then we just emit the head surrogate as a code point.
+ // It's bogus, but so is the string.
+ }
+
+ return p;
+}
+
+// The text string must begin and end with single or double quote characters.
+upb_StringView upb_Parse_String(const char* text, upb_Arena* arena) {
+ const size_t size = strlen(text);
+
+ upb_String output;
+ upb_String_Init(&output, arena);
+
+ // Reminder: text[0] is always a quote character.
+ // (If text is empty, it's invalid, so we'll just return).
+ if (size == 0) {
+ fprintf(stderr,
+ "Tokenizer::ParseStringAppend() passed text that could not"
+ " have been tokenized as a string: %s",
+ text);
+ UPB_ASSERT(0);
+ return upb_StringView_FromDataAndSize(NULL, 0);
+ }
+
+ // Reserve room for new string.
+ const size_t new_len = size + upb_String_Size(&output);
+ upb_String_Reserve(&output, new_len);
+
+ // Loop through the string copying characters to "output" and
+ // interpreting escape sequences. Note that any invalid escape
+ // sequences or other errors were already reported while tokenizing.
+ // In this case we do not need to produce valid results.
+ for (const char* ptr = text + 1; *ptr != '\0'; ptr++) {
+ if (*ptr == '\\' && ptr[1] != '\0') {
+ // An escape sequence.
+ ++ptr;
+
+ if (upb_Tokenizer_IsOctalDigit(*ptr)) {
+ // An octal escape. May one, two, or three digits.
+ int code = DigitValue(*ptr);
+ if (upb_Tokenizer_IsOctalDigit(ptr[1])) {
+ ++ptr;
+ code = code * 8 + DigitValue(*ptr);
+ }
+ if (upb_Tokenizer_IsOctalDigit(ptr[1])) {
+ ++ptr;
+ code = code * 8 + DigitValue(*ptr);
+ }
+ upb_String_PushBack(&output, (char)code);
+
+ } else if (*ptr == 'x') {
+ // A hex escape. May zero, one, or two digits. (The zero case
+ // will have been caught as an error earlier.)
+ int code = 0;
+ if (upb_Tokenizer_IsHexDigit(ptr[1])) {
+ ++ptr;
+ code = DigitValue(*ptr);
+ }
+ if (upb_Tokenizer_IsHexDigit(ptr[1])) {
+ ++ptr;
+ code = code * 16 + DigitValue(*ptr);
+ }
+ upb_String_PushBack(&output, (char)code);
+
+ } else if (*ptr == 'u' || *ptr == 'U') {
+ uint32_t unicode;
+ const char* end = FetchUnicodePoint(ptr, &unicode);
+ if (end == ptr) {
+ // Failure: Just dump out what we saw, don't try to parse it.
+ upb_String_PushBack(&output, *ptr);
+ } else {
+ AppendUTF8(unicode, &output);
+ ptr = end - 1; // Because we're about to ++ptr.
+ }
+ } else {
+ // Some other escape code.
+ upb_String_PushBack(&output, TranslateEscape(*ptr));
+ }
+
+ } else if (*ptr == text[0] && ptr[1] == '\0') {
+ // Ignore final quote matching the starting quote.
+ } else {
+ upb_String_PushBack(&output, *ptr);
+ }
+ }
+
+ return upb_StringView_FromDataAndSize(upb_String_Data(&output),
+ upb_String_Size(&output));
+}
+
+static bool AllInClass(bool (*f)(char), const char* text, int size) {
+ for (int i = 0; i < size; i++) {
+ if (!f(text[i])) return false;
+ }
+ return true;
+}
+
+bool upb_Tokenizer_IsIdentifier(const char* data, int size) {
+ // Mirrors IDENTIFIER definition in Tokenizer::Next() above.
+ if (size == 0) return false;
+ if (!upb_Tokenizer_IsLetter(data[0])) return false;
+ if (!AllInClass(upb_Tokenizer_IsAlphanumeric, data + 1, size - 1))
+ return false;
+ return true;
+}
+
+upb_Tokenizer* upb_Tokenizer_New(const void* data, size_t size,
+ upb_ZeroCopyInputStream* input, int options,
+ upb_Arena* arena) {
+ upb_Tokenizer* t = upb_Arena_Malloc(arena, sizeof(upb_Tokenizer));
+ if (!t) return NULL;
+
+ t->input = input;
+ t->arena = arena;
+ t->buffer = data;
+ t->buffer_size = size;
+ t->buffer_pos = 0;
+ t->read_error = false;
+ t->line = 0;
+ t->column = 0;
+ t->record_target = NULL;
+ t->record_start = -1;
+
+ // ReportNewlines implies ReportWhitespace.
+ if (options & kUpb_TokenizerOption_ReportNewlines) {
+ options |= kUpb_TokenizerOption_ReportWhitespace;
+ }
+ t->options = options;
+
+ upb_String_Init(&t->token_text, arena);
+ t->token_type = kUpb_TokenType_Start;
+ t->token_line = 0;
+ t->token_column = 0;
+ t->token_end_column = 0;
+
+ t->previous_type = kUpb_TokenType_Start;
+ t->previous_line = 0;
+ t->previous_column = 0;
+ t->previous_end_column = 0;
+
+ if (size) {
+ t->current_char = t->buffer[0];
+ } else {
+ Refresh(t);
+ }
+ return t;
+}
+
+void upb_Tokenizer_Fini(upb_Tokenizer* t) {
+ // If we had any buffer left unread, return it to the underlying stream
+ // so that someone else can read it.
+ if (t->buffer_size > t->buffer_pos) {
+ upb_ZeroCopyInputStream_BackUp(t->input, t->buffer_size - t->buffer_pos);
+ }
+}
diff --git a/upb/upb/io/tokenizer.h b/upb/upb/io/tokenizer.h
new file mode 100644
index 0000000..ca86757
--- /dev/null
+++ b/upb/upb/io/tokenizer.h
@@ -0,0 +1,156 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Class for parsing tokenized text from a ZeroCopyInputStream.
+
+#ifndef UPB_IO_TOKENIZER_H_
+#define UPB_IO_TOKENIZER_H_
+
+#include "upb/base/status.h"
+#include "upb/base/string_view.h"
+#include "upb/io/zero_copy_input_stream.h"
+#include "upb/mem/arena.h"
+
+// Must be included last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+ kUpb_TokenType_Start, // Next() has not yet been called.
+ kUpb_TokenType_End, // End of input reached. "text" is empty.
+
+ // A sequence of letters, digits, and underscores, not starting with a digit.
+ // It is an error for a number to be followed by an identifier with no space
+ // in between.
+ kUpb_TokenType_Identifier,
+
+ // A sequence of digits representing an integer. Normally the digits are
+ // decimal, but a prefix of "0x" indicates a hex number and a leading zero
+ // indicates octal, just like with C numeric literals. A leading negative
+ // sign is NOT included in the token; it's up to the parser to interpret the
+ // unary minus operator on its own.
+ kUpb_TokenType_Integer,
+
+ // A floating point literal, with a fractional part and/or an exponent.
+ // Always in decimal. Again, never negative.
+ kUpb_TokenType_Float,
+
+ // A quoted sequence of escaped characters.
+ // Either single or double quotes can be used, but they must match.
+ // A string literal cannot cross a line break.
+ kUpb_TokenType_String,
+
+ // Any other printable character, like '!' or '+'.
+ // Symbols are always a single character, so "!+$%" is four tokens.
+ kUpb_TokenType_Symbol,
+
+ // A sequence of whitespace.
+ // This token type is only produced if report_whitespace() is true.
+ // It is not reported for whitespace within comments or strings.
+ kUpb_TokenType_Whitespace,
+
+ // A newline ('\n'). This token type is only produced if report_whitespace()
+ // is true and report_newlines() is also true.
+ // It is not reported for newlines in comments or strings.
+ kUpb_TokenType_Newline,
+} upb_TokenType;
+
+typedef enum {
+ // Set to allow floats to be suffixed with the letter 'f'. Tokens which would
+ // otherwise be integers but which have the 'f' suffix will be forced to be
+ // interpreted as floats. For all other purposes, the 'f' is ignored.
+ kUpb_TokenizerOption_AllowFAfterFloat = 1 << 0,
+
+ // If set, whitespace tokens are reported by Next().
+ kUpb_TokenizerOption_ReportWhitespace = 1 << 1,
+
+ // If set, newline tokens are reported by Next().
+ // This is a superset of ReportWhitespace.
+ kUpb_TokenizerOption_ReportNewlines = 1 << 2,
+
+ // By default the tokenizer expects C-style (/* */) comments.
+ // If set, it expects shell-style (#) comments instead.
+ kUpb_TokenizerOption_CommentStyleShell = 1 << 3,
+} upb_Tokenizer_Option;
+
+typedef struct upb_Tokenizer upb_Tokenizer;
+
+// Can be passed a flat array and/or a ZCIS as input.
+// The array will be read first (if non-NULL), then the stream (if non-NULL).
+upb_Tokenizer* upb_Tokenizer_New(const void* data, size_t size,
+ upb_ZeroCopyInputStream* input, int options,
+ upb_Arena* arena);
+
+void upb_Tokenizer_Fini(upb_Tokenizer* t);
+
+// Advance the tokenizer to the next input token. Returns True on success.
+// Returns False and (clears *status on EOF, sets *status on error).
+bool upb_Tokenizer_Next(upb_Tokenizer* t, upb_Status* status);
+
+// Accessors for inspecting current/previous parse tokens,
+// which are opaque to the tokenizer (to reduce copying).
+
+upb_TokenType upb_Tokenizer_Type(const upb_Tokenizer* t);
+int upb_Tokenizer_Column(const upb_Tokenizer* t);
+int upb_Tokenizer_EndColumn(const upb_Tokenizer* t);
+int upb_Tokenizer_Line(const upb_Tokenizer* t);
+int upb_Tokenizer_TextSize(const upb_Tokenizer* t);
+const char* upb_Tokenizer_TextData(const upb_Tokenizer* t);
+
+// External helper: validate an identifier.
+bool upb_Tokenizer_IsIdentifier(const char* data, int size);
+
+// Parses a TYPE_INTEGER token. Returns false if the result would be
+// greater than max_value. Otherwise, returns true and sets *output to the
+// result. If the text is not from a Token of type TYPE_INTEGER originally
+// parsed by a Tokenizer, the result is undefined (possibly an assert
+// failure).
+bool upb_Parse_Integer(const char* text, uint64_t max_value, uint64_t* output);
+
+// Parses a TYPE_FLOAT token. This never fails, so long as the text actually
+// comes from a TYPE_FLOAT token parsed by Tokenizer. If it doesn't, the
+// result is undefined (possibly an assert failure).
+double upb_Parse_Float(const char* text);
+
+// Parses a TYPE_STRING token. This never fails, so long as the text actually
+// comes from a TYPE_STRING token parsed by Tokenizer. If it doesn't, the
+// result is undefined (possibly an assert failure).
+upb_StringView upb_Parse_String(const char* text, upb_Arena* arena);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif // UPB_IO_TOKENIZER_H_
diff --git a/upb/upb/io/tokenizer_test.cc b/upb/upb/io/tokenizer_test.cc
new file mode 100644
index 0000000..59cff71
--- /dev/null
+++ b/upb/upb/io/tokenizer_test.cc
@@ -0,0 +1,1256 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/io/tokenizer.h"
+
+#include "gtest/gtest.h"
+#include "absl/strings/escaping.h"
+#include "absl/strings/str_format.h"
+#include "upb/io/chunked_input_stream.h"
+#include "upb/io/string.h"
+#include "upb/lex/unicode.h"
+#include "upb/mem/arena.hpp"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+namespace proto2 {
+namespace io {
+namespace {
+
+#ifndef arraysize
+#define arraysize(a) (sizeof(a) / sizeof(a[0]))
+#endif
+
+static bool StringEquals(const char* a, const char* b) {
+ return strcmp(a, b) == 0;
+}
+
+// ===================================================================
+// Data-Driven Test Infrastructure
+
+// TODO(kenton): This is copied from coded_stream_unittest. This is
+// temporary until these features are integrated into gUnit itself.
+
+// TEST_1D and TEST_2D are macros I'd eventually like to see added to
+// gUnit. These macros can be used to declare tests which should be
+// run multiple times, once for each item in some input array. TEST_1D
+// tests all cases in a single input array. TEST_2D tests all
+// combinations of cases from two arrays. The arrays must be statically
+// defined such that the arraysize() macro works on them. Example:
+//
+// int kCases[] = {1, 2, 3, 4}
+// TEST_1D(MyFixture, MyTest, kCases) {
+// EXPECT_GT(kCases_case, 0);
+// }
+//
+// This test iterates through the numbers 1, 2, 3, and 4 and tests that
+// they are all grater than zero. In case of failure, the exact case
+// which failed will be printed. The case type must be printable using
+// ostream::operator<<.
+
+#define TEST_1D(FIXTURE, NAME, CASES) \
+ class FIXTURE##_##NAME##_DD : public FIXTURE { \
+ protected: \
+ template <typename CaseType> \
+ void DoSingleCase(const CaseType& CASES##_case); \
+ }; \
+ \
+ TEST_F(FIXTURE##_##NAME##_DD, NAME) { \
+ for (size_t i = 0; i < arraysize(CASES); i++) { \
+ SCOPED_TRACE(testing::Message() \
+ << #CASES " case #" << i << ": " << CASES[i]); \
+ DoSingleCase(CASES[i]); \
+ } \
+ } \
+ \
+ template <typename CaseType> \
+ void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType& CASES##_case)
+
+#define TEST_2D(FIXTURE, NAME, CASES1, CASES2) \
+ class FIXTURE##_##NAME##_DD : public FIXTURE { \
+ protected: \
+ template <typename CaseType1, typename CaseType2> \
+ void DoSingleCase(const CaseType1& CASES1##_case, \
+ const CaseType2& CASES2##_case); \
+ }; \
+ \
+ TEST_F(FIXTURE##_##NAME##_DD, NAME) { \
+ for (size_t i = 0; i < arraysize(CASES1); i++) { \
+ for (size_t j = 0; j < arraysize(CASES2); j++) { \
+ SCOPED_TRACE(testing::Message() \
+ << #CASES1 " case #" << i << ": " << CASES1[i] << ", " \
+ << #CASES2 " case #" << j << ": " << CASES2[j]); \
+ DoSingleCase(CASES1[i], CASES2[j]); \
+ } \
+ } \
+ } \
+ \
+ template <typename CaseType1, typename CaseType2> \
+ void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType1& CASES1##_case, \
+ const CaseType2& CASES2##_case)
+
+// -------------------------------------------------------------------
+
+// In C, a size of zero from ZCIS_Next() means EOF so we can't play the same
+// trick here that happens in the C++ version. Use ChunkedInputStream instead.
+upb_ZeroCopyInputStream* TestInputStream(const void* data, size_t size,
+ size_t block_size, upb_Arena* arena) {
+ return upb_ChunkedInputStream_New(data, size, block_size, arena);
+}
+
+// -------------------------------------------------------------------
+
+// We test each operation over a variety of block sizes to insure that
+// we test cases where reads cross buffer boundaries as well as cases
+// where they don't. This is sort of a brute-force approach to this,
+// but it's easy to write and easy to understand.
+const int kBlockSizes[] = {1, 2, 3, 5, 7, 13, 32, 1024};
+
+class TokenizerTest : public testing::Test {
+ protected:
+ // For easy testing.
+ uint64_t ParseInteger(const std::string& text) {
+ uint64_t result;
+ EXPECT_TRUE(upb_Parse_Integer(text.data(), UINT64_MAX, &result))
+ << "'" << text << "'";
+ return result;
+ }
+};
+
+// ===================================================================
+
+// These tests causes gcc 3.3.5 (and earlier?) to give the cryptic error:
+// "sorry, unimplemented: `method_call_expr' not supported by dump_expr"
+#if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3)
+
+// In each test case, the entire input text should parse as a single token
+// of the given type.
+struct SimpleTokenCase {
+ std::string input;
+ upb_TokenType type;
+};
+
+inline std::ostream& operator<<(std::ostream& out,
+ const SimpleTokenCase& test_case) {
+ return out << absl::CEscape(test_case.input);
+}
+
+SimpleTokenCase kSimpleTokenCases[] = {
+ // Test identifiers.
+ {"hello", kUpb_TokenType_Identifier},
+
+ // Test integers.
+ {"123", kUpb_TokenType_Integer},
+ {"0xab6", kUpb_TokenType_Integer},
+ {"0XAB6", kUpb_TokenType_Integer},
+ {"0X1234567", kUpb_TokenType_Integer},
+ {"0x89abcdef", kUpb_TokenType_Integer},
+ {"0x89ABCDEF", kUpb_TokenType_Integer},
+ {"01234567", kUpb_TokenType_Integer},
+
+ // Test floats.
+ {"123.45", kUpb_TokenType_Float},
+ {"1.", kUpb_TokenType_Float},
+ {"1e3", kUpb_TokenType_Float},
+ {"1E3", kUpb_TokenType_Float},
+ {"1e-3", kUpb_TokenType_Float},
+ {"1e+3", kUpb_TokenType_Float},
+ {"1.e3", kUpb_TokenType_Float},
+ {"1.2e3", kUpb_TokenType_Float},
+ {".1", kUpb_TokenType_Float},
+ {".1e3", kUpb_TokenType_Float},
+ {".1e-3", kUpb_TokenType_Float},
+ {".1e+3", kUpb_TokenType_Float},
+
+ // Test strings.
+ {"'hello'", kUpb_TokenType_String},
+ {"\"foo\"", kUpb_TokenType_String},
+ {"'a\"b'", kUpb_TokenType_String},
+ {"\"a'b\"", kUpb_TokenType_String},
+ {"'a\\'b'", kUpb_TokenType_String},
+ {"\"a\\\"b\"", kUpb_TokenType_String},
+ {"'\\xf'", kUpb_TokenType_String},
+ {"'\\0'", kUpb_TokenType_String},
+
+ // Test symbols.
+ {"+", kUpb_TokenType_Symbol},
+ {".", kUpb_TokenType_Symbol},
+};
+
+TEST_2D(TokenizerTest, SimpleTokens, kSimpleTokenCases, kBlockSizes) {
+ upb::Arena arena;
+
+ // Set up the tokenizer.
+ auto input = TestInputStream(kSimpleTokenCases_case.input.data(),
+ kSimpleTokenCases_case.input.size(),
+ kBlockSizes_case, arena.ptr());
+ auto t = upb_Tokenizer_New(NULL, 0, input, 0, arena.ptr());
+
+ // Before Next() is called, the initial token should always be TYPE_START.
+ EXPECT_EQ(upb_Tokenizer_Type(t), kUpb_TokenType_Start);
+ EXPECT_EQ(upb_Tokenizer_Line(t), 0);
+ EXPECT_EQ(upb_Tokenizer_Column(t), 0);
+ EXPECT_EQ(upb_Tokenizer_EndColumn(t), 0);
+ EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t), ""));
+
+ // Parse the token.
+ EXPECT_TRUE(upb_Tokenizer_Next(t, NULL));
+ // Check that it has the right type.
+ EXPECT_EQ(upb_Tokenizer_Type(t), kSimpleTokenCases_case.type);
+ // Check that it contains the complete input text.
+ EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t),
+ kSimpleTokenCases_case.input.data()));
+
+ // Check that it is located at the beginning of the input
+ EXPECT_EQ(upb_Tokenizer_Line(t), 0);
+ EXPECT_EQ(upb_Tokenizer_Column(t), 0);
+ EXPECT_EQ(upb_Tokenizer_EndColumn(t), kSimpleTokenCases_case.input.size());
+
+ upb_Status status;
+ upb_Status_Clear(&status);
+
+ // There should be no more input and no errors..
+ EXPECT_FALSE(upb_Tokenizer_Next(t, &status));
+ EXPECT_TRUE(upb_Status_IsOk(&status));
+
+ // After Next() returns false, the token should have type TYPE_END.
+ EXPECT_EQ(upb_Tokenizer_Type(t), kUpb_TokenType_End);
+ EXPECT_EQ(upb_Tokenizer_Line(t), 0);
+ EXPECT_EQ(upb_Tokenizer_Column(t), kSimpleTokenCases_case.input.size());
+ EXPECT_EQ(upb_Tokenizer_EndColumn(t), kSimpleTokenCases_case.input.size());
+ EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t), ""));
+}
+
+TEST_1D(TokenizerTest, FloatSuffix, kBlockSizes) {
+ // Test the "allow_f_after_float" option.
+
+ // Set up the tokenizer.
+ upb::Arena arena;
+ const char* text = "1f 2.5f 6e3f 7F";
+ auto input =
+ TestInputStream(text, strlen(text), kBlockSizes_case, arena.ptr());
+ const int options = kUpb_TokenizerOption_AllowFAfterFloat;
+ auto t = upb_Tokenizer_New(NULL, 0, input, options, arena.ptr());
+
+ // Advance through tokens and check that they are parsed as expected.
+
+ EXPECT_TRUE(upb_Tokenizer_Next(t, NULL));
+ EXPECT_EQ(upb_Tokenizer_Type(t), kUpb_TokenType_Float);
+ EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t), "1f"));
+
+ EXPECT_TRUE(upb_Tokenizer_Next(t, NULL));
+ EXPECT_EQ(upb_Tokenizer_Type(t), kUpb_TokenType_Float);
+ EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t), "2.5f"));
+
+ EXPECT_TRUE(upb_Tokenizer_Next(t, NULL));
+ EXPECT_EQ(upb_Tokenizer_Type(t), kUpb_TokenType_Float);
+ EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t), "6e3f"));
+
+ EXPECT_TRUE(upb_Tokenizer_Next(t, NULL));
+ EXPECT_EQ(upb_Tokenizer_Type(t), kUpb_TokenType_Float);
+ EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t), "7F"));
+
+ upb_Status status;
+ upb_Status_Clear(&status);
+
+ // There should be no more input and no errors..
+ EXPECT_FALSE(upb_Tokenizer_Next(t, &status));
+ EXPECT_TRUE(upb_Status_IsOk(&status));
+}
+
+SimpleTokenCase kWhitespaceTokenCases[] = {
+ {" ", kUpb_TokenType_Whitespace},
+ {" ", kUpb_TokenType_Whitespace},
+ {"\t", kUpb_TokenType_Whitespace},
+ {"\v", kUpb_TokenType_Whitespace},
+ {"\t ", kUpb_TokenType_Whitespace},
+ {"\v\t", kUpb_TokenType_Whitespace},
+ {" \t\r", kUpb_TokenType_Whitespace},
+ // Newlines:
+ {"\n", kUpb_TokenType_Newline},
+};
+
+TEST_2D(TokenizerTest, Whitespace, kWhitespaceTokenCases, kBlockSizes) {
+ upb::Arena arena;
+ {
+ auto input = TestInputStream(kWhitespaceTokenCases_case.input.data(),
+ kWhitespaceTokenCases_case.input.size(),
+ kBlockSizes_case, arena.ptr());
+ auto t = upb_Tokenizer_New(NULL, 0, input, 0, arena.ptr());
+
+ EXPECT_FALSE(upb_Tokenizer_Next(t, NULL));
+ }
+ {
+ auto input = TestInputStream(kWhitespaceTokenCases_case.input.data(),
+ kWhitespaceTokenCases_case.input.size(),
+ kBlockSizes_case, arena.ptr());
+ const int options = kUpb_TokenizerOption_ReportNewlines;
+ auto t = upb_Tokenizer_New(NULL, 0, input, options, arena.ptr());
+
+ EXPECT_TRUE(upb_Tokenizer_Next(t, NULL));
+
+ EXPECT_EQ(upb_Tokenizer_Type(t), kWhitespaceTokenCases_case.type);
+ EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t),
+ kWhitespaceTokenCases_case.input.data()));
+ EXPECT_FALSE(upb_Tokenizer_Next(t, NULL));
+ }
+}
+
+#endif
+
+// -------------------------------------------------------------------
+
+struct TokenFields {
+ upb_TokenType type;
+ std::string text;
+ size_t line;
+ size_t column;
+ size_t end_column;
+};
+
+// In each case, the input is parsed to produce a list of tokens. The
+// last token in "output" must have type TYPE_END.
+struct MultiTokenCase {
+ std::string input;
+ std::vector<TokenFields> output;
+};
+
+inline std::ostream& operator<<(std::ostream& out,
+ const MultiTokenCase& test_case) {
+ return out << absl::CEscape(test_case.input);
+}
+
+MultiTokenCase kMultiTokenCases[] = {
+ // Test empty input.
+ {"",
+ {
+ {kUpb_TokenType_End, "", 0, 0, 0},
+ }},
+ // Test all token types at the same time.
+ {"foo 1 1.2 + 'bar'",
+ {
+ {kUpb_TokenType_Identifier, "foo", 0, 0, 3},
+ {kUpb_TokenType_Integer, "1", 0, 4, 5},
+ {kUpb_TokenType_Float, "1.2", 0, 6, 9},
+ {kUpb_TokenType_Symbol, "+", 0, 10, 11},
+ {kUpb_TokenType_String, "'bar'", 0, 12, 17},
+ {kUpb_TokenType_End, "", 0, 17, 17},
+ }},
+
+ // Test that consecutive symbols are parsed as separate tokens.
+ {"!@+%",
+ {
+ {kUpb_TokenType_Symbol, "!", 0, 0, 1},
+ {kUpb_TokenType_Symbol, "@", 0, 1, 2},
+ {kUpb_TokenType_Symbol, "+", 0, 2, 3},
+ {kUpb_TokenType_Symbol, "%", 0, 3, 4},
+ {kUpb_TokenType_End, "", 0, 4, 4},
+ }},
+
+ // Test that newlines affect line numbers correctly.
+ {"foo bar\nrab oof",
+ {
+ {kUpb_TokenType_Identifier, "foo", 0, 0, 3},
+ {kUpb_TokenType_Identifier, "bar", 0, 4, 7},
+ {kUpb_TokenType_Identifier, "rab", 1, 0, 3},
+ {kUpb_TokenType_Identifier, "oof", 1, 4, 7},
+ {kUpb_TokenType_End, "", 1, 7, 7},
+ }},
+
+ // Test that tabs affect column numbers correctly.
+ {"foo\tbar \tbaz",
+ {
+ {kUpb_TokenType_Identifier, "foo", 0, 0, 3},
+ {kUpb_TokenType_Identifier, "bar", 0, 8, 11},
+ {kUpb_TokenType_Identifier, "baz", 0, 16, 19},
+ {kUpb_TokenType_End, "", 0, 19, 19},
+ }},
+
+ // Test that tabs in string literals affect column numbers correctly.
+ {"\"foo\tbar\" baz",
+ {
+ {kUpb_TokenType_String, "\"foo\tbar\"", 0, 0, 12},
+ {kUpb_TokenType_Identifier, "baz", 0, 13, 16},
+ {kUpb_TokenType_End, "", 0, 16, 16},
+ }},
+
+ // Test that line comments are ignored.
+ {"foo // This is a comment\n"
+ "bar // This is another comment",
+ {
+ {kUpb_TokenType_Identifier, "foo", 0, 0, 3},
+ {kUpb_TokenType_Identifier, "bar", 1, 0, 3},
+ {kUpb_TokenType_End, "", 1, 30, 30},
+ }},
+
+ // Test that block comments are ignored.
+ {"foo /* This is a block comment */ bar",
+ {
+ {kUpb_TokenType_Identifier, "foo", 0, 0, 3},
+ {kUpb_TokenType_Identifier, "bar", 0, 34, 37},
+ {kUpb_TokenType_End, "", 0, 37, 37},
+ }},
+
+ // Test that sh-style comments are not ignored by default.
+ {"foo # bar\n"
+ "baz",
+ {
+ {kUpb_TokenType_Identifier, "foo", 0, 0, 3},
+ {kUpb_TokenType_Symbol, "#", 0, 4, 5},
+ {kUpb_TokenType_Identifier, "bar", 0, 6, 9},
+ {kUpb_TokenType_Identifier, "baz", 1, 0, 3},
+ {kUpb_TokenType_End, "", 1, 3, 3},
+ }},
+
+ // Test all whitespace chars
+ {"foo\n\t\r\v\fbar",
+ {
+ {kUpb_TokenType_Identifier, "foo", 0, 0, 3},
+ {kUpb_TokenType_Identifier, "bar", 1, 11, 14},
+ {kUpb_TokenType_End, "", 1, 14, 14},
+ }},
+};
+
+TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) {
+ // Set up the tokenizer.
+ upb::Arena arena;
+ auto input = TestInputStream(kMultiTokenCases_case.input.data(),
+ kMultiTokenCases_case.input.size(),
+ kBlockSizes_case, arena.ptr());
+ auto t = upb_Tokenizer_New(NULL, 0, input, 0, arena.ptr());
+
+ // Before Next() is called, the initial token should always be TYPE_START.
+ EXPECT_EQ(upb_Tokenizer_Type(t), kUpb_TokenType_Start);
+ EXPECT_EQ(upb_Tokenizer_Line(t), 0);
+ EXPECT_EQ(upb_Tokenizer_Column(t), 0);
+ EXPECT_EQ(upb_Tokenizer_EndColumn(t), 0);
+ EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t), ""));
+
+ // Loop through all expected tokens.
+ TokenFields token_fields;
+ upb_Status status;
+ upb_Status_Clear(&status);
+ int i = 0;
+ do {
+ token_fields = kMultiTokenCases_case.output[i++];
+
+ SCOPED_TRACE(testing::Message()
+ << "Token #" << i << ": " << absl::CEscape(token_fields.text));
+
+ // Next() should only return false when it hits the end token.
+ if (token_fields.type == kUpb_TokenType_End) {
+ EXPECT_FALSE(upb_Tokenizer_Next(t, &status));
+ EXPECT_TRUE(upb_Status_IsOk(&status));
+ } else {
+ EXPECT_TRUE(upb_Tokenizer_Next(t, NULL));
+ }
+
+ // Check that the token matches the expected one.
+ EXPECT_EQ(upb_Tokenizer_Type(t), token_fields.type);
+ EXPECT_EQ(upb_Tokenizer_Line(t), token_fields.line);
+ EXPECT_EQ(upb_Tokenizer_Column(t), token_fields.column);
+ EXPECT_EQ(upb_Tokenizer_EndColumn(t), token_fields.end_column);
+ EXPECT_EQ(upb_Tokenizer_TextSize(t), token_fields.text.size());
+ EXPECT_TRUE(
+ StringEquals(upb_Tokenizer_TextData(t), token_fields.text.data()));
+ } while (token_fields.type != kUpb_TokenType_End);
+}
+
+MultiTokenCase kMultiWhitespaceTokenCases[] = {
+ // Test all token types at the same time.
+ {"foo 1 \t1.2 \n +\v'bar'",
+ {
+ {kUpb_TokenType_Identifier, "foo", 0, 0, 3},
+ {kUpb_TokenType_Whitespace, " ", 0, 3, 4},
+ {kUpb_TokenType_Integer, "1", 0, 4, 5},
+ {kUpb_TokenType_Whitespace, " \t", 0, 5, 8},
+ {kUpb_TokenType_Float, "1.2", 0, 8, 11},
+ {kUpb_TokenType_Whitespace, " ", 0, 11, 13},
+ {kUpb_TokenType_Newline, "\n", 0, 13, 0},
+ {kUpb_TokenType_Whitespace, " ", 1, 0, 3},
+ {kUpb_TokenType_Symbol, "+", 1, 3, 4},
+ {kUpb_TokenType_Whitespace, "\v", 1, 4, 5},
+ {kUpb_TokenType_String, "'bar'", 1, 5, 10},
+ {kUpb_TokenType_End, "", 1, 10, 10},
+ }},
+
+};
+
+TEST_2D(TokenizerTest, MultipleWhitespaceTokens, kMultiWhitespaceTokenCases,
+ kBlockSizes) {
+ // Set up the tokenizer.
+ upb::Arena arena;
+ auto input = TestInputStream(kMultiWhitespaceTokenCases_case.input.data(),
+ kMultiWhitespaceTokenCases_case.input.size(),
+ kBlockSizes_case, arena.ptr());
+ const int options = kUpb_TokenizerOption_ReportNewlines;
+ auto t = upb_Tokenizer_New(NULL, 0, input, options, arena.ptr());
+
+ // Before Next() is called, the initial token should always be TYPE_START.
+ EXPECT_EQ(upb_Tokenizer_Type(t), kUpb_TokenType_Start);
+ EXPECT_EQ(upb_Tokenizer_Line(t), 0);
+ EXPECT_EQ(upb_Tokenizer_Column(t), 0);
+ EXPECT_EQ(upb_Tokenizer_EndColumn(t), 0);
+ EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t), ""));
+
+ // Loop through all expected tokens.
+ TokenFields token_fields;
+ upb_Status status;
+ upb_Status_Clear(&status);
+ int i = 0;
+ do {
+ token_fields = kMultiWhitespaceTokenCases_case.output[i++];
+
+ SCOPED_TRACE(testing::Message()
+ << "Token #" << i << ": " << token_fields.text);
+
+ // Next() should only return false when it hits the end token.
+ if (token_fields.type == kUpb_TokenType_End) {
+ EXPECT_FALSE(upb_Tokenizer_Next(t, &status));
+ EXPECT_TRUE(upb_Status_IsOk(&status));
+ } else {
+ EXPECT_TRUE(upb_Tokenizer_Next(t, NULL));
+ }
+
+ // Check that the token matches the expected one.
+ EXPECT_EQ(upb_Tokenizer_Type(t), token_fields.type);
+ EXPECT_EQ(upb_Tokenizer_Line(t), token_fields.line);
+ EXPECT_EQ(upb_Tokenizer_Column(t), token_fields.column);
+ EXPECT_EQ(upb_Tokenizer_EndColumn(t), token_fields.end_column);
+ EXPECT_TRUE(
+ StringEquals(upb_Tokenizer_TextData(t), token_fields.text.data()));
+ } while (token_fields.type != kUpb_TokenType_End);
+}
+
+// This test causes gcc 3.3.5 (and earlier?) to give the cryptic error:
+// "sorry, unimplemented: `method_call_expr' not supported by dump_expr"
+#if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3)
+
+TEST_1D(TokenizerTest, ShCommentStyle, kBlockSizes) {
+ // Test the "comment_style" option.
+
+ const char* text =
+ "foo # bar\n"
+ "baz // qux\n"
+ "corge /* grault */\n"
+ "garply";
+ const char* const kTokens[] = {"foo", // "# bar" is ignored
+ "baz", "/", "/", "qux", "corge", "/",
+ "*", "grault", "*", "/", "garply"};
+
+ // Set up the tokenizer.
+ upb::Arena arena;
+ auto input =
+ TestInputStream(text, strlen(text), kBlockSizes_case, arena.ptr());
+ const int options = kUpb_TokenizerOption_CommentStyleShell;
+ auto t = upb_Tokenizer_New(NULL, 0, input, options, arena.ptr());
+
+ // Advance through tokens and check that they are parsed as expected.
+ for (size_t i = 0; i < arraysize(kTokens); i++) {
+ EXPECT_TRUE(upb_Tokenizer_Next(t, NULL));
+ EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t), kTokens[i]));
+ }
+
+ // There should be no more input and no errors.
+ upb_Status status;
+ upb_Status_Clear(&status);
+ EXPECT_FALSE(upb_Tokenizer_Next(t, &status));
+ EXPECT_TRUE(upb_Status_IsOk(&status));
+}
+
+#endif
+
+// -------------------------------------------------------------------
+
+#if 0 // TODO(salo): Extended comments are currently unimplemented.
+
+// In each case, the input is expected to have two tokens named "prev" and
+// "next" with comments in between.
+struct DocCommentCase {
+ std::string input;
+
+ const char* prev_trailing_comments;
+ const char* detached_comments[10];
+ const char* next_leading_comments;
+};
+
+inline std::ostream& operator<<(std::ostream& out,
+ const DocCommentCase& test_case) {
+ return out << absl::CEscape(test_case.input);
+}
+
+DocCommentCase kDocCommentCases[] = {
+ {"prev next",
+
+ "",
+ {},
+ ""},
+
+ {"prev /* ignored */ next",
+
+ "",
+ {},
+ ""},
+
+ {"prev // trailing comment\n"
+ "next",
+
+ " trailing comment\n",
+ {},
+ ""},
+
+ {"prev\n"
+ "// leading comment\n"
+ "// line 2\n"
+ "next",
+
+ "",
+ {},
+ " leading comment\n"
+ " line 2\n"},
+
+ {"prev\n"
+ "// trailing comment\n"
+ "// line 2\n"
+ "\n"
+ "next",
+
+ " trailing comment\n"
+ " line 2\n",
+ {},
+ ""},
+
+ {"prev // trailing comment\n"
+ "// leading comment\n"
+ "// line 2\n"
+ "next",
+
+ " trailing comment\n",
+ {},
+ " leading comment\n"
+ " line 2\n"},
+
+ {"prev /* trailing block comment */\n"
+ "/* leading block comment\n"
+ " * line 2\n"
+ " * line 3 */"
+ "next",
+
+ " trailing block comment ",
+ {},
+ " leading block comment\n"
+ " line 2\n"
+ " line 3 "},
+
+ {"prev\n"
+ "/* trailing block comment\n"
+ " * line 2\n"
+ " * line 3\n"
+ " */\n"
+ "/* leading block comment\n"
+ " * line 2\n"
+ " * line 3 */"
+ "next",
+
+ " trailing block comment\n"
+ " line 2\n"
+ " line 3\n",
+ {},
+ " leading block comment\n"
+ " line 2\n"
+ " line 3 "},
+
+ {"prev\n"
+ "// trailing comment\n"
+ "\n"
+ "// detached comment\n"
+ "// line 2\n"
+ "\n"
+ "// second detached comment\n"
+ "/* third detached comment\n"
+ " * line 2 */\n"
+ "// leading comment\n"
+ "next",
+
+ " trailing comment\n",
+ {" detached comment\n"
+ " line 2\n",
+ " second detached comment\n",
+ " third detached comment\n"
+ " line 2 "},
+ " leading comment\n"},
+
+ {"prev /**/\n"
+ "\n"
+ "// detached comment\n"
+ "\n"
+ "// leading comment\n"
+ "next",
+
+ "",
+ {" detached comment\n"},
+ " leading comment\n"},
+
+ {"prev /**/\n"
+ "// leading comment\n"
+ "next",
+
+ "",
+ {},
+ " leading comment\n"},
+};
+
+TEST_2D(TokenizerTest, DocComments, kDocCommentCases, kBlockSizes) {
+ // Set up the tokenizer.
+ TestInputStream input(kDocCommentCases_case.input.data(),
+ kDocCommentCases_case.input.size(), kBlockSizes_case);
+ TestErrorCollector error_collector;
+ Tokenizer tokenizer(&input, &error_collector);
+
+ // Set up a second tokenizer where we'll pass all NULLs to NextWithComments().
+ TestInputStream input2(kDocCommentCases_case.input.data(),
+ kDocCommentCases_case.input.size(), kBlockSizes_case);
+ Tokenizer tokenizer2(&input2, &error_collector);
+
+ tokenizer.Next();
+ tokenizer2.Next();
+
+ EXPECT_EQ("prev", tokenizer.current().text);
+ EXPECT_EQ("prev", tokenizer2.current().text);
+
+ std::string prev_trailing_comments;
+ std::vector<std::string> detached_comments;
+ std::string next_leading_comments;
+ tokenizer.NextWithComments(&prev_trailing_comments, &detached_comments,
+ &next_leading_comments);
+ tokenizer2.NextWithComments(NULL, NULL, NULL);
+ EXPECT_EQ("next", tokenizer.current().text);
+ EXPECT_EQ("next", tokenizer2.current().text);
+
+ EXPECT_EQ(kDocCommentCases_case.prev_trailing_comments,
+ prev_trailing_comments);
+
+ for (int i = 0; i < detached_comments.size(); i++) {
+ EXPECT_LT(i, arraysize(kDocCommentCases));
+ EXPECT_TRUE(kDocCommentCases_case.detached_comments[i] != NULL);
+ EXPECT_EQ(kDocCommentCases_case.detached_comments[i], detached_comments[i]);
+ }
+
+ // Verify that we matched all the detached comments.
+ EXPECT_EQ(NULL,
+ kDocCommentCases_case.detached_comments[detached_comments.size()]);
+
+ EXPECT_EQ(kDocCommentCases_case.next_leading_comments, next_leading_comments);
+}
+
+#endif // 0
+
+// -------------------------------------------------------------------
+
+// Test parse helpers.
+// TODO(b/225783758): Add a fuzz test for this.
+TEST_F(TokenizerTest, ParseInteger) {
+ EXPECT_EQ(0, ParseInteger("0"));
+ EXPECT_EQ(123, ParseInteger("123"));
+ EXPECT_EQ(0xabcdef12u, ParseInteger("0xabcdef12"));
+ EXPECT_EQ(0xabcdef12u, ParseInteger("0xABCDEF12"));
+ EXPECT_EQ(UINT64_MAX, ParseInteger("0xFFFFFFFFFFFFFFFF"));
+ EXPECT_EQ(01234567, ParseInteger("01234567"));
+ EXPECT_EQ(0X123, ParseInteger("0X123"));
+
+ // Test invalid integers that may still be tokenized as integers.
+ EXPECT_EQ(0, ParseInteger("0x"));
+
+ uint64_t i;
+
+ // Test invalid integers that will never be tokenized as integers.
+ EXPECT_FALSE(upb_Parse_Integer("zxy", UINT64_MAX, &i));
+ EXPECT_FALSE(upb_Parse_Integer("1.2", UINT64_MAX, &i));
+ EXPECT_FALSE(upb_Parse_Integer("08", UINT64_MAX, &i));
+ EXPECT_FALSE(upb_Parse_Integer("0xg", UINT64_MAX, &i));
+ EXPECT_FALSE(upb_Parse_Integer("-1", UINT64_MAX, &i));
+
+ // Test overflows.
+ EXPECT_TRUE(upb_Parse_Integer("0", 0, &i));
+ EXPECT_FALSE(upb_Parse_Integer("1", 0, &i));
+ EXPECT_TRUE(upb_Parse_Integer("1", 1, &i));
+ EXPECT_TRUE(upb_Parse_Integer("12345", 12345, &i));
+ EXPECT_FALSE(upb_Parse_Integer("12346", 12345, &i));
+ EXPECT_TRUE(upb_Parse_Integer("0xFFFFFFFFFFFFFFFF", UINT64_MAX, &i));
+ EXPECT_FALSE(upb_Parse_Integer("0x10000000000000000", UINT64_MAX, &i));
+
+ // Test near the limits of signed parsing (values in INT64_MAX +/- 1600)
+ for (int64_t offset = -1600; offset <= 1600; ++offset) {
+ // We make sure to perform an unsigned addition so that we avoid signed
+ // overflow, which would be undefined behavior.
+ uint64_t i = 0x7FFFFFFFFFFFFFFFu + static_cast<uint64_t>(offset);
+ char decimal[32];
+ snprintf(decimal, 32, "%llu", static_cast<unsigned long long>(i));
+ if (offset > 0) {
+ uint64_t parsed = -1;
+ EXPECT_FALSE(upb_Parse_Integer(decimal, INT64_MAX, &parsed))
+ << decimal << "=>" << parsed;
+ } else {
+ uint64_t parsed = -1;
+ EXPECT_TRUE(upb_Parse_Integer(decimal, INT64_MAX, &parsed))
+ << decimal << "=>" << parsed;
+ EXPECT_EQ(parsed, i);
+ }
+ char octal[32];
+ snprintf(octal, 32, "0%llo", static_cast<unsigned long long>(i));
+ if (offset > 0) {
+ uint64_t parsed = -1;
+ EXPECT_FALSE(upb_Parse_Integer(octal, INT64_MAX, &parsed))
+ << octal << "=>" << parsed;
+ } else {
+ uint64_t parsed = -1;
+ EXPECT_TRUE(upb_Parse_Integer(octal, INT64_MAX, &parsed))
+ << octal << "=>" << parsed;
+ EXPECT_EQ(parsed, i);
+ }
+ char hex[32];
+ snprintf(hex, 32, "0x%llx", static_cast<unsigned long long>(i));
+ if (offset > 0) {
+ uint64_t parsed = -1;
+ EXPECT_FALSE(upb_Parse_Integer(hex, INT64_MAX, &parsed))
+ << hex << "=>" << parsed;
+ } else {
+ uint64_t parsed = -1;
+ EXPECT_TRUE(upb_Parse_Integer(hex, INT64_MAX, &parsed)) << hex;
+ EXPECT_EQ(parsed, i);
+ }
+ // EXPECT_NE(offset, -237);
+ }
+
+ // Test near the limits of unsigned parsing (values in UINT64_MAX +/- 1600)
+ // By definition, values greater than UINT64_MAX cannot be held in a uint64_t
+ // variable, so printing them is a little tricky; fortunately all but the
+ // last four digits are known, so we can hard-code them in the printf string,
+ // and we only need to format the last 4.
+ for (int64_t offset = -1600; offset <= 1600; ++offset) {
+ {
+ uint64_t i = 18446744073709551615u + offset;
+ char decimal[32];
+ snprintf(decimal, 32, "1844674407370955%04llu",
+ static_cast<unsigned long long>(1615 + offset));
+ if (offset > 0) {
+ uint64_t parsed = -1;
+ EXPECT_FALSE(upb_Parse_Integer(decimal, UINT64_MAX, &parsed))
+ << decimal << "=>" << parsed;
+ } else {
+ uint64_t parsed = -1;
+ EXPECT_TRUE(upb_Parse_Integer(decimal, UINT64_MAX, &parsed)) << decimal;
+ EXPECT_EQ(parsed, i);
+ }
+ }
+ {
+ uint64_t i = 01777777777777777777777u + offset;
+ if (offset > 0) {
+ char octal[32];
+ snprintf(octal, 32, "0200000000000000000%04llo",
+ static_cast<unsigned long long>(offset - 1));
+ uint64_t parsed = -1;
+ EXPECT_FALSE(upb_Parse_Integer(octal, UINT64_MAX, &parsed))
+ << octal << "=>" << parsed;
+ } else {
+ char octal[32];
+ snprintf(octal, 32, "0%llo", static_cast<unsigned long long>(i));
+ uint64_t parsed = -1;
+ EXPECT_TRUE(upb_Parse_Integer(octal, UINT64_MAX, &parsed)) << octal;
+ EXPECT_EQ(parsed, i);
+ }
+ }
+ {
+ uint64_t ui = 0xffffffffffffffffu + offset;
+ char hex[32];
+ if (offset > 0) {
+ snprintf(hex, 32, "0x1000000000000%04llx",
+ static_cast<unsigned long long>(offset - 1));
+ uint64_t parsed = -1;
+ EXPECT_FALSE(upb_Parse_Integer(hex, UINT64_MAX, &parsed))
+ << hex << "=>" << parsed;
+ } else {
+ snprintf(hex, 32, "0x%llx", static_cast<unsigned long long>(ui));
+ uint64_t parsed = -1;
+ EXPECT_TRUE(upb_Parse_Integer(hex, UINT64_MAX, &parsed)) << hex;
+ EXPECT_EQ(parsed, ui);
+ }
+ }
+ }
+}
+
+TEST_F(TokenizerTest, ParseFloat) {
+ EXPECT_DOUBLE_EQ(1, upb_Parse_Float("1."));
+ EXPECT_DOUBLE_EQ(1e3, upb_Parse_Float("1e3"));
+ EXPECT_DOUBLE_EQ(1e3, upb_Parse_Float("1E3"));
+ EXPECT_DOUBLE_EQ(1.5e3, upb_Parse_Float("1.5e3"));
+ EXPECT_DOUBLE_EQ(.1, upb_Parse_Float(".1"));
+ EXPECT_DOUBLE_EQ(.25, upb_Parse_Float(".25"));
+ EXPECT_DOUBLE_EQ(.1e3, upb_Parse_Float(".1e3"));
+ EXPECT_DOUBLE_EQ(.25e3, upb_Parse_Float(".25e3"));
+ EXPECT_DOUBLE_EQ(.1e+3, upb_Parse_Float(".1e+3"));
+ EXPECT_DOUBLE_EQ(.1e-3, upb_Parse_Float(".1e-3"));
+ EXPECT_DOUBLE_EQ(5, upb_Parse_Float("5"));
+ EXPECT_DOUBLE_EQ(6e-12, upb_Parse_Float("6e-12"));
+ EXPECT_DOUBLE_EQ(1.2, upb_Parse_Float("1.2"));
+ EXPECT_DOUBLE_EQ(1.e2, upb_Parse_Float("1.e2"));
+
+ // Test invalid integers that may still be tokenized as integers.
+ EXPECT_DOUBLE_EQ(1, upb_Parse_Float("1e"));
+ EXPECT_DOUBLE_EQ(1, upb_Parse_Float("1e-"));
+ EXPECT_DOUBLE_EQ(1, upb_Parse_Float("1.e"));
+
+ // Test 'f' suffix.
+ EXPECT_DOUBLE_EQ(1, upb_Parse_Float("1f"));
+ EXPECT_DOUBLE_EQ(1, upb_Parse_Float("1.0f"));
+ EXPECT_DOUBLE_EQ(1, upb_Parse_Float("1F"));
+
+ // These should parse successfully even though they are out of range.
+ // Overflows become infinity and underflows become zero.
+ EXPECT_EQ(0.0, upb_Parse_Float("1e-9999999999999999999999999999"));
+ EXPECT_EQ(HUGE_VAL, upb_Parse_Float("1e+9999999999999999999999999999"));
+
+#if GTEST_HAS_DEATH_TEST // death tests do not work on Windows yet
+ // Test invalid integers that will never be tokenized as integers.
+ EXPECT_DEBUG_DEATH(
+ upb_Parse_Float("zxy"),
+ "passed text that could not have been tokenized as a float");
+ EXPECT_DEBUG_DEATH(
+ upb_Parse_Float("1-e0"),
+ "passed text that could not have been tokenized as a float");
+ EXPECT_DEBUG_DEATH(
+ upb_Parse_Float("-1.0"),
+ "passed text that could not have been tokenized as a float");
+#endif // GTEST_HAS_DEATH_TEST
+}
+
+TEST_F(TokenizerTest, ParseString) {
+ const std::string inputs[] = {
+ "'hello'",
+ "\"blah\\nblah2\"",
+ "'\\1x\\1\\123\\739\\52\\334n\\3'",
+ "'\\x20\\x4'",
+
+ // Test invalid strings that may still be tokenized as strings.
+ "\"\\a\\l\\v\\t", // \l is invalid
+ "'",
+ "'\\",
+
+ // Experiment with Unicode escapes.
+ // Here are one-, two- and three-byte Unicode characters.
+ "'\\u0024\\u00a2\\u20ac\\U00024b62XX'",
+ "'\\u0024\\u00a2\\u20ac\\ud852\\udf62XX'", // Same, encoded using UTF16.
+
+ // Here's some broken UTF16: a head surrogate with no tail surrogate.
+ // We just output this as if it were UTF8; it's not a defined code point,
+ // but it has a defined encoding.
+ "'\\ud852XX'",
+
+ // Malformed escape: Demons may fly out of the nose.
+ "'\\u0'",
+
+ // Beyond the range of valid UTF-32 code units.
+ "'\\U00110000\\U00200000\\UFFFFFFFF'",
+ };
+
+ const std::string outputs[] = {
+ "hello",
+ "blah\nblah2",
+ "\1x\1\123\739\52\334n\3",
+ "\x20\x4",
+
+ "\a?\v\t",
+ "",
+ "\\",
+
+ "$¢€ð¤¢XX",
+ "$¢€ð¤¢XX",
+
+ "\xed\xa1\x92XX",
+
+ "u0",
+
+ "\\U00110000\\U00200000\\Uffffffff",
+ };
+
+ upb::Arena arena;
+
+ for (size_t i = 0; i < sizeof(inputs) / sizeof(inputs[0]); i++) {
+ auto sv = upb_Parse_String(inputs[i].data(), arena.ptr());
+ EXPECT_TRUE(StringEquals(sv.data, outputs[i].data()));
+ }
+
+ // Test invalid strings that will never be tokenized as strings.
+#if GTEST_HAS_DEATH_TEST // death tests do not work on Windows yet
+ EXPECT_DEBUG_DEATH(
+ upb_Parse_String("", arena.ptr()),
+ "passed text that could not have been tokenized as a string");
+#endif // GTEST_HAS_DEATH_TEST
+}
+
+TEST_F(TokenizerTest, ParseStringAppend) {
+ upb::Arena arena;
+ upb_String output;
+ upb_String_Init(&output, arena.ptr());
+
+ upb_String_Assign(&output, "stuff+", 6);
+ auto sv = upb_Parse_String("'hello'", arena.ptr());
+ EXPECT_TRUE(StringEquals(sv.data, "hello"));
+ upb_String_Append(&output, sv.data, sv.size);
+ EXPECT_TRUE(StringEquals(upb_String_Data(&output), "stuff+hello"));
+}
+
+// -------------------------------------------------------------------
+
+// Each case parses some input text, ignoring the tokens produced, and
+// checks that the error output matches what is expected.
+struct ErrorCase {
+ std::string input;
+ const char* errors;
+};
+
+inline std::ostream& operator<<(std::ostream& out, const ErrorCase& test_case) {
+ return out << absl::CEscape(test_case.input);
+}
+
+ErrorCase kErrorCases[] = {
+ // String errors.
+ {"'\\l'", "0:2: Invalid escape sequence in string literal."},
+ {"'\\X'", "0:2: Invalid escape sequence in string literal."},
+ {"'\\x'", "0:3: Expected hex digits for escape sequence."},
+ {"'foo", "0:4: Unexpected end of string."},
+ {"'bar\nfoo", "0:4: String literals cannot cross line boundaries."},
+ {"'\\u01'", "0:5: Expected four hex digits for \\u escape sequence."},
+ {"'\\uXYZ'", "0:3: Expected four hex digits for \\u escape sequence."},
+
+ // Integer errors.
+ {"123foo", "0:3: Need space between number and identifier."},
+
+ // Hex/octal errors.
+ {"0x foo", "0:2: \"0x\" must be followed by hex digits."},
+ {"0541823", "0:4: Numbers starting with leading zero must be in octal."},
+ {"0x123z", "0:5: Need space between number and identifier."},
+ {"0x123.4", "0:5: Hex and octal numbers must be integers."},
+ {"0123.4", "0:4: Hex and octal numbers must be integers."},
+
+ // Float errors.
+ {"1e foo", "0:2: \"e\" must be followed by exponent."},
+ {"1e- foo", "0:3: \"e\" must be followed by exponent."},
+ {"1.2.3",
+ "0:3: Already saw decimal point or exponent; can't have another one."},
+ {"1e2.3",
+ "0:3: Already saw decimal point or exponent; can't have another one."},
+ {"a.1", "0:1: Need space between identifier and decimal point."},
+ // allow_f_after_float not enabled, so this should be an error.
+ {"1.0f", "0:3: Need space between number and identifier."},
+
+ // Block comment errors.
+ {"/*",
+ "0:2: End-of-file inside block comment.\n0:0: Comment started here."},
+ {"/*/*/ foo",
+ "0:3: \"/*\" inside block comment. Block comments cannot be nested."},
+
+ // Control characters. Multiple consecutive control characters should only
+ // produce one error.
+ {"\b foo", "0:0: Invalid control characters encountered in text."},
+ {"\b\b foo", "0:0: Invalid control characters encountered in text."},
+
+ // Check that control characters at end of input don't result in an
+ // infinite loop.
+ {"\b", "0:0: Invalid control characters encountered in text."},
+
+ // Check recovery from '\0'. We have to explicitly specify the length of
+ // these strings because otherwise the string constructor will just call
+ // strlen() which will see the first '\0' and think that is the end of the
+ // string.
+ {std::string("\0foo", 4),
+ "0:0: Invalid control characters encountered in text."},
+ {std::string("\0\0foo", 5),
+ "0:0: Invalid control characters encountered in text."},
+
+ // Check error from high order bits set
+ {"\300", "0:0: Interpreting non ascii codepoint 192."},
+};
+
+TEST_2D(TokenizerTest, Errors, kErrorCases, kBlockSizes) {
+ // Set up the tokenizer.
+ upb::Arena arena;
+ auto input = TestInputStream(kErrorCases_case.input.data(),
+ kErrorCases_case.input.size(), kBlockSizes_case,
+ arena.ptr());
+ auto t = upb_Tokenizer_New(NULL, 0, input, 0, arena.ptr());
+
+ upb_Status status;
+ upb_Status_Clear(&status);
+
+ while (upb_Tokenizer_Next(t, &status))
+ ; // just keep looping
+ EXPECT_TRUE(
+ StringEquals(upb_Status_ErrorMessage(&status), kErrorCases_case.errors));
+}
+
+// -------------------------------------------------------------------
+
+TEST_1D(TokenizerTest, BackUpOnDestruction, kBlockSizes) {
+ const std::string text = "foo bar";
+ upb::Arena arena;
+ auto input =
+ TestInputStream(text.data(), text.size(), kBlockSizes_case, arena.ptr());
+
+ // Create a tokenizer, read one token, then destroy it.
+ auto t = upb_Tokenizer_New(NULL, 0, input, 0, arena.ptr());
+ upb_Tokenizer_Next(t, NULL);
+ upb_Tokenizer_Fini(t);
+
+ // Only "foo" should have been read.
+ EXPECT_EQ(strlen("foo"), upb_ZeroCopyInputStream_ByteCount(input));
+}
+
+static const char* kParseBenchmark[] = {
+ "\"partner-google-mobile-modes-print\"",
+ "\"partner-google-mobile-modes-products\"",
+ "\"partner-google-mobile-modes-realtime\"",
+ "\"partner-google-mobile-modes-video\"",
+ "\"partner-google-modes-news\"",
+ "\"partner-google-modes-places\"",
+ "\"partner-google-news\"",
+ "\"partner-google-print\"",
+ "\"partner-google-products\"",
+ "\"partner-google-realtime\"",
+ "\"partner-google-video\"",
+ "\"true\"",
+ "\"BigImagesHover__js_list\"",
+ "\"XFEExternJsVersionParameters\"",
+ "\"Available versions of the big images hover javascript\"",
+ "\"Version: {\n\"",
+ "\" script_name: \"extern_js/dummy_file_compiled_post20070813.js\"\n\"",
+ "\" version_number: 0\n\"",
+ "\"}\"",
+ "\"BigImagesHover__js_selection\"",
+ "\"XFEExternJsVersionParameters\"",
+ "\"Versioning info for the big images hover javascript.\"",
+ "\"current_version: 0\"",
+ "\"BigImagesHover__js_suppressed\"",
+ "\"Indicates if the client-side javascript associated with big images.\"",
+ "\"true\"",
+ "\"BrowserAnyOf\"",
+ "\"IsChrome5OrAbove\"",
+ "\"IsFirefox3OrAbove\"",
+ "IsIE8OrAboveBinary",
+ "\"Abe \"Sausage King\" Froman\"",
+ "\"Frank \"Meatball\" Febbraro\"",
+};
+
+TEST(Benchmark, ParseStringAppendAccumulate) {
+ upb::Arena arena;
+ size_t outsize = 0;
+ int benchmark_len = arraysize(kParseBenchmark);
+ for (int i = 0; i < benchmark_len; i++) {
+ auto sv = upb_Parse_String(kParseBenchmark[i], arena.ptr());
+ outsize += sv.size;
+ }
+ EXPECT_NE(0, outsize);
+}
+
+TEST(Benchmark, ParseStringAppend) {
+ upb::Arena arena;
+ upb_String output;
+ upb_String_Init(&output, arena.ptr());
+ int benchmark_len = arraysize(kParseBenchmark);
+ for (int i = 0; i < benchmark_len; i++) {
+ auto sv = upb_Parse_String(kParseBenchmark[i], arena.ptr());
+ upb_String_Append(&output, sv.data, sv.size);
+ }
+ EXPECT_NE(0, upb_String_Size(&output));
+}
+
+// These tests validate the Tokenizer's handling of Unicode escapes.
+
+// Encode a single code point as UTF8.
+static std::string StandardUTF8(uint32_t code_point) {
+ char buffer[4];
+ int count = upb_Unicode_ToUTF8(code_point, &buffer[0]);
+
+ EXPECT_NE(count, 0) << "Failed to encode point " << std::hex << code_point;
+ return std::string(reinterpret_cast<const char*>(buffer), count);
+}
+
+static std::string DisplayHex(const std::string& data) {
+ std::string output;
+ for (size_t i = 0; i < data.size(); ++i) {
+ absl::StrAppendFormat(&output, "%02x ", data[i]);
+ }
+ return output;
+}
+
+static void ExpectFormat(const std::string& expectation,
+ const std::string& formatted) {
+ upb::Arena arena;
+ auto sv = upb_Parse_String(formatted.data(), arena.ptr());
+ EXPECT_EQ(strcmp(sv.data, expectation.data()), 0)
+ << ": Incorrectly parsed " << formatted << ":\nGot "
+ << DisplayHex(sv.data) << "\nExpected " << DisplayHex(expectation);
+}
+
+TEST(TokenizerHandlesUnicode, BMPCodes) {
+ for (uint32_t code_point = 0; code_point < 0x10000; ++code_point) {
+ // The UTF8 encoding of surrogates as single entities is not defined.
+ if (upb_Unicode_IsHigh(code_point)) continue;
+ if (upb_Unicode_IsLow(code_point)) continue;
+
+ const std::string expectation = StandardUTF8(code_point);
+
+ // Points in the BMP pages can be encoded using either \u with four hex
+ // digits, or \U with eight hex digits.
+ ExpectFormat(expectation, absl::StrFormat("'\\u%04x'", code_point));
+ ExpectFormat(expectation, absl::StrFormat("'\\u%04X'", code_point));
+ ExpectFormat(expectation, absl::StrFormat("'\\U%08x'", code_point));
+ ExpectFormat(expectation, absl::StrFormat("'\\U%08X'", code_point));
+ }
+}
+
+TEST(TokenizerHandlesUnicode, NonBMPCodes) {
+ for (uint32_t code_point = 0x10000; code_point < 0x110000; ++code_point) {
+ const std::string expectation = StandardUTF8(code_point);
+
+ // Points in the non-BMP pages can be encoded using either \U with eight hex
+ // digits, or using UTF-16 surrogate pairs.
+ ExpectFormat(expectation, absl::StrFormat("'\\U%08x'", code_point));
+ ExpectFormat(expectation, absl::StrFormat("'\\U%08X'", code_point));
+ ExpectFormat(expectation, absl::StrFormat("'\\u%04x\\u%04x'",
+ upb_Unicode_ToHigh(code_point),
+ upb_Unicode_ToLow(code_point)));
+ }
+}
+
+} // namespace
+} // namespace io
+} // namespace proto2
diff --git a/upb/upb/io/zero_copy_input_stream.h b/upb/upb/io/zero_copy_input_stream.h
new file mode 100644
index 0000000..b5eef3e
--- /dev/null
+++ b/upb/upb/io/zero_copy_input_stream.h
@@ -0,0 +1,132 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_IO_ZERO_COPY_INPUT_STREAM_H_
+#define UPB_IO_ZERO_COPY_INPUT_STREAM_H_
+
+#include "upb/base/status.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct upb_ZeroCopyInputStream upb_ZeroCopyInputStream;
+
+typedef struct {
+ // Obtains a chunk of data from the stream.
+ //
+ // Preconditions:
+ // "count" and "status" are not NULL.
+ //
+ // Postconditions:
+ // All errors are permanent. If an error occurs then:
+ // - NULL will be returned to the caller.
+ // - *count will be set to zero.
+ // - *status will be set to the error.
+ // EOF is permanent. If EOF is reached then:
+ // - NULL will be returned to the caller.
+ // - *count will be set to zero.
+ // - *status will not be touched.
+ // Otherwise:
+ // - The returned value will point to a buffer containing the bytes read.
+ // - *count will be set to the number of bytes read.
+ // - *status will not be touched.
+ //
+ // Ownership of this buffer remains with the stream, and the buffer
+ // remains valid only until some other method of the stream is called
+ // or the stream is destroyed.
+ const void* (*Next)(struct upb_ZeroCopyInputStream* z, size_t* count,
+ upb_Status* status);
+
+ // Backs up a number of bytes, so that the next call to Next() returns
+ // data again that was already returned by the last call to Next(). This
+ // is useful when writing procedures that are only supposed to read up
+ // to a certain point in the input, then return. If Next() returns a
+ // buffer that goes beyond what you wanted to read, you can use BackUp()
+ // to return to the point where you intended to finish.
+ //
+ // Preconditions:
+ // * The last method called must have been Next().
+ // * count must be less than or equal to the size of the last buffer
+ // returned by Next().
+ //
+ // Postconditions:
+ // * The last "count" bytes of the last buffer returned by Next() will be
+ // pushed back into the stream. Subsequent calls to Next() will return
+ // the same data again before producing new data.
+ void (*BackUp)(struct upb_ZeroCopyInputStream* z, size_t count);
+
+ // Skips a number of bytes. Returns false if the end of the stream is
+ // reached or some input error occurred. In the end-of-stream case, the
+ // stream is advanced to the end of the stream (so ByteCount() will return
+ // the total size of the stream).
+ bool (*Skip)(struct upb_ZeroCopyInputStream* z, size_t count);
+
+ // Returns the total number of bytes read since this object was created.
+ size_t (*ByteCount)(const struct upb_ZeroCopyInputStream* z);
+} _upb_ZeroCopyInputStream_VTable;
+
+struct upb_ZeroCopyInputStream {
+ const _upb_ZeroCopyInputStream_VTable* vtable;
+};
+
+UPB_INLINE const void* upb_ZeroCopyInputStream_Next(upb_ZeroCopyInputStream* z,
+ size_t* count,
+ upb_Status* status) {
+ const void* out = z->vtable->Next(z, count, status);
+ UPB_ASSERT((out == NULL) == (*count == 0));
+ return out;
+}
+
+UPB_INLINE void upb_ZeroCopyInputStream_BackUp(upb_ZeroCopyInputStream* z,
+ size_t count) {
+ return z->vtable->BackUp(z, count);
+}
+
+UPB_INLINE bool upb_ZeroCopyInputStream_Skip(upb_ZeroCopyInputStream* z,
+ size_t count) {
+ return z->vtable->Skip(z, count);
+}
+
+UPB_INLINE size_t
+upb_ZeroCopyInputStream_ByteCount(const upb_ZeroCopyInputStream* z) {
+ return z->vtable->ByteCount(z);
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_IO_ZERO_COPY_INPUT_STREAM_H_ */
diff --git a/upb/upb/io/zero_copy_output_stream.h b/upb/upb/io/zero_copy_output_stream.h
new file mode 100644
index 0000000..485a94f
--- /dev/null
+++ b/upb/upb/io/zero_copy_output_stream.h
@@ -0,0 +1,133 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_IO_ZERO_COPY_OUTPUT_STREAM_H_
+#define UPB_IO_ZERO_COPY_OUTPUT_STREAM_H_
+
+#include "upb/base/status.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct upb_ZeroCopyOutputStream upb_ZeroCopyOutputStream;
+
+typedef struct {
+ // Obtains a buffer into which data can be written. Any data written
+ // into this buffer will eventually (maybe instantly, maybe later on)
+ // be written to the output.
+ //
+ // Preconditions:
+ // "count" and "status" are not NULL.
+ //
+ // Postconditions:
+ // All errors are permanent. If an error occurs then:
+ // - NULL will be returned to the caller.
+ // - *count will be set to zero.
+ // - *status will be set to the error.
+ // EOF is permanent. If EOF is reached then:
+ // - NULL will be returned to the caller.
+ // - *count will be set to zero.
+ // - *status will not be touched.
+ // Otherwise:
+ // - The returned value will point to a buffer containing the bytes read.
+ // - *count will be set to the number of bytes read.
+ // - *status will not be touched.
+ //
+ // Ownership of this buffer remains with the stream, and the buffer
+ // remains valid only until some other method of the stream is called
+ // or the stream is destroyed.
+ //
+ // Any data which the caller stores in this buffer will eventually be
+ // written to the output (unless BackUp() is called).
+ void* (*Next)(struct upb_ZeroCopyOutputStream* z, size_t* count,
+ upb_Status* status);
+
+ // Backs up a number of bytes, so that the end of the last buffer returned
+ // by Next() is not actually written. This is needed when you finish
+ // writing all the data you want to write, but the last buffer was bigger
+ // than you needed. You don't want to write a bunch of garbage after the
+ // end of your data, so you use BackUp() to back up.
+ //
+ // Preconditions:
+ // * The last method called must have been Next().
+ // * count must be less than or equal to the size of the last buffer
+ // returned by Next().
+ // * The caller must not have written anything to the last "count" bytes
+ // of that buffer.
+ //
+ // Postconditions:
+ // * The last "count" bytes of the last buffer returned by Next() will be
+ // ignored.
+ //
+ // This method can be called with `count = 0` to finalize (flush) any
+ // previously returned buffer. For example, a file output stream can
+ // flush buffers returned from a previous call to Next() upon such
+ // BackUp(0) invocations. ZeroCopyOutputStream callers should always
+ // invoke BackUp() after a final Next() call, even if there is no
+ // excess buffer data to be backed up to indicate a flush point.
+ void (*BackUp)(struct upb_ZeroCopyOutputStream* z, size_t count);
+
+ // Returns the total number of bytes written since this object was created.
+ size_t (*ByteCount)(const struct upb_ZeroCopyOutputStream* z);
+} _upb_ZeroCopyOutputStream_VTable;
+
+struct upb_ZeroCopyOutputStream {
+ const _upb_ZeroCopyOutputStream_VTable* vtable;
+};
+
+UPB_INLINE void* upb_ZeroCopyOutputStream_Next(upb_ZeroCopyOutputStream* z,
+ size_t* count,
+ upb_Status* status) {
+ void* out = z->vtable->Next(z, count, status);
+ UPB_ASSERT((out == NULL) == (*count == 0));
+ return out;
+}
+
+UPB_INLINE void upb_ZeroCopyOutputStream_BackUp(upb_ZeroCopyOutputStream* z,
+ size_t count) {
+ return z->vtable->BackUp(z, count);
+}
+
+UPB_INLINE size_t
+upb_ZeroCopyOutputStream_ByteCount(const upb_ZeroCopyOutputStream* z) {
+ return z->vtable->ByteCount(z);
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_IO_ZERO_COPY_OUTPUT_STREAM_H_ */
diff --git a/upb/upb/io/zero_copy_stream_test.cc b/upb/upb/io/zero_copy_stream_test.cc
new file mode 100644
index 0000000..475af83
--- /dev/null
+++ b/upb/upb/io/zero_copy_stream_test.cc
@@ -0,0 +1,294 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Testing strategy: For each type of I/O (array, string, file, etc.) we
+// create an output stream and write some data to it, then create a
+// corresponding input stream to read the same data back and expect it to
+// match. When the data is written, it is written in several small chunks
+// of varying sizes, with a BackUp() after each chunk. It is read back
+// similarly, but with chunks separated at different points. The whole
+// process is run with a variety of block sizes for both the input and
+// the output.
+
+#include "gtest/gtest.h"
+#include "upb/base/status.hpp"
+#include "upb/io/chunked_input_stream.h"
+#include "upb/io/chunked_output_stream.h"
+#include "upb/mem/arena.hpp"
+
+namespace upb {
+namespace {
+
+class IoTest : public testing::Test {
+ protected:
+ // Test helpers.
+
+ // Helper to write an array of data to an output stream.
+ bool WriteToOutput(upb_ZeroCopyOutputStream* output, const void* data,
+ int size);
+ // Helper to read a fixed-length array of data from an input stream.
+ int ReadFromInput(upb_ZeroCopyInputStream* input, void* data, int size);
+ // Write a string to the output stream.
+ void WriteString(upb_ZeroCopyOutputStream* output, const std::string& str);
+ // Read a number of bytes equal to the size of the given string and checks
+ // that it matches the string.
+ void ReadString(upb_ZeroCopyInputStream* input, const std::string& str);
+ // Writes some text to the output stream in a particular order. Returns
+ // the number of bytes written, in case the caller needs that to set up an
+ // input stream.
+ int WriteStuff(upb_ZeroCopyOutputStream* output);
+ // Reads text from an input stream and expects it to match what
+ // WriteStuff() writes.
+ void ReadStuff(upb_ZeroCopyInputStream* input, bool read_eof = true);
+
+ // Similar to WriteStuff, but performs more sophisticated testing.
+ int WriteStuffLarge(upb_ZeroCopyOutputStream* output);
+ // Reads and tests a stream that should have been written to
+ // via WriteStuffLarge().
+ void ReadStuffLarge(upb_ZeroCopyInputStream* input);
+
+ static const int kBlockSizes[];
+ static const int kBlockSizeCount;
+};
+
+const int IoTest::kBlockSizes[] = {1, 2, 5, 7, 10, 23, 64};
+const int IoTest::kBlockSizeCount = sizeof(IoTest::kBlockSizes) / sizeof(int);
+
+bool IoTest::WriteToOutput(upb_ZeroCopyOutputStream* output, const void* data,
+ int size) {
+ const uint8_t* in = reinterpret_cast<const uint8_t*>(data);
+ size_t in_size = size;
+ size_t out_size;
+
+ while (true) {
+ upb::Status status;
+ void* out = upb_ZeroCopyOutputStream_Next(output, &out_size, status.ptr());
+ if (out_size == 0) return false;
+
+ if (in_size <= out_size) {
+ memcpy(out, in, in_size);
+ upb_ZeroCopyOutputStream_BackUp(output, out_size - in_size);
+ return true;
+ }
+
+ memcpy(out, in, out_size);
+ in += out_size;
+ in_size -= out_size;
+ }
+}
+
+int IoTest::ReadFromInput(upb_ZeroCopyInputStream* input, void* data,
+ int size) {
+ uint8_t* out = reinterpret_cast<uint8_t*>(data);
+ size_t out_size = size;
+
+ const void* in;
+ size_t in_size = 0;
+
+ while (true) {
+ upb::Status status;
+ in = upb_ZeroCopyInputStream_Next(input, &in_size, status.ptr());
+
+ if (in_size == 0) {
+ return size - out_size;
+ }
+
+ if (out_size <= in_size) {
+ memcpy(out, in, out_size);
+ if (in_size > out_size) {
+ upb_ZeroCopyInputStream_BackUp(input, in_size - out_size);
+ }
+ return size; // Copied all of it.
+ }
+
+ memcpy(out, in, in_size);
+ out += in_size;
+ out_size -= in_size;
+ }
+}
+
+void IoTest::WriteString(upb_ZeroCopyOutputStream* output,
+ const std::string& str) {
+ EXPECT_TRUE(WriteToOutput(output, str.c_str(), str.size()));
+}
+
+void IoTest::ReadString(upb_ZeroCopyInputStream* input,
+ const std::string& str) {
+ std::unique_ptr<char[]> buffer(new char[str.size() + 1]);
+ buffer[str.size()] = '\0';
+ EXPECT_EQ(ReadFromInput(input, buffer.get(), str.size()), str.size());
+ EXPECT_STREQ(str.c_str(), buffer.get());
+}
+
+int IoTest::WriteStuff(upb_ZeroCopyOutputStream* output) {
+ WriteString(output, "Hello world!\n");
+ WriteString(output, "Some te");
+ WriteString(output, "xt. Blah blah.");
+ WriteString(output, "abcdefg");
+ WriteString(output, "01234567890123456789");
+ WriteString(output, "foobar");
+
+ const int result = upb_ZeroCopyOutputStream_ByteCount(output);
+ EXPECT_EQ(result, 68);
+ return result;
+}
+
+// Reads text from an input stream and expects it to match what WriteStuff()
+// writes.
+void IoTest::ReadStuff(upb_ZeroCopyInputStream* input, bool read_eof) {
+ ReadString(input, "Hello world!\n");
+ ReadString(input, "Some text. ");
+ ReadString(input, "Blah ");
+ ReadString(input, "blah.");
+ ReadString(input, "abcdefg");
+ EXPECT_TRUE(upb_ZeroCopyInputStream_Skip(input, 20));
+ ReadString(input, "foo");
+ ReadString(input, "bar");
+
+ EXPECT_EQ(upb_ZeroCopyInputStream_ByteCount(input), 68);
+
+ if (read_eof) {
+ uint8_t byte;
+ EXPECT_EQ(ReadFromInput(input, &byte, 1), 0);
+ }
+}
+
+int IoTest::WriteStuffLarge(upb_ZeroCopyOutputStream* output) {
+ WriteString(output, "Hello world!\n");
+ WriteString(output, "Some te");
+ WriteString(output, "xt. Blah blah.");
+ WriteString(output, std::string(100000, 'x')); // A very long string
+ WriteString(output, std::string(100000, 'y')); // A very long string
+ WriteString(output, "01234567890123456789");
+
+ const int result = upb_ZeroCopyOutputStream_ByteCount(output);
+ EXPECT_EQ(result, 200055);
+ return result;
+}
+
+// Reads text from an input stream and expects it to match what WriteStuff()
+// writes.
+void IoTest::ReadStuffLarge(upb_ZeroCopyInputStream* input) {
+ ReadString(input, "Hello world!\nSome text. ");
+ EXPECT_TRUE(upb_ZeroCopyInputStream_Skip(input, 5));
+ ReadString(input, "blah.");
+ EXPECT_TRUE(upb_ZeroCopyInputStream_Skip(input, 100000 - 10));
+ ReadString(input, std::string(10, 'x') + std::string(100000 - 20000, 'y'));
+ EXPECT_TRUE(upb_ZeroCopyInputStream_Skip(input, 20000 - 10));
+ ReadString(input, "yyyyyyyyyy01234567890123456789");
+ EXPECT_EQ(upb_ZeroCopyInputStream_ByteCount(input), 200055);
+
+ uint8_t byte;
+ EXPECT_EQ(ReadFromInput(input, &byte, 1), 0);
+}
+
+// ===================================================================
+
+TEST_F(IoTest, ArrayIo) {
+ const int kBufferSize = 256;
+ uint8_t buffer[kBufferSize];
+
+ upb::Arena arena;
+ for (int i = 0; i < kBlockSizeCount; i++) {
+ for (int j = 0; j < kBlockSizeCount; j++) {
+ auto output = upb_ChunkedOutputStream_New(buffer, kBufferSize,
+ kBlockSizes[j], arena.ptr());
+ int size = WriteStuff(output);
+ auto input =
+ upb_ChunkedInputStream_New(buffer, size, kBlockSizes[j], arena.ptr());
+ ReadStuff(input);
+ }
+ }
+}
+
+TEST(ChunkedStream, SingleInput) {
+ const int kBufferSize = 256;
+ uint8_t buffer[kBufferSize];
+ upb::Arena arena;
+ auto input =
+ upb_ChunkedInputStream_New(buffer, kBufferSize, kBufferSize, arena.ptr());
+ const void* data;
+ size_t size;
+
+ upb::Status status;
+ data = upb_ZeroCopyInputStream_Next(input, &size, status.ptr());
+ EXPECT_EQ(size, kBufferSize);
+
+ data = upb_ZeroCopyInputStream_Next(input, &size, status.ptr());
+ EXPECT_EQ(data, nullptr);
+ EXPECT_EQ(size, 0);
+ EXPECT_TRUE(upb_Status_IsOk(status.ptr()));
+}
+
+TEST(ChunkedStream, SingleOutput) {
+ const int kBufferSize = 256;
+ uint8_t buffer[kBufferSize];
+ upb::Arena arena;
+ auto output = upb_ChunkedOutputStream_New(buffer, kBufferSize, kBufferSize,
+ arena.ptr());
+ size_t size;
+ upb::Status status;
+ void* data = upb_ZeroCopyOutputStream_Next(output, &size, status.ptr());
+ EXPECT_EQ(size, kBufferSize);
+
+ data = upb_ZeroCopyOutputStream_Next(output, &size, status.ptr());
+ EXPECT_EQ(data, nullptr);
+ EXPECT_EQ(size, 0);
+ EXPECT_TRUE(upb_Status_IsOk(status.ptr()));
+}
+
+// Check that a zero-size input array doesn't confuse the code.
+TEST(ChunkedStream, InputEOF) {
+ upb::Arena arena;
+ char buf;
+ auto input = upb_ChunkedInputStream_New(&buf, 0, 1, arena.ptr());
+ size_t size;
+ upb::Status status;
+ const void* data = upb_ZeroCopyInputStream_Next(input, &size, status.ptr());
+ EXPECT_EQ(data, nullptr);
+ EXPECT_EQ(size, 0);
+ EXPECT_TRUE(upb_Status_IsOk(status.ptr()));
+}
+
+// Check that a zero-size output array doesn't confuse the code.
+TEST(ChunkedStream, OutputEOF) {
+ upb::Arena arena;
+ char buf;
+ auto output = upb_ChunkedOutputStream_New(&buf, 0, 1, arena.ptr());
+ size_t size;
+ upb::Status status;
+ void* data = upb_ZeroCopyOutputStream_Next(output, &size, status.ptr());
+ EXPECT_EQ(data, nullptr);
+ EXPECT_EQ(size, 0);
+ EXPECT_TRUE(upb_Status_IsOk(status.ptr()));
+}
+
+} // namespace
+} // namespace upb
diff --git a/upb/upb/json/BUILD b/upb/upb/json/BUILD
new file mode 100644
index 0000000..538c2c8
--- /dev/null
+++ b/upb/upb/json/BUILD
@@ -0,0 +1,84 @@
+# TODO(haberman): describe this package.
+
+load("//bazel:build_defs.bzl", "UPB_DEFAULT_COPTS")
+load(
+ "//bazel:upb_proto_library.bzl",
+ "upb_proto_library",
+ "upb_proto_reflection_library",
+)
+
+cc_library(
+ name = "json",
+ srcs = [
+ "decode.c",
+ "encode.c",
+ ],
+ hdrs = [
+ "decode.h",
+ "encode.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ "//:collections",
+ "//:lex",
+ "//:port",
+ "//:reflection",
+ "//:wire",
+ ],
+)
+
+cc_test(
+ name = "decode_test",
+ srcs = ["decode_test.cc"],
+ deps = [
+ ":json",
+ ":struct_upb_proto",
+ ":test_upb_proto",
+ ":test_upb_proto_reflection",
+ "//:mem",
+ "//:reflection",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "encode_test",
+ srcs = ["encode_test.cc"],
+ deps = [
+ ":json",
+ ":struct_upb_proto",
+ ":test_upb_proto",
+ ":test_upb_proto_reflection",
+ "//:base",
+ "//:mem",
+ "//:reflection",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+proto_library(
+ name = "test_proto",
+ testonly = 1,
+ srcs = ["test.proto"],
+ deps = ["@com_google_protobuf//:struct_proto"],
+)
+
+upb_proto_library(
+ name = "test_upb_proto",
+ testonly = 1,
+ deps = [":test_proto"],
+)
+
+upb_proto_reflection_library(
+ name = "test_upb_proto_reflection",
+ testonly = 1,
+ deps = [":test_proto"],
+)
+
+# TODO: This target arguably belongs in //google/protobuf/BUILD
+upb_proto_library(
+ name = "struct_upb_proto",
+ testonly = 1,
+ deps = ["@com_google_protobuf//:struct_proto"],
+)
diff --git a/upb/upb/json/decode.c b/upb/upb/json/decode.c
new file mode 100644
index 0000000..dea0c83
--- /dev/null
+++ b/upb/upb/json/decode.c
@@ -0,0 +1,1480 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/json/decode.h"
+
+#include <errno.h>
+#include <float.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "upb/collections/map.h"
+#include "upb/lex/atoi.h"
+#include "upb/lex/unicode.h"
+#include "upb/reflection/message.h"
+#include "upb/wire/encode.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+typedef struct {
+ const char *ptr, *end;
+ upb_Arena* arena; /* TODO: should we have a tmp arena for tmp data? */
+ const upb_DefPool* symtab;
+ int depth;
+ upb_Status* status;
+ jmp_buf err;
+ int line;
+ const char* line_begin;
+ bool is_first;
+ int options;
+ const upb_FieldDef* debug_field;
+} jsondec;
+
+enum { JD_OBJECT, JD_ARRAY, JD_STRING, JD_NUMBER, JD_TRUE, JD_FALSE, JD_NULL };
+
+/* Forward declarations of mutually-recursive functions. */
+static void jsondec_wellknown(jsondec* d, upb_Message* msg,
+ const upb_MessageDef* m);
+static upb_MessageValue jsondec_value(jsondec* d, const upb_FieldDef* f);
+static void jsondec_wellknownvalue(jsondec* d, upb_Message* msg,
+ const upb_MessageDef* m);
+static void jsondec_object(jsondec* d, upb_Message* msg,
+ const upb_MessageDef* m);
+
+static bool jsondec_streql(upb_StringView str, const char* lit) {
+ return str.size == strlen(lit) && memcmp(str.data, lit, str.size) == 0;
+}
+
+static bool jsondec_isnullvalue(const upb_FieldDef* f) {
+ return upb_FieldDef_CType(f) == kUpb_CType_Enum &&
+ strcmp(upb_EnumDef_FullName(upb_FieldDef_EnumSubDef(f)),
+ "google.protobuf.NullValue") == 0;
+}
+
+static bool jsondec_isvalue(const upb_FieldDef* f) {
+ return (upb_FieldDef_CType(f) == kUpb_CType_Message &&
+ upb_MessageDef_WellKnownType(upb_FieldDef_MessageSubDef(f)) ==
+ kUpb_WellKnown_Value) ||
+ jsondec_isnullvalue(f);
+}
+
+UPB_NORETURN static void jsondec_err(jsondec* d, const char* msg) {
+ upb_Status_SetErrorFormat(d->status, "Error parsing JSON @%d:%d: %s", d->line,
+ (int)(d->ptr - d->line_begin), msg);
+ UPB_LONGJMP(d->err, 1);
+}
+
+UPB_PRINTF(2, 3)
+UPB_NORETURN static void jsondec_errf(jsondec* d, const char* fmt, ...) {
+ va_list argp;
+ upb_Status_SetErrorFormat(d->status, "Error parsing JSON @%d:%d: ", d->line,
+ (int)(d->ptr - d->line_begin));
+ va_start(argp, fmt);
+ upb_Status_VAppendErrorFormat(d->status, fmt, argp);
+ va_end(argp);
+ UPB_LONGJMP(d->err, 1);
+}
+
+static void jsondec_skipws(jsondec* d) {
+ while (d->ptr != d->end) {
+ switch (*d->ptr) {
+ case '\n':
+ d->line++;
+ d->line_begin = d->ptr;
+ /* Fallthrough. */
+ case '\r':
+ case '\t':
+ case ' ':
+ d->ptr++;
+ break;
+ default:
+ return;
+ }
+ }
+ jsondec_err(d, "Unexpected EOF");
+}
+
+static bool jsondec_tryparsech(jsondec* d, char ch) {
+ if (d->ptr == d->end || *d->ptr != ch) return false;
+ d->ptr++;
+ return true;
+}
+
+static void jsondec_parselit(jsondec* d, const char* lit) {
+ size_t avail = d->end - d->ptr;
+ size_t len = strlen(lit);
+ if (avail < len || memcmp(d->ptr, lit, len) != 0) {
+ jsondec_errf(d, "Expected: '%s'", lit);
+ }
+ d->ptr += len;
+}
+
+static void jsondec_wsch(jsondec* d, char ch) {
+ jsondec_skipws(d);
+ if (!jsondec_tryparsech(d, ch)) {
+ jsondec_errf(d, "Expected: '%c'", ch);
+ }
+}
+
+static void jsondec_true(jsondec* d) { jsondec_parselit(d, "true"); }
+static void jsondec_false(jsondec* d) { jsondec_parselit(d, "false"); }
+static void jsondec_null(jsondec* d) { jsondec_parselit(d, "null"); }
+
+static void jsondec_entrysep(jsondec* d) {
+ jsondec_skipws(d);
+ jsondec_parselit(d, ":");
+}
+
+static int jsondec_rawpeek(jsondec* d) {
+ switch (*d->ptr) {
+ case '{':
+ return JD_OBJECT;
+ case '[':
+ return JD_ARRAY;
+ case '"':
+ return JD_STRING;
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ return JD_NUMBER;
+ case 't':
+ return JD_TRUE;
+ case 'f':
+ return JD_FALSE;
+ case 'n':
+ return JD_NULL;
+ default:
+ jsondec_errf(d, "Unexpected character: '%c'", *d->ptr);
+ }
+}
+
+/* JSON object/array **********************************************************/
+
+/* These are used like so:
+ *
+ * jsondec_objstart(d);
+ * while (jsondec_objnext(d)) {
+ * ...
+ * }
+ * jsondec_objend(d) */
+
+static int jsondec_peek(jsondec* d) {
+ jsondec_skipws(d);
+ return jsondec_rawpeek(d);
+}
+
+static void jsondec_push(jsondec* d) {
+ if (--d->depth < 0) {
+ jsondec_err(d, "Recursion limit exceeded");
+ }
+ d->is_first = true;
+}
+
+static bool jsondec_seqnext(jsondec* d, char end_ch) {
+ bool is_first = d->is_first;
+ d->is_first = false;
+ jsondec_skipws(d);
+ if (*d->ptr == end_ch) return false;
+ if (!is_first) jsondec_parselit(d, ",");
+ return true;
+}
+
+static void jsondec_arrstart(jsondec* d) {
+ jsondec_push(d);
+ jsondec_wsch(d, '[');
+}
+
+static void jsondec_arrend(jsondec* d) {
+ d->depth++;
+ jsondec_wsch(d, ']');
+}
+
+static bool jsondec_arrnext(jsondec* d) { return jsondec_seqnext(d, ']'); }
+
+static void jsondec_objstart(jsondec* d) {
+ jsondec_push(d);
+ jsondec_wsch(d, '{');
+}
+
+static void jsondec_objend(jsondec* d) {
+ d->depth++;
+ jsondec_wsch(d, '}');
+}
+
+static bool jsondec_objnext(jsondec* d) {
+ if (!jsondec_seqnext(d, '}')) return false;
+ if (jsondec_peek(d) != JD_STRING) {
+ jsondec_err(d, "Object must start with string");
+ }
+ return true;
+}
+
+/* JSON number ****************************************************************/
+
+static bool jsondec_tryskipdigits(jsondec* d) {
+ const char* start = d->ptr;
+
+ while (d->ptr < d->end) {
+ if (*d->ptr < '0' || *d->ptr > '9') {
+ break;
+ }
+ d->ptr++;
+ }
+
+ return d->ptr != start;
+}
+
+static void jsondec_skipdigits(jsondec* d) {
+ if (!jsondec_tryskipdigits(d)) {
+ jsondec_err(d, "Expected one or more digits");
+ }
+}
+
+static double jsondec_number(jsondec* d) {
+ const char* start = d->ptr;
+
+ assert(jsondec_rawpeek(d) == JD_NUMBER);
+
+ /* Skip over the syntax of a number, as specified by JSON. */
+ if (*d->ptr == '-') d->ptr++;
+
+ if (jsondec_tryparsech(d, '0')) {
+ if (jsondec_tryskipdigits(d)) {
+ jsondec_err(d, "number cannot have leading zero");
+ }
+ } else {
+ jsondec_skipdigits(d);
+ }
+
+ if (d->ptr == d->end) goto parse;
+ if (jsondec_tryparsech(d, '.')) {
+ jsondec_skipdigits(d);
+ }
+ if (d->ptr == d->end) goto parse;
+
+ if (*d->ptr == 'e' || *d->ptr == 'E') {
+ d->ptr++;
+ if (d->ptr == d->end) {
+ jsondec_err(d, "Unexpected EOF in number");
+ }
+ if (*d->ptr == '+' || *d->ptr == '-') {
+ d->ptr++;
+ }
+ jsondec_skipdigits(d);
+ }
+
+parse:
+ /* Having verified the syntax of a JSON number, use strtod() to parse
+ * (strtod() accepts a superset of JSON syntax). */
+ errno = 0;
+ {
+ char* end;
+ double val = strtod(start, &end);
+ assert(end == d->ptr);
+
+ /* Currently the min/max-val conformance tests fail if we check this. Does
+ * this mean the conformance tests are wrong or strtod() is wrong, or
+ * something else? Investigate further. */
+ /*
+ if (errno == ERANGE) {
+ jsondec_err(d, "Number out of range");
+ }
+ */
+
+ if (val > DBL_MAX || val < -DBL_MAX) {
+ jsondec_err(d, "Number out of range");
+ }
+
+ return val;
+ }
+}
+
+/* JSON string ****************************************************************/
+
+static char jsondec_escape(jsondec* d) {
+ switch (*d->ptr++) {
+ case '"':
+ return '\"';
+ case '\\':
+ return '\\';
+ case '/':
+ return '/';
+ case 'b':
+ return '\b';
+ case 'f':
+ return '\f';
+ case 'n':
+ return '\n';
+ case 'r':
+ return '\r';
+ case 't':
+ return '\t';
+ default:
+ jsondec_err(d, "Invalid escape char");
+ }
+}
+
+static uint32_t jsondec_codepoint(jsondec* d) {
+ uint32_t cp = 0;
+ const char* end;
+
+ if (d->end - d->ptr < 4) {
+ jsondec_err(d, "EOF inside string");
+ }
+
+ end = d->ptr + 4;
+ while (d->ptr < end) {
+ char ch = *d->ptr++;
+ if (ch >= '0' && ch <= '9') {
+ ch -= '0';
+ } else if (ch >= 'a' && ch <= 'f') {
+ ch = ch - 'a' + 10;
+ } else if (ch >= 'A' && ch <= 'F') {
+ ch = ch - 'A' + 10;
+ } else {
+ jsondec_err(d, "Invalid hex digit");
+ }
+ cp = (cp << 4) | ch;
+ }
+
+ return cp;
+}
+
+/* Parses a \uXXXX unicode escape (possibly a surrogate pair). */
+static size_t jsondec_unicode(jsondec* d, char* out) {
+ uint32_t cp = jsondec_codepoint(d);
+ if (upb_Unicode_IsHigh(cp)) {
+ /* Surrogate pair: two 16-bit codepoints become a 32-bit codepoint. */
+ jsondec_parselit(d, "\\u");
+ uint32_t low = jsondec_codepoint(d);
+ if (!upb_Unicode_IsLow(low)) jsondec_err(d, "Invalid low surrogate");
+ cp = upb_Unicode_FromPair(cp, low);
+ } else if (upb_Unicode_IsLow(cp)) {
+ jsondec_err(d, "Unpaired low surrogate");
+ }
+
+ /* Write to UTF-8 */
+ int bytes = upb_Unicode_ToUTF8(cp, out);
+ if (bytes == 0) jsondec_err(d, "Invalid codepoint");
+ return bytes;
+}
+
+static void jsondec_resize(jsondec* d, char** buf, char** end, char** buf_end) {
+ size_t oldsize = *buf_end - *buf;
+ size_t len = *end - *buf;
+ size_t size = UPB_MAX(8, 2 * oldsize);
+
+ *buf = upb_Arena_Realloc(d->arena, *buf, len, size);
+ if (!*buf) jsondec_err(d, "Out of memory");
+
+ *end = *buf + len;
+ *buf_end = *buf + size;
+}
+
+static upb_StringView jsondec_string(jsondec* d) {
+ char* buf = NULL;
+ char* end = NULL;
+ char* buf_end = NULL;
+
+ jsondec_skipws(d);
+
+ if (*d->ptr++ != '"') {
+ jsondec_err(d, "Expected string");
+ }
+
+ while (d->ptr < d->end) {
+ char ch = *d->ptr++;
+
+ if (end == buf_end) {
+ jsondec_resize(d, &buf, &end, &buf_end);
+ }
+
+ switch (ch) {
+ case '"': {
+ upb_StringView ret;
+ ret.data = buf;
+ ret.size = end - buf;
+ *end = '\0'; /* Needed for possible strtod(). */
+ return ret;
+ }
+ case '\\':
+ if (d->ptr == d->end) goto eof;
+ if (*d->ptr == 'u') {
+ d->ptr++;
+ if (buf_end - end < 4) {
+ /* Allow space for maximum-sized codepoint (4 bytes). */
+ jsondec_resize(d, &buf, &end, &buf_end);
+ }
+ end += jsondec_unicode(d, end);
+ } else {
+ *end++ = jsondec_escape(d);
+ }
+ break;
+ default:
+ if ((unsigned char)*d->ptr < 0x20) {
+ jsondec_err(d, "Invalid char in JSON string");
+ }
+ *end++ = ch;
+ break;
+ }
+ }
+
+eof:
+ jsondec_err(d, "EOF inside string");
+}
+
+static void jsondec_skipval(jsondec* d) {
+ switch (jsondec_peek(d)) {
+ case JD_OBJECT:
+ jsondec_objstart(d);
+ while (jsondec_objnext(d)) {
+ jsondec_string(d);
+ jsondec_entrysep(d);
+ jsondec_skipval(d);
+ }
+ jsondec_objend(d);
+ break;
+ case JD_ARRAY:
+ jsondec_arrstart(d);
+ while (jsondec_arrnext(d)) {
+ jsondec_skipval(d);
+ }
+ jsondec_arrend(d);
+ break;
+ case JD_TRUE:
+ jsondec_true(d);
+ break;
+ case JD_FALSE:
+ jsondec_false(d);
+ break;
+ case JD_NULL:
+ jsondec_null(d);
+ break;
+ case JD_STRING:
+ jsondec_string(d);
+ break;
+ case JD_NUMBER:
+ jsondec_number(d);
+ break;
+ }
+}
+
+/* Base64 decoding for bytes fields. ******************************************/
+
+static unsigned int jsondec_base64_tablelookup(const char ch) {
+ /* Table includes the normal base64 chars plus the URL-safe variant. */
+ const signed char table[256] = {
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, 62 /*+*/, -1, 62 /*-*/, -1, 63 /*/ */, 52 /*0*/,
+ 53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/, 59 /*7*/,
+ 60 /*8*/, 61 /*9*/, -1, -1, -1, -1, -1,
+ -1, -1, 0 /*A*/, 1 /*B*/, 2 /*C*/, 3 /*D*/, 4 /*E*/,
+ 5 /*F*/, 6 /*G*/, 07 /*H*/, 8 /*I*/, 9 /*J*/, 10 /*K*/, 11 /*L*/,
+ 12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/, 18 /*S*/,
+ 19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/, 25 /*Z*/,
+ -1, -1, -1, -1, 63 /*_*/, -1, 26 /*a*/,
+ 27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/, 33 /*h*/,
+ 34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/, 40 /*o*/,
+ 41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/, 47 /*v*/,
+ 48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1};
+
+ /* Sign-extend return value so high bit will be set on any unexpected char. */
+ return table[(unsigned)ch];
+}
+
+static char* jsondec_partialbase64(jsondec* d, const char* ptr, const char* end,
+ char* out) {
+ int32_t val = -1;
+
+ switch (end - ptr) {
+ case 2:
+ val = jsondec_base64_tablelookup(ptr[0]) << 18 |
+ jsondec_base64_tablelookup(ptr[1]) << 12;
+ out[0] = val >> 16;
+ out += 1;
+ break;
+ case 3:
+ val = jsondec_base64_tablelookup(ptr[0]) << 18 |
+ jsondec_base64_tablelookup(ptr[1]) << 12 |
+ jsondec_base64_tablelookup(ptr[2]) << 6;
+ out[0] = val >> 16;
+ out[1] = (val >> 8) & 0xff;
+ out += 2;
+ break;
+ }
+
+ if (val < 0) {
+ jsondec_err(d, "Corrupt base64");
+ }
+
+ return out;
+}
+
+static size_t jsondec_base64(jsondec* d, upb_StringView str) {
+ /* We decode in place. This is safe because this is a new buffer (not
+ * aliasing the input) and because base64 decoding shrinks 4 bytes into 3. */
+ char* out = (char*)str.data;
+ const char* ptr = str.data;
+ const char* end = ptr + str.size;
+ const char* end4 = ptr + (str.size & -4); /* Round down to multiple of 4. */
+
+ for (; ptr < end4; ptr += 4, out += 3) {
+ int val = jsondec_base64_tablelookup(ptr[0]) << 18 |
+ jsondec_base64_tablelookup(ptr[1]) << 12 |
+ jsondec_base64_tablelookup(ptr[2]) << 6 |
+ jsondec_base64_tablelookup(ptr[3]) << 0;
+
+ if (val < 0) {
+ /* Junk chars or padding. Remove trailing padding, if any. */
+ if (end - ptr == 4 && ptr[3] == '=') {
+ if (ptr[2] == '=') {
+ end -= 2;
+ } else {
+ end -= 1;
+ }
+ }
+ break;
+ }
+
+ out[0] = val >> 16;
+ out[1] = (val >> 8) & 0xff;
+ out[2] = val & 0xff;
+ }
+
+ if (ptr < end) {
+ /* Process remaining chars. We do not require padding. */
+ out = jsondec_partialbase64(d, ptr, end, out);
+ }
+
+ return out - str.data;
+}
+
+/* Low-level integer parsing **************************************************/
+
+static const char* jsondec_buftouint64(jsondec* d, const char* ptr,
+ const char* end, uint64_t* val) {
+ const char* out = upb_BufToUint64(ptr, end, val);
+ if (!out) jsondec_err(d, "Integer overflow");
+ return out;
+}
+
+static const char* jsondec_buftoint64(jsondec* d, const char* ptr,
+ const char* end, int64_t* val,
+ bool* is_neg) {
+ const char* out = upb_BufToInt64(ptr, end, val, is_neg);
+ if (!out) jsondec_err(d, "Integer overflow");
+ return out;
+}
+
+static uint64_t jsondec_strtouint64(jsondec* d, upb_StringView str) {
+ const char* end = str.data + str.size;
+ uint64_t ret;
+ if (jsondec_buftouint64(d, str.data, end, &ret) != end) {
+ jsondec_err(d, "Non-number characters in quoted integer");
+ }
+ return ret;
+}
+
+static int64_t jsondec_strtoint64(jsondec* d, upb_StringView str) {
+ const char* end = str.data + str.size;
+ int64_t ret;
+ if (jsondec_buftoint64(d, str.data, end, &ret, NULL) != end) {
+ jsondec_err(d, "Non-number characters in quoted integer");
+ }
+ return ret;
+}
+
+/* Primitive value types ******************************************************/
+
+/* Parse INT32 or INT64 value. */
+static upb_MessageValue jsondec_int(jsondec* d, const upb_FieldDef* f) {
+ upb_MessageValue val;
+
+ switch (jsondec_peek(d)) {
+ case JD_NUMBER: {
+ double dbl = jsondec_number(d);
+ if (dbl > 9223372036854774784.0 || dbl < -9223372036854775808.0) {
+ jsondec_err(d, "JSON number is out of range.");
+ }
+ val.int64_val = dbl; /* must be guarded, overflow here is UB */
+ if (val.int64_val != dbl) {
+ jsondec_errf(d, "JSON number was not integral (%f != %" PRId64 ")", dbl,
+ val.int64_val);
+ }
+ break;
+ }
+ case JD_STRING: {
+ upb_StringView str = jsondec_string(d);
+ val.int64_val = jsondec_strtoint64(d, str);
+ break;
+ }
+ default:
+ jsondec_err(d, "Expected number or string");
+ }
+
+ if (upb_FieldDef_CType(f) == kUpb_CType_Int32 ||
+ upb_FieldDef_CType(f) == kUpb_CType_Enum) {
+ if (val.int64_val > INT32_MAX || val.int64_val < INT32_MIN) {
+ jsondec_err(d, "Integer out of range.");
+ }
+ val.int32_val = (int32_t)val.int64_val;
+ }
+
+ return val;
+}
+
+/* Parse UINT32 or UINT64 value. */
+static upb_MessageValue jsondec_uint(jsondec* d, const upb_FieldDef* f) {
+ upb_MessageValue val = {0};
+
+ switch (jsondec_peek(d)) {
+ case JD_NUMBER: {
+ double dbl = jsondec_number(d);
+ if (dbl > 18446744073709549568.0 || dbl < 0) {
+ jsondec_err(d, "JSON number is out of range.");
+ }
+ val.uint64_val = dbl; /* must be guarded, overflow here is UB */
+ if (val.uint64_val != dbl) {
+ jsondec_errf(d, "JSON number was not integral (%f != %" PRIu64 ")", dbl,
+ val.uint64_val);
+ }
+ break;
+ }
+ case JD_STRING: {
+ upb_StringView str = jsondec_string(d);
+ val.uint64_val = jsondec_strtouint64(d, str);
+ break;
+ }
+ default:
+ jsondec_err(d, "Expected number or string");
+ }
+
+ if (upb_FieldDef_CType(f) == kUpb_CType_UInt32) {
+ if (val.uint64_val > UINT32_MAX) {
+ jsondec_err(d, "Integer out of range.");
+ }
+ val.uint32_val = (uint32_t)val.uint64_val;
+ }
+
+ return val;
+}
+
+/* Parse DOUBLE or FLOAT value. */
+static upb_MessageValue jsondec_double(jsondec* d, const upb_FieldDef* f) {
+ upb_StringView str;
+ upb_MessageValue val = {0};
+
+ switch (jsondec_peek(d)) {
+ case JD_NUMBER:
+ val.double_val = jsondec_number(d);
+ break;
+ case JD_STRING:
+ str = jsondec_string(d);
+ if (jsondec_streql(str, "NaN")) {
+ val.double_val = NAN;
+ } else if (jsondec_streql(str, "Infinity")) {
+ val.double_val = INFINITY;
+ } else if (jsondec_streql(str, "-Infinity")) {
+ val.double_val = -INFINITY;
+ } else {
+ val.double_val = strtod(str.data, NULL);
+ }
+ break;
+ default:
+ jsondec_err(d, "Expected number or string");
+ }
+
+ if (upb_FieldDef_CType(f) == kUpb_CType_Float) {
+ float f = val.double_val;
+ if (val.double_val != INFINITY && val.double_val != -INFINITY) {
+ if (f == INFINITY || f == -INFINITY) jsondec_err(d, "Float out of range");
+ }
+ val.float_val = f;
+ }
+
+ return val;
+}
+
+/* Parse STRING or BYTES value. */
+static upb_MessageValue jsondec_strfield(jsondec* d, const upb_FieldDef* f) {
+ upb_MessageValue val;
+ val.str_val = jsondec_string(d);
+ if (upb_FieldDef_CType(f) == kUpb_CType_Bytes) {
+ val.str_val.size = jsondec_base64(d, val.str_val);
+ }
+ return val;
+}
+
+static upb_MessageValue jsondec_enum(jsondec* d, const upb_FieldDef* f) {
+ switch (jsondec_peek(d)) {
+ case JD_STRING: {
+ upb_StringView str = jsondec_string(d);
+ const upb_EnumDef* e = upb_FieldDef_EnumSubDef(f);
+ const upb_EnumValueDef* ev =
+ upb_EnumDef_FindValueByNameWithSize(e, str.data, str.size);
+ upb_MessageValue val;
+ if (ev) {
+ val.int32_val = upb_EnumValueDef_Number(ev);
+ } else {
+ if (d->options & upb_JsonDecode_IgnoreUnknown) {
+ val.int32_val = 0;
+ } else {
+ jsondec_errf(d, "Unknown enumerator: '" UPB_STRINGVIEW_FORMAT "'",
+ UPB_STRINGVIEW_ARGS(str));
+ }
+ }
+ return val;
+ }
+ case JD_NULL: {
+ if (jsondec_isnullvalue(f)) {
+ upb_MessageValue val;
+ jsondec_null(d);
+ val.int32_val = 0;
+ return val;
+ }
+ }
+ /* Fallthrough. */
+ default:
+ return jsondec_int(d, f);
+ }
+}
+
+static upb_MessageValue jsondec_bool(jsondec* d, const upb_FieldDef* f) {
+ bool is_map_key = upb_FieldDef_Number(f) == 1 &&
+ upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f));
+ upb_MessageValue val;
+
+ if (is_map_key) {
+ upb_StringView str = jsondec_string(d);
+ if (jsondec_streql(str, "true")) {
+ val.bool_val = true;
+ } else if (jsondec_streql(str, "false")) {
+ val.bool_val = false;
+ } else {
+ jsondec_err(d, "Invalid boolean map key");
+ }
+ } else {
+ switch (jsondec_peek(d)) {
+ case JD_TRUE:
+ val.bool_val = true;
+ jsondec_true(d);
+ break;
+ case JD_FALSE:
+ val.bool_val = false;
+ jsondec_false(d);
+ break;
+ default:
+ jsondec_err(d, "Expected true or false");
+ }
+ }
+
+ return val;
+}
+
+/* Composite types (array/message/map) ****************************************/
+
+static void jsondec_array(jsondec* d, upb_Message* msg, const upb_FieldDef* f) {
+ upb_Array* arr = upb_Message_Mutable(msg, f, d->arena).array;
+
+ jsondec_arrstart(d);
+ while (jsondec_arrnext(d)) {
+ upb_MessageValue elem = jsondec_value(d, f);
+ upb_Array_Append(arr, elem, d->arena);
+ }
+ jsondec_arrend(d);
+}
+
+static void jsondec_map(jsondec* d, upb_Message* msg, const upb_FieldDef* f) {
+ upb_Map* map = upb_Message_Mutable(msg, f, d->arena).map;
+ const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
+ const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumber(entry, 1);
+ const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(entry, 2);
+
+ jsondec_objstart(d);
+ while (jsondec_objnext(d)) {
+ upb_MessageValue key, val;
+ key = jsondec_value(d, key_f);
+ jsondec_entrysep(d);
+ val = jsondec_value(d, val_f);
+ upb_Map_Set(map, key, val, d->arena);
+ }
+ jsondec_objend(d);
+}
+
+static void jsondec_tomsg(jsondec* d, upb_Message* msg,
+ const upb_MessageDef* m) {
+ if (upb_MessageDef_WellKnownType(m) == kUpb_WellKnown_Unspecified) {
+ jsondec_object(d, msg, m);
+ } else {
+ jsondec_wellknown(d, msg, m);
+ }
+}
+
+static upb_MessageValue jsondec_msg(jsondec* d, const upb_FieldDef* f) {
+ const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f);
+ const upb_MiniTable* layout = upb_MessageDef_MiniTable(m);
+ upb_Message* msg = upb_Message_New(layout, d->arena);
+ upb_MessageValue val;
+
+ jsondec_tomsg(d, msg, m);
+ val.msg_val = msg;
+ return val;
+}
+
+static void jsondec_field(jsondec* d, upb_Message* msg,
+ const upb_MessageDef* m) {
+ upb_StringView name;
+ const upb_FieldDef* f;
+ const upb_FieldDef* preserved;
+
+ name = jsondec_string(d);
+ jsondec_entrysep(d);
+
+ if (name.size >= 2 && name.data[0] == '[' &&
+ name.data[name.size - 1] == ']') {
+ f = upb_DefPool_FindExtensionByNameWithSize(d->symtab, name.data + 1,
+ name.size - 2);
+ if (f && upb_FieldDef_ContainingType(f) != m) {
+ jsondec_errf(
+ d, "Extension %s extends message %s, but was seen in message %s",
+ upb_FieldDef_FullName(f),
+ upb_MessageDef_FullName(upb_FieldDef_ContainingType(f)),
+ upb_MessageDef_FullName(m));
+ }
+ } else {
+ f = upb_MessageDef_FindByJsonNameWithSize(m, name.data, name.size);
+ }
+
+ if (!f) {
+ if ((d->options & upb_JsonDecode_IgnoreUnknown) == 0) {
+ jsondec_errf(d, "No such field: " UPB_STRINGVIEW_FORMAT,
+ UPB_STRINGVIEW_ARGS(name));
+ }
+ jsondec_skipval(d);
+ return;
+ }
+
+ if (jsondec_peek(d) == JD_NULL && !jsondec_isvalue(f)) {
+ /* JSON "null" indicates a default value, so no need to set anything. */
+ jsondec_null(d);
+ return;
+ }
+
+ if (upb_FieldDef_RealContainingOneof(f) &&
+ upb_Message_WhichOneof(msg, upb_FieldDef_ContainingOneof(f))) {
+ jsondec_err(d, "More than one field for this oneof.");
+ }
+
+ preserved = d->debug_field;
+ d->debug_field = f;
+
+ if (upb_FieldDef_IsMap(f)) {
+ jsondec_map(d, msg, f);
+ } else if (upb_FieldDef_IsRepeated(f)) {
+ jsondec_array(d, msg, f);
+ } else if (upb_FieldDef_IsSubMessage(f)) {
+ upb_Message* submsg = upb_Message_Mutable(msg, f, d->arena).msg;
+ const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f);
+ jsondec_tomsg(d, submsg, subm);
+ } else {
+ upb_MessageValue val = jsondec_value(d, f);
+ upb_Message_SetFieldByDef(msg, f, val, d->arena);
+ }
+
+ d->debug_field = preserved;
+}
+
+static void jsondec_object(jsondec* d, upb_Message* msg,
+ const upb_MessageDef* m) {
+ jsondec_objstart(d);
+ while (jsondec_objnext(d)) {
+ jsondec_field(d, msg, m);
+ }
+ jsondec_objend(d);
+}
+
+static upb_MessageValue jsondec_value(jsondec* d, const upb_FieldDef* f) {
+ switch (upb_FieldDef_CType(f)) {
+ case kUpb_CType_Bool:
+ return jsondec_bool(d, f);
+ case kUpb_CType_Float:
+ case kUpb_CType_Double:
+ return jsondec_double(d, f);
+ case kUpb_CType_UInt32:
+ case kUpb_CType_UInt64:
+ return jsondec_uint(d, f);
+ case kUpb_CType_Int32:
+ case kUpb_CType_Int64:
+ return jsondec_int(d, f);
+ case kUpb_CType_String:
+ case kUpb_CType_Bytes:
+ return jsondec_strfield(d, f);
+ case kUpb_CType_Enum:
+ return jsondec_enum(d, f);
+ case kUpb_CType_Message:
+ return jsondec_msg(d, f);
+ default:
+ UPB_UNREACHABLE();
+ }
+}
+
+/* Well-known types ***********************************************************/
+
+static int jsondec_tsdigits(jsondec* d, const char** ptr, size_t digits,
+ const char* after) {
+ uint64_t val;
+ const char* p = *ptr;
+ const char* end = p + digits;
+ size_t after_len = after ? strlen(after) : 0;
+
+ UPB_ASSERT(digits <= 9); /* int can't overflow. */
+
+ if (jsondec_buftouint64(d, p, end, &val) != end ||
+ (after_len && memcmp(end, after, after_len) != 0)) {
+ jsondec_err(d, "Malformed timestamp");
+ }
+
+ UPB_ASSERT(val < INT_MAX);
+
+ *ptr = end + after_len;
+ return (int)val;
+}
+
+static int jsondec_nanos(jsondec* d, const char** ptr, const char* end) {
+ uint64_t nanos = 0;
+ const char* p = *ptr;
+
+ if (p != end && *p == '.') {
+ const char* nano_end = jsondec_buftouint64(d, p + 1, end, &nanos);
+ int digits = (int)(nano_end - p - 1);
+ int exp_lg10 = 9 - digits;
+ if (digits > 9) {
+ jsondec_err(d, "Too many digits for partial seconds");
+ }
+ while (exp_lg10--) nanos *= 10;
+ *ptr = nano_end;
+ }
+
+ UPB_ASSERT(nanos < INT_MAX);
+
+ return (int)nanos;
+}
+
+/* jsondec_epochdays(1970, 1, 1) == 1970-01-01 == 0. */
+int jsondec_epochdays(int y, int m, int d) {
+ const uint32_t year_base = 4800; /* Before min year, multiple of 400. */
+ const uint32_t m_adj = m - 3; /* March-based month. */
+ const uint32_t carry = m_adj > (uint32_t)m ? 1 : 0;
+ const uint32_t adjust = carry ? 12 : 0;
+ const uint32_t y_adj = y + year_base - carry;
+ const uint32_t month_days = ((m_adj + adjust) * 62719 + 769) / 2048;
+ const uint32_t leap_days = y_adj / 4 - y_adj / 100 + y_adj / 400;
+ return y_adj * 365 + leap_days + month_days + (d - 1) - 2472632;
+}
+
+static int64_t jsondec_unixtime(int y, int m, int d, int h, int min, int s) {
+ return (int64_t)jsondec_epochdays(y, m, d) * 86400 + h * 3600 + min * 60 + s;
+}
+
+static void jsondec_timestamp(jsondec* d, upb_Message* msg,
+ const upb_MessageDef* m) {
+ upb_MessageValue seconds;
+ upb_MessageValue nanos;
+ upb_StringView str = jsondec_string(d);
+ const char* ptr = str.data;
+ const char* end = ptr + str.size;
+
+ if (str.size < 20) goto malformed;
+
+ {
+ /* 1972-01-01T01:00:00 */
+ int year = jsondec_tsdigits(d, &ptr, 4, "-");
+ int mon = jsondec_tsdigits(d, &ptr, 2, "-");
+ int day = jsondec_tsdigits(d, &ptr, 2, "T");
+ int hour = jsondec_tsdigits(d, &ptr, 2, ":");
+ int min = jsondec_tsdigits(d, &ptr, 2, ":");
+ int sec = jsondec_tsdigits(d, &ptr, 2, NULL);
+
+ seconds.int64_val = jsondec_unixtime(year, mon, day, hour, min, sec);
+ }
+
+ nanos.int32_val = jsondec_nanos(d, &ptr, end);
+
+ {
+ /* [+-]08:00 or Z */
+ int ofs_hour = 0;
+ int ofs_min = 0;
+ bool neg = false;
+
+ if (ptr == end) goto malformed;
+
+ switch (*ptr++) {
+ case '-':
+ neg = true;
+ /* fallthrough */
+ case '+':
+ if ((end - ptr) != 5) goto malformed;
+ ofs_hour = jsondec_tsdigits(d, &ptr, 2, ":");
+ ofs_min = jsondec_tsdigits(d, &ptr, 2, NULL);
+ ofs_min = ((ofs_hour * 60) + ofs_min) * 60;
+ seconds.int64_val += (neg ? ofs_min : -ofs_min);
+ break;
+ case 'Z':
+ if (ptr != end) goto malformed;
+ break;
+ default:
+ goto malformed;
+ }
+ }
+
+ if (seconds.int64_val < -62135596800) {
+ jsondec_err(d, "Timestamp out of range");
+ }
+
+ upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 1),
+ seconds, d->arena);
+ upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos,
+ d->arena);
+ return;
+
+malformed:
+ jsondec_err(d, "Malformed timestamp");
+}
+
+static void jsondec_duration(jsondec* d, upb_Message* msg,
+ const upb_MessageDef* m) {
+ upb_MessageValue seconds;
+ upb_MessageValue nanos;
+ upb_StringView str = jsondec_string(d);
+ const char* ptr = str.data;
+ const char* end = ptr + str.size;
+ const int64_t max = (uint64_t)3652500 * 86400;
+ bool neg = false;
+
+ /* "3.000000001s", "3s", etc. */
+ ptr = jsondec_buftoint64(d, ptr, end, &seconds.int64_val, &neg);
+ nanos.int32_val = jsondec_nanos(d, &ptr, end);
+
+ if (end - ptr != 1 || *ptr != 's') {
+ jsondec_err(d, "Malformed duration");
+ }
+
+ if (seconds.int64_val < -max || seconds.int64_val > max) {
+ jsondec_err(d, "Duration out of range");
+ }
+
+ if (neg) {
+ nanos.int32_val = -nanos.int32_val;
+ }
+
+ upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 1),
+ seconds, d->arena);
+ upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos,
+ d->arena);
+}
+
+static void jsondec_listvalue(jsondec* d, upb_Message* msg,
+ const upb_MessageDef* m) {
+ const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumber(m, 1);
+ const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(values_f);
+ const upb_MiniTable* value_layout = upb_MessageDef_MiniTable(value_m);
+ upb_Array* values = upb_Message_Mutable(msg, values_f, d->arena).array;
+
+ jsondec_arrstart(d);
+ while (jsondec_arrnext(d)) {
+ upb_Message* value_msg = upb_Message_New(value_layout, d->arena);
+ upb_MessageValue value;
+ value.msg_val = value_msg;
+ upb_Array_Append(values, value, d->arena);
+ jsondec_wellknownvalue(d, value_msg, value_m);
+ }
+ jsondec_arrend(d);
+}
+
+static void jsondec_struct(jsondec* d, upb_Message* msg,
+ const upb_MessageDef* m) {
+ const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumber(m, 1);
+ const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(fields_f);
+ const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(entry_m, 2);
+ const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(value_f);
+ const upb_MiniTable* value_layout = upb_MessageDef_MiniTable(value_m);
+ upb_Map* fields = upb_Message_Mutable(msg, fields_f, d->arena).map;
+
+ jsondec_objstart(d);
+ while (jsondec_objnext(d)) {
+ upb_MessageValue key, value;
+ upb_Message* value_msg = upb_Message_New(value_layout, d->arena);
+ key.str_val = jsondec_string(d);
+ value.msg_val = value_msg;
+ upb_Map_Set(fields, key, value, d->arena);
+ jsondec_entrysep(d);
+ jsondec_wellknownvalue(d, value_msg, value_m);
+ }
+ jsondec_objend(d);
+}
+
+static void jsondec_wellknownvalue(jsondec* d, upb_Message* msg,
+ const upb_MessageDef* m) {
+ upb_MessageValue val;
+ const upb_FieldDef* f;
+ upb_Message* submsg;
+
+ switch (jsondec_peek(d)) {
+ case JD_NUMBER:
+ /* double number_value = 2; */
+ f = upb_MessageDef_FindFieldByNumber(m, 2);
+ val.double_val = jsondec_number(d);
+ break;
+ case JD_STRING:
+ /* string string_value = 3; */
+ f = upb_MessageDef_FindFieldByNumber(m, 3);
+ val.str_val = jsondec_string(d);
+ break;
+ case JD_FALSE:
+ /* bool bool_value = 4; */
+ f = upb_MessageDef_FindFieldByNumber(m, 4);
+ val.bool_val = false;
+ jsondec_false(d);
+ break;
+ case JD_TRUE:
+ /* bool bool_value = 4; */
+ f = upb_MessageDef_FindFieldByNumber(m, 4);
+ val.bool_val = true;
+ jsondec_true(d);
+ break;
+ case JD_NULL:
+ /* NullValue null_value = 1; */
+ f = upb_MessageDef_FindFieldByNumber(m, 1);
+ val.int32_val = 0;
+ jsondec_null(d);
+ break;
+ /* Note: these cases return, because upb_Message_Mutable() is enough. */
+ case JD_OBJECT:
+ /* Struct struct_value = 5; */
+ f = upb_MessageDef_FindFieldByNumber(m, 5);
+ submsg = upb_Message_Mutable(msg, f, d->arena).msg;
+ jsondec_struct(d, submsg, upb_FieldDef_MessageSubDef(f));
+ return;
+ case JD_ARRAY:
+ /* ListValue list_value = 6; */
+ f = upb_MessageDef_FindFieldByNumber(m, 6);
+ submsg = upb_Message_Mutable(msg, f, d->arena).msg;
+ jsondec_listvalue(d, submsg, upb_FieldDef_MessageSubDef(f));
+ return;
+ default:
+ UPB_UNREACHABLE();
+ }
+
+ upb_Message_SetFieldByDef(msg, f, val, d->arena);
+}
+
+static upb_StringView jsondec_mask(jsondec* d, const char* buf,
+ const char* end) {
+ /* FieldMask fields grow due to inserted '_' characters, so we can't do the
+ * transform in place. */
+ const char* ptr = buf;
+ upb_StringView ret;
+ char* out;
+
+ ret.size = end - ptr;
+ while (ptr < end) {
+ ret.size += (*ptr >= 'A' && *ptr <= 'Z');
+ ptr++;
+ }
+
+ out = upb_Arena_Malloc(d->arena, ret.size);
+ ptr = buf;
+ ret.data = out;
+
+ while (ptr < end) {
+ char ch = *ptr++;
+ if (ch >= 'A' && ch <= 'Z') {
+ *out++ = '_';
+ *out++ = ch + 32;
+ } else if (ch == '_') {
+ jsondec_err(d, "field mask may not contain '_'");
+ } else {
+ *out++ = ch;
+ }
+ }
+
+ return ret;
+}
+
+static void jsondec_fieldmask(jsondec* d, upb_Message* msg,
+ const upb_MessageDef* m) {
+ /* repeated string paths = 1; */
+ const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumber(m, 1);
+ upb_Array* arr = upb_Message_Mutable(msg, paths_f, d->arena).array;
+ upb_StringView str = jsondec_string(d);
+ const char* ptr = str.data;
+ const char* end = ptr + str.size;
+ upb_MessageValue val;
+
+ while (ptr < end) {
+ const char* elem_end = memchr(ptr, ',', end - ptr);
+ if (elem_end) {
+ val.str_val = jsondec_mask(d, ptr, elem_end);
+ ptr = elem_end + 1;
+ } else {
+ val.str_val = jsondec_mask(d, ptr, end);
+ ptr = end;
+ }
+ upb_Array_Append(arr, val, d->arena);
+ }
+}
+
+static void jsondec_anyfield(jsondec* d, upb_Message* msg,
+ const upb_MessageDef* m) {
+ if (upb_MessageDef_WellKnownType(m) == kUpb_WellKnown_Unspecified) {
+ /* For regular types: {"@type": "[user type]", "f1": <V1>, "f2": <V2>}
+ * where f1, f2, etc. are the normal fields of this type. */
+ jsondec_field(d, msg, m);
+ } else {
+ /* For well-known types: {"@type": "[well-known type]", "value": <X>}
+ * where <X> is whatever encoding the WKT normally uses. */
+ upb_StringView str = jsondec_string(d);
+ jsondec_entrysep(d);
+ if (!jsondec_streql(str, "value")) {
+ jsondec_err(d, "Key for well-known type must be 'value'");
+ }
+ jsondec_wellknown(d, msg, m);
+ }
+}
+
+static const upb_MessageDef* jsondec_typeurl(jsondec* d, upb_Message* msg,
+ const upb_MessageDef* m) {
+ const upb_FieldDef* type_url_f = upb_MessageDef_FindFieldByNumber(m, 1);
+ const upb_MessageDef* type_m;
+ upb_StringView type_url = jsondec_string(d);
+ const char* end = type_url.data + type_url.size;
+ const char* ptr = end;
+ upb_MessageValue val;
+
+ val.str_val = type_url;
+ upb_Message_SetFieldByDef(msg, type_url_f, val, d->arena);
+
+ /* Find message name after the last '/' */
+ while (ptr > type_url.data && *--ptr != '/') {
+ }
+
+ if (ptr == type_url.data || ptr == end) {
+ jsondec_err(d, "Type url must have at least one '/' and non-empty host");
+ }
+
+ ptr++;
+ type_m = upb_DefPool_FindMessageByNameWithSize(d->symtab, ptr, end - ptr);
+
+ if (!type_m) {
+ jsondec_err(d, "Type was not found");
+ }
+
+ return type_m;
+}
+
+static void jsondec_any(jsondec* d, upb_Message* msg, const upb_MessageDef* m) {
+ /* string type_url = 1;
+ * bytes value = 2; */
+ const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 2);
+ upb_Message* any_msg;
+ const upb_MessageDef* any_m = NULL;
+ const char* pre_type_data = NULL;
+ const char* pre_type_end = NULL;
+ upb_MessageValue encoded;
+
+ jsondec_objstart(d);
+
+ /* Scan looking for "@type", which is not necessarily first. */
+ while (!any_m && jsondec_objnext(d)) {
+ const char* start = d->ptr;
+ upb_StringView name = jsondec_string(d);
+ jsondec_entrysep(d);
+ if (jsondec_streql(name, "@type")) {
+ any_m = jsondec_typeurl(d, msg, m);
+ if (pre_type_data) {
+ pre_type_end = start;
+ while (*pre_type_end != ',') pre_type_end--;
+ }
+ } else {
+ if (!pre_type_data) pre_type_data = start;
+ jsondec_skipval(d);
+ }
+ }
+
+ if (!any_m) {
+ jsondec_err(d, "Any object didn't contain a '@type' field");
+ }
+
+ const upb_MiniTable* any_layout = upb_MessageDef_MiniTable(any_m);
+ any_msg = upb_Message_New(any_layout, d->arena);
+
+ if (pre_type_data) {
+ size_t len = pre_type_end - pre_type_data + 1;
+ char* tmp = upb_Arena_Malloc(d->arena, len);
+ const char* saved_ptr = d->ptr;
+ const char* saved_end = d->end;
+ memcpy(tmp, pre_type_data, len - 1);
+ tmp[len - 1] = '}';
+ d->ptr = tmp;
+ d->end = tmp + len;
+ d->is_first = true;
+ while (jsondec_objnext(d)) {
+ jsondec_anyfield(d, any_msg, any_m);
+ }
+ d->ptr = saved_ptr;
+ d->end = saved_end;
+ }
+
+ while (jsondec_objnext(d)) {
+ jsondec_anyfield(d, any_msg, any_m);
+ }
+
+ jsondec_objend(d);
+
+ upb_EncodeStatus status =
+ upb_Encode(any_msg, upb_MessageDef_MiniTable(any_m), 0, d->arena,
+ (char**)&encoded.str_val.data, &encoded.str_val.size);
+ // TODO(b/235839510): We should fail gracefully here on a bad return status.
+ UPB_ASSERT(status == kUpb_EncodeStatus_Ok);
+ upb_Message_SetFieldByDef(msg, value_f, encoded, d->arena);
+}
+
+static void jsondec_wrapper(jsondec* d, upb_Message* msg,
+ const upb_MessageDef* m) {
+ const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 1);
+ upb_MessageValue val = jsondec_value(d, value_f);
+ upb_Message_SetFieldByDef(msg, value_f, val, d->arena);
+}
+
+static void jsondec_wellknown(jsondec* d, upb_Message* msg,
+ const upb_MessageDef* m) {
+ switch (upb_MessageDef_WellKnownType(m)) {
+ case kUpb_WellKnown_Any:
+ jsondec_any(d, msg, m);
+ break;
+ case kUpb_WellKnown_FieldMask:
+ jsondec_fieldmask(d, msg, m);
+ break;
+ case kUpb_WellKnown_Duration:
+ jsondec_duration(d, msg, m);
+ break;
+ case kUpb_WellKnown_Timestamp:
+ jsondec_timestamp(d, msg, m);
+ break;
+ case kUpb_WellKnown_Value:
+ jsondec_wellknownvalue(d, msg, m);
+ break;
+ case kUpb_WellKnown_ListValue:
+ jsondec_listvalue(d, msg, m);
+ break;
+ case kUpb_WellKnown_Struct:
+ jsondec_struct(d, msg, m);
+ break;
+ case kUpb_WellKnown_DoubleValue:
+ case kUpb_WellKnown_FloatValue:
+ case kUpb_WellKnown_Int64Value:
+ case kUpb_WellKnown_UInt64Value:
+ case kUpb_WellKnown_Int32Value:
+ case kUpb_WellKnown_UInt32Value:
+ case kUpb_WellKnown_StringValue:
+ case kUpb_WellKnown_BytesValue:
+ case kUpb_WellKnown_BoolValue:
+ jsondec_wrapper(d, msg, m);
+ break;
+ default:
+ UPB_UNREACHABLE();
+ }
+}
+
+static bool upb_JsonDecoder_Decode(jsondec* const d, upb_Message* const msg,
+ const upb_MessageDef* const m) {
+ if (UPB_SETJMP(d->err)) return false;
+
+ jsondec_tomsg(d, msg, m);
+ return true;
+}
+
+bool upb_JsonDecode(const char* buf, size_t size, upb_Message* msg,
+ const upb_MessageDef* m, const upb_DefPool* symtab,
+ int options, upb_Arena* arena, upb_Status* status) {
+ jsondec d;
+
+ if (size == 0) return true;
+
+ d.ptr = buf;
+ d.end = buf + size;
+ d.arena = arena;
+ d.symtab = symtab;
+ d.status = status;
+ d.options = options;
+ d.depth = 64;
+ d.line = 1;
+ d.line_begin = d.ptr;
+ d.debug_field = NULL;
+ d.is_first = false;
+
+ return upb_JsonDecoder_Decode(&d, msg, m);
+}
diff --git a/upb/upb/json/decode.h b/upb/upb/json/decode.h
new file mode 100644
index 0000000..8f58797
--- /dev/null
+++ b/upb/upb/json/decode.h
@@ -0,0 +1,55 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_JSON_DECODE_H_
+#define UPB_JSON_DECODE_H_
+
+#include "upb/reflection/def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum { upb_JsonDecode_IgnoreUnknown = 1 };
+
+UPB_API bool upb_JsonDecode(const char* buf, size_t size, upb_Message* msg,
+ const upb_MessageDef* m, const upb_DefPool* symtab,
+ int options, upb_Arena* arena, upb_Status* status);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_JSONDECODE_H_ */
diff --git a/upb/upb/json/decode_test.cc b/upb/upb/json/decode_test.cc
new file mode 100644
index 0000000..b1e6c96
--- /dev/null
+++ b/upb/upb/json/decode_test.cc
@@ -0,0 +1,94 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/json/decode.h"
+
+#include "google/protobuf/struct.upb.h"
+#include "gtest/gtest.h"
+#include "upb/json/test.upb.h"
+#include "upb/json/test.upbdefs.h"
+#include "upb/mem/arena.hpp"
+#include "upb/reflection/def.hpp"
+
+static upb_test_Box* JsonDecode(const char* json, upb_Arena* a) {
+ upb::Status status;
+ upb::DefPool defpool;
+ upb::MessageDefPtr m(upb_test_Box_getmsgdef(defpool.ptr()));
+ EXPECT_TRUE(m.ptr() != nullptr);
+
+ upb_test_Box* box = upb_test_Box_new(a);
+ int options = 0;
+ bool ok = upb_JsonDecode(json, strlen(json), box, m.ptr(), defpool.ptr(),
+ options, a, status.ptr());
+ return ok ? box : nullptr;
+}
+
+struct FloatTest {
+ const std::string json;
+ float f;
+};
+
+static const std::vector<FloatTest> FloatTestsPass = {
+ {R"({"f": 0})", 0},
+ {R"({"f": 1})", 1},
+ {R"({"f": 1.000000})", 1},
+ {R"({"f": 1.5e1})", 15},
+ {R"({"f": 15e-1})", 1.5},
+ {R"({"f": -3.5})", -3.5},
+ {R"({"f": 3.402823e38})", 3.402823e38},
+ {R"({"f": -3.402823e38})", -3.402823e38},
+ {R"({"f": 340282346638528859811704183484516925440.0})",
+ 340282346638528859811704183484516925440.0},
+ {R"({"f": -340282346638528859811704183484516925440.0})",
+ -340282346638528859811704183484516925440.0},
+};
+
+static const std::vector<FloatTest> FloatTestsFail = {
+ {R"({"f": 1z})", 0},
+ {R"({"f": 3.4028236e+38})", 0},
+ {R"({"f": -3.4028236e+38})", 0},
+};
+
+// Decode some floats.
+TEST(JsonTest, DecodeFloats) {
+ upb::Arena a;
+
+ for (const auto& test : FloatTestsPass) {
+ upb_test_Box* box = JsonDecode(test.json.c_str(), a.ptr());
+ EXPECT_NE(box, nullptr);
+ float f = upb_test_Box_f(box);
+ EXPECT_EQ(f, test.f);
+ }
+
+ for (const auto& test : FloatTestsFail) {
+ upb_test_Box* box = JsonDecode(test.json.c_str(), a.ptr());
+ EXPECT_EQ(box, nullptr);
+ }
+}
diff --git a/upb/upb/json/encode.c b/upb/upb/json/encode.c
new file mode 100644
index 0000000..28440b3
--- /dev/null
+++ b/upb/upb/json/encode.c
@@ -0,0 +1,807 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/json/encode.h"
+
+#include <ctype.h>
+#include <float.h>
+#include <inttypes.h>
+#include <math.h>
+#include <stdarg.h>
+#include <string.h>
+
+#include "upb/collections/map.h"
+#include "upb/lex/round_trip.h"
+#include "upb/port/vsnprintf_compat.h"
+#include "upb/reflection/message.h"
+#include "upb/wire/decode.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+typedef struct {
+ char *buf, *ptr, *end;
+ size_t overflow;
+ int indent_depth;
+ int options;
+ const upb_DefPool* ext_pool;
+ jmp_buf err;
+ upb_Status* status;
+ upb_Arena* arena;
+} jsonenc;
+
+static void jsonenc_msg(jsonenc* e, const upb_Message* msg,
+ const upb_MessageDef* m);
+static void jsonenc_scalar(jsonenc* e, upb_MessageValue val,
+ const upb_FieldDef* f);
+static void jsonenc_msgfield(jsonenc* e, const upb_Message* msg,
+ const upb_MessageDef* m);
+static void jsonenc_msgfields(jsonenc* e, const upb_Message* msg,
+ const upb_MessageDef* m, bool first);
+static void jsonenc_value(jsonenc* e, const upb_Message* msg,
+ const upb_MessageDef* m);
+
+UPB_NORETURN static void jsonenc_err(jsonenc* e, const char* msg) {
+ upb_Status_SetErrorMessage(e->status, msg);
+ longjmp(e->err, 1);
+}
+
+UPB_PRINTF(2, 3)
+UPB_NORETURN static void jsonenc_errf(jsonenc* e, const char* fmt, ...) {
+ va_list argp;
+ va_start(argp, fmt);
+ upb_Status_VSetErrorFormat(e->status, fmt, argp);
+ va_end(argp);
+ longjmp(e->err, 1);
+}
+
+static upb_Arena* jsonenc_arena(jsonenc* e) {
+ /* Create lazily, since it's only needed for Any */
+ if (!e->arena) {
+ e->arena = upb_Arena_New();
+ }
+ return e->arena;
+}
+
+static void jsonenc_putbytes(jsonenc* e, const void* data, size_t len) {
+ size_t have = e->end - e->ptr;
+ if (UPB_LIKELY(have >= len)) {
+ memcpy(e->ptr, data, len);
+ e->ptr += len;
+ } else {
+ if (have) {
+ memcpy(e->ptr, data, have);
+ e->ptr += have;
+ }
+ e->overflow += (len - have);
+ }
+}
+
+static void jsonenc_putstr(jsonenc* e, const char* str) {
+ jsonenc_putbytes(e, str, strlen(str));
+}
+
+UPB_PRINTF(2, 3)
+static void jsonenc_printf(jsonenc* e, const char* fmt, ...) {
+ size_t n;
+ size_t have = e->end - e->ptr;
+ va_list args;
+
+ va_start(args, fmt);
+ n = _upb_vsnprintf(e->ptr, have, fmt, args);
+ va_end(args);
+
+ if (UPB_LIKELY(have > n)) {
+ e->ptr += n;
+ } else {
+ e->ptr = UPB_PTRADD(e->ptr, have);
+ e->overflow += (n - have);
+ }
+}
+
+static void jsonenc_nanos(jsonenc* e, int32_t nanos) {
+ int digits = 9;
+
+ if (nanos == 0) return;
+ if (nanos < 0 || nanos >= 1000000000) {
+ jsonenc_err(e, "error formatting timestamp as JSON: invalid nanos");
+ }
+
+ while (nanos % 1000 == 0) {
+ nanos /= 1000;
+ digits -= 3;
+ }
+
+ jsonenc_printf(e, ".%.*" PRId32, digits, nanos);
+}
+
+static void jsonenc_timestamp(jsonenc* e, const upb_Message* msg,
+ const upb_MessageDef* m) {
+ const upb_FieldDef* seconds_f = upb_MessageDef_FindFieldByNumber(m, 1);
+ const upb_FieldDef* nanos_f = upb_MessageDef_FindFieldByNumber(m, 2);
+ int64_t seconds = upb_Message_GetFieldByDef(msg, seconds_f).int64_val;
+ int32_t nanos = upb_Message_GetFieldByDef(msg, nanos_f).int32_val;
+ int L, N, I, J, K, hour, min, sec;
+
+ if (seconds < -62135596800) {
+ jsonenc_err(e,
+ "error formatting timestamp as JSON: minimum acceptable value "
+ "is 0001-01-01T00:00:00Z");
+ } else if (seconds > 253402300799) {
+ jsonenc_err(e,
+ "error formatting timestamp as JSON: maximum acceptable value "
+ "is 9999-12-31T23:59:59Z");
+ }
+
+ /* Julian Day -> Y/M/D, Algorithm from:
+ * Fliegel, H. F., and Van Flandern, T. C., "A Machine Algorithm for
+ * Processing Calendar Dates," Communications of the Association of
+ * Computing Machines, vol. 11 (1968), p. 657. */
+ seconds += 62135596800; // Ensure seconds is positive.
+ L = (int)(seconds / 86400) - 719162 + 68569 + 2440588;
+ N = 4 * L / 146097;
+ L = L - (146097 * N + 3) / 4;
+ I = 4000 * (L + 1) / 1461001;
+ L = L - 1461 * I / 4 + 31;
+ J = 80 * L / 2447;
+ K = L - 2447 * J / 80;
+ L = J / 11;
+ J = J + 2 - 12 * L;
+ I = 100 * (N - 49) + I + L;
+
+ sec = seconds % 60;
+ min = (seconds / 60) % 60;
+ hour = (seconds / 3600) % 24;
+
+ jsonenc_printf(e, "\"%04d-%02d-%02dT%02d:%02d:%02d", I, J, K, hour, min, sec);
+ jsonenc_nanos(e, nanos);
+ jsonenc_putstr(e, "Z\"");
+}
+
+static void jsonenc_duration(jsonenc* e, const upb_Message* msg,
+ const upb_MessageDef* m) {
+ const upb_FieldDef* seconds_f = upb_MessageDef_FindFieldByNumber(m, 1);
+ const upb_FieldDef* nanos_f = upb_MessageDef_FindFieldByNumber(m, 2);
+ int64_t seconds = upb_Message_GetFieldByDef(msg, seconds_f).int64_val;
+ int32_t nanos = upb_Message_GetFieldByDef(msg, nanos_f).int32_val;
+ bool negative = false;
+
+ if (seconds > 315576000000 || seconds < -315576000000 ||
+ (seconds != 0 && nanos != 0 && (seconds < 0) != (nanos < 0))) {
+ jsonenc_err(e, "bad duration");
+ }
+
+ if (seconds < 0) {
+ negative = true;
+ seconds = -seconds;
+ }
+ if (nanos < 0) {
+ negative = true;
+ nanos = -nanos;
+ }
+
+ jsonenc_putstr(e, "\"");
+ if (negative) {
+ jsonenc_putstr(e, "-");
+ }
+ jsonenc_printf(e, "%" PRId64, seconds);
+ jsonenc_nanos(e, nanos);
+ jsonenc_putstr(e, "s\"");
+}
+
+static void jsonenc_enum(int32_t val, const upb_FieldDef* f, jsonenc* e) {
+ const upb_EnumDef* e_def = upb_FieldDef_EnumSubDef(f);
+
+ if (strcmp(upb_EnumDef_FullName(e_def), "google.protobuf.NullValue") == 0) {
+ jsonenc_putstr(e, "null");
+ } else {
+ const upb_EnumValueDef* ev =
+ (e->options & upb_JsonEncode_FormatEnumsAsIntegers)
+ ? NULL
+ : upb_EnumDef_FindValueByNumber(e_def, val);
+
+ if (ev) {
+ jsonenc_printf(e, "\"%s\"", upb_EnumValueDef_Name(ev));
+ } else {
+ jsonenc_printf(e, "%" PRId32, val);
+ }
+ }
+}
+
+static void jsonenc_bytes(jsonenc* e, upb_StringView str) {
+ /* This is the regular base64, not the "web-safe" version. */
+ static const char base64[] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+ const unsigned char* ptr = (unsigned char*)str.data;
+ const unsigned char* end = UPB_PTRADD(ptr, str.size);
+ char buf[4];
+
+ jsonenc_putstr(e, "\"");
+
+ while (end - ptr >= 3) {
+ buf[0] = base64[ptr[0] >> 2];
+ buf[1] = base64[((ptr[0] & 0x3) << 4) | (ptr[1] >> 4)];
+ buf[2] = base64[((ptr[1] & 0xf) << 2) | (ptr[2] >> 6)];
+ buf[3] = base64[ptr[2] & 0x3f];
+ jsonenc_putbytes(e, buf, 4);
+ ptr += 3;
+ }
+
+ switch (end - ptr) {
+ case 2:
+ buf[0] = base64[ptr[0] >> 2];
+ buf[1] = base64[((ptr[0] & 0x3) << 4) | (ptr[1] >> 4)];
+ buf[2] = base64[(ptr[1] & 0xf) << 2];
+ buf[3] = '=';
+ jsonenc_putbytes(e, buf, 4);
+ break;
+ case 1:
+ buf[0] = base64[ptr[0] >> 2];
+ buf[1] = base64[((ptr[0] & 0x3) << 4)];
+ buf[2] = '=';
+ buf[3] = '=';
+ jsonenc_putbytes(e, buf, 4);
+ break;
+ }
+
+ jsonenc_putstr(e, "\"");
+}
+
+static void jsonenc_stringbody(jsonenc* e, upb_StringView str) {
+ const char* ptr = str.data;
+ const char* end = UPB_PTRADD(ptr, str.size);
+
+ while (ptr < end) {
+ switch (*ptr) {
+ case '\n':
+ jsonenc_putstr(e, "\\n");
+ break;
+ case '\r':
+ jsonenc_putstr(e, "\\r");
+ break;
+ case '\t':
+ jsonenc_putstr(e, "\\t");
+ break;
+ case '\"':
+ jsonenc_putstr(e, "\\\"");
+ break;
+ case '\f':
+ jsonenc_putstr(e, "\\f");
+ break;
+ case '\b':
+ jsonenc_putstr(e, "\\b");
+ break;
+ case '\\':
+ jsonenc_putstr(e, "\\\\");
+ break;
+ default:
+ if ((uint8_t)*ptr < 0x20) {
+ jsonenc_printf(e, "\\u%04x", (int)(uint8_t)*ptr);
+ } else {
+ /* This could be a non-ASCII byte. We rely on the string being valid
+ * UTF-8. */
+ jsonenc_putbytes(e, ptr, 1);
+ }
+ break;
+ }
+ ptr++;
+ }
+}
+
+static void jsonenc_string(jsonenc* e, upb_StringView str) {
+ jsonenc_putstr(e, "\"");
+ jsonenc_stringbody(e, str);
+ jsonenc_putstr(e, "\"");
+}
+
+static bool upb_JsonEncode_HandleSpecialDoubles(jsonenc* e, double val) {
+ if (val == INFINITY) {
+ jsonenc_putstr(e, "\"Infinity\"");
+ } else if (val == -INFINITY) {
+ jsonenc_putstr(e, "\"-Infinity\"");
+ } else if (val != val) {
+ jsonenc_putstr(e, "\"NaN\"");
+ } else {
+ return false;
+ }
+ return true;
+}
+
+static void upb_JsonEncode_Double(jsonenc* e, double val) {
+ if (upb_JsonEncode_HandleSpecialDoubles(e, val)) return;
+ char buf[32];
+ _upb_EncodeRoundTripDouble(val, buf, sizeof(buf));
+ jsonenc_putstr(e, buf);
+}
+
+static void upb_JsonEncode_Float(jsonenc* e, float val) {
+ if (upb_JsonEncode_HandleSpecialDoubles(e, val)) return;
+ char buf[32];
+ _upb_EncodeRoundTripFloat(val, buf, sizeof(buf));
+ jsonenc_putstr(e, buf);
+}
+
+static void jsonenc_wrapper(jsonenc* e, const upb_Message* msg,
+ const upb_MessageDef* m) {
+ const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(m, 1);
+ upb_MessageValue val = upb_Message_GetFieldByDef(msg, val_f);
+ jsonenc_scalar(e, val, val_f);
+}
+
+static const upb_MessageDef* jsonenc_getanymsg(jsonenc* e,
+ upb_StringView type_url) {
+ /* Find last '/', if any. */
+ const char* end = type_url.data + type_url.size;
+ const char* ptr = end;
+ const upb_MessageDef* ret;
+
+ if (!e->ext_pool) {
+ jsonenc_err(e, "Tried to encode Any, but no symtab was provided");
+ }
+
+ if (type_url.size == 0) goto badurl;
+
+ while (true) {
+ if (--ptr == type_url.data) {
+ /* Type URL must contain at least one '/', with host before. */
+ goto badurl;
+ }
+ if (*ptr == '/') {
+ ptr++;
+ break;
+ }
+ }
+
+ ret = upb_DefPool_FindMessageByNameWithSize(e->ext_pool, ptr, end - ptr);
+
+ if (!ret) {
+ jsonenc_errf(e, "Couldn't find Any type: %.*s", (int)(end - ptr), ptr);
+ }
+
+ return ret;
+
+badurl:
+ jsonenc_errf(e, "Bad type URL: " UPB_STRINGVIEW_FORMAT,
+ UPB_STRINGVIEW_ARGS(type_url));
+}
+
+static void jsonenc_any(jsonenc* e, const upb_Message* msg,
+ const upb_MessageDef* m) {
+ const upb_FieldDef* type_url_f = upb_MessageDef_FindFieldByNumber(m, 1);
+ const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 2);
+ upb_StringView type_url = upb_Message_GetFieldByDef(msg, type_url_f).str_val;
+ upb_StringView value = upb_Message_GetFieldByDef(msg, value_f).str_val;
+ const upb_MessageDef* any_m = jsonenc_getanymsg(e, type_url);
+ const upb_MiniTable* any_layout = upb_MessageDef_MiniTable(any_m);
+ upb_Arena* arena = jsonenc_arena(e);
+ upb_Message* any = upb_Message_New(any_layout, arena);
+
+ if (upb_Decode(value.data, value.size, any, any_layout, NULL, 0, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ jsonenc_err(e, "Error decoding message in Any");
+ }
+
+ jsonenc_putstr(e, "{\"@type\":");
+ jsonenc_string(e, type_url);
+
+ if (upb_MessageDef_WellKnownType(any_m) == kUpb_WellKnown_Unspecified) {
+ /* Regular messages: {"@type": "...","foo": 1, "bar": 2} */
+ jsonenc_msgfields(e, any, any_m, false);
+ } else {
+ /* Well-known type: {"@type": "...","value": <well-known encoding>} */
+ jsonenc_putstr(e, ",\"value\":");
+ jsonenc_msgfield(e, any, any_m);
+ }
+
+ jsonenc_putstr(e, "}");
+}
+
+static void jsonenc_putsep(jsonenc* e, const char* str, bool* first) {
+ if (*first) {
+ *first = false;
+ } else {
+ jsonenc_putstr(e, str);
+ }
+}
+
+static void jsonenc_fieldpath(jsonenc* e, upb_StringView path) {
+ const char* ptr = path.data;
+ const char* end = ptr + path.size;
+
+ while (ptr < end) {
+ char ch = *ptr;
+
+ if (ch >= 'A' && ch <= 'Z') {
+ jsonenc_err(e, "Field mask element may not have upper-case letter.");
+ } else if (ch == '_') {
+ if (ptr == end - 1 || *(ptr + 1) < 'a' || *(ptr + 1) > 'z') {
+ jsonenc_err(e, "Underscore must be followed by a lowercase letter.");
+ }
+ ch = *++ptr - 32;
+ }
+
+ jsonenc_putbytes(e, &ch, 1);
+ ptr++;
+ }
+}
+
+static void jsonenc_fieldmask(jsonenc* e, const upb_Message* msg,
+ const upb_MessageDef* m) {
+ const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumber(m, 1);
+ const upb_Array* paths = upb_Message_GetFieldByDef(msg, paths_f).array_val;
+ bool first = true;
+ size_t i, n = 0;
+
+ if (paths) n = upb_Array_Size(paths);
+
+ jsonenc_putstr(e, "\"");
+
+ for (i = 0; i < n; i++) {
+ jsonenc_putsep(e, ",", &first);
+ jsonenc_fieldpath(e, upb_Array_Get(paths, i).str_val);
+ }
+
+ jsonenc_putstr(e, "\"");
+}
+
+static void jsonenc_struct(jsonenc* e, const upb_Message* msg,
+ const upb_MessageDef* m) {
+ jsonenc_putstr(e, "{");
+
+ const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumber(m, 1);
+ const upb_Map* fields = upb_Message_GetFieldByDef(msg, fields_f).map_val;
+
+ if (fields) {
+ const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(fields_f);
+ const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(entry_m, 2);
+
+ size_t iter = kUpb_Map_Begin;
+ bool first = true;
+
+ upb_MessageValue key, val;
+ while (upb_Map_Next(fields, &key, &val, &iter)) {
+ jsonenc_putsep(e, ",", &first);
+ jsonenc_string(e, key.str_val);
+ jsonenc_putstr(e, ":");
+ jsonenc_value(e, val.msg_val, upb_FieldDef_MessageSubDef(value_f));
+ }
+ }
+
+ jsonenc_putstr(e, "}");
+}
+
+static void jsonenc_listvalue(jsonenc* e, const upb_Message* msg,
+ const upb_MessageDef* m) {
+ const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumber(m, 1);
+ const upb_MessageDef* values_m = upb_FieldDef_MessageSubDef(values_f);
+ const upb_Array* values = upb_Message_GetFieldByDef(msg, values_f).array_val;
+ size_t i;
+ bool first = true;
+
+ jsonenc_putstr(e, "[");
+
+ if (values) {
+ const size_t size = upb_Array_Size(values);
+ for (i = 0; i < size; i++) {
+ upb_MessageValue elem = upb_Array_Get(values, i);
+
+ jsonenc_putsep(e, ",", &first);
+ jsonenc_value(e, elem.msg_val, values_m);
+ }
+ }
+
+ jsonenc_putstr(e, "]");
+}
+
+static void jsonenc_value(jsonenc* e, const upb_Message* msg,
+ const upb_MessageDef* m) {
+ /* TODO(haberman): do we want a reflection method to get oneof case? */
+ size_t iter = kUpb_Message_Begin;
+ const upb_FieldDef* f;
+ upb_MessageValue val;
+
+ if (!upb_Message_Next(msg, m, NULL, &f, &val, &iter)) {
+ jsonenc_err(e, "No value set in Value proto");
+ }
+
+ switch (upb_FieldDef_Number(f)) {
+ case 1:
+ jsonenc_putstr(e, "null");
+ break;
+ case 2:
+ if (upb_JsonEncode_HandleSpecialDoubles(e, val.double_val)) {
+ jsonenc_err(
+ e,
+ "google.protobuf.Value cannot encode double values for "
+ "infinity or nan, because they would be parsed as a string");
+ }
+ upb_JsonEncode_Double(e, val.double_val);
+ break;
+ case 3:
+ jsonenc_string(e, val.str_val);
+ break;
+ case 4:
+ jsonenc_putstr(e, val.bool_val ? "true" : "false");
+ break;
+ case 5:
+ jsonenc_struct(e, val.msg_val, upb_FieldDef_MessageSubDef(f));
+ break;
+ case 6:
+ jsonenc_listvalue(e, val.msg_val, upb_FieldDef_MessageSubDef(f));
+ break;
+ }
+}
+
+static void jsonenc_msgfield(jsonenc* e, const upb_Message* msg,
+ const upb_MessageDef* m) {
+ switch (upb_MessageDef_WellKnownType(m)) {
+ case kUpb_WellKnown_Unspecified:
+ jsonenc_msg(e, msg, m);
+ break;
+ case kUpb_WellKnown_Any:
+ jsonenc_any(e, msg, m);
+ break;
+ case kUpb_WellKnown_FieldMask:
+ jsonenc_fieldmask(e, msg, m);
+ break;
+ case kUpb_WellKnown_Duration:
+ jsonenc_duration(e, msg, m);
+ break;
+ case kUpb_WellKnown_Timestamp:
+ jsonenc_timestamp(e, msg, m);
+ break;
+ case kUpb_WellKnown_DoubleValue:
+ case kUpb_WellKnown_FloatValue:
+ case kUpb_WellKnown_Int64Value:
+ case kUpb_WellKnown_UInt64Value:
+ case kUpb_WellKnown_Int32Value:
+ case kUpb_WellKnown_UInt32Value:
+ case kUpb_WellKnown_StringValue:
+ case kUpb_WellKnown_BytesValue:
+ case kUpb_WellKnown_BoolValue:
+ jsonenc_wrapper(e, msg, m);
+ break;
+ case kUpb_WellKnown_Value:
+ jsonenc_value(e, msg, m);
+ break;
+ case kUpb_WellKnown_ListValue:
+ jsonenc_listvalue(e, msg, m);
+ break;
+ case kUpb_WellKnown_Struct:
+ jsonenc_struct(e, msg, m);
+ break;
+ }
+}
+
+static void jsonenc_scalar(jsonenc* e, upb_MessageValue val,
+ const upb_FieldDef* f) {
+ switch (upb_FieldDef_CType(f)) {
+ case kUpb_CType_Bool:
+ jsonenc_putstr(e, val.bool_val ? "true" : "false");
+ break;
+ case kUpb_CType_Float:
+ upb_JsonEncode_Float(e, val.float_val);
+ break;
+ case kUpb_CType_Double:
+ upb_JsonEncode_Double(e, val.double_val);
+ break;
+ case kUpb_CType_Int32:
+ jsonenc_printf(e, "%" PRId32, val.int32_val);
+ break;
+ case kUpb_CType_UInt32:
+ jsonenc_printf(e, "%" PRIu32, val.uint32_val);
+ break;
+ case kUpb_CType_Int64:
+ jsonenc_printf(e, "\"%" PRId64 "\"", val.int64_val);
+ break;
+ case kUpb_CType_UInt64:
+ jsonenc_printf(e, "\"%" PRIu64 "\"", val.uint64_val);
+ break;
+ case kUpb_CType_String:
+ jsonenc_string(e, val.str_val);
+ break;
+ case kUpb_CType_Bytes:
+ jsonenc_bytes(e, val.str_val);
+ break;
+ case kUpb_CType_Enum:
+ jsonenc_enum(val.int32_val, f, e);
+ break;
+ case kUpb_CType_Message:
+ jsonenc_msgfield(e, val.msg_val, upb_FieldDef_MessageSubDef(f));
+ break;
+ }
+}
+
+static void jsonenc_mapkey(jsonenc* e, upb_MessageValue val,
+ const upb_FieldDef* f) {
+ jsonenc_putstr(e, "\"");
+
+ switch (upb_FieldDef_CType(f)) {
+ case kUpb_CType_Bool:
+ jsonenc_putstr(e, val.bool_val ? "true" : "false");
+ break;
+ case kUpb_CType_Int32:
+ jsonenc_printf(e, "%" PRId32, val.int32_val);
+ break;
+ case kUpb_CType_UInt32:
+ jsonenc_printf(e, "%" PRIu32, val.uint32_val);
+ break;
+ case kUpb_CType_Int64:
+ jsonenc_printf(e, "%" PRId64, val.int64_val);
+ break;
+ case kUpb_CType_UInt64:
+ jsonenc_printf(e, "%" PRIu64, val.uint64_val);
+ break;
+ case kUpb_CType_String:
+ jsonenc_stringbody(e, val.str_val);
+ break;
+ default:
+ UPB_UNREACHABLE();
+ }
+
+ jsonenc_putstr(e, "\":");
+}
+
+static void jsonenc_array(jsonenc* e, const upb_Array* arr,
+ const upb_FieldDef* f) {
+ size_t i;
+ size_t size = arr ? upb_Array_Size(arr) : 0;
+ bool first = true;
+
+ jsonenc_putstr(e, "[");
+
+ for (i = 0; i < size; i++) {
+ jsonenc_putsep(e, ",", &first);
+ jsonenc_scalar(e, upb_Array_Get(arr, i), f);
+ }
+
+ jsonenc_putstr(e, "]");
+}
+
+static void jsonenc_map(jsonenc* e, const upb_Map* map, const upb_FieldDef* f) {
+ jsonenc_putstr(e, "{");
+
+ const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
+ const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumber(entry, 1);
+ const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(entry, 2);
+
+ if (map) {
+ size_t iter = kUpb_Map_Begin;
+ bool first = true;
+
+ upb_MessageValue key, val;
+ while (upb_Map_Next(map, &key, &val, &iter)) {
+ jsonenc_putsep(e, ",", &first);
+ jsonenc_mapkey(e, key, key_f);
+ jsonenc_scalar(e, val, val_f);
+ }
+ }
+
+ jsonenc_putstr(e, "}");
+}
+
+static void jsonenc_fieldval(jsonenc* e, const upb_FieldDef* f,
+ upb_MessageValue val, bool* first) {
+ const char* name;
+
+ jsonenc_putsep(e, ",", first);
+
+ if (upb_FieldDef_IsExtension(f)) {
+ // TODO: For MessageSet, I would have expected this to print the message
+ // name here, but Python doesn't appear to do this. We should do more
+ // research here about what various implementations do.
+ jsonenc_printf(e, "\"[%s]\":", upb_FieldDef_FullName(f));
+ } else {
+ if (e->options & upb_JsonEncode_UseProtoNames) {
+ name = upb_FieldDef_Name(f);
+ } else {
+ name = upb_FieldDef_JsonName(f);
+ }
+ jsonenc_printf(e, "\"%s\":", name);
+ }
+
+ if (upb_FieldDef_IsMap(f)) {
+ jsonenc_map(e, val.map_val, f);
+ } else if (upb_FieldDef_IsRepeated(f)) {
+ jsonenc_array(e, val.array_val, f);
+ } else {
+ jsonenc_scalar(e, val, f);
+ }
+}
+
+static void jsonenc_msgfields(jsonenc* e, const upb_Message* msg,
+ const upb_MessageDef* m, bool first) {
+ upb_MessageValue val;
+ const upb_FieldDef* f;
+
+ if (e->options & upb_JsonEncode_EmitDefaults) {
+ /* Iterate over all fields. */
+ int i = 0;
+ int n = upb_MessageDef_FieldCount(m);
+ for (i = 0; i < n; i++) {
+ f = upb_MessageDef_Field(m, i);
+ if (!upb_FieldDef_HasPresence(f) || upb_Message_HasFieldByDef(msg, f)) {
+ jsonenc_fieldval(e, f, upb_Message_GetFieldByDef(msg, f), &first);
+ }
+ }
+ } else {
+ /* Iterate over non-empty fields. */
+ size_t iter = kUpb_Message_Begin;
+ while (upb_Message_Next(msg, m, e->ext_pool, &f, &val, &iter)) {
+ jsonenc_fieldval(e, f, val, &first);
+ }
+ }
+}
+
+static void jsonenc_msg(jsonenc* e, const upb_Message* msg,
+ const upb_MessageDef* m) {
+ jsonenc_putstr(e, "{");
+ jsonenc_msgfields(e, msg, m, true);
+ jsonenc_putstr(e, "}");
+}
+
+static size_t jsonenc_nullz(jsonenc* e, size_t size) {
+ size_t ret = e->ptr - e->buf + e->overflow;
+
+ if (size > 0) {
+ if (e->ptr == e->end) e->ptr--;
+ *e->ptr = '\0';
+ }
+
+ return ret;
+}
+
+static size_t upb_JsonEncoder_Encode(jsonenc* const e,
+ const upb_Message* const msg,
+ const upb_MessageDef* const m,
+ const size_t size) {
+ if (UPB_SETJMP(e->err) != 0) return -1;
+
+ jsonenc_msgfield(e, msg, m);
+ if (e->arena) upb_Arena_Free(e->arena);
+ return jsonenc_nullz(e, size);
+}
+
+size_t upb_JsonEncode(const upb_Message* msg, const upb_MessageDef* m,
+ const upb_DefPool* ext_pool, int options, char* buf,
+ size_t size, upb_Status* status) {
+ jsonenc e;
+
+ e.buf = buf;
+ e.ptr = buf;
+ e.end = UPB_PTRADD(buf, size);
+ e.overflow = 0;
+ e.options = options;
+ e.ext_pool = ext_pool;
+ e.status = status;
+ e.arena = NULL;
+
+ return upb_JsonEncoder_Encode(&e, msg, m, size);
+}
diff --git a/upb/upb/json/encode.h b/upb/upb/json/encode.h
new file mode 100644
index 0000000..f8f112b
--- /dev/null
+++ b/upb/upb/json/encode.h
@@ -0,0 +1,73 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_JSON_ENCODE_H_
+#define UPB_JSON_ENCODE_H_
+
+#include "upb/reflection/def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+ /* When set, emits 0/default values. TODO(haberman): proto3 only? */
+ upb_JsonEncode_EmitDefaults = 1 << 0,
+
+ /* When set, use normal (snake_case) field names instead of JSON (camelCase)
+ names. */
+ upb_JsonEncode_UseProtoNames = 1 << 1,
+
+ /* When set, emits enums as their integer values instead of as their names. */
+ upb_JsonEncode_FormatEnumsAsIntegers = 1 << 2
+};
+
+/* Encodes the given |msg| to JSON format. The message's reflection is given in
+ * |m|. The symtab in |symtab| is used to find extensions (if NULL, extensions
+ * will not be printed).
+ *
+ * Output is placed in the given buffer, and always NULL-terminated. The output
+ * size (excluding NULL) is returned. This means that a return value >= |size|
+ * implies that the output was truncated. (These are the same semantics as
+ * snprintf()). */
+UPB_API size_t upb_JsonEncode(const upb_Message* msg, const upb_MessageDef* m,
+ const upb_DefPool* ext_pool, int options,
+ char* buf, size_t size, upb_Status* status);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_JSONENCODE_H_ */
diff --git a/upb/upb/json/encode_test.cc b/upb/upb/json/encode_test.cc
new file mode 100644
index 0000000..ab21400
--- /dev/null
+++ b/upb/upb/json/encode_test.cc
@@ -0,0 +1,105 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/json/encode.h"
+
+#include "google/protobuf/struct.upb.h"
+#include "gtest/gtest.h"
+#include "upb/base/status.hpp"
+#include "upb/json/test.upb.h"
+#include "upb/json/test.upbdefs.h"
+#include "upb/mem/arena.hpp"
+#include "upb/reflection/def.hpp"
+
+static std::string JsonEncode(const upb_test_Box* msg, int options) {
+ upb::Arena a;
+ upb::Status status;
+ upb::DefPool defpool;
+ upb::MessageDefPtr m(upb_test_Box_getmsgdef(defpool.ptr()));
+ EXPECT_TRUE(m.ptr() != nullptr);
+
+ size_t json_size = upb_JsonEncode(msg, m.ptr(), defpool.ptr(), options, NULL,
+ 0, status.ptr());
+ char* json_buf = (char*)upb_Arena_Malloc(a.ptr(), json_size + 1);
+
+ size_t size = upb_JsonEncode(msg, m.ptr(), defpool.ptr(), options, json_buf,
+ json_size + 1, status.ptr());
+ EXPECT_EQ(size, json_size);
+ return std::string(json_buf, json_size);
+}
+
+// Encode a single optional enum.
+TEST(JsonTest, EncodeEnum) {
+ upb::Arena a;
+
+ upb_test_Box* foo = upb_test_Box_new(a.ptr());
+ upb_test_Box_set_first_tag(foo, upb_test_Z_BAR);
+
+ EXPECT_EQ(R"({"firstTag":"Z_BAR"})", JsonEncode(foo, 0));
+ EXPECT_EQ(R"({"firstTag":1})",
+ JsonEncode(foo, upb_JsonEncode_FormatEnumsAsIntegers));
+}
+
+// Encode a single optional negative enum.
+TEST(JsonTest, EncodeNegativeEnum) {
+ upb::Arena a;
+
+ upb_test_Box* foo = upb_test_Box_new(a.ptr());
+ upb_test_Box_set_last_tag(foo, upb_test_Z_BAZ);
+
+ EXPECT_EQ(R"({"lastTag":"Z_BAZ"})", JsonEncode(foo, 0));
+ EXPECT_EQ(R"({"lastTag":-2})",
+ JsonEncode(foo, upb_JsonEncode_FormatEnumsAsIntegers));
+}
+
+// Encode a single repeated enum.
+TEST(JsonTest, EncodeRepeatedEnum) {
+ upb::Arena a;
+
+ upb_test_Box* foo = upb_test_Box_new(a.ptr());
+ upb_test_Box_add_more_tags(foo, upb_test_Z_BAT, a.ptr());
+
+ EXPECT_EQ(R"({"moreTags":["Z_BAT"]})", JsonEncode(foo, 0));
+ EXPECT_EQ(R"({"moreTags":[13]})",
+ JsonEncode(foo, upb_JsonEncode_FormatEnumsAsIntegers));
+}
+
+// Special case: encode null enum.
+TEST(JsonTest, EncodeNullEnum) {
+ upb::Arena a;
+
+ upb_test_Box* foo = upb_test_Box_new(a.ptr());
+ google_protobuf_Value_set_null_value(upb_test_Box_mutable_val(foo, a.ptr()),
+ google_protobuf_NULL_VALUE);
+
+ EXPECT_EQ(R"({"val":null})", JsonEncode(foo, 0));
+ EXPECT_EQ(R"({"val":null})",
+ JsonEncode(foo, upb_JsonEncode_FormatEnumsAsIntegers));
+}
diff --git a/upb/upb/json/test.proto b/upb/upb/json/test.proto
new file mode 100644
index 0000000..7517bda
--- /dev/null
+++ b/upb/upb/json/test.proto
@@ -0,0 +1,52 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto2";
+
+package upb_test;
+
+import "google/protobuf/struct.proto";
+
+enum Tag {
+ Z_NONE = 0;
+ Z_BAR = 1;
+ Z_BAZ = -2;
+ Z_BAT = 13;
+}
+
+message Box {
+ optional Tag first_tag = 1;
+ repeated Tag more_tags = 2;
+ optional Tag last_tag = 5;
+ optional string name = 4;
+ optional google.protobuf.Value val = 6;
+ optional float f = 7;
+ optional double d = 8;
+}
diff --git a/upb/upb/lex/BUILD b/upb/upb/lex/BUILD
new file mode 100644
index 0000000..f7a57a1
--- /dev/null
+++ b/upb/upb/lex/BUILD
@@ -0,0 +1,71 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load("//bazel:build_defs.bzl", "UPB_DEFAULT_COPTS")
+
+cc_library(
+ name = "lex",
+ srcs = [
+ "atoi.c",
+ "round_trip.c",
+ "strtod.c",
+ "unicode.c",
+ ],
+ hdrs = [
+ "atoi.h",
+ "round_trip.h",
+ "strtod.h",
+ "unicode.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = ["//:port"],
+)
+
+cc_test(
+ name = "atoi_test",
+ srcs = ["atoi_test.cc"],
+ deps = [
+ ":lex",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+# begin:github_only
+filegroup(
+ name = "source_files",
+ srcs = glob(
+ [
+ "**/*.c",
+ "**/*.h",
+ ],
+ ),
+ visibility = [
+ "//cmake:__pkg__",
+ "//python/dist:__pkg__",
+ ]
+)
+# end:github_only
diff --git a/upb/upb/lex/atoi.c b/upb/upb/lex/atoi.c
new file mode 100644
index 0000000..aecb467
--- /dev/null
+++ b/upb/upb/lex/atoi.c
@@ -0,0 +1,71 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/lex/atoi.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+const char* upb_BufToUint64(const char* ptr, const char* end, uint64_t* val) {
+ uint64_t u64 = 0;
+ while (ptr < end) {
+ unsigned ch = *ptr - '0';
+ if (ch >= 10) break;
+ if (u64 > UINT64_MAX / 10 || u64 * 10 > UINT64_MAX - ch) {
+ return NULL; // integer overflow
+ }
+ u64 *= 10;
+ u64 += ch;
+ ptr++;
+ }
+
+ *val = u64;
+ return ptr;
+}
+
+const char* upb_BufToInt64(const char* ptr, const char* end, int64_t* val,
+ bool* is_neg) {
+ bool neg = false;
+ uint64_t u64;
+
+ if (ptr != end && *ptr == '-') {
+ ptr++;
+ neg = true;
+ }
+
+ ptr = upb_BufToUint64(ptr, end, &u64);
+ if (!ptr || u64 > (uint64_t)INT64_MAX + neg) {
+ return NULL; // integer overflow
+ }
+
+ *val = neg ? -u64 : u64;
+ if (is_neg) *is_neg = neg;
+ return ptr;
+}
diff --git a/upb/upb/lex/atoi.h b/upb/upb/lex/atoi.h
new file mode 100644
index 0000000..808bca8
--- /dev/null
+++ b/upb/upb/lex/atoi.h
@@ -0,0 +1,58 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_LEX_ATOI_H_
+#define UPB_LEX_ATOI_H_
+
+#include <stdint.h>
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// We use these hand-written routines instead of strto[u]l() because the "long
+// long" variants aren't in c89. Also our version allows setting a ptr limit.
+// Return the new position of the pointer after parsing the int, or NULL on
+// integer overflow.
+
+const char* upb_BufToUint64(const char* ptr, const char* end, uint64_t* val);
+const char* upb_BufToInt64(const char* ptr, const char* end, int64_t* val,
+ bool* is_neg);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_LEX_ATOI_H_ */
diff --git a/upb/upb/lex/atoi_test.cc b/upb/upb/lex/atoi_test.cc
new file mode 100644
index 0000000..ed35219
--- /dev/null
+++ b/upb/upb/lex/atoi_test.cc
@@ -0,0 +1,112 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/lex/atoi.h"
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+
+TEST(AtoiTest, Uint64) {
+ uint64_t val;
+
+ const char* s = "1234z";
+ EXPECT_EQ(s + 1, upb_BufToUint64(s, s + 1, &val));
+ EXPECT_EQ(val, 1);
+ EXPECT_EQ(s + 4, upb_BufToUint64(s, s + 4, &val));
+ EXPECT_EQ(val, 1234);
+ EXPECT_EQ(s + 4, upb_BufToUint64(s, s + 5, &val));
+ EXPECT_EQ(val, 1234);
+
+ const char* t = "42.6";
+ EXPECT_EQ(t + 1, upb_BufToUint64(t, t + 1, &val));
+ EXPECT_EQ(val, 4);
+ EXPECT_EQ(t + 2, upb_BufToUint64(t, t + 2, &val));
+ EXPECT_EQ(val, 42);
+ EXPECT_EQ(t + 2, upb_BufToUint64(t, t + 3, &val));
+ EXPECT_EQ(val, 42);
+
+ // Integer overflow
+ const char* u = "1000000000000000000000000000000";
+ EXPECT_EQ(NULL, upb_BufToUint64(u, u + strlen(u), &val));
+
+ // Not an integer
+ const char* v = "foobar";
+ EXPECT_EQ(v, upb_BufToUint64(v, v + strlen(v), &val));
+
+ const uint64_t values[] = {
+ std::numeric_limits<uint64_t>::max(),
+ std::numeric_limits<uint64_t>::min(),
+ };
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(values); i++) {
+ std::string v = absl::StrCat(values[i]);
+ const char* ptr = v.c_str();
+ const char* end = ptr + strlen(ptr);
+ EXPECT_EQ(end, upb_BufToUint64(ptr, end, &val));
+ EXPECT_EQ(val, values[i]);
+ }
+}
+
+TEST(AtoiTest, Int64) {
+ int64_t val;
+ bool neg;
+
+ const char* s = "1234z";
+ EXPECT_EQ(s + 1, upb_BufToInt64(s, s + 1, &val, &neg));
+ EXPECT_EQ(val, 1);
+ EXPECT_EQ(neg, false);
+ EXPECT_EQ(s + 4, upb_BufToInt64(s, s + 4, &val, NULL));
+ EXPECT_EQ(val, 1234);
+ EXPECT_EQ(s + 4, upb_BufToInt64(s, s + 5, &val, NULL));
+ EXPECT_EQ(val, 1234);
+
+ const char* t = "-42.6";
+ EXPECT_EQ(t + 2, upb_BufToInt64(t, t + 2, &val, &neg));
+ EXPECT_EQ(val, -4);
+ EXPECT_EQ(neg, true);
+ EXPECT_EQ(t + 3, upb_BufToInt64(t, t + 3, &val, NULL));
+ EXPECT_EQ(val, -42);
+ EXPECT_EQ(neg, true);
+ EXPECT_EQ(t + 3, upb_BufToInt64(t, t + 5, &val, NULL));
+ EXPECT_EQ(val, -42);
+
+ const int64_t values[] = {
+ std::numeric_limits<int32_t>::max(),
+ std::numeric_limits<int32_t>::min(),
+ std::numeric_limits<int64_t>::max(),
+ std::numeric_limits<int64_t>::min(),
+ };
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(values); i++) {
+ std::string v = absl::StrCat(values[i]);
+ const char* ptr = v.c_str();
+ const char* end = ptr + strlen(ptr);
+ EXPECT_EQ(end, upb_BufToInt64(ptr, end, &val, NULL));
+ EXPECT_EQ(val, values[i]);
+ }
+}
diff --git a/upb/upb/lex/round_trip.c b/upb/upb/lex/round_trip.c
new file mode 100644
index 0000000..6402dc6
--- /dev/null
+++ b/upb/upb/lex/round_trip.c
@@ -0,0 +1,70 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/lex/round_trip.h"
+
+#include <float.h>
+#include <stdlib.h>
+
+// Must be last.
+#include "upb/port/def.inc"
+
+/* Miscellaneous utilities ****************************************************/
+
+static void upb_FixLocale(char* p) {
+ /* printf() is dependent on locales; sadly there is no easy and portable way
+ * to avoid this. This little post-processing step will translate 1,2 -> 1.2
+ * since JSON needs the latter. Arguably a hack, but it is simple and the
+ * alternatives are far more complicated, platform-dependent, and/or larger
+ * in code size. */
+ for (; *p; p++) {
+ if (*p == ',') *p = '.';
+ }
+}
+
+void _upb_EncodeRoundTripDouble(double val, char* buf, size_t size) {
+ assert(size >= kUpb_RoundTripBufferSize);
+ snprintf(buf, size, "%.*g", DBL_DIG, val);
+ if (strtod(buf, NULL) != val) {
+ snprintf(buf, size, "%.*g", DBL_DIG + 2, val);
+ assert(strtod(buf, NULL) == val);
+ }
+ upb_FixLocale(buf);
+}
+
+void _upb_EncodeRoundTripFloat(float val, char* buf, size_t size) {
+ assert(size >= kUpb_RoundTripBufferSize);
+ snprintf(buf, size, "%.*g", FLT_DIG, val);
+ if (strtof(buf, NULL) != val) {
+ snprintf(buf, size, "%.*g", FLT_DIG + 3, val);
+ assert(strtof(buf, NULL) == val);
+ }
+ upb_FixLocale(buf);
+}
diff --git a/upb/upb/lex/round_trip.h b/upb/upb/lex/round_trip.h
new file mode 100644
index 0000000..d00a662
--- /dev/null
+++ b/upb/upb/lex/round_trip.h
@@ -0,0 +1,58 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_LEX_ROUND_TRIP_H_
+#define UPB_LEX_ROUND_TRIP_H_
+
+// Must be last.
+#include "upb/port/def.inc"
+
+// Encodes a float or double that is round-trippable, but as short as possible.
+// These routines are not fully optimal (not guaranteed to be shortest), but are
+// short-ish and match the implementation that has been used in protobuf since
+// the beginning.
+
+// The given buffer size must be at least kUpb_RoundTripBufferSize.
+enum { kUpb_RoundTripBufferSize = 32 };
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void _upb_EncodeRoundTripDouble(double val, char* buf, size_t size);
+void _upb_EncodeRoundTripFloat(float val, char* buf, size_t size);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_LEX_ROUND_TRIP_H_ */
diff --git a/upb/upb/lex/strtod.c b/upb/upb/lex/strtod.c
new file mode 100644
index 0000000..50a7c89
--- /dev/null
+++ b/upb/upb/lex/strtod.c
@@ -0,0 +1,100 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/lex/strtod.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+// Must be last.
+#include "upb/port/def.inc"
+
+// Determine the locale-specific radix character by calling sprintf() to print
+// the number 1.5, then stripping off the digits. As far as I can tell, this
+// is the only portable, thread-safe way to get the C library to divulge the
+// locale's radix character. No, localeconv() is NOT thread-safe.
+
+static int GetLocaleRadix(char *data, size_t capacity) {
+ char temp[16];
+ const int size = snprintf(temp, sizeof(temp), "%.1f", 1.5);
+ UPB_ASSERT(temp[0] == '1');
+ UPB_ASSERT(temp[size - 1] == '5');
+ UPB_ASSERT(size < capacity);
+ temp[size - 1] = '\0';
+ strcpy(data, temp + 1);
+ return size - 2;
+}
+
+// Populates a string identical to *input except that the character pointed to
+// by pos (which should be '.') is replaced with the locale-specific radix.
+
+static void LocalizeRadix(const char *input, const char *pos, char *output) {
+ const int len1 = pos - input;
+
+ char radix[8];
+ const int len2 = GetLocaleRadix(radix, sizeof(radix));
+
+ memcpy(output, input, len1);
+ memcpy(output + len1, radix, len2);
+ strcpy(output + len1 + len2, input + len1 + 1);
+}
+
+double _upb_NoLocaleStrtod(const char *str, char **endptr) {
+ // We cannot simply set the locale to "C" temporarily with setlocale()
+ // as this is not thread-safe. Instead, we try to parse in the current
+ // locale first. If parsing stops at a '.' character, then this is a
+ // pretty good hint that we're actually in some other locale in which
+ // '.' is not the radix character.
+
+ char *temp_endptr;
+ double result = strtod(str, &temp_endptr);
+ if (endptr != NULL) *endptr = temp_endptr;
+ if (*temp_endptr != '.') return result;
+
+ // Parsing halted on a '.'. Perhaps we're in a different locale? Let's
+ // try to replace the '.' with a locale-specific radix character and
+ // try again.
+
+ char localized[80];
+ LocalizeRadix(str, temp_endptr, localized);
+ char *localized_endptr;
+ result = strtod(localized, &localized_endptr);
+ if ((localized_endptr - &localized[0]) > (temp_endptr - str)) {
+ // This attempt got further, so replacing the decimal must have helped.
+ // Update endptr to point at the right location.
+ if (endptr != NULL) {
+ // size_diff is non-zero if the localized radix has multiple bytes.
+ int size_diff = strlen(localized) - strlen(str);
+ *endptr = (char *)str + (localized_endptr - &localized[0] - size_diff);
+ }
+ }
+
+ return result;
+}
diff --git a/upb/upb/lex/strtod.h b/upb/upb/lex/strtod.h
new file mode 100644
index 0000000..c7b8777
--- /dev/null
+++ b/upb/upb/lex/strtod.h
@@ -0,0 +1,49 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_LEX_STRTOD_H_
+#define UPB_LEX_STRTOD_H_
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+double _upb_NoLocaleStrtod(const char *str, char **endptr);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_LEX_STRTOD_H_ */
diff --git a/upb/upb/lex/unicode.c b/upb/upb/lex/unicode.c
new file mode 100644
index 0000000..885a061
--- /dev/null
+++ b/upb/upb/lex/unicode.c
@@ -0,0 +1,60 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/lex/unicode.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+int upb_Unicode_ToUTF8(uint32_t cp, char* out) {
+ if (cp <= 0x7f) {
+ out[0] = cp;
+ return 1;
+ }
+ if (cp <= 0x07ff) {
+ out[0] = (cp >> 6) | 0xc0;
+ out[1] = (cp & 0x3f) | 0x80;
+ return 2;
+ }
+ if (cp <= 0xffff) {
+ out[0] = (cp >> 12) | 0xe0;
+ out[1] = ((cp >> 6) & 0x3f) | 0x80;
+ out[2] = (cp & 0x3f) | 0x80;
+ return 3;
+ }
+ if (cp <= 0x10ffff) {
+ out[0] = (cp >> 18) | 0xf0;
+ out[1] = ((cp >> 12) & 0x3f) | 0x80;
+ out[2] = ((cp >> 6) & 0x3f) | 0x80;
+ out[3] = (cp & 0x3f) | 0x80;
+ return 4;
+ }
+ return 0;
+}
diff --git a/upb/upb/lex/unicode.h b/upb/upb/lex/unicode.h
new file mode 100644
index 0000000..ed5615e
--- /dev/null
+++ b/upb/upb/lex/unicode.h
@@ -0,0 +1,82 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_LEX_UNICODE_H_
+#define UPB_LEX_UNICODE_H_
+
+#include <stdint.h>
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Returns true iff a codepoint is the value for a high surrogate.
+UPB_INLINE bool upb_Unicode_IsHigh(uint32_t cp) {
+ return (cp >= 0xd800 && cp <= 0xdbff);
+}
+
+// Returns true iff a codepoint is the value for a low surrogate.
+UPB_INLINE bool upb_Unicode_IsLow(uint32_t cp) {
+ return (cp >= 0xdc00 && cp <= 0xdfff);
+}
+
+// Returns the high 16-bit surrogate value for a supplementary codepoint.
+// Does not sanity-check the input.
+UPB_INLINE uint16_t upb_Unicode_ToHigh(uint32_t cp) {
+ return (cp >> 10) + 0xd7c0;
+}
+
+// Returns the low 16-bit surrogate value for a supplementary codepoint.
+// Does not sanity-check the input.
+UPB_INLINE uint16_t upb_Unicode_ToLow(uint32_t cp) {
+ return (cp & 0x3ff) | 0xdc00;
+}
+
+// Returns the 32-bit value corresponding to a pair of 16-bit surrogates.
+// Does not sanity-check the input.
+UPB_INLINE uint32_t upb_Unicode_FromPair(uint32_t high, uint32_t low) {
+ return ((high & 0x3ff) << 10) + (low & 0x3ff) + 0x10000;
+}
+
+// Outputs a codepoint as UTF8.
+// Returns the number of bytes written (1-4 on success, 0 on error).
+// Does not sanity-check the input. Specifically does not check for surrogates.
+int upb_Unicode_ToUTF8(uint32_t cp, char* out);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_LEX_UNICODE_H_ */
diff --git a/upb/upb/mem/BUILD b/upb/upb/mem/BUILD
new file mode 100644
index 0000000..c1b877d
--- /dev/null
+++ b/upb/upb/mem/BUILD
@@ -0,0 +1,89 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load("//bazel:build_defs.bzl", "UPB_DEFAULT_COPTS")
+
+cc_library(
+ name = "mem",
+ hdrs = [
+ "alloc.h",
+ "arena.h",
+ "arena.hpp",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":internal",
+ "//:port",
+ ],
+)
+
+cc_library(
+ name = "internal",
+ srcs = [
+ "alloc.c",
+ "alloc.h",
+ "arena.c",
+ "arena.h",
+ ],
+ hdrs = [
+ "internal/arena.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//:__pkg__"],
+ deps = [
+ "//:port",
+ ],
+)
+
+cc_test(
+ name = "arena_test",
+ srcs = ["arena_test.cc"],
+ deps = [
+ "//:mem",
+ "//:port",
+ "@com_google_absl//absl/random",
+ "@com_google_absl//absl/random:distributions",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+# begin:github_only
+filegroup(
+ name = "source_files",
+ srcs = glob(
+ [
+ "**/*.c",
+ "**/*.h",
+ "**/*.hpp",
+ ],
+ ),
+ visibility = [
+ "//cmake:__pkg__",
+ "//python/dist:__pkg__",
+ ]
+)
+# end:github_only
diff --git a/upb/upb/mem/alloc.c b/upb/upb/mem/alloc.c
new file mode 100644
index 0000000..1d10f10
--- /dev/null
+++ b/upb/upb/mem/alloc.c
@@ -0,0 +1,50 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/mem/alloc.h"
+
+#include <stdlib.h>
+
+// Must be last.
+#include "upb/port/def.inc"
+
+static void* upb_global_allocfunc(upb_alloc* alloc, void* ptr, size_t oldsize,
+ size_t size) {
+ UPB_UNUSED(alloc);
+ UPB_UNUSED(oldsize);
+ if (size == 0) {
+ free(ptr);
+ return NULL;
+ } else {
+ return realloc(ptr, size);
+ }
+}
+
+upb_alloc upb_alloc_global = {&upb_global_allocfunc};
diff --git a/upb/upb/mem/alloc.h b/upb/upb/mem/alloc.h
new file mode 100644
index 0000000..c278cbb
--- /dev/null
+++ b/upb/upb/mem/alloc.h
@@ -0,0 +1,101 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MEM_ALLOC_H_
+#define UPB_MEM_ALLOC_H_
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct upb_alloc upb_alloc;
+
+/* A combined `malloc()`/`free()` function.
+ * If `size` is 0 then the function acts like `free()`, otherwise it acts like
+ * `realloc()`. Only `oldsize` bytes from a previous allocation are
+ * preserved. */
+typedef void* upb_alloc_func(upb_alloc* alloc, void* ptr, size_t oldsize,
+ size_t size);
+
+/* A upb_alloc is a possibly-stateful allocator object.
+ *
+ * It could either be an arena allocator (which doesn't require individual
+ * `free()` calls) or a regular `malloc()` (which does). The client must
+ * therefore free memory unless it knows that the allocator is an arena
+ * allocator. */
+struct upb_alloc {
+ upb_alloc_func* func;
+};
+
+UPB_INLINE void* upb_malloc(upb_alloc* alloc, size_t size) {
+ UPB_ASSERT(alloc);
+ return alloc->func(alloc, NULL, 0, size);
+}
+
+UPB_INLINE void* upb_realloc(upb_alloc* alloc, void* ptr, size_t oldsize,
+ size_t size) {
+ UPB_ASSERT(alloc);
+ return alloc->func(alloc, ptr, oldsize, size);
+}
+
+UPB_INLINE void upb_free(upb_alloc* alloc, void* ptr) {
+ UPB_ASSERT(alloc);
+ alloc->func(alloc, ptr, 0, 0);
+}
+
+// The global allocator used by upb. Uses the standard malloc()/free().
+
+extern upb_alloc upb_alloc_global;
+
+/* Functions that hard-code the global malloc.
+ *
+ * We still get benefit because we can put custom logic into our global
+ * allocator, like injecting out-of-memory faults in debug/testing builds. */
+
+UPB_INLINE void* upb_gmalloc(size_t size) {
+ return upb_malloc(&upb_alloc_global, size);
+}
+
+UPB_INLINE void* upb_grealloc(void* ptr, size_t oldsize, size_t size) {
+ return upb_realloc(&upb_alloc_global, ptr, oldsize, size);
+}
+
+UPB_INLINE void upb_gfree(void* ptr) { upb_free(&upb_alloc_global, ptr); }
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MEM_ALLOC_H_ */
diff --git a/upb/upb/mem/arena.c b/upb/upb/mem/arena.c
new file mode 100644
index 0000000..569986c
--- /dev/null
+++ b/upb/upb/mem/arena.c
@@ -0,0 +1,371 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/mem/internal/arena.h"
+
+#include "upb/port/atomic.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+struct _upb_MemBlock {
+ // Atomic only for the benefit of SpaceAllocated().
+ UPB_ATOMIC(_upb_MemBlock*) next;
+ uint32_t size;
+ // Data follows.
+};
+
+static const size_t memblock_reserve =
+ UPB_ALIGN_UP(sizeof(_upb_MemBlock), UPB_MALLOC_ALIGN);
+
+typedef struct _upb_ArenaRoot {
+ upb_Arena* root;
+ uintptr_t tagged_count;
+} _upb_ArenaRoot;
+
+static _upb_ArenaRoot _upb_Arena_FindRoot(upb_Arena* a) {
+ uintptr_t poc = upb_Atomic_Load(&a->parent_or_count, memory_order_acquire);
+ while (_upb_Arena_IsTaggedPointer(poc)) {
+ upb_Arena* next = _upb_Arena_PointerFromTagged(poc);
+ UPB_ASSERT(a != next);
+ uintptr_t next_poc =
+ upb_Atomic_Load(&next->parent_or_count, memory_order_acquire);
+
+ if (_upb_Arena_IsTaggedPointer(next_poc)) {
+ // To keep complexity down, we lazily collapse levels of the tree. This
+ // keeps it flat in the final case, but doesn't cost much incrementally.
+ //
+ // Path splitting keeps time complexity down, see:
+ // https://en.wikipedia.org/wiki/Disjoint-set_data_structure
+ //
+ // We can safely use a relaxed atomic here because all threads doing this
+ // will converge on the same value and we don't need memory orderings to
+ // be visible.
+ //
+ // This is true because:
+ // - If no fuses occur, this will eventually become the root.
+ // - If fuses are actively occurring, the root may change, but the
+ // invariant is that `parent_or_count` merely points to *a* parent.
+ //
+ // In other words, it is moving towards "the" root, and that root may move
+ // further away over time, but the path towards that root will continue to
+ // be valid and the creation of the path carries all the memory orderings
+ // required.
+ UPB_ASSERT(a != _upb_Arena_PointerFromTagged(next_poc));
+ upb_Atomic_Store(&a->parent_or_count, next_poc, memory_order_relaxed);
+ }
+ a = next;
+ poc = next_poc;
+ }
+ return (_upb_ArenaRoot){.root = a, .tagged_count = poc};
+}
+
+size_t upb_Arena_SpaceAllocated(upb_Arena* arena) {
+ arena = _upb_Arena_FindRoot(arena).root;
+ size_t memsize = 0;
+
+ while (arena != NULL) {
+ _upb_MemBlock* block =
+ upb_Atomic_Load(&arena->blocks, memory_order_relaxed);
+ while (block != NULL) {
+ memsize += sizeof(_upb_MemBlock) + block->size;
+ block = upb_Atomic_Load(&block->next, memory_order_relaxed);
+ }
+ arena = upb_Atomic_Load(&arena->next, memory_order_relaxed);
+ }
+
+ return memsize;
+}
+
+uint32_t upb_Arena_DebugRefCount(upb_Arena* a) {
+ // These loads could probably be relaxed, but given that this is debug-only,
+ // it's not worth introducing a new variant for it.
+ uintptr_t poc = upb_Atomic_Load(&a->parent_or_count, memory_order_acquire);
+ while (_upb_Arena_IsTaggedPointer(poc)) {
+ a = _upb_Arena_PointerFromTagged(poc);
+ poc = upb_Atomic_Load(&a->parent_or_count, memory_order_acquire);
+ }
+ return _upb_Arena_RefCountFromTagged(poc);
+}
+
+static void upb_Arena_AddBlock(upb_Arena* a, void* ptr, size_t size) {
+ _upb_MemBlock* block = ptr;
+
+ // Insert into linked list.
+ block->size = (uint32_t)size;
+ upb_Atomic_Init(&block->next, a->blocks);
+ upb_Atomic_Store(&a->blocks, block, memory_order_release);
+
+ a->head.ptr = UPB_PTR_AT(block, memblock_reserve, char);
+ a->head.end = UPB_PTR_AT(block, size, char);
+
+ UPB_POISON_MEMORY_REGION(a->head.ptr, a->head.end - a->head.ptr);
+}
+
+static bool upb_Arena_AllocBlock(upb_Arena* a, size_t size) {
+ if (!a->block_alloc) return false;
+ _upb_MemBlock* last_block = upb_Atomic_Load(&a->blocks, memory_order_acquire);
+ size_t last_size = last_block != NULL ? last_block->size : 128;
+ size_t block_size = UPB_MAX(size, last_size * 2) + memblock_reserve;
+ _upb_MemBlock* block = upb_malloc(upb_Arena_BlockAlloc(a), block_size);
+
+ if (!block) return false;
+ upb_Arena_AddBlock(a, block, block_size);
+ return true;
+}
+
+void* _upb_Arena_SlowMalloc(upb_Arena* a, size_t size) {
+ if (!upb_Arena_AllocBlock(a, size)) return NULL; /* Out of memory. */
+ UPB_ASSERT(_upb_ArenaHas(a) >= size);
+ return upb_Arena_Malloc(a, size);
+}
+
+/* Public Arena API ***********************************************************/
+
+static upb_Arena* upb_Arena_InitSlow(upb_alloc* alloc) {
+ const size_t first_block_overhead = sizeof(upb_Arena) + memblock_reserve;
+ upb_Arena* a;
+
+ /* We need to malloc the initial block. */
+ char* mem;
+ size_t n = first_block_overhead + 256;
+ if (!alloc || !(mem = upb_malloc(alloc, n))) {
+ return NULL;
+ }
+
+ a = UPB_PTR_AT(mem, n - sizeof(*a), upb_Arena);
+ n -= sizeof(*a);
+
+ a->block_alloc = upb_Arena_MakeBlockAlloc(alloc, 0);
+ upb_Atomic_Init(&a->parent_or_count, _upb_Arena_TaggedFromRefcount(1));
+ upb_Atomic_Init(&a->next, NULL);
+ upb_Atomic_Init(&a->tail, a);
+ upb_Atomic_Init(&a->blocks, NULL);
+
+ upb_Arena_AddBlock(a, mem, n);
+
+ return a;
+}
+
+upb_Arena* upb_Arena_Init(void* mem, size_t n, upb_alloc* alloc) {
+ upb_Arena* a;
+
+ if (n) {
+ /* Align initial pointer up so that we return properly-aligned pointers. */
+ void* aligned = (void*)UPB_ALIGN_UP((uintptr_t)mem, UPB_MALLOC_ALIGN);
+ size_t delta = (uintptr_t)aligned - (uintptr_t)mem;
+ n = delta <= n ? n - delta : 0;
+ mem = aligned;
+ }
+
+ /* Round block size down to alignof(*a) since we will allocate the arena
+ * itself at the end. */
+ n = UPB_ALIGN_DOWN(n, UPB_ALIGN_OF(upb_Arena));
+
+ if (UPB_UNLIKELY(n < sizeof(upb_Arena))) {
+ return upb_Arena_InitSlow(alloc);
+ }
+
+ a = UPB_PTR_AT(mem, n - sizeof(*a), upb_Arena);
+
+ upb_Atomic_Init(&a->parent_or_count, _upb_Arena_TaggedFromRefcount(1));
+ upb_Atomic_Init(&a->next, NULL);
+ upb_Atomic_Init(&a->tail, a);
+ upb_Atomic_Init(&a->blocks, NULL);
+ a->block_alloc = upb_Arena_MakeBlockAlloc(alloc, 1);
+ a->head.ptr = mem;
+ a->head.end = UPB_PTR_AT(mem, n - sizeof(*a), char);
+
+ return a;
+}
+
+static void arena_dofree(upb_Arena* a) {
+ UPB_ASSERT(_upb_Arena_RefCountFromTagged(a->parent_or_count) == 1);
+
+ while (a != NULL) {
+ // Load first since arena itself is likely from one of its blocks.
+ upb_Arena* next_arena =
+ (upb_Arena*)upb_Atomic_Load(&a->next, memory_order_acquire);
+ upb_alloc* block_alloc = upb_Arena_BlockAlloc(a);
+ _upb_MemBlock* block = upb_Atomic_Load(&a->blocks, memory_order_acquire);
+ while (block != NULL) {
+ // Load first since we are deleting block.
+ _upb_MemBlock* next_block =
+ upb_Atomic_Load(&block->next, memory_order_acquire);
+ upb_free(block_alloc, block);
+ block = next_block;
+ }
+ a = next_arena;
+ }
+}
+
+void upb_Arena_Free(upb_Arena* a) {
+ uintptr_t poc = upb_Atomic_Load(&a->parent_or_count, memory_order_acquire);
+retry:
+ while (_upb_Arena_IsTaggedPointer(poc)) {
+ a = _upb_Arena_PointerFromTagged(poc);
+ poc = upb_Atomic_Load(&a->parent_or_count, memory_order_acquire);
+ }
+
+ // compare_exchange or fetch_sub are RMW operations, which are more
+ // expensive then direct loads. As an optimization, we only do RMW ops
+ // when we need to update things for other threads to see.
+ if (poc == _upb_Arena_TaggedFromRefcount(1)) {
+ arena_dofree(a);
+ return;
+ }
+
+ if (upb_Atomic_CompareExchangeWeak(
+ &a->parent_or_count, &poc,
+ _upb_Arena_TaggedFromRefcount(_upb_Arena_RefCountFromTagged(poc) - 1),
+ memory_order_release, memory_order_acquire)) {
+ // We were >1 and we decremented it successfully, so we are done.
+ return;
+ }
+
+ // We failed our update, so someone has done something, retry the whole
+ // process, but the failed exchange reloaded `poc` for us.
+ goto retry;
+}
+
+static void _upb_Arena_DoFuseArenaLists(upb_Arena* const parent,
+ upb_Arena* child) {
+ upb_Arena* parent_tail = upb_Atomic_Load(&parent->tail, memory_order_relaxed);
+ do {
+ // Our tail might be stale, but it will always converge to the true tail.
+ upb_Arena* parent_tail_next =
+ upb_Atomic_Load(&parent_tail->next, memory_order_relaxed);
+ while (parent_tail_next != NULL) {
+ parent_tail = parent_tail_next;
+ parent_tail_next =
+ upb_Atomic_Load(&parent_tail->next, memory_order_relaxed);
+ }
+
+ upb_Arena* displaced =
+ upb_Atomic_Exchange(&parent_tail->next, child, memory_order_relaxed);
+ parent_tail = upb_Atomic_Load(&child->tail, memory_order_relaxed);
+
+ // If we displaced something that got installed racily, we can simply
+ // reinstall it on our new tail.
+ child = displaced;
+ } while (child != NULL);
+
+ upb_Atomic_Store(&parent->tail, parent_tail, memory_order_relaxed);
+}
+
+static upb_Arena* _upb_Arena_DoFuse(upb_Arena* a1, upb_Arena* a2,
+ uintptr_t* ref_delta) {
+ // `parent_or_count` has two disctint modes
+ // - parent pointer mode
+ // - refcount mode
+ //
+ // In parent pointer mode, it may change what pointer it refers to in the
+ // tree, but it will always approach a root. Any operation that walks the
+ // tree to the root may collapse levels of the tree concurrently.
+ _upb_ArenaRoot r1 = _upb_Arena_FindRoot(a1);
+ _upb_ArenaRoot r2 = _upb_Arena_FindRoot(a2);
+
+ if (r1.root == r2.root) return r1.root; // Already fused.
+
+ // Avoid cycles by always fusing into the root with the lower address.
+ if ((uintptr_t)r1.root > (uintptr_t)r2.root) {
+ _upb_ArenaRoot tmp = r1;
+ r1 = r2;
+ r2 = tmp;
+ }
+
+ // The moment we install `r1` as the parent for `r2` all racing frees may
+ // immediately begin decrementing `r1`'s refcount (including pending
+ // increments to that refcount and their frees!). We need to add `r2`'s refs
+ // now, so that `r1` can withstand any unrefs that come from r2.
+ //
+ // Note that while it is possible for `r2`'s refcount to increase
+ // asynchronously, we will not actually do the reparenting operation below
+ // unless `r2`'s refcount is unchanged from when we read it.
+ //
+ // Note that we may have done this previously, either to this node or a
+ // different node, during a previous and failed DoFuse() attempt. But we will
+ // not lose track of these refs because we always add them to our overall
+ // delta.
+ uintptr_t r2_untagged_count = r2.tagged_count & ~1;
+ uintptr_t with_r2_refs = r1.tagged_count + r2_untagged_count;
+ if (!upb_Atomic_CompareExchangeStrong(
+ &r1.root->parent_or_count, &r1.tagged_count, with_r2_refs,
+ memory_order_release, memory_order_acquire)) {
+ return NULL;
+ }
+
+ // Perform the actual fuse by removing the refs from `r2` and swapping in the
+ // parent pointer.
+ if (!upb_Atomic_CompareExchangeStrong(
+ &r2.root->parent_or_count, &r2.tagged_count,
+ _upb_Arena_TaggedFromPointer(r1.root), memory_order_release,
+ memory_order_acquire)) {
+ // We'll need to remove the excess refs we added to r1 previously.
+ *ref_delta += r2_untagged_count;
+ return NULL;
+ }
+
+ // Now that the fuse has been performed (and can no longer fail) we need to
+ // append `r2` to `r1`'s linked list.
+ _upb_Arena_DoFuseArenaLists(r1.root, r2.root);
+ return r1.root;
+}
+
+static bool _upb_Arena_FixupRefs(upb_Arena* new_root, uintptr_t ref_delta) {
+ if (ref_delta == 0) return true; // No fixup required.
+ uintptr_t poc =
+ upb_Atomic_Load(&new_root->parent_or_count, memory_order_relaxed);
+ if (_upb_Arena_IsTaggedPointer(poc)) return false;
+ uintptr_t with_refs = poc - ref_delta;
+ UPB_ASSERT(!_upb_Arena_IsTaggedPointer(with_refs));
+ return upb_Atomic_CompareExchangeStrong(&new_root->parent_or_count, &poc,
+ with_refs, memory_order_relaxed,
+ memory_order_relaxed);
+}
+
+bool upb_Arena_Fuse(upb_Arena* a1, upb_Arena* a2) {
+ if (a1 == a2) return true; // trivial fuse
+
+ // Do not fuse initial blocks since we cannot lifetime extend them.
+ // Any other fuse scenario is allowed.
+ if (upb_Arena_HasInitialBlock(a1) || upb_Arena_HasInitialBlock(a2)) {
+ return false;
+ }
+
+ // The number of refs we ultimately need to transfer to the new root.
+ uintptr_t ref_delta = 0;
+ while (true) {
+ upb_Arena* new_root = _upb_Arena_DoFuse(a1, a2, &ref_delta);
+ if (new_root != NULL && _upb_Arena_FixupRefs(new_root, ref_delta)) {
+ return true;
+ }
+ }
+}
diff --git a/upb/upb/mem/arena.h b/upb/upb/mem/arena.h
new file mode 100644
index 0000000..72e8ddb
--- /dev/null
+++ b/upb/upb/mem/arena.h
@@ -0,0 +1,152 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+/* upb_Arena is a specific allocator implementation that uses arena allocation.
+ * The user provides an allocator that will be used to allocate the underlying
+ * arena blocks. Arenas by nature do not require the individual allocations
+ * to be freed. However the Arena does allow users to register cleanup
+ * functions that will run when the arena is destroyed.
+ *
+ * A upb_Arena is *not* thread-safe.
+ *
+ * You could write a thread-safe arena allocator that satisfies the
+ * upb_alloc interface, but it would not be as efficient for the
+ * single-threaded case. */
+
+#ifndef UPB_MEM_ARENA_H_
+#define UPB_MEM_ARENA_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "upb/mem/alloc.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+typedef struct upb_Arena upb_Arena;
+
+typedef struct {
+ char *ptr, *end;
+} _upb_ArenaHead;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Creates an arena from the given initial block (if any -- n may be 0).
+// Additional blocks will be allocated from |alloc|. If |alloc| is NULL, this
+// is a fixed-size arena and cannot grow.
+UPB_API upb_Arena* upb_Arena_Init(void* mem, size_t n, upb_alloc* alloc);
+
+UPB_API void upb_Arena_Free(upb_Arena* a);
+UPB_API bool upb_Arena_Fuse(upb_Arena* a, upb_Arena* b);
+
+void* _upb_Arena_SlowMalloc(upb_Arena* a, size_t size);
+size_t upb_Arena_SpaceAllocated(upb_Arena* arena);
+uint32_t upb_Arena_DebugRefCount(upb_Arena* arena);
+
+UPB_INLINE size_t _upb_ArenaHas(upb_Arena* a) {
+ _upb_ArenaHead* h = (_upb_ArenaHead*)a;
+ return (size_t)(h->end - h->ptr);
+}
+
+UPB_API_INLINE void* upb_Arena_Malloc(upb_Arena* a, size_t size) {
+ size = UPB_ALIGN_MALLOC(size);
+ size_t span = size + UPB_ASAN_GUARD_SIZE;
+ if (UPB_UNLIKELY(_upb_ArenaHas(a) < span)) {
+ return _upb_Arena_SlowMalloc(a, size);
+ }
+
+ // We have enough space to do a fast malloc.
+ _upb_ArenaHead* h = (_upb_ArenaHead*)a;
+ void* ret = h->ptr;
+ UPB_ASSERT(UPB_ALIGN_MALLOC((uintptr_t)ret) == (uintptr_t)ret);
+ UPB_ASSERT(UPB_ALIGN_MALLOC(size) == size);
+ UPB_UNPOISON_MEMORY_REGION(ret, size);
+
+ h->ptr += span;
+
+ return ret;
+}
+
+// Shrinks the last alloc from arena.
+// REQUIRES: (ptr, oldsize) was the last malloc/realloc from this arena.
+// We could also add a upb_Arena_TryShrinkLast() which is simply a no-op if
+// this was not the last alloc.
+UPB_API_INLINE void upb_Arena_ShrinkLast(upb_Arena* a, void* ptr,
+ size_t oldsize, size_t size) {
+ _upb_ArenaHead* h = (_upb_ArenaHead*)a;
+ oldsize = UPB_ALIGN_MALLOC(oldsize);
+ size = UPB_ALIGN_MALLOC(size);
+ // Must be the last alloc.
+ UPB_ASSERT((char*)ptr + oldsize == h->ptr - UPB_ASAN_GUARD_SIZE);
+ UPB_ASSERT(size <= oldsize);
+ h->ptr = (char*)ptr + size;
+}
+
+UPB_API_INLINE void* upb_Arena_Realloc(upb_Arena* a, void* ptr, size_t oldsize,
+ size_t size) {
+ _upb_ArenaHead* h = (_upb_ArenaHead*)a;
+ oldsize = UPB_ALIGN_MALLOC(oldsize);
+ size = UPB_ALIGN_MALLOC(size);
+ bool is_most_recent_alloc = (uintptr_t)ptr + oldsize == (uintptr_t)h->ptr;
+
+ if (is_most_recent_alloc) {
+ ptrdiff_t diff = size - oldsize;
+ if ((ptrdiff_t)_upb_ArenaHas(a) >= diff) {
+ h->ptr += diff;
+ return ptr;
+ }
+ } else if (size <= oldsize) {
+ return ptr;
+ }
+
+ void* ret = upb_Arena_Malloc(a, size);
+
+ if (ret && oldsize > 0) {
+ memcpy(ret, ptr, UPB_MIN(oldsize, size));
+ }
+
+ return ret;
+}
+
+UPB_API_INLINE upb_Arena* upb_Arena_New(void) {
+ return upb_Arena_Init(NULL, 0, &upb_alloc_global);
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MEM_ARENA_H_ */
diff --git a/upb/upb/mem/arena.hpp b/upb/upb/mem/arena.hpp
new file mode 100644
index 0000000..cd86d0d
--- /dev/null
+++ b/upb/upb/mem/arena.hpp
@@ -0,0 +1,77 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MEM_ARENA_HPP_
+#define UPB_MEM_ARENA_HPP_
+
+#include <memory>
+
+#include "upb/mem/arena.h"
+
+namespace upb {
+
+class Arena {
+ public:
+ // A simple arena with no initial memory block and the default allocator.
+ Arena() : ptr_(upb_Arena_New(), upb_Arena_Free) {}
+ Arena(char* initial_block, size_t size)
+ : ptr_(upb_Arena_Init(initial_block, size, &upb_alloc_global),
+ upb_Arena_Free) {}
+
+ upb_Arena* ptr() const { return ptr_.get(); }
+
+ void Fuse(Arena& other) { upb_Arena_Fuse(ptr(), other.ptr()); }
+
+ protected:
+ std::unique_ptr<upb_Arena, decltype(&upb_Arena_Free)> ptr_;
+};
+
+// InlinedArena seeds the arenas with a predefined amount of memory. No
+// heap memory will be allocated until the initial block is exceeded.
+template <int N>
+class InlinedArena : public Arena {
+ public:
+ InlinedArena() : Arena(initial_block_, N) {}
+ ~InlinedArena() {
+ // Explicitly destroy the arena now so that it does not outlive
+ // initial_block_.
+ ptr_.reset();
+ }
+
+ private:
+ InlinedArena(const InlinedArena*) = delete;
+ InlinedArena& operator=(const InlinedArena*) = delete;
+
+ char initial_block_[N];
+};
+
+} // namespace upb
+
+#endif // UPB_MEM_ARENA_HPP_
diff --git a/upb/upb/mem/arena_test.cc b/upb/upb/mem/arena_test.cc
new file mode 100644
index 0000000..8bcf671
--- /dev/null
+++ b/upb/upb/mem/arena_test.cc
@@ -0,0 +1,196 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/mem/arena.h"
+
+#include <array>
+#include <atomic>
+#include <thread>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/random/distributions.h"
+#include "absl/random/random.h"
+#include "absl/synchronization/notification.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+namespace {
+
+TEST(ArenaTest, ArenaFuse) {
+ upb_Arena* arena1 = upb_Arena_New();
+ upb_Arena* arena2 = upb_Arena_New();
+
+ EXPECT_TRUE(upb_Arena_Fuse(arena1, arena2));
+
+ upb_Arena_Free(arena1);
+ upb_Arena_Free(arena2);
+}
+
+/* Do nothing allocator for testing */
+extern "C" void* TestAllocFunc(upb_alloc* alloc, void* ptr, size_t oldsize,
+ size_t size) {
+ return upb_alloc_global.func(alloc, ptr, oldsize, size);
+}
+
+TEST(ArenaTest, FuseWithInitialBlock) {
+ char buf1[1024];
+ char buf2[1024];
+ upb_Arena* arenas[] = {upb_Arena_Init(buf1, 1024, &upb_alloc_global),
+ upb_Arena_Init(buf2, 1024, &upb_alloc_global),
+ upb_Arena_Init(NULL, 0, &upb_alloc_global)};
+ int size = sizeof(arenas) / sizeof(arenas[0]);
+ for (int i = 0; i < size; ++i) {
+ for (int j = 0; j < size; ++j) {
+ if (i == j) {
+ // Fuse to self is always allowed.
+ EXPECT_TRUE(upb_Arena_Fuse(arenas[i], arenas[j]));
+ } else {
+ EXPECT_FALSE(upb_Arena_Fuse(arenas[i], arenas[j]));
+ }
+ }
+ }
+
+ for (int i = 0; i < size; ++i) upb_Arena_Free(arenas[i]);
+}
+
+class Environment {
+ public:
+ ~Environment() {
+ for (auto& atom : arenas_) {
+ auto* a = atom.load(std::memory_order_relaxed);
+ if (a != nullptr) upb_Arena_Free(a);
+ }
+ }
+
+ void RandomNewFree(absl::BitGen& gen) {
+ auto* old = SwapRandomly(gen, upb_Arena_New());
+ if (old != nullptr) upb_Arena_Free(old);
+ }
+
+ void RandomFuse(absl::BitGen& gen) {
+ std::array<upb_Arena*, 2> old;
+ for (auto& o : old) {
+ o = SwapRandomly(gen, nullptr);
+ if (o == nullptr) o = upb_Arena_New();
+ }
+
+ EXPECT_TRUE(upb_Arena_Fuse(old[0], old[1]));
+ for (auto& o : old) {
+ o = SwapRandomly(gen, o);
+ if (o != nullptr) upb_Arena_Free(o);
+ }
+ }
+
+ void RandomPoke(absl::BitGen& gen) {
+ switch (absl::Uniform(gen, 0, 2)) {
+ case 0:
+ RandomNewFree(gen);
+ break;
+ case 1:
+ RandomFuse(gen);
+ break;
+ default:
+ break;
+ }
+ }
+
+ private:
+ upb_Arena* SwapRandomly(absl::BitGen& gen, upb_Arena* a) {
+ return arenas_[absl::Uniform<size_t>(gen, 0, arenas_.size())].exchange(
+ a, std::memory_order_acq_rel);
+ }
+
+ std::array<std::atomic<upb_Arena*>, 100> arenas_ = {};
+};
+
+TEST(ArenaTest, FuzzSingleThreaded) {
+ Environment env;
+
+ absl::BitGen gen;
+ auto end = absl::Now() + absl::Seconds(0.5);
+ while (absl::Now() < end) {
+ env.RandomPoke(gen);
+ }
+}
+
+#ifdef UPB_USE_C11_ATOMICS
+
+TEST(ArenaTest, FuzzFuseFreeRace) {
+ Environment env;
+
+ absl::Notification done;
+ std::vector<std::thread> threads;
+ for (int i = 0; i < 10; ++i) {
+ threads.emplace_back([&]() {
+ absl::BitGen gen;
+ while (!done.HasBeenNotified()) {
+ env.RandomNewFree(gen);
+ }
+ });
+ }
+
+ absl::BitGen gen;
+ auto end = absl::Now() + absl::Seconds(2);
+ while (absl::Now() < end) {
+ env.RandomFuse(gen);
+ }
+ done.Notify();
+ for (auto& t : threads) t.join();
+}
+
+TEST(ArenaTest, FuzzFuseFuseRace) {
+ Environment env;
+
+ absl::Notification done;
+ std::vector<std::thread> threads;
+ for (int i = 0; i < 10; ++i) {
+ threads.emplace_back([&]() {
+ absl::BitGen gen;
+ while (!done.HasBeenNotified()) {
+ env.RandomFuse(gen);
+ }
+ });
+ }
+
+ absl::BitGen gen;
+ auto end = absl::Now() + absl::Seconds(2);
+ while (absl::Now() < end) {
+ env.RandomFuse(gen);
+ }
+ done.Notify();
+ for (auto& t : threads) t.join();
+}
+
+#endif
+
+} // namespace
diff --git a/upb/upb/mem/internal/arena.h b/upb/upb/mem/internal/arena.h
new file mode 100644
index 0000000..3fb434a
--- /dev/null
+++ b/upb/upb/mem/internal/arena.h
@@ -0,0 +1,117 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MEM_INTERNAL_ARENA_H_
+#define UPB_MEM_INTERNAL_ARENA_H_
+
+#include "upb/mem/arena.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+typedef struct _upb_MemBlock _upb_MemBlock;
+
+struct upb_Arena {
+ _upb_ArenaHead head;
+
+ // upb_alloc* together with a low bit which signals if there is an initial
+ // block.
+ uintptr_t block_alloc;
+
+ // When multiple arenas are fused together, each arena points to a parent
+ // arena (root points to itself). The root tracks how many live arenas
+ // reference it.
+
+ // The low bit is tagged:
+ // 0: pointer to parent
+ // 1: count, left shifted by one
+ UPB_ATOMIC(uintptr_t) parent_or_count;
+
+ // All nodes that are fused together are in a singly-linked list.
+ UPB_ATOMIC(upb_Arena*) next; // NULL at end of list.
+
+ // The last element of the linked list. This is present only as an
+ // optimization, so that we do not have to iterate over all members for every
+ // fuse. Only significant for an arena root. In other cases it is ignored.
+ UPB_ATOMIC(upb_Arena*) tail; // == self when no other list members.
+
+ // Linked list of blocks to free/cleanup. Atomic only for the benefit of
+ // upb_Arena_SpaceAllocated().
+ UPB_ATOMIC(_upb_MemBlock*) blocks;
+};
+
+UPB_INLINE bool _upb_Arena_IsTaggedRefcount(uintptr_t parent_or_count) {
+ return (parent_or_count & 1) == 1;
+}
+
+UPB_INLINE bool _upb_Arena_IsTaggedPointer(uintptr_t parent_or_count) {
+ return (parent_or_count & 1) == 0;
+}
+
+UPB_INLINE uintptr_t _upb_Arena_RefCountFromTagged(uintptr_t parent_or_count) {
+ UPB_ASSERT(_upb_Arena_IsTaggedRefcount(parent_or_count));
+ return parent_or_count >> 1;
+}
+
+UPB_INLINE uintptr_t _upb_Arena_TaggedFromRefcount(uintptr_t refcount) {
+ uintptr_t parent_or_count = (refcount << 1) | 1;
+ UPB_ASSERT(_upb_Arena_IsTaggedRefcount(parent_or_count));
+ return parent_or_count;
+}
+
+UPB_INLINE upb_Arena* _upb_Arena_PointerFromTagged(uintptr_t parent_or_count) {
+ UPB_ASSERT(_upb_Arena_IsTaggedPointer(parent_or_count));
+ return (upb_Arena*)parent_or_count;
+}
+
+UPB_INLINE uintptr_t _upb_Arena_TaggedFromPointer(upb_Arena* a) {
+ uintptr_t parent_or_count = (uintptr_t)a;
+ UPB_ASSERT(_upb_Arena_IsTaggedPointer(parent_or_count));
+ return parent_or_count;
+}
+
+UPB_INLINE upb_alloc* upb_Arena_BlockAlloc(upb_Arena* arena) {
+ return (upb_alloc*)(arena->block_alloc & ~0x1);
+}
+
+UPB_INLINE uintptr_t upb_Arena_MakeBlockAlloc(upb_alloc* alloc,
+ bool has_initial) {
+ uintptr_t alloc_uint = (uintptr_t)alloc;
+ UPB_ASSERT((alloc_uint & 1) == 0);
+ return alloc_uint | (has_initial ? 1 : 0);
+}
+
+UPB_INLINE bool upb_Arena_HasInitialBlock(upb_Arena* arena) {
+ return arena->block_alloc & 0x1;
+}
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MEM_INTERNAL_ARENA_H_ */
diff --git a/upb/upb/message/BUILD b/upb/upb/message/BUILD
new file mode 100644
index 0000000..b9f5d15
--- /dev/null
+++ b/upb/upb/message/BUILD
@@ -0,0 +1,333 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load("//bazel:build_defs.bzl", "UPB_DEFAULT_COPTS")
+load(
+ "//bazel:upb_proto_library.bzl",
+ "upb_proto_library",
+ "upb_proto_reflection_library",
+)
+
+cc_library(
+ name = "accessors",
+ srcs = [
+ "accessors.c",
+ "internal/accessors.h",
+ ],
+ hdrs = [
+ "accessors.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":internal",
+ ":message",
+ "//:base",
+ "//:collections",
+ "//:collections_internal",
+ "//:eps_copy_input_stream",
+ "//:mini_table",
+ "//:mini_table_internal",
+ "//:port",
+ "//:wire",
+ "//:wire_reader",
+ ],
+)
+
+cc_library(
+ name = "accessors_internal",
+ hdrs = [
+ "internal/accessors.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":internal",
+ "//:collections_internal",
+ "//:mini_table_internal",
+ "//:port",
+ ],
+)
+
+cc_library(
+ name = "copy",
+ srcs = [
+ "copy.c",
+ ],
+ hdrs = [
+ "copy.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":accessors",
+ ":internal",
+ ":message",
+ ":types",
+ "//:base",
+ "//:collections_internal",
+ "//:mem",
+ "//:mini_table",
+ "//:mini_table_internal",
+ "//:port",
+ ],
+)
+
+cc_library(
+ name = "internal",
+ srcs = [
+ "message.c",
+ "message.h",
+ ],
+ hdrs = [
+ "internal/extension.h",
+ "internal/message.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":types",
+ "//:base",
+ "//:base_internal",
+ "//:hash",
+ "//:mem",
+ "//:mini_table",
+ "//:port",
+ ],
+)
+
+cc_library(
+ name = "message",
+ hdrs = [
+ "message.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":internal",
+ ":types",
+ "//:base",
+ "//:mem",
+ "//:mini_table",
+ "//:port",
+ ],
+)
+
+cc_library(
+ name = "promote",
+ srcs = [
+ "promote.c",
+ ],
+ hdrs = [
+ "promote.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":accessors",
+ ":internal",
+ ":message",
+ ":types",
+ "//:base",
+ "//:collections",
+ "//:collections_internal",
+ "//:eps_copy_input_stream",
+ "//:hash",
+ "//:mem",
+ "//:mini_table",
+ "//:port",
+ "//:wire",
+ "//:wire_reader",
+ ],
+)
+
+cc_library(
+ name = "rep_internal",
+ hdrs = [
+ "internal/map_entry.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ "//:base",
+ "//:hash",
+ ],
+)
+
+cc_library(
+ name = "split64",
+ hdrs = [
+ "accessors_split64.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":accessors",
+ "//:port",
+ ],
+)
+
+cc_library(
+ name = "tagged_ptr",
+ hdrs = ["tagged_ptr.h"],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":types",
+ "//:port",
+ ],
+)
+
+cc_library(
+ name = "types",
+ hdrs = [
+ "types.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [],
+)
+
+# TODO(salo): Move these proto library targets to //third_party/protobuf/BUILD
+# after we have the monorepo.
+proto_library(
+ name = "message_test_proto",
+ testonly = 1,
+ srcs = ["test.proto"],
+ deps = ["@com_google_protobuf//src/google/protobuf:test_messages_proto3_proto"],
+)
+
+upb_proto_library(
+ name = "message_test_upb_proto",
+ testonly = 1,
+ deps = [":message_test_proto"],
+)
+
+upb_proto_reflection_library(
+ name = "message_test_upb_proto_reflection",
+ testonly = 1,
+ deps = [":message_test_proto"],
+)
+
+cc_test(
+ name = "accessors_test",
+ srcs = ["accessors_test.cc"],
+ deps = [
+ ":accessors",
+ "//:base",
+ "//:collections",
+ "//:mini_descriptor",
+ "//:mini_descriptor_internal",
+ "//:mini_table",
+ "//:port",
+ "//:wire",
+ "//upb/test:test_messages_proto2_upb_proto",
+ "//upb/test:test_messages_proto3_upb_proto",
+ "//upb/test:test_upb_proto",
+ "@com_google_absl//absl/container:flat_hash_set",
+ "@com_google_googletest//:gtest_main",
+ "@com_google_protobuf//:protobuf",
+ ],
+)
+
+cc_test(
+ name = "copy_test",
+ srcs = ["copy_test.cc"],
+ deps = [
+ ":accessors",
+ ":copy",
+ ":internal",
+ ":message",
+ "//:base",
+ "//:collections",
+ "//:mem",
+ "//:mini_table",
+ "//:wire",
+ "//upb/test:test_messages_proto2_upb_proto",
+ "//upb/test:test_messages_proto3_upb_proto",
+ "//upb/test:test_upb_proto",
+ "@com_google_absl//absl/container:flat_hash_set",
+ "@com_google_googletest//:gtest_main",
+ "@com_google_protobuf//:protobuf",
+ ],
+)
+
+cc_test(
+ name = "promote_test",
+ srcs = ["promote_test.cc"],
+ deps = [
+ ":accessors",
+ ":copy",
+ ":promote",
+ "//:base",
+ "//:collections",
+ "//:mem",
+ "//:mini_descriptor_internal",
+ "//:mini_table",
+ "//:port",
+ "//:wire",
+ "//upb/test:test_messages_proto2_upb_proto",
+ "//upb/test:test_messages_proto3_upb_proto",
+ "//upb/test:test_upb_proto",
+ "@com_google_absl//absl/container:flat_hash_set",
+ "@com_google_googletest//:gtest_main",
+ "@com_google_protobuf//:protobuf",
+ ],
+)
+
+# This test doesn't directly include any files from this subdir so it probably
+# should live elsewhere.
+cc_test(
+ name = "test",
+ srcs = ["test.cc"],
+ deps = [
+ ":message_test_upb_proto",
+ ":message_test_upb_proto_reflection",
+ "//:base",
+ "//:json",
+ "//:mem",
+ "//:reflection",
+ "//:wire",
+ "//upb/test:fuzz_util",
+ "//upb/test:test_messages_proto3_upb_proto",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+# begin:github_only
+filegroup(
+ name = "source_files",
+ srcs = glob(
+ [
+ "**/*.c",
+ "**/*.h",
+ ],
+ ),
+ visibility = [
+ "//cmake:__pkg__",
+ "//python/dist:__pkg__",
+ ]
+)
+# end:github_only
diff --git a/upb/upb/message/accessors.c b/upb/upb/message/accessors.c
new file mode 100644
index 0000000..687259f
--- /dev/null
+++ b/upb/upb/message/accessors.c
@@ -0,0 +1,94 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/message/accessors.h"
+
+#include "upb/collections/array.h"
+#include "upb/collections/internal/array.h"
+#include "upb/collections/map.h"
+#include "upb/message/message.h"
+#include "upb/mini_table/field.h"
+#include "upb/wire/decode.h"
+#include "upb/wire/encode.h"
+#include "upb/wire/eps_copy_input_stream.h"
+#include "upb/wire/reader.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+upb_MapInsertStatus upb_Message_InsertMapEntry(upb_Map* map,
+ const upb_MiniTable* mini_table,
+ const upb_MiniTableField* field,
+ upb_Message* map_entry_message,
+ upb_Arena* arena) {
+ const upb_MiniTable* map_entry_mini_table =
+ mini_table->subs[field->UPB_PRIVATE(submsg_index)].submsg;
+ UPB_ASSERT(map_entry_mini_table);
+ UPB_ASSERT(map_entry_mini_table->field_count == 2);
+ const upb_MiniTableField* map_entry_key_field =
+ &map_entry_mini_table->fields[0];
+ const upb_MiniTableField* map_entry_value_field =
+ &map_entry_mini_table->fields[1];
+ // Map key/value cannot have explicit defaults,
+ // hence assuming a zero default is valid.
+ upb_MessageValue default_val;
+ memset(&default_val, 0, sizeof(upb_MessageValue));
+ upb_MessageValue map_entry_key;
+ upb_MessageValue map_entry_value;
+ _upb_Message_GetField(map_entry_message, map_entry_key_field, &default_val,
+ &map_entry_key);
+ _upb_Message_GetField(map_entry_message, map_entry_value_field, &default_val,
+ &map_entry_value);
+ return upb_Map_Insert(map, map_entry_key, map_entry_value, arena);
+}
+
+bool upb_Message_IsExactlyEqual(const upb_Message* m1, const upb_Message* m2,
+ const upb_MiniTable* layout) {
+ if (m1 == m2) return true;
+
+ int opts = kUpb_EncodeOption_SkipUnknown | kUpb_EncodeOption_Deterministic;
+ upb_Arena* a = upb_Arena_New();
+
+ // Compare deterministically serialized payloads with no unknown fields.
+ size_t size1, size2;
+ char *data1, *data2;
+ upb_EncodeStatus status1 = upb_Encode(m1, layout, opts, a, &data1, &size1);
+ upb_EncodeStatus status2 = upb_Encode(m2, layout, opts, a, &data2, &size2);
+
+ if (status1 != kUpb_EncodeStatus_Ok || status2 != kUpb_EncodeStatus_Ok) {
+ // TODO(salo): How should we fail here? (In Ruby we throw an exception.)
+ upb_Arena_Free(a);
+ return false;
+ }
+
+ const bool ret = (size1 == size2) && (memcmp(data1, data2, size1) == 0);
+ upb_Arena_Free(a);
+ return ret;
+}
diff --git a/upb/upb/message/accessors.h b/upb/upb/message/accessors.h
new file mode 100644
index 0000000..0305977
--- /dev/null
+++ b/upb/upb/message/accessors.h
@@ -0,0 +1,403 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MESSAGE_ACCESSORS_H_
+#define UPB_MESSAGE_ACCESSORS_H_
+
+#include "upb/base/descriptor_constants.h"
+#include "upb/collections/array.h"
+#include "upb/collections/internal/array.h"
+#include "upb/collections/internal/map.h"
+#include "upb/collections/map.h"
+#include "upb/message/internal/accessors.h"
+#include "upb/message/internal/message.h"
+#include "upb/mini_table/enum.h"
+#include "upb/mini_table/field.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+UPB_API_INLINE void upb_Message_ClearField(upb_Message* msg,
+ const upb_MiniTableField* field) {
+ if (upb_MiniTableField_IsExtension(field)) {
+ const upb_MiniTableExtension* ext = (const upb_MiniTableExtension*)field;
+ _upb_Message_ClearExtensionField(msg, ext);
+ } else {
+ _upb_Message_ClearNonExtensionField(msg, field);
+ }
+}
+
+UPB_API_INLINE void upb_Message_Clear(upb_Message* msg,
+ const upb_MiniTable* l) {
+ // Note: Can't use UPB_PTR_AT() here because we are doing pointer subtraction.
+ char* mem = (char*)msg - sizeof(upb_Message_Internal);
+ memset(mem, 0, upb_msg_sizeof(l));
+}
+
+UPB_API_INLINE bool upb_Message_HasField(const upb_Message* msg,
+ const upb_MiniTableField* field) {
+ if (upb_MiniTableField_IsExtension(field)) {
+ const upb_MiniTableExtension* ext = (const upb_MiniTableExtension*)field;
+ return _upb_Message_HasExtensionField(msg, ext);
+ } else {
+ return _upb_Message_HasNonExtensionField(msg, field);
+ }
+}
+
+UPB_API_INLINE uint32_t upb_Message_WhichOneofFieldNumber(
+ const upb_Message* message, const upb_MiniTableField* oneof_field) {
+ UPB_ASSUME(_upb_MiniTableField_InOneOf(oneof_field));
+ return _upb_getoneofcase_field(message, oneof_field);
+}
+
+UPB_API_INLINE bool upb_Message_GetBool(const upb_Message* msg,
+ const upb_MiniTableField* field,
+ bool default_val) {
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_Bool);
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_1Byte);
+ UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+ bool ret;
+ _upb_Message_GetField(msg, field, &default_val, &ret);
+ return ret;
+}
+
+UPB_API_INLINE bool upb_Message_SetBool(upb_Message* msg,
+ const upb_MiniTableField* field,
+ bool value, upb_Arena* a) {
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_Bool);
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_1Byte);
+ UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+ return _upb_Message_SetField(msg, field, &value, a);
+}
+
+UPB_API_INLINE int32_t upb_Message_GetInt32(const upb_Message* msg,
+ const upb_MiniTableField* field,
+ int32_t default_val) {
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_Int32 ||
+ upb_MiniTableField_CType(field) == kUpb_CType_Enum);
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_4Byte);
+ UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+ int32_t ret;
+ _upb_Message_GetField(msg, field, &default_val, &ret);
+ return ret;
+}
+
+UPB_API_INLINE bool upb_Message_SetInt32(upb_Message* msg,
+ const upb_MiniTableField* field,
+ int32_t value, upb_Arena* a) {
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_Int32 ||
+ upb_MiniTableField_CType(field) == kUpb_CType_Enum);
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_4Byte);
+ UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+ return _upb_Message_SetField(msg, field, &value, a);
+}
+
+UPB_API_INLINE uint32_t upb_Message_GetUInt32(const upb_Message* msg,
+ const upb_MiniTableField* field,
+ uint32_t default_val) {
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_UInt32);
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_4Byte);
+ UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+ uint32_t ret;
+ _upb_Message_GetField(msg, field, &default_val, &ret);
+ return ret;
+}
+
+UPB_API_INLINE bool upb_Message_SetUInt32(upb_Message* msg,
+ const upb_MiniTableField* field,
+ uint32_t value, upb_Arena* a) {
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_UInt32);
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_4Byte);
+ UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+ return _upb_Message_SetField(msg, field, &value, a);
+}
+
+UPB_API_INLINE void upb_Message_SetClosedEnum(
+ upb_Message* msg, const upb_MiniTable* msg_mini_table,
+ const upb_MiniTableField* field, int32_t value) {
+ UPB_ASSERT(upb_MiniTableField_IsClosedEnum(field));
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_4Byte);
+ UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+ UPB_ASSERT(upb_MiniTableEnum_CheckValue(
+ upb_MiniTable_GetSubEnumTable(msg_mini_table, field), value));
+ _upb_Message_SetNonExtensionField(msg, field, &value);
+}
+
+UPB_API_INLINE int64_t upb_Message_GetInt64(const upb_Message* msg,
+ const upb_MiniTableField* field,
+ uint64_t default_val) {
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_Int64);
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_8Byte);
+ UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+ int64_t ret;
+ _upb_Message_GetField(msg, field, &default_val, &ret);
+ return ret;
+}
+
+UPB_API_INLINE bool upb_Message_SetInt64(upb_Message* msg,
+ const upb_MiniTableField* field,
+ int64_t value, upb_Arena* a) {
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_Int64);
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_8Byte);
+ UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+ return _upb_Message_SetField(msg, field, &value, a);
+}
+
+UPB_API_INLINE uint64_t upb_Message_GetUInt64(const upb_Message* msg,
+ const upb_MiniTableField* field,
+ uint64_t default_val) {
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_UInt64);
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_8Byte);
+ UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+ uint64_t ret;
+ _upb_Message_GetField(msg, field, &default_val, &ret);
+ return ret;
+}
+
+UPB_API_INLINE bool upb_Message_SetUInt64(upb_Message* msg,
+ const upb_MiniTableField* field,
+ uint64_t value, upb_Arena* a) {
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_UInt64);
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_8Byte);
+ UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+ return _upb_Message_SetField(msg, field, &value, a);
+}
+
+UPB_API_INLINE float upb_Message_GetFloat(const upb_Message* msg,
+ const upb_MiniTableField* field,
+ float default_val) {
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_Float);
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_4Byte);
+ UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+ float ret;
+ _upb_Message_GetField(msg, field, &default_val, &ret);
+ return ret;
+}
+
+UPB_API_INLINE bool upb_Message_SetFloat(upb_Message* msg,
+ const upb_MiniTableField* field,
+ float value, upb_Arena* a) {
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_Float);
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_4Byte);
+ UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+ return _upb_Message_SetField(msg, field, &value, a);
+}
+
+UPB_API_INLINE double upb_Message_GetDouble(const upb_Message* msg,
+ const upb_MiniTableField* field,
+ double default_val) {
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_Double);
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_8Byte);
+ UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+ double ret;
+ _upb_Message_GetField(msg, field, &default_val, &ret);
+ return ret;
+}
+
+UPB_API_INLINE bool upb_Message_SetDouble(upb_Message* msg,
+ const upb_MiniTableField* field,
+ double value, upb_Arena* a) {
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_Double);
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_8Byte);
+ UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+ return _upb_Message_SetField(msg, field, &value, a);
+}
+
+UPB_API_INLINE upb_StringView
+upb_Message_GetString(const upb_Message* msg, const upb_MiniTableField* field,
+ upb_StringView def_val) {
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_String ||
+ upb_MiniTableField_CType(field) == kUpb_CType_Bytes);
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_StringView);
+ UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+ upb_StringView ret;
+ _upb_Message_GetField(msg, field, &def_val, &ret);
+ return ret;
+}
+
+UPB_API_INLINE bool upb_Message_SetString(upb_Message* msg,
+ const upb_MiniTableField* field,
+ upb_StringView value, upb_Arena* a) {
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_String ||
+ upb_MiniTableField_CType(field) == kUpb_CType_Bytes);
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_StringView);
+ UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+ return _upb_Message_SetField(msg, field, &value, a);
+}
+
+UPB_API_INLINE upb_TaggedMessagePtr upb_Message_GetTaggedMessagePtr(
+ const upb_Message* msg, const upb_MiniTableField* field,
+ upb_Message* default_val) {
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_Message);
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) ==
+ UPB_SIZE(kUpb_FieldRep_4Byte, kUpb_FieldRep_8Byte));
+ UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+ upb_TaggedMessagePtr tagged;
+ _upb_Message_GetNonExtensionField(msg, field, &default_val, &tagged);
+ return tagged;
+}
+
+UPB_API_INLINE const upb_Message* upb_Message_GetMessage(
+ const upb_Message* msg, const upb_MiniTableField* field,
+ upb_Message* default_val) {
+ upb_TaggedMessagePtr tagged =
+ upb_Message_GetTaggedMessagePtr(msg, field, default_val);
+ return upb_TaggedMessagePtr_GetNonEmptyMessage(tagged);
+}
+
+// For internal use only; users cannot set tagged messages because only the
+// parser and the message copier are allowed to directly create an empty
+// message.
+UPB_API_INLINE void _upb_Message_SetTaggedMessagePtr(
+ upb_Message* msg, const upb_MiniTable* mini_table,
+ const upb_MiniTableField* field, upb_TaggedMessagePtr sub_message) {
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_Message);
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) ==
+ UPB_SIZE(kUpb_FieldRep_4Byte, kUpb_FieldRep_8Byte));
+ UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+ UPB_ASSERT(mini_table->subs[field->UPB_PRIVATE(submsg_index)].submsg);
+ _upb_Message_SetNonExtensionField(msg, field, &sub_message);
+}
+
+UPB_API_INLINE void upb_Message_SetMessage(upb_Message* msg,
+ const upb_MiniTable* mini_table,
+ const upb_MiniTableField* field,
+ upb_Message* sub_message) {
+ _upb_Message_SetTaggedMessagePtr(
+ msg, mini_table, field, _upb_TaggedMessagePtr_Pack(sub_message, false));
+}
+
+UPB_API_INLINE upb_Message* upb_Message_GetOrCreateMutableMessage(
+ upb_Message* msg, const upb_MiniTable* mini_table,
+ const upb_MiniTableField* field, upb_Arena* arena) {
+ UPB_ASSERT(arena);
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_Message);
+ upb_Message* sub_message = *UPB_PTR_AT(msg, field->offset, upb_Message*);
+ if (!sub_message) {
+ const upb_MiniTable* sub_mini_table =
+ mini_table->subs[field->UPB_PRIVATE(submsg_index)].submsg;
+ UPB_ASSERT(sub_mini_table);
+ sub_message = _upb_Message_New(sub_mini_table, arena);
+ *UPB_PTR_AT(msg, field->offset, upb_Message*) = sub_message;
+ _upb_Message_SetPresence(msg, field);
+ }
+ return sub_message;
+}
+
+UPB_API_INLINE const upb_Array* upb_Message_GetArray(
+ const upb_Message* msg, const upb_MiniTableField* field) {
+ _upb_MiniTableField_CheckIsArray(field);
+ upb_Array* ret;
+ const upb_Array* default_val = NULL;
+ _upb_Message_GetNonExtensionField(msg, field, &default_val, &ret);
+ return ret;
+}
+
+UPB_API_INLINE upb_Array* upb_Message_GetMutableArray(
+ upb_Message* msg, const upb_MiniTableField* field) {
+ _upb_MiniTableField_CheckIsArray(field);
+ return (upb_Array*)upb_Message_GetArray(msg, field);
+}
+
+UPB_API_INLINE upb_Array* upb_Message_GetOrCreateMutableArray(
+ upb_Message* msg, const upb_MiniTableField* field, upb_Arena* arena) {
+ UPB_ASSERT(arena);
+ _upb_MiniTableField_CheckIsArray(field);
+ upb_Array* array = upb_Message_GetMutableArray(msg, field);
+ if (!array) {
+ array = _upb_Array_New(arena, 4, _upb_MiniTable_ElementSizeLg2(field));
+ // Check again due to: https://godbolt.org/z/7WfaoKG1r
+ _upb_MiniTableField_CheckIsArray(field);
+ _upb_Message_SetField(msg, field, &array, arena);
+ }
+ return array;
+}
+
+UPB_API_INLINE void* upb_Message_ResizeArrayUninitialized(
+ upb_Message* msg, const upb_MiniTableField* field, size_t size,
+ upb_Arena* arena) {
+ _upb_MiniTableField_CheckIsArray(field);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, size, arena)) return NULL;
+ return _upb_array_ptr(arr);
+}
+
+UPB_API_INLINE const upb_Map* upb_Message_GetMap(
+ const upb_Message* msg, const upb_MiniTableField* field) {
+ _upb_MiniTableField_CheckIsMap(field);
+ _upb_Message_AssertMapIsUntagged(msg, field);
+ upb_Map* ret;
+ const upb_Map* default_val = NULL;
+ _upb_Message_GetNonExtensionField(msg, field, &default_val, &ret);
+ return ret;
+}
+
+UPB_API_INLINE upb_Map* upb_Message_GetMutableMap(
+ upb_Message* msg, const upb_MiniTableField* field) {
+ return (upb_Map*)upb_Message_GetMap(msg, field);
+}
+
+UPB_API_INLINE upb_Map* upb_Message_GetOrCreateMutableMap(
+ upb_Message* msg, const upb_MiniTable* map_entry_mini_table,
+ const upb_MiniTableField* field, upb_Arena* arena) {
+ UPB_ASSUME(upb_MiniTableField_CType(field) == kUpb_CType_Message);
+ const upb_MiniTableField* map_entry_key_field =
+ &map_entry_mini_table->fields[0];
+ const upb_MiniTableField* map_entry_value_field =
+ &map_entry_mini_table->fields[1];
+ return _upb_Message_GetOrCreateMutableMap(
+ msg, field,
+ _upb_Map_CTypeSize(upb_MiniTableField_CType(map_entry_key_field)),
+ _upb_Map_CTypeSize(upb_MiniTableField_CType(map_entry_value_field)),
+ arena);
+}
+
+// Updates a map entry given an entry message.
+upb_MapInsertStatus upb_Message_InsertMapEntry(upb_Map* map,
+ const upb_MiniTable* mini_table,
+ const upb_MiniTableField* field,
+ upb_Message* map_entry_message,
+ upb_Arena* arena);
+
+// Compares two messages by serializing them and calling memcmp().
+bool upb_Message_IsExactlyEqual(const upb_Message* m1, const upb_Message* m2,
+ const upb_MiniTable* layout);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif // UPB_MESSAGE_ACCESSORS_H_
diff --git a/upb/upb/message/accessors_split64.h b/upb/upb/message/accessors_split64.h
new file mode 100644
index 0000000..4308c8c
--- /dev/null
+++ b/upb/upb/message/accessors_split64.h
@@ -0,0 +1,89 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MESSAGE_ACCESSORS_SPLIT64_H_
+#define UPB_MESSAGE_ACCESSORS_SPLIT64_H_
+
+#include "upb/message/accessors.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// JavaScript doesn't directly support 64-bit ints so we must split them.
+
+UPB_API_INLINE uint32_t upb_Message_GetInt64Hi(const upb_Message* msg,
+ const upb_MiniTableField* field,
+ uint32_t default_value) {
+ return (uint32_t)(upb_Message_GetInt64(msg, field, default_value) >> 32);
+}
+
+UPB_API_INLINE uint32_t upb_Message_GetInt64Lo(const upb_Message* msg,
+ const upb_MiniTableField* field,
+ uint32_t default_value) {
+ return (uint32_t)upb_Message_GetInt64(msg, field, default_value);
+}
+
+UPB_API_INLINE bool upb_Message_SetInt64Split(upb_Message* msg,
+ const upb_MiniTableField* field,
+ uint32_t hi, uint32_t lo,
+ upb_Arena* arena) {
+ return upb_Message_SetInt64(msg, field, ((int64_t)hi << 32) | lo, arena);
+}
+
+UPB_API_INLINE uint32_t upb_Message_GetUInt64Hi(const upb_Message* msg,
+ const upb_MiniTableField* field,
+ uint32_t default_value) {
+ return (uint32_t)(upb_Message_GetUInt64(msg, field, default_value) >> 32);
+}
+
+UPB_API_INLINE uint32_t upb_Message_GetUInt64Lo(const upb_Message* msg,
+ const upb_MiniTableField* field,
+ uint32_t default_value) {
+ return (uint32_t)upb_Message_GetUInt64(msg, field, default_value);
+}
+
+UPB_API_INLINE bool upb_Message_SetUInt64Split(upb_Message* msg,
+ const upb_MiniTableField* field,
+ uint32_t hi, uint32_t lo,
+ upb_Arena* arena) {
+ return upb_Message_SetUInt64(msg, field, ((uint64_t)hi << 32) | lo, arena);
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif // UPB_MESSAGE_ACCESSORS_SPLIT64_H_
diff --git a/upb/upb/message/accessors_test.cc b/upb/upb/message/accessors_test.cc
new file mode 100644
index 0000000..8ac54dd
--- /dev/null
+++ b/upb/upb/message/accessors_test.cc
@@ -0,0 +1,427 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+/* Test of mini table accessors.
+ *
+ * Messages are created and mutated using generated code, and then
+ * accessed through reflective APIs exposed through mini table accessors.
+ */
+
+#include "upb/message/accessors.h"
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "google/protobuf/test_messages_proto2.upb.h"
+#include "google/protobuf/test_messages_proto3.upb.h"
+#include "upb/base/string_view.h"
+#include "upb/collections/array.h"
+#include "upb/mini_descriptor/decode.h"
+#include "upb/mini_descriptor/internal/encode.hpp"
+#include "upb/mini_descriptor/internal/modifiers.h"
+#include "upb/test/test.upb.h"
+#include "upb/wire/decode.h"
+
+// Must be last
+#include "upb/port/def.inc"
+
+namespace {
+
+// Proto2 test messages field numbers used for reflective access.
+const uint32_t kFieldOptionalInt32 = 1;
+const uint32_t kFieldOptionalUInt32 = 3;
+const uint32_t kFieldOptionalBool = 13;
+const uint32_t kFieldOptionalString = 14;
+const uint32_t kFieldOptionalNestedMessage = 18;
+const uint32_t kFieldOptionalRepeatedInt32 = 31;
+const uint32_t kFieldOptionalNestedMessageA = 1;
+const uint32_t kFieldOptionalOneOfUInt32 = 111;
+const uint32_t kFieldOptionalOneOfString = 113;
+
+const uint32_t kFieldProto3OptionalInt64 = 2;
+const uint32_t kFieldProto3OptionalUInt64 = 4;
+
+const char kTestStr1[] = "Hello1";
+const char kTestStr2[] = "Hello2";
+const int32_t kTestInt32 = 567;
+const int32_t kTestUInt32 = 0xF1234567;
+const uint64_t kTestUInt64 = 0xFEDCBAFF87654321;
+
+const upb_MiniTableField* find_proto3_field(int field_number) {
+ return upb_MiniTable_FindFieldByNumber(
+ &protobuf_test_messages_proto3_TestAllTypesProto3_msg_init, field_number);
+}
+
+const upb_MiniTableField* find_proto2_field(int field_number) {
+ return upb_MiniTable_FindFieldByNumber(
+ &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init, field_number);
+}
+
+TEST(GeneratedCode, HazzersProto2) {
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* msg =
+ protobuf_test_messages_proto2_TestAllTypesProto2_new(arena);
+
+ // Scalar/Boolean.
+ const upb_MiniTableField* optional_bool_field =
+ find_proto2_field(kFieldOptionalBool);
+ EXPECT_EQ(false, upb_Message_HasField(msg, optional_bool_field));
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_optional_bool(msg, true);
+ EXPECT_EQ(true, upb_Message_HasField(msg, optional_bool_field));
+ upb_Message_ClearField(msg, optional_bool_field);
+ EXPECT_EQ(false, upb_Message_HasField(msg, optional_bool_field));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_bool(msg));
+
+ // String.
+ const upb_MiniTableField* optional_string_field =
+ find_proto2_field(kFieldOptionalString);
+ EXPECT_EQ(false, upb_Message_HasField(msg, optional_string_field));
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_optional_string(
+ msg, upb_StringView_FromString(kTestStr1));
+ EXPECT_EQ(true, upb_Message_HasField(msg, optional_string_field));
+ EXPECT_EQ(
+ strlen(kTestStr1),
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_string(msg)
+ .size);
+ upb_Message_ClearField(msg, optional_string_field);
+ EXPECT_EQ(false, upb_Message_HasField(msg, optional_string_field));
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto2_TestAllTypesProto2_optional_string(msg)
+ .size);
+
+ // Message.
+ const upb_MiniTableField* optional_message_field =
+ find_proto2_field(kFieldOptionalNestedMessage);
+ EXPECT_EQ(false, upb_Message_HasField(msg, optional_message_field));
+ protobuf_test_messages_proto2_TestAllTypesProto2_mutable_optional_nested_message(
+ msg, arena);
+ EXPECT_EQ(true, upb_Message_HasField(msg, optional_message_field));
+ upb_Message_ClearField(msg, optional_message_field);
+ EXPECT_EQ(false, upb_Message_HasField(msg, optional_message_field));
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_nested_message(
+ msg) == nullptr);
+
+ // One of.
+ const upb_MiniTableField* optional_oneof_uint32_field =
+ find_proto2_field(kFieldOptionalOneOfUInt32);
+ const upb_MiniTableField* optional_oneof_string_field =
+ find_proto2_field(kFieldOptionalOneOfString);
+
+ EXPECT_EQ(false, upb_Message_HasField(msg, optional_oneof_uint32_field));
+ EXPECT_EQ(false, upb_Message_HasField(msg, optional_oneof_string_field));
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_oneof_uint32(msg, 123);
+ EXPECT_EQ(true, upb_Message_HasField(msg, optional_oneof_uint32_field));
+ EXPECT_EQ(false, upb_Message_HasField(msg, optional_oneof_string_field));
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_oneof_string(
+ msg, upb_StringView_FromString(kTestStr1));
+ EXPECT_EQ(false, upb_Message_HasField(msg, optional_oneof_uint32_field));
+ EXPECT_EQ(true, upb_Message_HasField(msg, optional_oneof_string_field));
+ upb_Message_ClearField(msg, optional_oneof_uint32_field);
+ EXPECT_EQ(false, upb_Message_HasField(msg, optional_oneof_uint32_field));
+ EXPECT_EQ(true, upb_Message_HasField(msg, optional_oneof_string_field));
+ upb_Message_ClearField(msg, optional_oneof_string_field);
+ EXPECT_EQ(false, upb_Message_HasField(msg, optional_oneof_uint32_field));
+ EXPECT_EQ(false, upb_Message_HasField(msg, optional_oneof_string_field));
+
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, ScalarsProto2) {
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* msg =
+ protobuf_test_messages_proto2_TestAllTypesProto2_new(arena);
+
+ const upb_MiniTableField* optional_int32_field =
+ find_proto2_field(kFieldOptionalInt32);
+
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto2_TestAllTypesProto2_optional_int32(msg));
+
+ EXPECT_EQ(0, upb_Message_GetInt32(msg, optional_int32_field, 0));
+ upb_Message_SetInt32(msg, optional_int32_field, kTestInt32, nullptr);
+ EXPECT_EQ(true, upb_Message_HasField(msg, optional_int32_field));
+ EXPECT_EQ(kTestInt32, upb_Message_GetInt32(msg, optional_int32_field, 0));
+ EXPECT_EQ(
+ kTestInt32,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_int32(msg));
+
+ const upb_MiniTableField* optional_uint32_field =
+ find_proto2_field(kFieldOptionalUInt32);
+
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto2_TestAllTypesProto2_optional_uint32(msg));
+ EXPECT_EQ(0, upb_Message_GetUInt32(msg, optional_uint32_field, 0));
+ upb_Message_SetUInt32(msg, optional_uint32_field, kTestUInt32, nullptr);
+ EXPECT_EQ(kTestUInt32, upb_Message_GetUInt32(msg, optional_uint32_field, 0));
+ EXPECT_EQ(
+ kTestUInt32,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_uint32(msg));
+
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, ScalarProto3) {
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto3_TestAllTypesProto3* msg =
+ protobuf_test_messages_proto3_TestAllTypesProto3_new(arena);
+
+ const upb_MiniTableField* optional_int64_field =
+ find_proto3_field(kFieldProto3OptionalInt64);
+ const upb_MiniTableField* optional_uint64_field =
+ find_proto3_field(kFieldProto3OptionalUInt64);
+
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto3_TestAllTypesProto3_optional_int64(msg));
+ upb_Message_SetInt64(msg, optional_int64_field, -1, nullptr);
+ EXPECT_EQ(
+ -1, protobuf_test_messages_proto3_TestAllTypesProto3_optional_int64(msg));
+ EXPECT_EQ(-1, upb_Message_GetInt64(msg, optional_int64_field, 0));
+
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto3_TestAllTypesProto3_optional_uint64(msg));
+ upb_Message_SetUInt64(msg, optional_uint64_field, kTestUInt64, nullptr);
+ EXPECT_EQ(
+ kTestUInt64,
+ protobuf_test_messages_proto3_TestAllTypesProto3_optional_uint64(msg));
+ EXPECT_EQ(kTestUInt64, upb_Message_GetUInt64(msg, optional_uint64_field, 0));
+
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, Strings) {
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* msg =
+ protobuf_test_messages_proto2_TestAllTypesProto2_new(arena);
+
+ const upb_MiniTableField* optional_string_field =
+ find_proto2_field(kFieldOptionalString);
+
+ // Test default.
+ EXPECT_EQ(false, upb_Message_HasField(msg, optional_string_field));
+ // Test read after write using C.
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_optional_string(
+ msg, upb_StringView_FromString(kTestStr1));
+ EXPECT_EQ(true, upb_Message_HasField(msg, optional_string_field));
+ upb_StringView value = upb_Message_GetString(msg, optional_string_field,
+ upb_StringView{NULL, 0});
+ std::string read_value = std::string(value.data, value.size);
+ EXPECT_EQ(kTestStr1, read_value);
+ // Clear.
+ upb_Message_ClearField(msg, optional_string_field);
+ EXPECT_EQ(false, upb_Message_HasField(msg, optional_string_field));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_string(
+ msg));
+ upb_Message_SetString(msg, optional_string_field,
+ upb_StringView_FromString(kTestStr2), nullptr);
+ EXPECT_EQ(true, upb_Message_HasField(msg, optional_string_field));
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_string(
+ msg));
+ value = protobuf_test_messages_proto2_TestAllTypesProto2_optional_string(msg);
+ read_value = std::string(value.data, value.size);
+ EXPECT_EQ(kTestStr2, read_value);
+
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, SubMessage) {
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* msg =
+ protobuf_test_messages_proto2_TestAllTypesProto2_new(arena);
+
+ const upb_MiniTableField* optional_message_field =
+ find_proto2_field(kFieldOptionalNestedMessage);
+
+ const upb_Message* test_message =
+ upb_Message_GetMessage(msg, optional_message_field, nullptr);
+ EXPECT_EQ(nullptr, test_message);
+
+ EXPECT_EQ(false, upb_Message_HasField(msg, optional_message_field));
+
+ // Get mutable using C API.
+ protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage* nested_message =
+ protobuf_test_messages_proto2_TestAllTypesProto2_mutable_optional_nested_message(
+ msg, arena);
+ EXPECT_EQ(true, nested_message != nullptr);
+ EXPECT_EQ(true, upb_Message_HasField(msg, optional_message_field));
+ protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage_set_a(
+ nested_message, 5);
+
+ // Read back using mini table API.
+ const upb_Message* sub_message =
+ upb_Message_GetMessage(msg, optional_message_field, nullptr);
+ EXPECT_EQ(true, sub_message != nullptr);
+
+ const upb_MiniTableField* nested_message_a_field =
+ upb_MiniTable_FindFieldByNumber(
+ &protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage_msg_init,
+ kFieldOptionalNestedMessageA);
+ EXPECT_EQ(5, upb_Message_GetInt32(sub_message, nested_message_a_field, 0));
+
+ upb_Message_ClearField(msg, optional_message_field);
+ EXPECT_EQ(
+ nullptr,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_nested_message(
+ msg));
+ EXPECT_EQ(false, upb_Message_HasField(msg, optional_message_field));
+
+ upb_Message* new_nested_message =
+ protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage_new(arena);
+ upb_Message_SetInt32(new_nested_message, nested_message_a_field, 123,
+ nullptr);
+ upb_Message_SetMessage(
+ msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init,
+ optional_message_field, new_nested_message);
+
+ upb_Message* mutable_message = upb_Message_GetOrCreateMutableMessage(
+ msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init,
+ optional_message_field, arena);
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_nested_message(
+ msg) != nullptr);
+ EXPECT_EQ(true, upb_Message_HasField(msg, optional_message_field));
+ EXPECT_EQ(123,
+ upb_Message_GetInt32(mutable_message, nested_message_a_field, 0));
+
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, RepeatedScalar) {
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* msg =
+ protobuf_test_messages_proto2_TestAllTypesProto2_new(arena);
+
+ const upb_MiniTableField* repeated_int32_field =
+ find_proto2_field(kFieldOptionalRepeatedInt32);
+
+ size_t len;
+ const int32_t* arr =
+ protobuf_test_messages_proto2_TestAllTypesProto2_repeated_int32(msg,
+ &len);
+ // Test Get/Set Array values, validate with C API.
+ EXPECT_EQ(0, len);
+ EXPECT_EQ(nullptr, arr);
+ EXPECT_EQ(nullptr, upb_Message_GetArray(msg, repeated_int32_field));
+ protobuf_test_messages_proto2_TestAllTypesProto2_resize_repeated_int32(
+ msg, 10, arena);
+ int32_t* mutable_values =
+ protobuf_test_messages_proto2_TestAllTypesProto2_mutable_repeated_int32(
+ msg, &len);
+ mutable_values[5] = 123;
+ const upb_Array* readonly_arr =
+ upb_Message_GetArray(msg, repeated_int32_field);
+ EXPECT_EQ(123, upb_Array_Get(readonly_arr, 5).int32_val);
+
+ upb_MessageValue new_value;
+ new_value.int32_val = 567;
+ upb_Array* mutable_array =
+ upb_Message_GetMutableArray(msg, repeated_int32_field);
+ upb_Array_Set(mutable_array, 5, new_value);
+ EXPECT_EQ(new_value.int32_val,
+ protobuf_test_messages_proto2_TestAllTypesProto2_repeated_int32(
+ msg, &len)[5]);
+
+ // Test resize.
+ bool result = upb_Array_Resize(mutable_array, 20, arena);
+ EXPECT_EQ(true, result);
+ upb_Array_Set(mutable_array, 19, new_value);
+ EXPECT_EQ(new_value.int32_val,
+ protobuf_test_messages_proto2_TestAllTypesProto2_repeated_int32(
+ msg, &len)[19]);
+ upb_Array_Resize(mutable_array, 0, arena);
+ const int32_t* zero_length_array =
+ protobuf_test_messages_proto2_TestAllTypesProto2_repeated_int32(msg,
+ &len);
+ EXPECT_EQ(0, len);
+ EXPECT_EQ(true, zero_length_array != nullptr);
+
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, GetMutableMessage) {
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* msg =
+ protobuf_test_messages_proto2_TestAllTypesProto2_new(arena);
+ // Message.
+ const upb_MiniTableField* optional_message_field =
+ find_proto2_field(kFieldOptionalNestedMessage);
+ upb_Message* msg1 = upb_Message_GetOrCreateMutableMessage(
+ msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init,
+ optional_message_field, arena);
+ upb_Message* msg2 = upb_Message_GetOrCreateMutableMessage(
+ msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init,
+ optional_message_field, arena);
+ // Verify that newly constructed sub message is stored in msg.
+ EXPECT_EQ(msg1, msg2);
+
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, EnumClosedCheck) {
+ upb_Arena* arena = upb_Arena_New();
+
+ upb::MtDataEncoder e;
+ e.StartMessage(0);
+ e.PutField(kUpb_FieldType_Int32, 4, 0);
+ e.PutField(kUpb_FieldType_Enum, 5, 0);
+
+ upb_Status status;
+ upb_Status_Clear(&status);
+ upb_MiniTable* table =
+ upb_MiniTable_Build(e.data().data(), e.data().size(), arena, &status);
+
+ const upb_MiniTableField* enumField = &table->fields[1];
+ EXPECT_EQ(upb_MiniTableField_Type(enumField), kUpb_FieldType_Enum);
+ EXPECT_FALSE(upb_MiniTableField_IsClosedEnum(enumField));
+
+ upb::MtDataEncoder e2;
+ e2.StartMessage(0);
+ e2.PutField(kUpb_FieldType_Int32, 4, 0);
+ e2.PutField(kUpb_FieldType_Enum, 6, kUpb_FieldModifier_IsClosedEnum);
+
+ upb_Status_Clear(&status);
+ table =
+ upb_MiniTable_Build(e2.data().data(), e2.data().size(), arena, &status);
+
+ const upb_MiniTableField* closedEnumField = &table->fields[1];
+ EXPECT_EQ(upb_MiniTableField_Type(closedEnumField), kUpb_FieldType_Enum);
+ EXPECT_TRUE(upb_MiniTableField_IsClosedEnum(closedEnumField));
+ upb_Arena_Free(arena);
+}
+
+} // namespace
diff --git a/upb/upb/message/copy.c b/upb/upb/message/copy.c
new file mode 100644
index 0000000..de528c5
--- /dev/null
+++ b/upb/upb/message/copy.c
@@ -0,0 +1,322 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/message/copy.h"
+
+#include <stdbool.h>
+#include <string.h>
+
+#include "upb/base/descriptor_constants.h"
+#include "upb/base/string_view.h"
+#include "upb/mem/arena.h"
+#include "upb/message/accessors.h"
+#include "upb/message/internal/message.h"
+#include "upb/message/message.h"
+#include "upb/mini_table/field.h"
+#include "upb/mini_table/internal/field.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+static bool upb_MessageField_IsMap(const upb_MiniTableField* field) {
+ return upb_FieldMode_Get(field) == kUpb_FieldMode_Map;
+}
+
+static upb_StringView upb_Clone_StringView(upb_StringView str,
+ upb_Arena* arena) {
+ if (str.size == 0) {
+ return upb_StringView_FromDataAndSize(NULL, 0);
+ }
+ void* cloned_data = upb_Arena_Malloc(arena, str.size);
+ upb_StringView cloned_str =
+ upb_StringView_FromDataAndSize(cloned_data, str.size);
+ memcpy(cloned_data, str.data, str.size);
+ return cloned_str;
+}
+
+static bool upb_Clone_MessageValue(void* value, upb_CType value_type,
+ const upb_MiniTable* sub, upb_Arena* arena) {
+ switch (value_type) {
+ case kUpb_CType_Bool:
+ case kUpb_CType_Float:
+ case kUpb_CType_Int32:
+ case kUpb_CType_UInt32:
+ case kUpb_CType_Enum:
+ case kUpb_CType_Double:
+ case kUpb_CType_Int64:
+ case kUpb_CType_UInt64:
+ return true;
+ case kUpb_CType_String:
+ case kUpb_CType_Bytes: {
+ upb_StringView source = *(upb_StringView*)value;
+ int size = source.size;
+ void* cloned_data = upb_Arena_Malloc(arena, size);
+ if (cloned_data == NULL) {
+ return false;
+ }
+ *(upb_StringView*)value =
+ upb_StringView_FromDataAndSize(cloned_data, size);
+ memcpy(cloned_data, source.data, size);
+ return true;
+ } break;
+ case kUpb_CType_Message: {
+ const upb_TaggedMessagePtr source = *(upb_TaggedMessagePtr*)value;
+ bool is_empty = upb_TaggedMessagePtr_IsEmpty(source);
+ if (is_empty) sub = &_kUpb_MiniTable_Empty;
+ UPB_ASSERT(source);
+ upb_Message* clone = upb_Message_DeepClone(
+ _upb_TaggedMessagePtr_GetMessage(source), sub, arena);
+ *(upb_TaggedMessagePtr*)value =
+ _upb_TaggedMessagePtr_Pack(clone, is_empty);
+ return clone != NULL;
+ } break;
+ }
+ UPB_UNREACHABLE();
+}
+
+upb_Map* upb_Map_DeepClone(const upb_Map* map, upb_CType key_type,
+ upb_CType value_type,
+ const upb_MiniTable* map_entry_table,
+ upb_Arena* arena) {
+ upb_Map* cloned_map = _upb_Map_New(arena, map->key_size, map->val_size);
+ if (cloned_map == NULL) {
+ return NULL;
+ }
+ upb_MessageValue key, val;
+ size_t iter = kUpb_Map_Begin;
+ while (upb_Map_Next(map, &key, &val, &iter)) {
+ const upb_MiniTableField* value_field = &map_entry_table->fields[1];
+ const upb_MiniTable* value_sub =
+ (value_field->UPB_PRIVATE(submsg_index) != kUpb_NoSub)
+ ? upb_MiniTable_GetSubMessageTable(map_entry_table, value_field)
+ : NULL;
+ upb_CType value_field_type = upb_MiniTableField_CType(value_field);
+ if (!upb_Clone_MessageValue(&val, value_field_type, value_sub, arena)) {
+ return NULL;
+ }
+ if (upb_Map_Insert(cloned_map, key, val, arena) ==
+ kUpb_MapInsertStatus_OutOfMemory) {
+ return NULL;
+ }
+ }
+ return cloned_map;
+}
+
+static upb_Map* upb_Message_Map_DeepClone(const upb_Map* map,
+ const upb_MiniTable* mini_table,
+ const upb_MiniTableField* field,
+ upb_Message* clone,
+ upb_Arena* arena) {
+ const upb_MiniTable* map_entry_table =
+ mini_table->subs[field->UPB_PRIVATE(submsg_index)].submsg;
+ UPB_ASSERT(map_entry_table);
+
+ const upb_MiniTableField* key_field = &map_entry_table->fields[0];
+ const upb_MiniTableField* value_field = &map_entry_table->fields[1];
+
+ upb_Map* cloned_map = upb_Map_DeepClone(
+ map, upb_MiniTableField_CType(key_field),
+ upb_MiniTableField_CType(value_field), map_entry_table, arena);
+ if (!cloned_map) {
+ return NULL;
+ }
+ _upb_Message_SetNonExtensionField(clone, field, &cloned_map);
+ return cloned_map;
+}
+
+upb_Array* upb_Array_DeepClone(const upb_Array* array, upb_CType value_type,
+ const upb_MiniTable* sub, upb_Arena* arena) {
+ size_t size = array->size;
+ upb_Array* cloned_array =
+ _upb_Array_New(arena, size, _upb_Array_CTypeSizeLg2(value_type));
+ if (!cloned_array) {
+ return NULL;
+ }
+ if (!_upb_Array_ResizeUninitialized(cloned_array, size, arena)) {
+ return NULL;
+ }
+ for (size_t i = 0; i < size; ++i) {
+ upb_MessageValue val = upb_Array_Get(array, i);
+ if (!upb_Clone_MessageValue(&val, value_type, sub, arena)) {
+ return false;
+ }
+ upb_Array_Set(cloned_array, i, val);
+ }
+ return cloned_array;
+}
+
+static bool upb_Message_Array_DeepClone(const upb_Array* array,
+ const upb_MiniTable* mini_table,
+ const upb_MiniTableField* field,
+ upb_Message* clone, upb_Arena* arena) {
+ _upb_MiniTableField_CheckIsArray(field);
+ upb_Array* cloned_array = upb_Array_DeepClone(
+ array, upb_MiniTableField_CType(field),
+ upb_MiniTableField_CType(field) == kUpb_CType_Message &&
+ field->UPB_PRIVATE(submsg_index) != kUpb_NoSub
+ ? upb_MiniTable_GetSubMessageTable(mini_table, field)
+ : NULL,
+ arena);
+
+ // Clear out upb_Array* due to parent memcpy.
+ _upb_Message_SetNonExtensionField(clone, field, &cloned_array);
+ return true;
+}
+
+static bool upb_Clone_ExtensionValue(
+ const upb_MiniTableExtension* mini_table_ext,
+ const upb_Message_Extension* source, upb_Message_Extension* dest,
+ upb_Arena* arena) {
+ dest->data = source->data;
+ return upb_Clone_MessageValue(
+ &dest->data, upb_MiniTableField_CType(&mini_table_ext->field),
+ mini_table_ext->sub.submsg, arena);
+}
+
+upb_Message* _upb_Message_Copy(upb_Message* dst, const upb_Message* src,
+ const upb_MiniTable* mini_table,
+ upb_Arena* arena) {
+ upb_StringView empty_string = upb_StringView_FromDataAndSize(NULL, 0);
+ // Only copy message area skipping upb_Message_Internal.
+ memcpy(dst, src, mini_table->size);
+ for (size_t i = 0; i < mini_table->field_count; ++i) {
+ const upb_MiniTableField* field = &mini_table->fields[i];
+ if (!upb_IsRepeatedOrMap(field)) {
+ switch (upb_MiniTableField_CType(field)) {
+ case kUpb_CType_Message: {
+ upb_TaggedMessagePtr tagged =
+ upb_Message_GetTaggedMessagePtr(src, field, NULL);
+ const upb_Message* sub_message =
+ _upb_TaggedMessagePtr_GetMessage(tagged);
+ if (sub_message != NULL) {
+ // If the message is currently in an unlinked, "empty" state we keep
+ // it that way, because we don't want to deal with decode options,
+ // decode status, or possible parse failure here.
+ bool is_empty = upb_TaggedMessagePtr_IsEmpty(tagged);
+ const upb_MiniTable* sub_message_table =
+ is_empty ? &_kUpb_MiniTable_Empty
+ : upb_MiniTable_GetSubMessageTable(mini_table, field);
+ upb_Message* dst_sub_message =
+ upb_Message_DeepClone(sub_message, sub_message_table, arena);
+ if (dst_sub_message == NULL) {
+ return NULL;
+ }
+ _upb_Message_SetTaggedMessagePtr(
+ dst, mini_table, field,
+ _upb_TaggedMessagePtr_Pack(dst_sub_message, is_empty));
+ }
+ } break;
+ case kUpb_CType_String:
+ case kUpb_CType_Bytes: {
+ upb_StringView str = upb_Message_GetString(src, field, empty_string);
+ if (str.size != 0) {
+ if (!upb_Message_SetString(
+ dst, field, upb_Clone_StringView(str, arena), arena)) {
+ return NULL;
+ }
+ }
+ } break;
+ default:
+ // Scalar, already copied.
+ break;
+ }
+ } else {
+ if (upb_MessageField_IsMap(field)) {
+ const upb_Map* map = upb_Message_GetMap(src, field);
+ if (map != NULL) {
+ if (!upb_Message_Map_DeepClone(map, mini_table, field, dst, arena)) {
+ return NULL;
+ }
+ }
+ } else {
+ const upb_Array* array = upb_Message_GetArray(src, field);
+ if (array != NULL) {
+ if (!upb_Message_Array_DeepClone(array, mini_table, field, dst,
+ arena)) {
+ return NULL;
+ }
+ }
+ }
+ }
+ }
+ // Clone extensions.
+ size_t ext_count;
+ const upb_Message_Extension* ext = _upb_Message_Getexts(src, &ext_count);
+ for (size_t i = 0; i < ext_count; ++i) {
+ const upb_Message_Extension* msg_ext = &ext[i];
+ const upb_MiniTableField* field = &msg_ext->ext->field;
+ upb_Message_Extension* dst_ext =
+ _upb_Message_GetOrCreateExtension(dst, msg_ext->ext, arena);
+ if (!dst_ext) return NULL;
+ if (!upb_IsRepeatedOrMap(field)) {
+ if (!upb_Clone_ExtensionValue(msg_ext->ext, msg_ext, dst_ext, arena)) {
+ return NULL;
+ }
+ } else {
+ upb_Array* msg_array = (upb_Array*)msg_ext->data.ptr;
+ UPB_ASSERT(msg_array);
+ upb_Array* cloned_array =
+ upb_Array_DeepClone(msg_array, upb_MiniTableField_CType(field),
+ msg_ext->ext->sub.submsg, arena);
+ if (!cloned_array) {
+ return NULL;
+ }
+ dst_ext->data.ptr = (void*)cloned_array;
+ }
+ }
+
+ // Clone unknowns.
+ size_t unknown_size = 0;
+ const char* ptr = upb_Message_GetUnknown(src, &unknown_size);
+ if (unknown_size != 0) {
+ UPB_ASSERT(ptr);
+ // Make a copy into destination arena.
+ if (!_upb_Message_AddUnknown(dst, ptr, unknown_size, arena)) {
+ return NULL;
+ }
+ }
+ return dst;
+}
+
+bool upb_Message_DeepCopy(upb_Message* dst, const upb_Message* src,
+ const upb_MiniTable* mini_table, upb_Arena* arena) {
+ upb_Message_Clear(dst, mini_table);
+ return _upb_Message_Copy(dst, src, mini_table, arena) != NULL;
+}
+
+// Deep clones a message using the provided target arena.
+//
+// Returns NULL on failure.
+upb_Message* upb_Message_DeepClone(const upb_Message* message,
+ const upb_MiniTable* mini_table,
+ upb_Arena* arena) {
+ upb_Message* clone = upb_Message_New(mini_table, arena);
+ return _upb_Message_Copy(clone, message, mini_table, arena);
+}
diff --git a/upb/upb/message/copy.h b/upb/upb/message/copy.h
new file mode 100644
index 0000000..dea292d
--- /dev/null
+++ b/upb/upb/message/copy.h
@@ -0,0 +1,69 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MESSAGE_COPY_H_
+#define UPB_MESSAGE_COPY_H_
+
+#include "upb/collections/message_value.h"
+#include "upb/message/internal/message.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Deep clones a message using the provided target arena.
+upb_Message* upb_Message_DeepClone(const upb_Message* message,
+ const upb_MiniTable* mini_table,
+ upb_Arena* arena);
+
+// Deep clones array contents.
+upb_Array* upb_Array_DeepClone(const upb_Array* array, upb_CType value_type,
+ const upb_MiniTable* sub, upb_Arena* arena);
+
+// Deep clones map contents.
+upb_Map* upb_Map_DeepClone(const upb_Map* map, upb_CType key_type,
+ upb_CType value_type,
+ const upb_MiniTable* map_entry_table,
+ upb_Arena* arena);
+
+// Deep copies the message from src to dst.
+bool upb_Message_DeepCopy(upb_Message* dst, const upb_Message* src,
+ const upb_MiniTable* mini_table, upb_Arena* arena);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif // UPB_MESSAGE_COPY_H_
diff --git a/upb/upb/message/copy_test.cc b/upb/upb/message/copy_test.cc
new file mode 100644
index 0000000..ff3a6f0
--- /dev/null
+++ b/upb/upb/message/copy_test.cc
@@ -0,0 +1,344 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+/* Test of mini table accessors.
+ *
+ * Messages are created and mutated using generated code, and then
+ * accessed through reflective APIs exposed through mini table accessors.
+ */
+
+#include "upb/message/copy.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "google/protobuf/test_messages_proto2.upb.h"
+#include "upb/base/string_view.h"
+#include "upb/collections/map.h"
+#include "upb/mem/arena.h"
+#include "upb/message/accessors.h"
+#include "upb/message/internal/message.h"
+#include "upb/message/message.h"
+#include "upb/mini_table/message.h"
+#include "upb/wire/encode.h"
+
+namespace {
+
+// Proto2 test messages field numbers used for reflective access.
+const uint32_t kFieldOptionalInt32 = 1;
+const uint32_t kFieldOptionalString = 14;
+const uint32_t kFieldOptionalNestedMessage = 18;
+
+const char kTestStr1[] = "Hello1";
+const char kTestStr2[] = "HelloWorld2";
+const int32_t kTestInt32 = 567;
+const int32_t kTestNestedInt32 = 123;
+
+const upb_MiniTableField* find_proto2_field(int field_number) {
+ return upb_MiniTable_FindFieldByNumber(
+ &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init, field_number);
+}
+
+TEST(GeneratedCode, DeepCloneMessageScalarAndString) {
+ upb_Arena* source_arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* msg =
+ protobuf_test_messages_proto2_TestAllTypesProto2_new(source_arena);
+ const upb_MiniTableField* optional_int32_field =
+ find_proto2_field(kFieldOptionalInt32);
+ const upb_MiniTableField* optional_string_field =
+ find_proto2_field(kFieldOptionalString);
+ upb_Message_SetInt32(msg, optional_int32_field, kTestInt32, nullptr);
+ char* string_in_arena =
+ (char*)upb_Arena_Malloc(source_arena, sizeof(kTestStr1));
+ memcpy(string_in_arena, kTestStr1, sizeof(kTestStr1));
+ upb_Message_SetString(
+ msg, optional_string_field,
+ upb_StringView_FromDataAndSize(string_in_arena, sizeof(kTestStr1) - 1),
+ source_arena);
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* clone =
+ (protobuf_test_messages_proto2_TestAllTypesProto2*)upb_Message_DeepClone(
+ msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init,
+ arena);
+ // After cloning overwrite values and destroy source arena for MSAN.
+ memset(string_in_arena, 0, sizeof(kTestStr1));
+ upb_Arena_Free(source_arena);
+ EXPECT_TRUE(upb_Message_HasField(clone, optional_int32_field));
+ EXPECT_EQ(upb_Message_GetInt32(clone, optional_int32_field, 0), kTestInt32);
+ EXPECT_TRUE(upb_Message_HasField(clone, optional_string_field));
+ EXPECT_EQ(upb_Message_GetString(clone, optional_string_field,
+ upb_StringView_FromDataAndSize(nullptr, 0))
+ .size,
+ sizeof(kTestStr1) - 1);
+ EXPECT_TRUE(upb_StringView_IsEqual(
+ upb_Message_GetString(clone, optional_string_field,
+ upb_StringView_FromDataAndSize(nullptr, 0)),
+ upb_StringView_FromString(kTestStr1)));
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, DeepCloneMessageSubMessage) {
+ upb_Arena* source_arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* msg =
+ protobuf_test_messages_proto2_TestAllTypesProto2_new(source_arena);
+ const upb_MiniTableField* nested_message_field =
+ find_proto2_field(kFieldOptionalNestedMessage);
+ protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage* nested =
+ protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage_new(
+ source_arena);
+ protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage_set_a(
+ nested, kTestNestedInt32);
+ upb_Message_SetMessage(
+ msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init,
+ nested_message_field, nested);
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* clone =
+ (protobuf_test_messages_proto2_TestAllTypesProto2*)upb_Message_DeepClone(
+ msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init,
+ arena);
+ // After cloning overwrite values and destroy source arena for MSAN.
+ protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage_set_a(nested,
+ 0);
+ upb_Arena_Free(source_arena);
+ EXPECT_TRUE(upb_Message_HasField(clone, nested_message_field));
+ protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage*
+ cloned_nested =
+ (protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage*)
+ upb_Message_GetMessage(clone, nested_message_field, nullptr);
+ EXPECT_EQ(protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage_a(
+ cloned_nested),
+ kTestNestedInt32);
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, DeepCloneMessageArrayField) {
+ upb_Arena* source_arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* msg =
+ protobuf_test_messages_proto2_TestAllTypesProto2_new(source_arena);
+ std::vector<int32_t> array_test_values = {3, 4, 5};
+ for (int32_t value : array_test_values) {
+ ASSERT_TRUE(
+ protobuf_test_messages_proto2_TestAllTypesProto2_add_repeated_int32(
+ msg, value, source_arena));
+ }
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* clone =
+ (protobuf_test_messages_proto2_TestAllTypesProto2*)upb_Message_DeepClone(
+ msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init,
+ arena);
+ protobuf_test_messages_proto2_TestAllTypesProto2_clear_repeated_sint32(msg);
+ upb_Arena_Free(source_arena);
+ size_t cloned_size = 0;
+ const int32_t* cloned_values =
+ protobuf_test_messages_proto2_TestAllTypesProto2_repeated_int32(
+ clone, &cloned_size);
+ EXPECT_EQ(cloned_size, array_test_values.size());
+ int index = 0;
+ for (int32_t value : array_test_values) {
+ EXPECT_EQ(cloned_values[index++], value);
+ }
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, DeepCloneMessageMapField) {
+ upb_Arena* source_arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* msg =
+ protobuf_test_messages_proto2_TestAllTypesProto2_new(source_arena);
+ ASSERT_TRUE(
+ protobuf_test_messages_proto2_TestAllTypesProto2_map_int32_double_set(
+ msg, 12, 1200.5, source_arena));
+ ASSERT_TRUE(
+ protobuf_test_messages_proto2_TestAllTypesProto2_map_string_string_set(
+ msg, upb_StringView_FromString("key1"),
+ upb_StringView_FromString("value1"), source_arena));
+ protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage* nested =
+ protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage_new(
+ source_arena);
+ protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage_set_a(
+ nested, kTestNestedInt32);
+ ASSERT_TRUE(
+ protobuf_test_messages_proto2_TestAllTypesProto2_map_string_nested_message_set(
+ msg, upb_StringView_FromString("nestedkey1"), nested, source_arena));
+
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* clone =
+ (protobuf_test_messages_proto2_TestAllTypesProto2*)upb_Message_DeepClone(
+ msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init,
+ arena);
+ protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage_set_a(nested,
+ 0);
+ upb_Arena_Free(source_arena);
+ size_t iter = kUpb_Map_Begin;
+ // Test map<int32, int32>.
+ const protobuf_test_messages_proto2_TestAllTypesProto2_MapInt32DoubleEntry*
+ int32_double_entry =
+ protobuf_test_messages_proto2_TestAllTypesProto2_map_int32_double_next(
+ clone, &iter);
+ ASSERT_NE(int32_double_entry, nullptr);
+ EXPECT_EQ(
+ protobuf_test_messages_proto2_TestAllTypesProto2_MapInt32DoubleEntry_key(
+ int32_double_entry),
+ 12);
+ EXPECT_EQ(
+ protobuf_test_messages_proto2_TestAllTypesProto2_MapInt32DoubleEntry_value(
+ int32_double_entry),
+ 1200.5);
+ // Test map<string, string>.
+ iter = kUpb_Map_Begin;
+ const protobuf_test_messages_proto2_TestAllTypesProto2_MapStringStringEntry*
+ string_string_entry =
+ protobuf_test_messages_proto2_TestAllTypesProto2_map_string_string_next(
+ clone, &iter);
+ ASSERT_NE(string_string_entry, nullptr);
+ EXPECT_TRUE(upb_StringView_IsEqual(
+ protobuf_test_messages_proto2_TestAllTypesProto2_MapStringStringEntry_key(
+ string_string_entry),
+ upb_StringView_FromString("key1")));
+ EXPECT_TRUE(upb_StringView_IsEqual(
+ protobuf_test_messages_proto2_TestAllTypesProto2_MapStringStringEntry_value(
+ string_string_entry),
+ upb_StringView_FromString("value1")));
+ // Test map<string, NestedMessage>.
+ iter = kUpb_Map_Begin;
+ const protobuf_test_messages_proto2_TestAllTypesProto2_MapStringNestedMessageEntry*
+ nested_message_entry =
+ protobuf_test_messages_proto2_TestAllTypesProto2_map_string_nested_message_next(
+ clone, &iter);
+ ASSERT_NE(nested_message_entry, nullptr);
+ EXPECT_TRUE(upb_StringView_IsEqual(
+ protobuf_test_messages_proto2_TestAllTypesProto2_MapStringNestedMessageEntry_key(
+ nested_message_entry),
+ upb_StringView_FromString("nestedkey1")));
+ const protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage*
+ cloned_nested =
+ protobuf_test_messages_proto2_TestAllTypesProto2_MapStringNestedMessageEntry_value(
+ nested_message_entry);
+ ASSERT_NE(cloned_nested, nullptr);
+ EXPECT_EQ(protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage_a(
+ cloned_nested),
+ kTestNestedInt32);
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, DeepCloneMessageExtensions) {
+ // Alloc and fill in test message with extension.
+ upb_Arena* source_arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2_MessageSetCorrect* msg =
+ protobuf_test_messages_proto2_TestAllTypesProto2_MessageSetCorrect_new(
+ source_arena);
+ protobuf_test_messages_proto2_TestAllTypesProto2_MessageSetCorrectExtension1*
+ ext1 =
+ protobuf_test_messages_proto2_TestAllTypesProto2_MessageSetCorrectExtension1_new(
+ source_arena);
+ protobuf_test_messages_proto2_TestAllTypesProto2_MessageSetCorrectExtension1_set_str(
+ ext1, upb_StringView_FromString(kTestStr1));
+ protobuf_test_messages_proto2_TestAllTypesProto2_MessageSetCorrectExtension1_set_message_set_extension(
+ msg, ext1, source_arena);
+ // Create clone.
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2_MessageSetCorrect* clone =
+ (protobuf_test_messages_proto2_TestAllTypesProto2_MessageSetCorrect*)
+ upb_Message_DeepClone(
+ msg,
+ &protobuf_test_messages_proto2_TestAllTypesProto2_MessageSetCorrect_msg_init,
+ arena);
+
+ // Mutate original extension.
+ protobuf_test_messages_proto2_TestAllTypesProto2_MessageSetCorrectExtension1_set_str(
+ ext1, upb_StringView_FromString(kTestStr2));
+ upb_Arena_Free(source_arena);
+
+ const protobuf_test_messages_proto2_TestAllTypesProto2_MessageSetCorrectExtension1*
+ cloned_ext =
+ protobuf_test_messages_proto2_TestAllTypesProto2_MessageSetCorrectExtension1_message_set_extension(
+ clone);
+ ASSERT_NE(cloned_ext, nullptr);
+ EXPECT_TRUE(upb_StringView_IsEqual(
+ protobuf_test_messages_proto2_TestAllTypesProto2_MessageSetCorrectExtension1_str(
+ cloned_ext),
+ upb_StringView_FromString(kTestStr1)));
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, DeepCloneMessageWithUnknowns) {
+ upb_Arena* source_arena = upb_Arena_New();
+ upb_Arena* unknown_arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* msg =
+ protobuf_test_messages_proto2_TestAllTypesProto2_new(source_arena);
+ ASSERT_TRUE(
+ protobuf_test_messages_proto2_TestAllTypesProto2_map_int32_double_set(
+ msg, 12, 1200.5, source_arena));
+ ASSERT_TRUE(
+ protobuf_test_messages_proto2_TestAllTypesProto2_map_string_string_set(
+ msg, upb_StringView_FromString("key1"),
+ upb_StringView_FromString("value1"), source_arena));
+ // Create unknown data.
+ protobuf_test_messages_proto2_UnknownToTestAllTypes* unknown_source =
+ protobuf_test_messages_proto2_UnknownToTestAllTypes_new(unknown_arena);
+ protobuf_test_messages_proto2_UnknownToTestAllTypes_set_optional_bool(
+ unknown_source, true);
+ protobuf_test_messages_proto2_UnknownToTestAllTypes_set_optional_int32(
+ unknown_source, 123);
+ // Encode unknown message to bytes.
+ size_t len;
+ char* data;
+ upb_Arena* encode_arena = upb_Arena_New();
+ upb_EncodeStatus status =
+ upb_Encode(unknown_source,
+ &protobuf_test_messages_proto2_UnknownToTestAllTypes_msg_init,
+ kUpb_EncodeOption_CheckRequired, encode_arena, &data, &len);
+ ASSERT_EQ(status, kUpb_EncodeStatus_Ok);
+ std::string unknown_data(data, len);
+ // Add unknown data.
+ _upb_Message_AddUnknown(msg, data, len, source_arena);
+ // Create clone.
+ upb_Arena* clone_arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* clone =
+ (protobuf_test_messages_proto2_TestAllTypesProto2*)upb_Message_DeepClone(
+ msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init,
+ clone_arena);
+ upb_Arena_Free(source_arena);
+ upb_Arena_Free(unknown_arena);
+ upb_Arena_Free(encode_arena);
+ // Read unknown data from clone and verify.
+ size_t cloned_length;
+ const char* cloned_unknown_data =
+ upb_Message_GetUnknown(clone, &cloned_length);
+ EXPECT_EQ(cloned_length, len);
+ EXPECT_EQ(memcmp(cloned_unknown_data, unknown_data.c_str(), cloned_length),
+ 0);
+ upb_Arena_Free(clone_arena);
+}
+
+} // namespace
diff --git a/upb/upb/message/internal/accessors.h b/upb/upb/message/internal/accessors.h
new file mode 100644
index 0000000..713e40e
--- /dev/null
+++ b/upb/upb/message/internal/accessors.h
@@ -0,0 +1,390 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MESSAGE_INTERNAL_ACCESSORS_H_
+#define UPB_MESSAGE_INTERNAL_ACCESSORS_H_
+
+#include "upb/collections/internal/map.h"
+#include "upb/message/internal/extension.h"
+#include "upb/message/internal/message.h"
+#include "upb/mini_table/internal/field.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#if defined(__GNUC__) && !defined(__clang__)
+// GCC raises incorrect warnings in these functions. It thinks that we are
+// overrunning buffers, but we carefully write the functions in this file to
+// guarantee that this is impossible. GCC gets this wrong due it its failure
+// to perform constant propagation as we expect:
+// - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108217
+// - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108226
+//
+// Unfortunately this also indicates that GCC is not optimizing away the
+// switch() in cases where it should be, compromising the performance.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Warray-bounds"
+#pragma GCC diagnostic ignored "-Wstringop-overflow"
+#if __GNUC__ >= 11
+#pragma GCC diagnostic ignored "-Wstringop-overread"
+#endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// LINT.IfChange(presence_logic)
+
+// Hasbit access ///////////////////////////////////////////////////////////////
+
+UPB_INLINE size_t _upb_hasbit_ofs(size_t idx) { return idx / 8; }
+
+UPB_INLINE char _upb_hasbit_mask(size_t idx) { return 1 << (idx % 8); }
+
+UPB_INLINE bool _upb_hasbit(const upb_Message* msg, size_t idx) {
+ return (*UPB_PTR_AT(msg, _upb_hasbit_ofs(idx), const char) &
+ _upb_hasbit_mask(idx)) != 0;
+}
+
+UPB_INLINE void _upb_sethas(const upb_Message* msg, size_t idx) {
+ (*UPB_PTR_AT(msg, _upb_hasbit_ofs(idx), char)) |= _upb_hasbit_mask(idx);
+}
+
+UPB_INLINE void _upb_clearhas(const upb_Message* msg, size_t idx) {
+ (*UPB_PTR_AT(msg, _upb_hasbit_ofs(idx), char)) &= ~_upb_hasbit_mask(idx);
+}
+
+UPB_INLINE size_t _upb_Message_Hasidx(const upb_MiniTableField* f) {
+ UPB_ASSERT(f->presence > 0);
+ return f->presence;
+}
+
+UPB_INLINE bool _upb_hasbit_field(const upb_Message* msg,
+ const upb_MiniTableField* f) {
+ return _upb_hasbit(msg, _upb_Message_Hasidx(f));
+}
+
+UPB_INLINE void _upb_sethas_field(const upb_Message* msg,
+ const upb_MiniTableField* f) {
+ _upb_sethas(msg, _upb_Message_Hasidx(f));
+}
+
+// Oneof case access ///////////////////////////////////////////////////////////
+
+UPB_INLINE size_t _upb_oneofcase_ofs(const upb_MiniTableField* f) {
+ UPB_ASSERT(f->presence < 0);
+ return ~(ptrdiff_t)f->presence;
+}
+
+UPB_INLINE uint32_t* _upb_oneofcase_field(upb_Message* msg,
+ const upb_MiniTableField* f) {
+ return UPB_PTR_AT(msg, _upb_oneofcase_ofs(f), uint32_t);
+}
+
+UPB_INLINE uint32_t _upb_getoneofcase_field(const upb_Message* msg,
+ const upb_MiniTableField* f) {
+ return *_upb_oneofcase_field((upb_Message*)msg, f);
+}
+
+// LINT.ThenChange(GoogleInternalName2)
+
+UPB_INLINE bool _upb_MiniTableField_InOneOf(const upb_MiniTableField* field) {
+ return field->presence < 0;
+}
+
+UPB_INLINE void* _upb_MiniTableField_GetPtr(upb_Message* msg,
+ const upb_MiniTableField* field) {
+ return (char*)msg + field->offset;
+}
+
+UPB_INLINE const void* _upb_MiniTableField_GetConstPtr(
+ const upb_Message* msg, const upb_MiniTableField* field) {
+ return (char*)msg + field->offset;
+}
+
+UPB_INLINE void _upb_Message_SetPresence(upb_Message* msg,
+ const upb_MiniTableField* field) {
+ if (field->presence > 0) {
+ _upb_sethas_field(msg, field);
+ } else if (_upb_MiniTableField_InOneOf(field)) {
+ *_upb_oneofcase_field(msg, field) = field->number;
+ }
+}
+
+UPB_INLINE bool _upb_MiniTable_ValueIsNonZero(const void* default_val,
+ const upb_MiniTableField* field) {
+ char zero[16] = {0};
+ switch (_upb_MiniTableField_GetRep(field)) {
+ case kUpb_FieldRep_1Byte:
+ return memcmp(&zero, default_val, 1) != 0;
+ case kUpb_FieldRep_4Byte:
+ return memcmp(&zero, default_val, 4) != 0;
+ case kUpb_FieldRep_8Byte:
+ return memcmp(&zero, default_val, 8) != 0;
+ case kUpb_FieldRep_StringView: {
+ const upb_StringView* sv = (const upb_StringView*)default_val;
+ return sv->size != 0;
+ }
+ }
+ UPB_UNREACHABLE();
+}
+
+UPB_INLINE void _upb_MiniTable_CopyFieldData(void* to, const void* from,
+ const upb_MiniTableField* field) {
+ switch (_upb_MiniTableField_GetRep(field)) {
+ case kUpb_FieldRep_1Byte:
+ memcpy(to, from, 1);
+ return;
+ case kUpb_FieldRep_4Byte:
+ memcpy(to, from, 4);
+ return;
+ case kUpb_FieldRep_8Byte:
+ memcpy(to, from, 8);
+ return;
+ case kUpb_FieldRep_StringView: {
+ memcpy(to, from, sizeof(upb_StringView));
+ return;
+ }
+ }
+ UPB_UNREACHABLE();
+}
+
+UPB_INLINE size_t
+_upb_MiniTable_ElementSizeLg2(const upb_MiniTableField* field) {
+ const unsigned char table[] = {
+ 0,
+ 3, // kUpb_FieldType_Double = 1,
+ 2, // kUpb_FieldType_Float = 2,
+ 3, // kUpb_FieldType_Int64 = 3,
+ 3, // kUpb_FieldType_UInt64 = 4,
+ 2, // kUpb_FieldType_Int32 = 5,
+ 3, // kUpb_FieldType_Fixed64 = 6,
+ 2, // kUpb_FieldType_Fixed32 = 7,
+ 0, // kUpb_FieldType_Bool = 8,
+ UPB_SIZE(3, 4), // kUpb_FieldType_String = 9,
+ UPB_SIZE(2, 3), // kUpb_FieldType_Group = 10,
+ UPB_SIZE(2, 3), // kUpb_FieldType_Message = 11,
+ UPB_SIZE(3, 4), // kUpb_FieldType_Bytes = 12,
+ 2, // kUpb_FieldType_UInt32 = 13,
+ 2, // kUpb_FieldType_Enum = 14,
+ 2, // kUpb_FieldType_SFixed32 = 15,
+ 3, // kUpb_FieldType_SFixed64 = 16,
+ 2, // kUpb_FieldType_SInt32 = 17,
+ 3, // kUpb_FieldType_SInt64 = 18,
+ };
+ return table[field->UPB_PRIVATE(descriptortype)];
+}
+
+// Here we define universal getter/setter functions for message fields.
+// These look very branchy and inefficient, but as long as the MiniTableField
+// values are known at compile time, all the branches are optimized away and
+// we are left with ideal code. This can happen either through through
+// literals or UPB_ASSUME():
+//
+// // Via struct literals.
+// bool FooMessage_set_bool_field(const upb_Message* msg, bool val) {
+// const upb_MiniTableField field = {1, 0, 0, /* etc... */};
+// // All value in "field" are compile-time known.
+// _upb_Message_SetNonExtensionField(msg, &field, &value);
+// }
+//
+// // Via UPB_ASSUME().
+// UPB_INLINE bool upb_Message_SetBool(upb_Message* msg,
+// const upb_MiniTableField* field,
+// bool value, upb_Arena* a) {
+// UPB_ASSUME(field->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Bool);
+// UPB_ASSUME(!upb_IsRepeatedOrMap(field));
+// UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_1Byte);
+// _upb_Message_SetField(msg, field, &value, a);
+// }
+//
+// As a result, we can use these universal getters/setters for *all* message
+// accessors: generated code, MiniTable accessors, and reflection. The only
+// exception is the binary encoder/decoder, which need to be a bit more clever
+// about how they read/write the message data, for efficiency.
+//
+// These functions work on both extensions and non-extensions. If the field
+// of a setter is known to be a non-extension, the arena may be NULL and the
+// returned bool value may be ignored since it will always succeed.
+
+UPB_INLINE bool _upb_Message_HasExtensionField(
+ const upb_Message* msg, const upb_MiniTableExtension* ext) {
+ UPB_ASSERT(upb_MiniTableField_HasPresence(&ext->field));
+ return _upb_Message_Getext(msg, ext) != NULL;
+}
+
+UPB_INLINE bool _upb_Message_HasNonExtensionField(
+ const upb_Message* msg, const upb_MiniTableField* field) {
+ UPB_ASSERT(upb_MiniTableField_HasPresence(field));
+ UPB_ASSUME(!upb_MiniTableField_IsExtension(field));
+ if (_upb_MiniTableField_InOneOf(field)) {
+ return _upb_getoneofcase_field(msg, field) == field->number;
+ } else {
+ return _upb_hasbit_field(msg, field);
+ }
+}
+
+static UPB_FORCEINLINE void _upb_Message_GetNonExtensionField(
+ const upb_Message* msg, const upb_MiniTableField* field,
+ const void* default_val, void* val) {
+ UPB_ASSUME(!upb_MiniTableField_IsExtension(field));
+ if ((_upb_MiniTableField_InOneOf(field) ||
+ _upb_MiniTable_ValueIsNonZero(default_val, field)) &&
+ !_upb_Message_HasNonExtensionField(msg, field)) {
+ _upb_MiniTable_CopyFieldData(val, default_val, field);
+ return;
+ }
+ _upb_MiniTable_CopyFieldData(val, _upb_MiniTableField_GetConstPtr(msg, field),
+ field);
+}
+
+UPB_INLINE void _upb_Message_GetExtensionField(
+ const upb_Message* msg, const upb_MiniTableExtension* mt_ext,
+ const void* default_val, void* val) {
+ UPB_ASSUME(upb_MiniTableField_IsExtension(&mt_ext->field));
+ const upb_Message_Extension* ext = _upb_Message_Getext(msg, mt_ext);
+ if (ext) {
+ _upb_MiniTable_CopyFieldData(val, &ext->data, &mt_ext->field);
+ } else {
+ _upb_MiniTable_CopyFieldData(val, default_val, &mt_ext->field);
+ }
+}
+
+UPB_INLINE void _upb_Message_GetField(const upb_Message* msg,
+ const upb_MiniTableField* field,
+ const void* default_val, void* val) {
+ if (upb_MiniTableField_IsExtension(field)) {
+ _upb_Message_GetExtensionField(msg, (upb_MiniTableExtension*)field,
+ default_val, val);
+ } else {
+ _upb_Message_GetNonExtensionField(msg, field, default_val, val);
+ }
+}
+
+UPB_INLINE void _upb_Message_SetNonExtensionField(
+ upb_Message* msg, const upb_MiniTableField* field, const void* val) {
+ UPB_ASSUME(!upb_MiniTableField_IsExtension(field));
+ _upb_Message_SetPresence(msg, field);
+ _upb_MiniTable_CopyFieldData(_upb_MiniTableField_GetPtr(msg, field), val,
+ field);
+}
+
+UPB_INLINE bool _upb_Message_SetExtensionField(
+ upb_Message* msg, const upb_MiniTableExtension* mt_ext, const void* val,
+ upb_Arena* a) {
+ UPB_ASSERT(a);
+ upb_Message_Extension* ext =
+ _upb_Message_GetOrCreateExtension(msg, mt_ext, a);
+ if (!ext) return false;
+ _upb_MiniTable_CopyFieldData(&ext->data, val, &mt_ext->field);
+ return true;
+}
+
+UPB_INLINE bool _upb_Message_SetField(upb_Message* msg,
+ const upb_MiniTableField* field,
+ const void* val, upb_Arena* a) {
+ if (upb_MiniTableField_IsExtension(field)) {
+ const upb_MiniTableExtension* ext = (const upb_MiniTableExtension*)field;
+ return _upb_Message_SetExtensionField(msg, ext, val, a);
+ } else {
+ _upb_Message_SetNonExtensionField(msg, field, val);
+ return true;
+ }
+}
+
+UPB_INLINE void _upb_Message_ClearExtensionField(
+ upb_Message* msg, const upb_MiniTableExtension* ext_l) {
+ upb_Message_Internal* in = upb_Message_Getinternal(msg);
+ if (!in->internal) return;
+ const upb_Message_Extension* base =
+ UPB_PTR_AT(in->internal, in->internal->ext_begin, upb_Message_Extension);
+ upb_Message_Extension* ext =
+ (upb_Message_Extension*)_upb_Message_Getext(msg, ext_l);
+ if (ext) {
+ *ext = *base;
+ in->internal->ext_begin += sizeof(upb_Message_Extension);
+ }
+}
+
+UPB_INLINE void _upb_Message_ClearNonExtensionField(
+ upb_Message* msg, const upb_MiniTableField* field) {
+ if (field->presence > 0) {
+ _upb_clearhas(msg, _upb_Message_Hasidx(field));
+ } else if (_upb_MiniTableField_InOneOf(field)) {
+ uint32_t* oneof_case = _upb_oneofcase_field(msg, field);
+ if (*oneof_case != field->number) return;
+ *oneof_case = 0;
+ }
+ const char zeros[16] = {0};
+ _upb_MiniTable_CopyFieldData(_upb_MiniTableField_GetPtr(msg, field), zeros,
+ field);
+}
+
+UPB_INLINE void _upb_Message_AssertMapIsUntagged(
+ const upb_Message* msg, const upb_MiniTableField* field) {
+ UPB_UNUSED(msg);
+ _upb_MiniTableField_CheckIsMap(field);
+#ifndef NDEBUG
+ upb_TaggedMessagePtr default_val = 0;
+ upb_TaggedMessagePtr tagged;
+ _upb_Message_GetNonExtensionField(msg, field, &default_val, &tagged);
+ UPB_ASSERT(!upb_TaggedMessagePtr_IsEmpty(tagged));
+#endif
+}
+
+UPB_INLINE upb_Map* _upb_Message_GetOrCreateMutableMap(
+ upb_Message* msg, const upb_MiniTableField* field, size_t key_size,
+ size_t val_size, upb_Arena* arena) {
+ _upb_MiniTableField_CheckIsMap(field);
+ _upb_Message_AssertMapIsUntagged(msg, field);
+ upb_Map* map = NULL;
+ upb_Map* default_map_value = NULL;
+ _upb_Message_GetNonExtensionField(msg, field, &default_map_value, &map);
+ if (!map) {
+ map = _upb_Map_New(arena, key_size, val_size);
+ // Check again due to: https://godbolt.org/z/7WfaoKG1r
+ _upb_MiniTableField_CheckIsMap(field);
+ _upb_Message_SetNonExtensionField(msg, field, &map);
+ }
+ return map;
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif // UPB_MESSAGE_INTERNAL_ACCESSORS_H_
diff --git a/upb/upb/message/internal/extension.h b/upb/upb/message/internal/extension.h
new file mode 100644
index 0000000..01329c1
--- /dev/null
+++ b/upb/upb/message/internal/extension.h
@@ -0,0 +1,86 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MESSAGE_INTERNAL_EXTENSION_H_
+#define UPB_MESSAGE_INTERNAL_EXTENSION_H_
+
+#include "upb/base/descriptor_constants.h"
+#include "upb/base/string_view.h"
+#include "upb/mem/arena.h"
+#include "upb/message/message.h"
+#include "upb/mini_table/extension.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+// The internal representation of an extension is self-describing: it contains
+// enough information that we can serialize it to binary format without needing
+// to look it up in a upb_ExtensionRegistry.
+//
+// This representation allocates 16 bytes to data on 64-bit platforms.
+// This is rather wasteful for scalars (in the extreme case of bool,
+// it wastes 15 bytes). We accept this because we expect messages to be
+// the most common extension type.
+typedef struct {
+ const upb_MiniTableExtension* ext;
+ union {
+ upb_StringView str;
+ void* ptr;
+ char scalar_data[8];
+ } data;
+} upb_Message_Extension;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Adds the given extension data to the given message.
+// |ext| is copied into the message instance.
+// This logically replaces any previously-added extension with this number.
+upb_Message_Extension* _upb_Message_GetOrCreateExtension(
+ upb_Message* msg, const upb_MiniTableExtension* ext, upb_Arena* arena);
+
+// Returns an array of extensions for this message.
+// Note: the array is ordered in reverse relative to the order of creation.
+const upb_Message_Extension* _upb_Message_Getexts(const upb_Message* msg,
+ size_t* count);
+
+// Returns an extension for the given field number, or NULL if no extension
+// exists for this field number.
+const upb_Message_Extension* _upb_Message_Getext(
+ const upb_Message* msg, const upb_MiniTableExtension* ext);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MESSAGE_INTERNAL_EXTENSION_H_ */
diff --git a/upb/upb/message/internal/map_entry.h b/upb/upb/message/internal/map_entry.h
new file mode 100644
index 0000000..b7d6014
--- /dev/null
+++ b/upb/upb/message/internal/map_entry.h
@@ -0,0 +1,75 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_TABLE_INTERNAL_MAP_ENTRY_DATA_H_
+#define UPB_MINI_TABLE_INTERNAL_MAP_ENTRY_DATA_H_
+
+#include <stdint.h>
+
+#include "upb/base/string_view.h"
+#include "upb/hash/common.h"
+
+// Map entries aren't actually stored for map fields, they are only used during
+// parsing. For parsing, it helps a lot if all map entry messages have the same
+// layout. The layout code in mini_table/decode.c will ensure that all map
+// entries have this layout.
+//
+// Note that users can and do create map entries directly, which will also use
+// this layout.
+//
+// NOTE: sync with mini_table/decode.c.
+typedef struct {
+ // We only need 2 hasbits max, but due to alignment we'll use 8 bytes here,
+ // and the uint64_t helps make this clear.
+ uint64_t hasbits;
+ union {
+ upb_StringView str; // For str/bytes.
+ upb_value val; // For all other types.
+ } k;
+ union {
+ upb_StringView str; // For str/bytes.
+ upb_value val; // For all other types.
+ } v;
+} upb_MapEntryData;
+
+typedef struct {
+ // LINT.IfChange(internal_layout)
+ union {
+ void* internal_data;
+
+ // Force 8-byte alignment, since the data members may contain members that
+ // require 8-byte alignment.
+ double d;
+ };
+ // LINT.ThenChange(//depot/google3/third_party/upb/upb/message/internal/message.h:internal_layout)
+ upb_MapEntryData data;
+} upb_MapEntry;
+
+#endif // UPB_MINI_TABLE_INTERNAL_MAP_ENTRY_DATA_H_
diff --git a/upb/upb/message/internal/message.h b/upb/upb/message/internal/message.h
new file mode 100644
index 0000000..f7e7f39
--- /dev/null
+++ b/upb/upb/message/internal/message.h
@@ -0,0 +1,142 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+/*
+** Our memory representation for parsing tables and messages themselves.
+** Functions in this file are used by generated code and possibly reflection.
+**
+** The definitions in this file are internal to upb.
+**/
+
+#ifndef UPB_MESSAGE_INTERNAL_H_
+#define UPB_MESSAGE_INTERNAL_H_
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "upb/hash/common.h"
+#include "upb/message/internal/extension.h"
+#include "upb/message/message.h"
+#include "upb/mini_table/extension.h"
+#include "upb/mini_table/extension_registry.h"
+#include "upb/mini_table/message.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern const float kUpb_FltInfinity;
+extern const double kUpb_Infinity;
+extern const double kUpb_NaN;
+
+/* Internal members of a upb_Message that track unknown fields and/or
+ * extensions. We can change this without breaking binary compatibility. We put
+ * these before the user's data. The user's upb_Message* points after the
+ * upb_Message_Internal. */
+
+typedef struct {
+ /* Total size of this structure, including the data that follows.
+ * Must be aligned to 8, which is alignof(upb_Message_Extension) */
+ uint32_t size;
+
+ /* Offsets relative to the beginning of this structure.
+ *
+ * Unknown data grows forward from the beginning to unknown_end.
+ * Extension data grows backward from size to ext_begin.
+ * When the two meet, we're out of data and have to realloc.
+ *
+ * If we imagine that the final member of this struct is:
+ * char data[size - overhead]; // overhead =
+ * sizeof(upb_Message_InternalData)
+ *
+ * Then we have:
+ * unknown data: data[0 .. (unknown_end - overhead)]
+ * extensions data: data[(ext_begin - overhead) .. (size - overhead)] */
+ uint32_t unknown_end;
+ uint32_t ext_begin;
+ /* Data follows, as if there were an array:
+ * char data[size - sizeof(upb_Message_InternalData)]; */
+} upb_Message_InternalData;
+
+typedef struct {
+ // LINT.IfChange(internal_layout)
+ union {
+ upb_Message_InternalData* internal;
+
+ // Force 8-byte alignment, since the data members may contain members that
+ // require 8-byte alignment.
+ double d;
+ };
+ // LINT.ThenChange(//depot/google3/third_party/upb/upb/message/internal/map_entry.h:internal_layout)
+ /* Message data follows. */
+} upb_Message_Internal;
+
+/* Maps upb_CType -> memory size. */
+extern char _upb_CTypeo_size[12];
+
+UPB_INLINE size_t upb_msg_sizeof(const upb_MiniTable* t) {
+ return t->size + sizeof(upb_Message_Internal);
+}
+
+// Inline version upb_Message_New(), for internal use.
+UPB_INLINE upb_Message* _upb_Message_New(const upb_MiniTable* mini_table,
+ upb_Arena* arena) {
+ size_t size = upb_msg_sizeof(mini_table);
+ void* mem = upb_Arena_Malloc(arena, size + sizeof(upb_Message_Internal));
+ if (UPB_UNLIKELY(!mem)) return NULL;
+ upb_Message* msg = UPB_PTR_AT(mem, sizeof(upb_Message_Internal), upb_Message);
+ memset(mem, 0, size);
+ return msg;
+}
+
+UPB_INLINE upb_Message_Internal* upb_Message_Getinternal(
+ const upb_Message* msg) {
+ ptrdiff_t size = sizeof(upb_Message_Internal);
+ return (upb_Message_Internal*)((char*)msg - size);
+}
+
+// Discards the unknown fields for this message only.
+void _upb_Message_DiscardUnknown_shallow(upb_Message* msg);
+
+// Adds unknown data (serialized protobuf data) to the given message.
+// The data is copied into the message instance.
+bool _upb_Message_AddUnknown(upb_Message* msg, const char* data, size_t len,
+ upb_Arena* arena);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MESSAGE_INTERNAL_H_ */
diff --git a/upb/upb/message/message.c b/upb/upb/message/message.c
new file mode 100644
index 0000000..30a16dc
--- /dev/null
+++ b/upb/upb/message/message.c
@@ -0,0 +1,177 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/message/message.h"
+
+#include <math.h>
+
+#include "upb/base/internal/log2.h"
+#include "upb/message/internal/message.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+const float kUpb_FltInfinity = INFINITY;
+const double kUpb_Infinity = INFINITY;
+const double kUpb_NaN = NAN;
+
+static const size_t overhead = sizeof(upb_Message_InternalData);
+
+upb_Message* upb_Message_New(const upb_MiniTable* mini_table,
+ upb_Arena* arena) {
+ return _upb_Message_New(mini_table, arena);
+}
+
+static bool realloc_internal(upb_Message* msg, size_t need, upb_Arena* arena) {
+ upb_Message_Internal* in = upb_Message_Getinternal(msg);
+ if (!in->internal) {
+ /* No internal data, allocate from scratch. */
+ size_t size = UPB_MAX(128, upb_Log2CeilingSize(need + overhead));
+ upb_Message_InternalData* internal = upb_Arena_Malloc(arena, size);
+ if (!internal) return false;
+ internal->size = size;
+ internal->unknown_end = overhead;
+ internal->ext_begin = size;
+ in->internal = internal;
+ } else if (in->internal->ext_begin - in->internal->unknown_end < need) {
+ /* Internal data is too small, reallocate. */
+ size_t new_size = upb_Log2CeilingSize(in->internal->size + need);
+ size_t ext_bytes = in->internal->size - in->internal->ext_begin;
+ size_t new_ext_begin = new_size - ext_bytes;
+ upb_Message_InternalData* internal =
+ upb_Arena_Realloc(arena, in->internal, in->internal->size, new_size);
+ if (!internal) return false;
+ if (ext_bytes) {
+ /* Need to move extension data to the end. */
+ char* ptr = (char*)internal;
+ memmove(ptr + new_ext_begin, ptr + internal->ext_begin, ext_bytes);
+ }
+ internal->ext_begin = new_ext_begin;
+ internal->size = new_size;
+ in->internal = internal;
+ }
+ UPB_ASSERT(in->internal->ext_begin - in->internal->unknown_end >= need);
+ return true;
+}
+
+bool _upb_Message_AddUnknown(upb_Message* msg, const char* data, size_t len,
+ upb_Arena* arena) {
+ if (!realloc_internal(msg, len, arena)) return false;
+ upb_Message_Internal* in = upb_Message_Getinternal(msg);
+ memcpy(UPB_PTR_AT(in->internal, in->internal->unknown_end, char), data, len);
+ in->internal->unknown_end += len;
+ return true;
+}
+
+void _upb_Message_DiscardUnknown_shallow(upb_Message* msg) {
+ upb_Message_Internal* in = upb_Message_Getinternal(msg);
+ if (in->internal) {
+ in->internal->unknown_end = overhead;
+ }
+}
+
+const char* upb_Message_GetUnknown(const upb_Message* msg, size_t* len) {
+ const upb_Message_Internal* in = upb_Message_Getinternal(msg);
+ if (in->internal) {
+ *len = in->internal->unknown_end - overhead;
+ return (char*)(in->internal + 1);
+ } else {
+ *len = 0;
+ return NULL;
+ }
+}
+
+void upb_Message_DeleteUnknown(upb_Message* msg, const char* data, size_t len) {
+ upb_Message_Internal* in = upb_Message_Getinternal(msg);
+ const char* internal_unknown_end =
+ UPB_PTR_AT(in->internal, in->internal->unknown_end, char);
+#ifndef NDEBUG
+ size_t full_unknown_size;
+ const char* full_unknown = upb_Message_GetUnknown(msg, &full_unknown_size);
+ UPB_ASSERT((uintptr_t)data >= (uintptr_t)full_unknown);
+ UPB_ASSERT((uintptr_t)data < (uintptr_t)(full_unknown + full_unknown_size));
+ UPB_ASSERT((uintptr_t)(data + len) > (uintptr_t)data);
+ UPB_ASSERT((uintptr_t)(data + len) <= (uintptr_t)internal_unknown_end);
+#endif
+ if ((data + len) != internal_unknown_end) {
+ memmove((char*)data, data + len, internal_unknown_end - data - len);
+ }
+ in->internal->unknown_end -= len;
+}
+
+const upb_Message_Extension* _upb_Message_Getexts(const upb_Message* msg,
+ size_t* count) {
+ const upb_Message_Internal* in = upb_Message_Getinternal(msg);
+ if (in->internal) {
+ *count = (in->internal->size - in->internal->ext_begin) /
+ sizeof(upb_Message_Extension);
+ return UPB_PTR_AT(in->internal, in->internal->ext_begin, void);
+ } else {
+ *count = 0;
+ return NULL;
+ }
+}
+
+const upb_Message_Extension* _upb_Message_Getext(
+ const upb_Message* msg, const upb_MiniTableExtension* e) {
+ size_t n;
+ const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &n);
+
+ /* For now we use linear search exclusively to find extensions. If this
+ * becomes an issue due to messages with lots of extensions, we can introduce
+ * a table of some sort. */
+ for (size_t i = 0; i < n; i++) {
+ if (ext[i].ext == e) {
+ return &ext[i];
+ }
+ }
+
+ return NULL;
+}
+
+upb_Message_Extension* _upb_Message_GetOrCreateExtension(
+ upb_Message* msg, const upb_MiniTableExtension* e, upb_Arena* arena) {
+ upb_Message_Extension* ext =
+ (upb_Message_Extension*)_upb_Message_Getext(msg, e);
+ if (ext) return ext;
+ if (!realloc_internal(msg, sizeof(upb_Message_Extension), arena)) return NULL;
+ upb_Message_Internal* in = upb_Message_Getinternal(msg);
+ in->internal->ext_begin -= sizeof(upb_Message_Extension);
+ ext = UPB_PTR_AT(in->internal, in->internal->ext_begin, void);
+ memset(ext, 0, sizeof(upb_Message_Extension));
+ ext->ext = e;
+ return ext;
+}
+
+size_t upb_Message_ExtensionCount(const upb_Message* msg) {
+ size_t count;
+ _upb_Message_Getexts(msg, &count);
+ return count;
+}
diff --git a/upb/upb/message/message.h b/upb/upb/message/message.h
new file mode 100644
index 0000000..d04b1e5
--- /dev/null
+++ b/upb/upb/message/message.h
@@ -0,0 +1,73 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Public APIs for message operations that do not depend on the schema.
+//
+// MiniTable-based accessors live in accessors.h.
+
+#ifndef UPB_MESSAGE_MESSAGE_H_
+#define UPB_MESSAGE_MESSAGE_H_
+
+#include "upb/mem/arena.h"
+#include "upb/message/types.h"
+#include "upb/mini_table/message.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Creates a new message with the given mini_table on the given arena.
+UPB_API upb_Message* upb_Message_New(const upb_MiniTable* mini_table,
+ upb_Arena* arena);
+
+// Adds unknown data (serialized protobuf data) to the given message.
+// The data is copied into the message instance.
+void upb_Message_AddUnknown(upb_Message* msg, const char* data, size_t len,
+ upb_Arena* arena);
+
+// Returns a reference to the message's unknown data.
+const char* upb_Message_GetUnknown(const upb_Message* msg, size_t* len);
+
+// Removes partial unknown data from message.
+void upb_Message_DeleteUnknown(upb_Message* msg, const char* data, size_t len);
+
+// Returns the number of extensions present in this message.
+size_t upb_Message_ExtensionCount(const upb_Message* msg);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MESSAGE_MESSAGE_H_ */
diff --git a/upb/upb/message/promote.c b/upb/upb/message/promote.c
new file mode 100644
index 0000000..99ec101
--- /dev/null
+++ b/upb/upb/message/promote.c
@@ -0,0 +1,364 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/message/promote.h"
+
+#include "upb/collections/array.h"
+#include "upb/collections/internal/array.h"
+#include "upb/collections/map.h"
+#include "upb/message/accessors.h"
+#include "upb/message/message.h"
+#include "upb/mini_table/field.h"
+#include "upb/wire/decode.h"
+#include "upb/wire/encode.h"
+#include "upb/wire/eps_copy_input_stream.h"
+#include "upb/wire/reader.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+// Parses unknown data by merging into existing base_message or creating a
+// new message usingg mini_table.
+static upb_UnknownToMessageRet upb_MiniTable_ParseUnknownMessage(
+ const char* unknown_data, size_t unknown_size,
+ const upb_MiniTable* mini_table, upb_Message* base_message,
+ int decode_options, upb_Arena* arena) {
+ upb_UnknownToMessageRet ret;
+ ret.message =
+ base_message ? base_message : _upb_Message_New(mini_table, arena);
+ if (!ret.message) {
+ ret.status = kUpb_UnknownToMessage_OutOfMemory;
+ return ret;
+ }
+ // Decode sub message using unknown field contents.
+ const char* data = unknown_data;
+ uint32_t tag;
+ uint64_t message_len = 0;
+ data = upb_WireReader_ReadTag(data, &tag);
+ data = upb_WireReader_ReadVarint(data, &message_len);
+ upb_DecodeStatus status = upb_Decode(data, message_len, ret.message,
+ mini_table, NULL, decode_options, arena);
+ if (status == kUpb_DecodeStatus_OutOfMemory) {
+ ret.status = kUpb_UnknownToMessage_OutOfMemory;
+ } else if (status == kUpb_DecodeStatus_Ok) {
+ ret.status = kUpb_UnknownToMessage_Ok;
+ } else {
+ ret.status = kUpb_UnknownToMessage_ParseError;
+ }
+ return ret;
+}
+
+upb_GetExtension_Status upb_MiniTable_GetOrPromoteExtension(
+ upb_Message* msg, const upb_MiniTableExtension* ext_table,
+ int decode_options, upb_Arena* arena,
+ const upb_Message_Extension** extension) {
+ UPB_ASSERT(upb_MiniTableField_CType(&ext_table->field) == kUpb_CType_Message);
+ *extension = _upb_Message_Getext(msg, ext_table);
+ if (*extension) {
+ return kUpb_GetExtension_Ok;
+ }
+
+ // Check unknown fields, if available promote.
+ int field_number = ext_table->field.number;
+ upb_FindUnknownRet result = upb_MiniTable_FindUnknown(
+ msg, field_number, kUpb_WireFormat_DefaultDepthLimit);
+ if (result.status != kUpb_FindUnknown_Ok) {
+ return kUpb_GetExtension_NotPresent;
+ }
+ size_t len;
+ size_t ofs = result.ptr - upb_Message_GetUnknown(msg, &len);
+ // Decode and promote from unknown.
+ const upb_MiniTable* extension_table = ext_table->sub.submsg;
+ upb_UnknownToMessageRet parse_result = upb_MiniTable_ParseUnknownMessage(
+ result.ptr, result.len, extension_table,
+ /* base_message= */ NULL, decode_options, arena);
+ switch (parse_result.status) {
+ case kUpb_UnknownToMessage_OutOfMemory:
+ return kUpb_GetExtension_OutOfMemory;
+ case kUpb_UnknownToMessage_ParseError:
+ return kUpb_GetExtension_ParseError;
+ case kUpb_UnknownToMessage_NotFound:
+ return kUpb_GetExtension_NotPresent;
+ case kUpb_UnknownToMessage_Ok:
+ break;
+ }
+ upb_Message* extension_msg = parse_result.message;
+ // Add to extensions.
+ upb_Message_Extension* ext =
+ _upb_Message_GetOrCreateExtension(msg, ext_table, arena);
+ if (!ext) {
+ return kUpb_GetExtension_OutOfMemory;
+ }
+ memcpy(&ext->data, &extension_msg, sizeof(extension_msg));
+ *extension = ext;
+ const char* delete_ptr = upb_Message_GetUnknown(msg, &len) + ofs;
+ upb_Message_DeleteUnknown(msg, delete_ptr, result.len);
+ return kUpb_GetExtension_Ok;
+}
+
+static upb_FindUnknownRet upb_FindUnknownRet_ParseError(void) {
+ return (upb_FindUnknownRet){.status = kUpb_FindUnknown_ParseError};
+}
+
+upb_FindUnknownRet upb_MiniTable_FindUnknown(const upb_Message* msg,
+ uint32_t field_number,
+ int depth_limit) {
+ size_t size;
+ upb_FindUnknownRet ret;
+
+ const char* ptr = upb_Message_GetUnknown(msg, &size);
+ upb_EpsCopyInputStream stream;
+ upb_EpsCopyInputStream_Init(&stream, &ptr, size, true);
+
+ while (!upb_EpsCopyInputStream_IsDone(&stream, &ptr)) {
+ uint32_t tag;
+ const char* unknown_begin = ptr;
+ ptr = upb_WireReader_ReadTag(ptr, &tag);
+ if (!ptr) return upb_FindUnknownRet_ParseError();
+ if (field_number == upb_WireReader_GetFieldNumber(tag)) {
+ ret.status = kUpb_FindUnknown_Ok;
+ ret.ptr = upb_EpsCopyInputStream_GetAliasedPtr(&stream, unknown_begin);
+ ptr = _upb_WireReader_SkipValue(ptr, tag, depth_limit, &stream);
+ // Because we know that the input is a flat buffer, it is safe to perform
+ // pointer arithmetic on aliased pointers.
+ ret.len = upb_EpsCopyInputStream_GetAliasedPtr(&stream, ptr) - ret.ptr;
+ return ret;
+ }
+
+ ptr = _upb_WireReader_SkipValue(ptr, tag, depth_limit, &stream);
+ if (!ptr) return upb_FindUnknownRet_ParseError();
+ }
+ ret.status = kUpb_FindUnknown_NotPresent;
+ ret.ptr = NULL;
+ ret.len = 0;
+ return ret;
+}
+
+static upb_DecodeStatus upb_Message_PromoteOne(upb_TaggedMessagePtr* tagged,
+ const upb_MiniTable* mini_table,
+ int decode_options,
+ upb_Arena* arena) {
+ upb_Message* empty = _upb_TaggedMessagePtr_GetEmptyMessage(*tagged);
+ size_t unknown_size;
+ const char* unknown_data = upb_Message_GetUnknown(empty, &unknown_size);
+ upb_Message* promoted = upb_Message_New(mini_table, arena);
+ if (!promoted) return kUpb_DecodeStatus_OutOfMemory;
+ upb_DecodeStatus status = upb_Decode(unknown_data, unknown_size, promoted,
+ mini_table, NULL, decode_options, arena);
+ if (status == kUpb_DecodeStatus_Ok) {
+ *tagged = _upb_TaggedMessagePtr_Pack(promoted, false);
+ }
+ return status;
+}
+
+upb_DecodeStatus upb_Message_PromoteMessage(upb_Message* parent,
+ const upb_MiniTable* mini_table,
+ const upb_MiniTableField* field,
+ int decode_options,
+ upb_Arena* arena,
+ upb_Message** promoted) {
+ const upb_MiniTable* sub_table =
+ upb_MiniTable_GetSubMessageTable(mini_table, field);
+ UPB_ASSERT(sub_table);
+ upb_TaggedMessagePtr tagged =
+ upb_Message_GetTaggedMessagePtr(parent, field, NULL);
+ upb_DecodeStatus ret =
+ upb_Message_PromoteOne(&tagged, sub_table, decode_options, arena);
+ if (ret == kUpb_DecodeStatus_Ok) {
+ *promoted = upb_TaggedMessagePtr_GetNonEmptyMessage(tagged);
+ upb_Message_SetMessage(parent, mini_table, field, *promoted);
+ }
+ return ret;
+}
+
+upb_DecodeStatus upb_Array_PromoteMessages(upb_Array* arr,
+ const upb_MiniTable* mini_table,
+ int decode_options,
+ upb_Arena* arena) {
+ void** data = _upb_array_ptr(arr);
+ size_t size = arr->size;
+ for (size_t i = 0; i < size; i++) {
+ upb_TaggedMessagePtr tagged;
+ memcpy(&tagged, &data[i], sizeof(tagged));
+ if (!upb_TaggedMessagePtr_IsEmpty(tagged)) continue;
+ upb_DecodeStatus status =
+ upb_Message_PromoteOne(&tagged, mini_table, decode_options, arena);
+ if (status != kUpb_DecodeStatus_Ok) return status;
+ memcpy(&data[i], &tagged, sizeof(tagged));
+ }
+ return kUpb_DecodeStatus_Ok;
+}
+
+upb_DecodeStatus upb_Map_PromoteMessages(upb_Map* map,
+ const upb_MiniTable* mini_table,
+ int decode_options, upb_Arena* arena) {
+ size_t iter = kUpb_Map_Begin;
+ upb_MessageValue key, val;
+ while (upb_Map_Next(map, &key, &val, &iter)) {
+ if (!upb_TaggedMessagePtr_IsEmpty(val.tagged_msg_val)) continue;
+ upb_DecodeStatus status = upb_Message_PromoteOne(
+ &val.tagged_msg_val, mini_table, decode_options, arena);
+ if (status != kUpb_DecodeStatus_Ok) return status;
+ upb_Map_SetEntryValue(map, iter, val);
+ }
+ return kUpb_DecodeStatus_Ok;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// OLD promotion functions, will be removed!
+////////////////////////////////////////////////////////////////////////////////
+
+// Warning: See TODO(b/267655898)
+upb_UnknownToMessageRet upb_MiniTable_PromoteUnknownToMessage(
+ upb_Message* msg, const upb_MiniTable* mini_table,
+ const upb_MiniTableField* field, const upb_MiniTable* sub_mini_table,
+ int decode_options, upb_Arena* arena) {
+ upb_FindUnknownRet unknown;
+ // We need to loop and merge unknowns that have matching tag field->number.
+ upb_Message* message = NULL;
+ // Callers should check that message is not set first before calling
+ // PromotoUnknownToMessage.
+ UPB_ASSERT(upb_MiniTable_GetSubMessageTable(mini_table, field) ==
+ sub_mini_table);
+ bool is_oneof = _upb_MiniTableField_InOneOf(field);
+ if (!is_oneof || _upb_getoneofcase_field(msg, field) == field->number) {
+ UPB_ASSERT(upb_Message_GetMessage(msg, field, NULL) == NULL);
+ }
+ upb_UnknownToMessageRet ret;
+ ret.status = kUpb_UnknownToMessage_Ok;
+ do {
+ unknown = upb_MiniTable_FindUnknown(
+ msg, field->number, upb_DecodeOptions_GetMaxDepth(decode_options));
+ switch (unknown.status) {
+ case kUpb_FindUnknown_Ok: {
+ const char* unknown_data = unknown.ptr;
+ size_t unknown_size = unknown.len;
+ ret = upb_MiniTable_ParseUnknownMessage(unknown_data, unknown_size,
+ sub_mini_table, message,
+ decode_options, arena);
+ if (ret.status == kUpb_UnknownToMessage_Ok) {
+ message = ret.message;
+ upb_Message_DeleteUnknown(msg, unknown_data, unknown_size);
+ }
+ } break;
+ case kUpb_FindUnknown_ParseError:
+ ret.status = kUpb_UnknownToMessage_ParseError;
+ break;
+ case kUpb_FindUnknown_NotPresent:
+ // If we parsed at least one unknown, we are done.
+ ret.status =
+ message ? kUpb_UnknownToMessage_Ok : kUpb_UnknownToMessage_NotFound;
+ break;
+ }
+ } while (unknown.status == kUpb_FindUnknown_Ok);
+ if (message) {
+ if (is_oneof) {
+ *_upb_oneofcase_field(msg, field) = field->number;
+ }
+ upb_Message_SetMessage(msg, mini_table, field, message);
+ ret.message = message;
+ }
+ return ret;
+}
+
+// Moves repeated messages in unknowns to a upb_Array.
+//
+// Since the repeated field is not a scalar type we don't check for
+// kUpb_LabelFlags_IsPacked.
+// TODO(b/251007554): Optimize. Instead of converting messages one at a time,
+// scan all unknown data once and compact.
+upb_UnknownToMessage_Status upb_MiniTable_PromoteUnknownToMessageArray(
+ upb_Message* msg, const upb_MiniTableField* field,
+ const upb_MiniTable* mini_table, int decode_options, upb_Arena* arena) {
+ upb_Array* repeated_messages = upb_Message_GetMutableArray(msg, field);
+ // Find all unknowns with given field number and parse.
+ upb_FindUnknownRet unknown;
+ do {
+ unknown = upb_MiniTable_FindUnknown(
+ msg, field->number, upb_DecodeOptions_GetMaxDepth(decode_options));
+ if (unknown.status == kUpb_FindUnknown_Ok) {
+ upb_UnknownToMessageRet ret = upb_MiniTable_ParseUnknownMessage(
+ unknown.ptr, unknown.len, mini_table,
+ /* base_message= */ NULL, decode_options, arena);
+ if (ret.status == kUpb_UnknownToMessage_Ok) {
+ upb_MessageValue value;
+ value.msg_val = ret.message;
+ // Allocate array on demand before append.
+ if (!repeated_messages) {
+ upb_Message_ResizeArrayUninitialized(msg, field, 0, arena);
+ repeated_messages = upb_Message_GetMutableArray(msg, field);
+ }
+ if (!upb_Array_Append(repeated_messages, value, arena)) {
+ return kUpb_UnknownToMessage_OutOfMemory;
+ }
+ upb_Message_DeleteUnknown(msg, unknown.ptr, unknown.len);
+ } else {
+ return ret.status;
+ }
+ }
+ } while (unknown.status == kUpb_FindUnknown_Ok);
+ return kUpb_UnknownToMessage_Ok;
+}
+
+// Moves repeated messages in unknowns to a upb_Map.
+upb_UnknownToMessage_Status upb_MiniTable_PromoteUnknownToMap(
+ upb_Message* msg, const upb_MiniTable* mini_table,
+ const upb_MiniTableField* field, int decode_options, upb_Arena* arena) {
+ const upb_MiniTable* map_entry_mini_table =
+ mini_table->subs[field->UPB_PRIVATE(submsg_index)].submsg;
+ UPB_ASSERT(map_entry_mini_table);
+ UPB_ASSERT(map_entry_mini_table);
+ UPB_ASSERT(map_entry_mini_table->field_count == 2);
+ UPB_ASSERT(upb_FieldMode_Get(field) == kUpb_FieldMode_Map);
+ // Find all unknowns with given field number and parse.
+ upb_FindUnknownRet unknown;
+ while (1) {
+ unknown = upb_MiniTable_FindUnknown(
+ msg, field->number, upb_DecodeOptions_GetMaxDepth(decode_options));
+ if (unknown.status != kUpb_FindUnknown_Ok) break;
+ upb_UnknownToMessageRet ret = upb_MiniTable_ParseUnknownMessage(
+ unknown.ptr, unknown.len, map_entry_mini_table,
+ /* base_message= */ NULL, decode_options, arena);
+ if (ret.status != kUpb_UnknownToMessage_Ok) return ret.status;
+ // Allocate map on demand before append.
+ upb_Map* map = upb_Message_GetOrCreateMutableMap(msg, map_entry_mini_table,
+ field, arena);
+ upb_Message* map_entry_message = ret.message;
+ upb_MapInsertStatus insert_status = upb_Message_InsertMapEntry(
+ map, mini_table, field, map_entry_message, arena);
+ if (insert_status == kUpb_MapInsertStatus_OutOfMemory) {
+ return kUpb_UnknownToMessage_OutOfMemory;
+ }
+ UPB_ASSUME(insert_status == kUpb_MapInsertStatus_Inserted ||
+ insert_status == kUpb_MapInsertStatus_Replaced);
+ upb_Message_DeleteUnknown(msg, unknown.ptr, unknown.len);
+ }
+ return kUpb_UnknownToMessage_Ok;
+}
diff --git a/upb/upb/message/promote.h b/upb/upb/message/promote.h
new file mode 100644
index 0000000..48737ee
--- /dev/null
+++ b/upb/upb/message/promote.h
@@ -0,0 +1,182 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MESSAGE_PROMOTE_H_
+#define UPB_MESSAGE_PROMOTE_H_
+
+#include "upb/collections/array.h"
+#include "upb/message/internal/extension.h"
+#include "upb/wire/decode.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+ kUpb_GetExtension_Ok,
+ kUpb_GetExtension_NotPresent,
+ kUpb_GetExtension_ParseError,
+ kUpb_GetExtension_OutOfMemory,
+} upb_GetExtension_Status;
+
+typedef enum {
+ kUpb_GetExtensionAsBytes_Ok,
+ kUpb_GetExtensionAsBytes_NotPresent,
+ kUpb_GetExtensionAsBytes_EncodeError,
+} upb_GetExtensionAsBytes_Status;
+
+// Returns a message extension or promotes an unknown field to
+// an extension.
+//
+// TODO(ferhat): Only supports extension fields that are messages,
+// expand support to include non-message types.
+upb_GetExtension_Status upb_MiniTable_GetOrPromoteExtension(
+ upb_Message* msg, const upb_MiniTableExtension* ext_table,
+ int decode_options, upb_Arena* arena,
+ const upb_Message_Extension** extension);
+
+typedef enum {
+ kUpb_FindUnknown_Ok,
+ kUpb_FindUnknown_NotPresent,
+ kUpb_FindUnknown_ParseError,
+} upb_FindUnknown_Status;
+
+typedef struct {
+ upb_FindUnknown_Status status;
+ // Start of unknown field data in message arena.
+ const char* ptr;
+ // Size of unknown field data.
+ size_t len;
+} upb_FindUnknownRet;
+
+// Finds first occurrence of unknown data by tag id in message.
+upb_FindUnknownRet upb_MiniTable_FindUnknown(const upb_Message* msg,
+ uint32_t field_number,
+ int depth_limit);
+
+typedef enum {
+ kUpb_UnknownToMessage_Ok,
+ kUpb_UnknownToMessage_ParseError,
+ kUpb_UnknownToMessage_OutOfMemory,
+ kUpb_UnknownToMessage_NotFound,
+} upb_UnknownToMessage_Status;
+
+typedef struct {
+ upb_UnknownToMessage_Status status;
+ upb_Message* message;
+} upb_UnknownToMessageRet;
+
+// Promotes an "empty" non-repeated message field in `parent` to a message of
+// the correct type.
+//
+// Preconditions:
+//
+// 1. The message field must currently be in the "empty" state (this must have
+// been previously verified by the caller by calling
+// `upb_Message_GetTaggedMessagePtr()` and observing that the message is
+// indeed empty).
+//
+// 2. This `field` must have previously been linked.
+//
+// If the promotion succeeds, `parent` will have its data for `field` replaced
+// by the promoted message, which is also returned in `*promoted`. If the
+// return value indicates an error status, `parent` and `promoted` are
+// unchanged.
+upb_DecodeStatus upb_Message_PromoteMessage(upb_Message* parent,
+ const upb_MiniTable* mini_table,
+ const upb_MiniTableField* field,
+ int decode_options,
+ upb_Arena* arena,
+ upb_Message** promoted);
+
+// Promotes any "empty" messages in this array to a message of the correct type
+// `mini_table`. This function should only be called for arrays of messages.
+//
+// If the return value indicates an error status, some but not all elements may
+// have been promoted, but the array itself will not be corrupted.
+upb_DecodeStatus upb_Array_PromoteMessages(upb_Array* arr,
+ const upb_MiniTable* mini_table,
+ int decode_options,
+ upb_Arena* arena);
+
+// Promotes any "empty" entries in this map to a message of the correct type
+// `mini_table`. This function should only be called for maps that have a
+// message type as the map value.
+//
+// If the return value indicates an error status, some but not all elements may
+// have been promoted, but the map itself will not be corrupted.
+upb_DecodeStatus upb_Map_PromoteMessages(upb_Map* map,
+ const upb_MiniTable* mini_table,
+ int decode_options, upb_Arena* arena);
+
+////////////////////////////////////////////////////////////////////////////////
+// OLD promotion interfaces, will be removed!
+////////////////////////////////////////////////////////////////////////////////
+
+// Promotes unknown data inside message to a upb_Message parsing the unknown.
+//
+// The unknown data is removed from message after field value is set
+// using upb_Message_SetMessage.
+//
+// WARNING!: See b/267655898
+upb_UnknownToMessageRet upb_MiniTable_PromoteUnknownToMessage(
+ upb_Message* msg, const upb_MiniTable* mini_table,
+ const upb_MiniTableField* field, const upb_MiniTable* sub_mini_table,
+ int decode_options, upb_Arena* arena);
+
+// Promotes all unknown data that matches field tag id to repeated messages
+// in upb_Array.
+//
+// The unknown data is removed from message after upb_Array is populated.
+// Since repeated messages can't be packed we remove each unknown that
+// contains the target tag id.
+upb_UnknownToMessage_Status upb_MiniTable_PromoteUnknownToMessageArray(
+ upb_Message* msg, const upb_MiniTableField* field,
+ const upb_MiniTable* mini_table, int decode_options, upb_Arena* arena);
+
+// Promotes all unknown data that matches field tag id to upb_Map.
+//
+// The unknown data is removed from message after upb_Map is populated.
+// Since repeated messages can't be packed we remove each unknown that
+// contains the target tag id.
+upb_UnknownToMessage_Status upb_MiniTable_PromoteUnknownToMap(
+ upb_Message* msg, const upb_MiniTable* mini_table,
+ const upb_MiniTableField* field, int decode_options, upb_Arena* arena);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif // UPB_MESSAGE_PROMOTE_H_
diff --git a/upb/upb/message/promote_test.cc b/upb/upb/message/promote_test.cc
new file mode 100644
index 0000000..225e1ce
--- /dev/null
+++ b/upb/upb/message/promote_test.cc
@@ -0,0 +1,863 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+/* Test of mini table accessors.
+ *
+ * Messages are created and mutated using generated code, and then
+ * accessed through reflective APIs exposed through mini table accessors.
+ */
+
+#include "upb/message/promote.h"
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "google/protobuf/test_messages_proto2.upb.h"
+#include "google/protobuf/test_messages_proto3.upb.h"
+#include "upb/base/string_view.h"
+#include "upb/collections/array.h"
+#include "upb/mem/arena.hpp"
+#include "upb/message/accessors.h"
+#include "upb/message/copy.h"
+#include "upb/mini_descriptor/internal/encode.hpp"
+#include "upb/mini_descriptor/internal/modifiers.h"
+#include "upb/test/test.upb.h"
+#include "upb/wire/decode.h"
+
+// Must be last
+#include "upb/port/def.inc"
+
+namespace {
+
+TEST(GeneratedCode, FindUnknown) {
+ upb_Arena* arena = upb_Arena_New();
+ upb_test_ModelWithExtensions* msg = upb_test_ModelWithExtensions_new(arena);
+ upb_test_ModelWithExtensions_set_random_int32(msg, 10);
+ upb_test_ModelWithExtensions_set_random_name(
+ msg, upb_StringView_FromString("Hello"));
+
+ upb_test_ModelExtension1* extension1 = upb_test_ModelExtension1_new(arena);
+ upb_test_ModelExtension1_set_str(extension1,
+ upb_StringView_FromString("World"));
+
+ upb_test_ModelExtension1_set_model_ext(msg, extension1, arena);
+
+ size_t serialized_size;
+ char* serialized =
+ upb_test_ModelWithExtensions_serialize(msg, arena, &serialized_size);
+
+ upb_test_EmptyMessageWithExtensions* base_msg =
+ upb_test_EmptyMessageWithExtensions_parse(serialized, serialized_size,
+ arena);
+
+ upb_FindUnknownRet result = upb_MiniTable_FindUnknown(
+ base_msg, upb_test_ModelExtension1_model_ext_ext.field.number,
+ kUpb_WireFormat_DefaultDepthLimit);
+ EXPECT_EQ(kUpb_FindUnknown_Ok, result.status);
+
+ result = upb_MiniTable_FindUnknown(
+ base_msg, upb_test_ModelExtension2_model_ext_ext.field.number,
+ kUpb_WireFormat_DefaultDepthLimit);
+ EXPECT_EQ(kUpb_FindUnknown_NotPresent, result.status);
+
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, Extensions) {
+ upb_Arena* arena = upb_Arena_New();
+ upb_test_ModelWithExtensions* msg = upb_test_ModelWithExtensions_new(arena);
+ upb_test_ModelWithExtensions_set_random_int32(msg, 10);
+ upb_test_ModelWithExtensions_set_random_name(
+ msg, upb_StringView_FromString("Hello"));
+
+ upb_test_ModelExtension1* extension1 = upb_test_ModelExtension1_new(arena);
+ upb_test_ModelExtension1_set_str(extension1,
+ upb_StringView_FromString("World"));
+
+ upb_test_ModelExtension2* extension2 = upb_test_ModelExtension2_new(arena);
+ upb_test_ModelExtension2_set_i(extension2, 5);
+
+ upb_test_ModelExtension2* extension3 = upb_test_ModelExtension2_new(arena);
+ upb_test_ModelExtension2_set_i(extension3, 6);
+
+ upb_test_ModelExtension2* extension4 = upb_test_ModelExtension2_new(arena);
+ upb_test_ModelExtension2_set_i(extension4, 7);
+
+ upb_test_ModelExtension2* extension5 = upb_test_ModelExtension2_new(arena);
+ upb_test_ModelExtension2_set_i(extension5, 8);
+
+ upb_test_ModelExtension2* extension6 = upb_test_ModelExtension2_new(arena);
+ upb_test_ModelExtension2_set_i(extension6, 9);
+
+ // Set many extensions, to exercise code paths that involve reallocating the
+ // extensions and unknown fields array.
+ upb_test_ModelExtension1_set_model_ext(msg, extension1, arena);
+ upb_test_ModelExtension2_set_model_ext(msg, extension2, arena);
+ upb_test_ModelExtension2_set_model_ext_2(msg, extension3, arena);
+ upb_test_ModelExtension2_set_model_ext_3(msg, extension4, arena);
+ upb_test_ModelExtension2_set_model_ext_4(msg, extension5, arena);
+ upb_test_ModelExtension2_set_model_ext_5(msg, extension6, arena);
+
+ size_t serialized_size;
+ char* serialized =
+ upb_test_ModelWithExtensions_serialize(msg, arena, &serialized_size);
+
+ const upb_Message_Extension* upb_ext2;
+ upb_test_ModelExtension1* ext1;
+ upb_test_ModelExtension2* ext2;
+ upb_GetExtension_Status promote_status;
+
+ // Test known GetExtension 1
+ promote_status = upb_MiniTable_GetOrPromoteExtension(
+ msg, &upb_test_ModelExtension1_model_ext_ext, 0, arena, &upb_ext2);
+ ext1 = (upb_test_ModelExtension1*)upb_ext2->data.ptr;
+ EXPECT_EQ(kUpb_GetExtension_Ok, promote_status);
+ EXPECT_TRUE(upb_StringView_IsEqual(upb_StringView_FromString("World"),
+ upb_test_ModelExtension1_str(ext1)));
+
+ // Test known GetExtension 2
+ promote_status = upb_MiniTable_GetOrPromoteExtension(
+ msg, &upb_test_ModelExtension2_model_ext_ext, 0, arena, &upb_ext2);
+ ext2 = (upb_test_ModelExtension2*)upb_ext2->data.ptr;
+ EXPECT_EQ(kUpb_GetExtension_Ok, promote_status);
+ EXPECT_EQ(5, upb_test_ModelExtension2_i(ext2));
+
+ // Test known GetExtension 3
+ promote_status = upb_MiniTable_GetOrPromoteExtension(
+ msg, &upb_test_ModelExtension2_model_ext_2_ext, 0, arena, &upb_ext2);
+ ext2 = (upb_test_ModelExtension2*)upb_ext2->data.ptr;
+ EXPECT_EQ(kUpb_GetExtension_Ok, promote_status);
+ EXPECT_EQ(6, upb_test_ModelExtension2_i(ext2));
+
+ // Test known GetExtension 4
+ promote_status = upb_MiniTable_GetOrPromoteExtension(
+ msg, &upb_test_ModelExtension2_model_ext_3_ext, 0, arena, &upb_ext2);
+ ext2 = (upb_test_ModelExtension2*)upb_ext2->data.ptr;
+ EXPECT_EQ(kUpb_GetExtension_Ok, promote_status);
+ EXPECT_EQ(7, upb_test_ModelExtension2_i(ext2));
+
+ // Test known GetExtension 5
+ promote_status = upb_MiniTable_GetOrPromoteExtension(
+ msg, &upb_test_ModelExtension2_model_ext_4_ext, 0, arena, &upb_ext2);
+ ext2 = (upb_test_ModelExtension2*)upb_ext2->data.ptr;
+ EXPECT_EQ(kUpb_GetExtension_Ok, promote_status);
+ EXPECT_EQ(8, upb_test_ModelExtension2_i(ext2));
+
+ // Test known GetExtension 6
+ promote_status = upb_MiniTable_GetOrPromoteExtension(
+ msg, &upb_test_ModelExtension2_model_ext_5_ext, 0, arena, &upb_ext2);
+ ext2 = (upb_test_ModelExtension2*)upb_ext2->data.ptr;
+ EXPECT_EQ(kUpb_GetExtension_Ok, promote_status);
+ EXPECT_EQ(9, upb_test_ModelExtension2_i(ext2));
+
+ upb_test_EmptyMessageWithExtensions* base_msg =
+ upb_test_EmptyMessageWithExtensions_parse(serialized, serialized_size,
+ arena);
+
+ // Get unknown extension bytes before promotion.
+ size_t start_len;
+ upb_Message_GetUnknown(base_msg, &start_len);
+ EXPECT_GT(start_len, 0);
+ EXPECT_EQ(0, upb_Message_ExtensionCount(base_msg));
+
+ // Test unknown GetExtension.
+ promote_status = upb_MiniTable_GetOrPromoteExtension(
+ base_msg, &upb_test_ModelExtension1_model_ext_ext, 0, arena, &upb_ext2);
+ ext1 = (upb_test_ModelExtension1*)upb_ext2->data.ptr;
+ EXPECT_EQ(kUpb_GetExtension_Ok, promote_status);
+ EXPECT_TRUE(upb_StringView_IsEqual(upb_StringView_FromString("World"),
+ upb_test_ModelExtension1_str(ext1)));
+
+ // Test unknown GetExtension.
+ promote_status = upb_MiniTable_GetOrPromoteExtension(
+ base_msg, &upb_test_ModelExtension2_model_ext_ext, 0, arena, &upb_ext2);
+ ext2 = (upb_test_ModelExtension2*)upb_ext2->data.ptr;
+ EXPECT_EQ(kUpb_GetExtension_Ok, promote_status);
+ EXPECT_EQ(5, upb_test_ModelExtension2_i(ext2));
+
+ // Test unknown GetExtension.
+ promote_status = upb_MiniTable_GetOrPromoteExtension(
+ base_msg, &upb_test_ModelExtension2_model_ext_2_ext, 0, arena, &upb_ext2);
+ ext2 = (upb_test_ModelExtension2*)upb_ext2->data.ptr;
+ EXPECT_EQ(kUpb_GetExtension_Ok, promote_status);
+ EXPECT_EQ(6, upb_test_ModelExtension2_i(ext2));
+
+ // Test unknown GetExtension.
+ promote_status = upb_MiniTable_GetOrPromoteExtension(
+ base_msg, &upb_test_ModelExtension2_model_ext_3_ext, 0, arena, &upb_ext2);
+ ext2 = (upb_test_ModelExtension2*)upb_ext2->data.ptr;
+ EXPECT_EQ(kUpb_GetExtension_Ok, promote_status);
+ EXPECT_EQ(7, upb_test_ModelExtension2_i(ext2));
+
+ // Test unknown GetExtension.
+ promote_status = upb_MiniTable_GetOrPromoteExtension(
+ base_msg, &upb_test_ModelExtension2_model_ext_4_ext, 0, arena, &upb_ext2);
+ ext2 = (upb_test_ModelExtension2*)upb_ext2->data.ptr;
+ EXPECT_EQ(kUpb_GetExtension_Ok, promote_status);
+ EXPECT_EQ(8, upb_test_ModelExtension2_i(ext2));
+
+ // Test unknown GetExtension.
+ promote_status = upb_MiniTable_GetOrPromoteExtension(
+ base_msg, &upb_test_ModelExtension2_model_ext_5_ext, 0, arena, &upb_ext2);
+ ext2 = (upb_test_ModelExtension2*)upb_ext2->data.ptr;
+ EXPECT_EQ(kUpb_GetExtension_Ok, promote_status);
+ EXPECT_EQ(9, upb_test_ModelExtension2_i(ext2));
+
+ size_t end_len;
+ upb_Message_GetUnknown(base_msg, &end_len);
+ EXPECT_LT(end_len, start_len);
+ EXPECT_EQ(6, upb_Message_ExtensionCount(base_msg));
+
+ upb_Arena_Free(arena);
+}
+
+// Create a minitable to mimic ModelWithSubMessages with unlinked subs
+// to lazily promote unknowns after parsing.
+upb_MiniTable* CreateMiniTableWithEmptySubTables(upb_Arena* arena) {
+ upb::MtDataEncoder e;
+ e.StartMessage(0);
+ e.PutField(kUpb_FieldType_Int32, 4, 0);
+ e.PutField(kUpb_FieldType_Message, 5, 0);
+ e.PutField(kUpb_FieldType_Message, 6, kUpb_FieldModifier_IsRepeated);
+
+ upb_Status status;
+ upb_Status_Clear(&status);
+ upb_MiniTable* table =
+ upb_MiniTable_Build(e.data().data(), e.data().size(), arena, &status);
+ EXPECT_EQ(status.ok, true);
+ return table;
+}
+
+upb_MiniTable* CreateMapEntryMiniTable(upb_Arena* arena) {
+ upb::MtDataEncoder e;
+ e.EncodeMap(kUpb_FieldType_Int32, kUpb_FieldType_Message, 0, 0);
+ upb_Status status;
+ upb_Status_Clear(&status);
+ upb_MiniTable* table =
+ upb_MiniTable_Build(e.data().data(), e.data().size(), arena, &status);
+ EXPECT_EQ(status.ok, true);
+ return table;
+}
+
+// Create a minitable to mimic ModelWithMaps with unlinked subs
+// to lazily promote unknowns after parsing.
+upb_MiniTable* CreateMiniTableWithEmptySubTablesForMaps(upb_Arena* arena) {
+ upb::MtDataEncoder e;
+ e.StartMessage(0);
+ e.PutField(kUpb_FieldType_Int32, 1, 0);
+ e.PutField(kUpb_FieldType_Message, 3, kUpb_FieldModifier_IsRepeated);
+ e.PutField(kUpb_FieldType_Message, 5, kUpb_FieldModifier_IsRepeated);
+
+ upb_Status status;
+ upb_Status_Clear(&status);
+ upb_MiniTable* table =
+ upb_MiniTable_Build(e.data().data(), e.data().size(), arena, &status);
+
+ // Field 5 corresponds to ModelWithMaps.map_sm.
+ upb_MiniTableField* map_field = const_cast<upb_MiniTableField*>(
+ upb_MiniTable_FindFieldByNumber(table, 5));
+ EXPECT_NE(map_field, nullptr);
+ upb_MiniTable* sub_table = CreateMapEntryMiniTable(arena);
+ upb_MiniTable_SetSubMessage(table, map_field, sub_table);
+ EXPECT_EQ(status.ok, true);
+ return table;
+}
+
+void CheckReserialize(const upb_Message* msg, const upb_MiniTable* mini_table,
+ upb_Arena* arena, char* serialized,
+ size_t serialized_size) {
+ // We can safely encode the "empty" message. We expect to get the same bytes
+ // out as were parsed.
+ size_t reserialized_size;
+ char* reserialized;
+ upb_EncodeStatus encode_status =
+ upb_Encode(msg, mini_table, kUpb_EncodeOption_Deterministic, arena,
+ &reserialized, &reserialized_size);
+ EXPECT_EQ(encode_status, kUpb_EncodeStatus_Ok);
+ EXPECT_EQ(reserialized_size, serialized_size);
+ EXPECT_EQ(0, memcmp(reserialized, serialized, serialized_size));
+
+ // We should get the same result if we copy+reserialize.
+ upb_Message* clone = upb_Message_DeepClone(msg, mini_table, arena);
+ encode_status = upb_Encode(clone, mini_table, kUpb_EncodeOption_Deterministic,
+ arena, &reserialized, &reserialized_size);
+ EXPECT_EQ(encode_status, kUpb_EncodeStatus_Ok);
+ EXPECT_EQ(reserialized_size, serialized_size);
+ EXPECT_EQ(0, memcmp(reserialized, serialized, serialized_size));
+}
+
+TEST(GeneratedCode, PromoteUnknownMessage) {
+ upb::Arena arena;
+ upb_test_ModelWithSubMessages* input_msg =
+ upb_test_ModelWithSubMessages_new(arena.ptr());
+ upb_test_ModelWithExtensions* sub_message =
+ upb_test_ModelWithExtensions_new(arena.ptr());
+ upb_test_ModelWithSubMessages_set_id(input_msg, 11);
+ upb_test_ModelWithExtensions_set_random_int32(sub_message, 12);
+ upb_test_ModelWithSubMessages_set_optional_child(input_msg, sub_message);
+ size_t serialized_size;
+ char* serialized = upb_test_ModelWithSubMessages_serialize(
+ input_msg, arena.ptr(), &serialized_size);
+
+ upb_MiniTable* mini_table = CreateMiniTableWithEmptySubTables(arena.ptr());
+ upb_DecodeStatus decode_status;
+
+ // If we parse without allowing unlinked objects, the parse will fail.
+ // TODO(haberman): re-enable this test once the old method of tree shaking is
+ // removed
+ // upb_Message* fail_msg = _upb_Message_New(mini_table, arena.ptr());
+ // decode_status =
+ // upb_Decode(serialized, serialized_size, fail_msg, mini_table, nullptr,
+ // 0,
+ // arena.ptr());
+ // EXPECT_EQ(decode_status, kUpb_DecodeStatus_UnlinkedSubMessage);
+
+ // if we parse while allowing unlinked objects, the parse will succeed.
+ upb_Message* msg = _upb_Message_New(mini_table, arena.ptr());
+ decode_status =
+ upb_Decode(serialized, serialized_size, msg, mini_table, nullptr,
+ kUpb_DecodeOption_ExperimentalAllowUnlinked, arena.ptr());
+ EXPECT_EQ(decode_status, kUpb_DecodeStatus_Ok);
+
+ CheckReserialize(msg, mini_table, arena.ptr(), serialized, serialized_size);
+
+ // We can encode the "empty" message and get the same output bytes.
+ size_t reserialized_size;
+ char* reserialized;
+ upb_EncodeStatus encode_status = upb_Encode(
+ msg, mini_table, 0, arena.ptr(), &reserialized, &reserialized_size);
+ EXPECT_EQ(encode_status, kUpb_EncodeStatus_Ok);
+ EXPECT_EQ(reserialized_size, serialized_size);
+ EXPECT_EQ(0, memcmp(reserialized, serialized, serialized_size));
+
+ // Int32 field is present, as normal.
+ int32_t val = upb_Message_GetInt32(
+ msg, upb_MiniTable_FindFieldByNumber(mini_table, 4), 0);
+ EXPECT_EQ(val, 11);
+
+ // Unlinked sub-message is present, but getting the value returns NULL.
+ const upb_MiniTableField* submsg_field =
+ upb_MiniTable_FindFieldByNumber(mini_table, 5);
+ ASSERT_TRUE(submsg_field != nullptr);
+ EXPECT_TRUE(upb_Message_HasField(msg, submsg_field));
+ upb_TaggedMessagePtr tagged =
+ upb_Message_GetTaggedMessagePtr(msg, submsg_field, nullptr);
+ EXPECT_TRUE(upb_TaggedMessagePtr_IsEmpty(tagged));
+
+ // Update mini table and promote unknown to a message.
+ EXPECT_TRUE(
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)submsg_field,
+ &upb_test_ModelWithExtensions_msg_init));
+
+ const int decode_options = upb_DecodeOptions_MaxDepth(
+ kUpb_WireFormat_DefaultDepthLimit); // UPB_DECODE_ALIAS disabled.
+ upb_test_ModelWithExtensions* promoted;
+ upb_DecodeStatus promote_result =
+ upb_Message_PromoteMessage(msg, mini_table, submsg_field, decode_options,
+ arena.ptr(), (upb_Message**)&promoted);
+ EXPECT_EQ(promote_result, kUpb_DecodeStatus_Ok);
+ EXPECT_NE(nullptr, promoted);
+ EXPECT_EQ(promoted, upb_Message_GetMessage(msg, submsg_field, nullptr));
+ EXPECT_EQ(upb_test_ModelWithExtensions_random_int32(promoted), 12);
+}
+
+// Tests a second parse that reuses an empty/unlinked message while the message
+// is still unlinked.
+TEST(GeneratedCode, ReparseUnlinked) {
+ upb::Arena arena;
+ upb_test_ModelWithSubMessages* input_msg =
+ upb_test_ModelWithSubMessages_new(arena.ptr());
+ upb_test_ModelWithExtensions* sub_message =
+ upb_test_ModelWithExtensions_new(arena.ptr());
+ upb_test_ModelWithSubMessages_set_id(input_msg, 11);
+ upb_test_ModelWithExtensions_add_repeated_int32(sub_message, 12, arena.ptr());
+ upb_test_ModelWithSubMessages_set_optional_child(input_msg, sub_message);
+ size_t serialized_size;
+ char* serialized = upb_test_ModelWithSubMessages_serialize(
+ input_msg, arena.ptr(), &serialized_size);
+
+ upb_MiniTable* mini_table = CreateMiniTableWithEmptySubTables(arena.ptr());
+
+ // Parse twice without linking the MiniTable.
+ upb_Message* msg = _upb_Message_New(mini_table, arena.ptr());
+ upb_DecodeStatus decode_status =
+ upb_Decode(serialized, serialized_size, msg, mini_table, nullptr,
+ kUpb_DecodeOption_ExperimentalAllowUnlinked, arena.ptr());
+ EXPECT_EQ(decode_status, kUpb_DecodeStatus_Ok);
+
+ decode_status =
+ upb_Decode(serialized, serialized_size, msg, mini_table, nullptr,
+ kUpb_DecodeOption_ExperimentalAllowUnlinked, arena.ptr());
+ EXPECT_EQ(decode_status, kUpb_DecodeStatus_Ok);
+
+ // Update mini table and promote unknown to a message.
+ const upb_MiniTableField* submsg_field =
+ upb_MiniTable_FindFieldByNumber(mini_table, 5);
+ EXPECT_TRUE(
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)submsg_field,
+ &upb_test_ModelWithExtensions_msg_init));
+
+ const int decode_options = upb_DecodeOptions_MaxDepth(
+ kUpb_WireFormat_DefaultDepthLimit); // UPB_DECODE_ALIAS disabled.
+ upb_test_ModelWithExtensions* promoted;
+ upb_DecodeStatus promote_result =
+ upb_Message_PromoteMessage(msg, mini_table, submsg_field, decode_options,
+ arena.ptr(), (upb_Message**)&promoted);
+ EXPECT_EQ(promote_result, kUpb_DecodeStatus_Ok);
+ EXPECT_NE(nullptr, promoted);
+ EXPECT_EQ(promoted, upb_Message_GetMessage(msg, submsg_field, nullptr));
+
+ // The repeated field should have two entries for the two parses.
+ size_t repeated_size;
+ const int32_t* entries =
+ upb_test_ModelWithExtensions_repeated_int32(promoted, &repeated_size);
+ EXPECT_EQ(repeated_size, 2);
+ EXPECT_EQ(entries[0], 12);
+ EXPECT_EQ(entries[1], 12);
+}
+
+// Tests a second parse that promotes a message within the parser because we are
+// merging into an empty/unlinked message after the message has been linked.
+TEST(GeneratedCode, PromoteInParser) {
+ upb::Arena arena;
+ upb_test_ModelWithSubMessages* input_msg =
+ upb_test_ModelWithSubMessages_new(arena.ptr());
+ upb_test_ModelWithExtensions* sub_message =
+ upb_test_ModelWithExtensions_new(arena.ptr());
+ upb_test_ModelWithSubMessages_set_id(input_msg, 11);
+ upb_test_ModelWithExtensions_add_repeated_int32(sub_message, 12, arena.ptr());
+ upb_test_ModelWithSubMessages_set_optional_child(input_msg, sub_message);
+ size_t serialized_size;
+ char* serialized = upb_test_ModelWithSubMessages_serialize(
+ input_msg, arena.ptr(), &serialized_size);
+
+ upb_MiniTable* mini_table = CreateMiniTableWithEmptySubTables(arena.ptr());
+
+ // Parse once without linking the MiniTable.
+ upb_Message* msg = _upb_Message_New(mini_table, arena.ptr());
+ upb_DecodeStatus decode_status =
+ upb_Decode(serialized, serialized_size, msg, mini_table, nullptr,
+ kUpb_DecodeOption_ExperimentalAllowUnlinked, arena.ptr());
+ EXPECT_EQ(decode_status, kUpb_DecodeStatus_Ok);
+
+ // Link the MiniTable.
+ const upb_MiniTableField* submsg_field =
+ upb_MiniTable_FindFieldByNumber(mini_table, 5);
+ EXPECT_TRUE(
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)submsg_field,
+ &upb_test_ModelWithExtensions_msg_init));
+
+ // Parse again. This will promote the message. An explicit promote will not
+ // be required.
+ decode_status =
+ upb_Decode(serialized, serialized_size, msg, mini_table, nullptr,
+ kUpb_DecodeOption_ExperimentalAllowUnlinked, arena.ptr());
+ EXPECT_EQ(decode_status, kUpb_DecodeStatus_Ok);
+ upb_test_ModelWithExtensions* promoted =
+ (upb_test_ModelWithExtensions*)upb_Message_GetMessage(msg, submsg_field,
+ nullptr);
+
+ EXPECT_NE(nullptr, promoted);
+ EXPECT_EQ(promoted, upb_Message_GetMessage(msg, submsg_field, nullptr));
+
+ // The repeated field should have two entries for the two parses.
+ size_t repeated_size;
+ const int32_t* entries =
+ upb_test_ModelWithExtensions_repeated_int32(promoted, &repeated_size);
+ EXPECT_EQ(repeated_size, 2);
+ EXPECT_EQ(entries[0], 12);
+ EXPECT_EQ(entries[1], 12);
+}
+
+TEST(GeneratedCode, PromoteUnknownRepeatedMessage) {
+ upb::Arena arena;
+ upb_test_ModelWithSubMessages* input_msg =
+ upb_test_ModelWithSubMessages_new(arena.ptr());
+ upb_test_ModelWithSubMessages_set_id(input_msg, 123);
+
+ // Add 2 repeated messages to input_msg.
+ upb_test_ModelWithExtensions* item =
+ upb_test_ModelWithSubMessages_add_items(input_msg, arena.ptr());
+ upb_test_ModelWithExtensions_set_random_int32(item, 5);
+ item = upb_test_ModelWithSubMessages_add_items(input_msg, arena.ptr());
+ upb_test_ModelWithExtensions_set_random_int32(item, 6);
+
+ size_t serialized_size;
+ char* serialized = upb_test_ModelWithSubMessages_serialize(
+ input_msg, arena.ptr(), &serialized_size);
+
+ upb_MiniTable* mini_table = CreateMiniTableWithEmptySubTables(arena.ptr());
+ upb_DecodeStatus decode_status;
+
+ // If we parse without allowing unlinked objects, the parse will fail.
+ // TODO(haberman): re-enable this test once the old method of tree shaking is
+ // removed
+ // upb_Message* fail_msg = _upb_Message_New(mini_table, arena.ptr());
+ // decode_status =
+ // upb_Decode(serialized, serialized_size, fail_msg, mini_table, nullptr,
+ // 0,
+ // arena.ptr());
+ // EXPECT_EQ(decode_status, kUpb_DecodeStatus_UnlinkedSubMessage);
+
+ // if we parse while allowing unlinked objects, the parse will succeed.
+ upb_Message* msg = _upb_Message_New(mini_table, arena.ptr());
+ decode_status =
+ upb_Decode(serialized, serialized_size, msg, mini_table, nullptr,
+ kUpb_DecodeOption_ExperimentalAllowUnlinked, arena.ptr());
+
+ CheckReserialize(msg, mini_table, arena.ptr(), serialized, serialized_size);
+
+ // Int32 field is present, as normal.
+ EXPECT_EQ(decode_status, kUpb_DecodeStatus_Ok);
+ int32_t val = upb_Message_GetInt32(
+ msg, upb_MiniTable_FindFieldByNumber(mini_table, 4), 0);
+ EXPECT_EQ(val, 123);
+
+ const upb_MiniTableField* repeated_field =
+ upb_MiniTable_FindFieldByNumber(mini_table, 6);
+
+ upb_Array* array = upb_Message_GetMutableArray(msg, repeated_field);
+
+ // Array length is 2 even though the messages are empty.
+ EXPECT_EQ(2, upb_Array_Size(array));
+
+ // Update mini table and promote unknown to a message.
+ EXPECT_TRUE(upb_MiniTable_SetSubMessage(
+ mini_table, (upb_MiniTableField*)repeated_field,
+ &upb_test_ModelWithExtensions_msg_init));
+ const int decode_options = upb_DecodeOptions_MaxDepth(
+ kUpb_WireFormat_DefaultDepthLimit); // UPB_DECODE_ALIAS disabled.
+ upb_DecodeStatus promote_result =
+ upb_Array_PromoteMessages(array, &upb_test_ModelWithExtensions_msg_init,
+ decode_options, arena.ptr());
+ EXPECT_EQ(promote_result, kUpb_DecodeStatus_Ok);
+ const upb_Message* promoted_message = upb_Array_Get(array, 0).msg_val;
+ EXPECT_EQ(upb_test_ModelWithExtensions_random_int32(
+ (upb_test_ModelWithExtensions*)promoted_message),
+ 5);
+ promoted_message = upb_Array_Get(array, 1).msg_val;
+ EXPECT_EQ(upb_test_ModelWithExtensions_random_int32(
+ (upb_test_ModelWithExtensions*)promoted_message),
+ 6);
+}
+
+TEST(GeneratedCode, PromoteUnknownToMap) {
+ upb::Arena arena;
+ upb_test_ModelWithMaps* input_msg = upb_test_ModelWithMaps_new(arena.ptr());
+ upb_test_ModelWithMaps_set_id(input_msg, 123);
+
+ upb_test_ModelWithExtensions* submsg1 =
+ upb_test_ModelWithExtensions_new(arena.ptr());
+ upb_test_ModelWithExtensions_set_random_int32(submsg1, 123);
+ upb_test_ModelWithExtensions* submsg2 =
+ upb_test_ModelWithExtensions_new(arena.ptr());
+ upb_test_ModelWithExtensions_set_random_int32(submsg2, 456);
+
+ // Add 2 map entries.
+ upb_test_ModelWithMaps_map_im_set(input_msg, 111, submsg1, arena.ptr());
+ upb_test_ModelWithMaps_map_im_set(input_msg, 222, submsg2, arena.ptr());
+
+ size_t serialized_size;
+ char* serialized = upb_test_ModelWithMaps_serialize_ex(
+ input_msg, kUpb_EncodeOption_Deterministic, arena.ptr(),
+ &serialized_size);
+
+ upb_MiniTable* mini_table =
+ CreateMiniTableWithEmptySubTablesForMaps(arena.ptr());
+
+ // If we parse without allowing unlinked objects, the parse will fail.
+ upb_Message* fail_msg1 = _upb_Message_New(mini_table, arena.ptr());
+ upb_DecodeStatus decode_status =
+ upb_Decode(serialized, serialized_size, fail_msg1, mini_table, nullptr, 0,
+ arena.ptr());
+ EXPECT_EQ(decode_status, kUpb_DecodeStatus_UnlinkedSubMessage);
+
+ // if we parse while allowing unlinked objects, the parse will succeed.
+ upb_Message* msg = _upb_Message_New(mini_table, arena.ptr());
+ decode_status =
+ upb_Decode(serialized, serialized_size, msg, mini_table, nullptr,
+ kUpb_DecodeOption_ExperimentalAllowUnlinked, arena.ptr());
+ EXPECT_EQ(decode_status, kUpb_DecodeStatus_Ok);
+
+ CheckReserialize(msg, mini_table, arena.ptr(), serialized, serialized_size);
+
+ upb_MiniTableField* map_field = const_cast<upb_MiniTableField*>(
+ upb_MiniTable_FindFieldByNumber(mini_table, 5));
+
+ upb_Map* map = upb_Message_GetMutableMap(msg, map_field);
+
+ // Map size is 2 even though messages are unlinked.
+ EXPECT_EQ(2, upb_Map_Size(map));
+
+ // Update mini table and promote unknown to a message.
+ upb_MiniTable* entry = const_cast<upb_MiniTable*>(
+ upb_MiniTable_GetSubMessageTable(mini_table, map_field));
+ upb_MiniTableField* entry_value = const_cast<upb_MiniTableField*>(
+ upb_MiniTable_FindFieldByNumber(entry, 2));
+ upb_MiniTable_SetSubMessage(entry, entry_value,
+ &upb_test_ModelWithExtensions_msg_init);
+ upb_DecodeStatus promote_result = upb_Map_PromoteMessages(
+ map, &upb_test_ModelWithExtensions_msg_init, 0, arena.ptr());
+ EXPECT_EQ(promote_result, kUpb_DecodeStatus_Ok);
+
+ upb_MessageValue key;
+ upb_MessageValue val;
+ key.int32_val = 111;
+ EXPECT_TRUE(upb_Map_Get(map, key, &val));
+ EXPECT_EQ(123,
+ upb_test_ModelWithExtensions_random_int32(
+ static_cast<const upb_test_ModelWithExtensions*>(val.msg_val)));
+
+ key.int32_val = 222;
+ EXPECT_TRUE(upb_Map_Get(map, key, &val));
+ EXPECT_EQ(456,
+ upb_test_ModelWithExtensions_random_int32(
+ static_cast<const upb_test_ModelWithExtensions*>(val.msg_val)));
+}
+
+} // namespace
+
+// OLD tests, to be removed!
+
+namespace {
+
+// Create a minitable to mimic ModelWithSubMessages with unlinked subs
+// to lazily promote unknowns after parsing.
+upb_MiniTable* CreateMiniTableWithEmptySubTablesOld(upb_Arena* arena) {
+ upb::MtDataEncoder e;
+ e.StartMessage(0);
+ e.PutField(kUpb_FieldType_Int32, 4, 0);
+ e.PutField(kUpb_FieldType_Message, 5, 0);
+ e.PutField(kUpb_FieldType_Message, 6, kUpb_FieldModifier_IsRepeated);
+
+ upb_Status status;
+ upb_Status_Clear(&status);
+ upb_MiniTable* table =
+ upb_MiniTable_Build(e.data().data(), e.data().size(), arena, &status);
+ EXPECT_EQ(status.ok, true);
+ // Initialize sub table to null. Not using upb_MiniTable_SetSubMessage
+ // since it checks ->ext on parameter.
+ upb_MiniTableSub* sub = const_cast<upb_MiniTableSub*>(
+ &table->subs[table->fields[1].UPB_PRIVATE(submsg_index)]);
+ sub = const_cast<upb_MiniTableSub*>(
+ &table->subs[table->fields[2].UPB_PRIVATE(submsg_index)]);
+ return table;
+}
+
+// Create a minitable to mimic ModelWithMaps with unlinked subs
+// to lazily promote unknowns after parsing.
+upb_MiniTable* CreateMiniTableWithEmptySubTablesForMapsOld(upb_Arena* arena) {
+ upb::MtDataEncoder e;
+ e.StartMessage(0);
+ e.PutField(kUpb_FieldType_Int32, 1, 0);
+ e.PutField(kUpb_FieldType_Message, 3, kUpb_FieldModifier_IsRepeated);
+ e.PutField(kUpb_FieldType_Message, 4, kUpb_FieldModifier_IsRepeated);
+
+ upb_Status status;
+ upb_Status_Clear(&status);
+ upb_MiniTable* table =
+ upb_MiniTable_Build(e.data().data(), e.data().size(), arena, &status);
+ EXPECT_EQ(status.ok, true);
+ // Initialize sub table to null. Not using upb_MiniTable_SetSubMessage
+ // since it checks ->ext on parameter.
+ upb_MiniTableSub* sub = const_cast<upb_MiniTableSub*>(
+ &table->subs[table->fields[1].UPB_PRIVATE(submsg_index)]);
+ sub = const_cast<upb_MiniTableSub*>(
+ &table->subs[table->fields[2].UPB_PRIVATE(submsg_index)]);
+ return table;
+}
+
+upb_MiniTable* CreateMapEntryMiniTableOld(upb_Arena* arena) {
+ upb::MtDataEncoder e;
+ e.EncodeMap(kUpb_FieldType_String, kUpb_FieldType_String, 0, 0);
+ upb_Status status;
+ upb_Status_Clear(&status);
+ upb_MiniTable* table =
+ upb_MiniTable_Build(e.data().data(), e.data().size(), arena, &status);
+ EXPECT_EQ(status.ok, true);
+ return table;
+}
+
+TEST(GeneratedCode, PromoteUnknownMessageOld) {
+ upb_Arena* arena = upb_Arena_New();
+ upb_test_ModelWithSubMessages* input_msg =
+ upb_test_ModelWithSubMessages_new(arena);
+ upb_test_ModelWithExtensions* sub_message =
+ upb_test_ModelWithExtensions_new(arena);
+ upb_test_ModelWithSubMessages_set_id(input_msg, 11);
+ upb_test_ModelWithExtensions_set_random_int32(sub_message, 12);
+ upb_test_ModelWithSubMessages_set_optional_child(input_msg, sub_message);
+ size_t serialized_size;
+ char* serialized = upb_test_ModelWithSubMessages_serialize(input_msg, arena,
+ &serialized_size);
+
+ upb_MiniTable* mini_table = CreateMiniTableWithEmptySubTablesOld(arena);
+ upb_Message* msg = _upb_Message_New(mini_table, arena);
+ upb_DecodeStatus decode_status = upb_Decode(serialized, serialized_size, msg,
+ mini_table, nullptr, 0, arena);
+ EXPECT_EQ(decode_status, kUpb_DecodeStatus_Ok);
+ int32_t val = upb_Message_GetInt32(
+ msg, upb_MiniTable_FindFieldByNumber(mini_table, 4), 0);
+ EXPECT_EQ(val, 11);
+ upb_FindUnknownRet unknown =
+ upb_MiniTable_FindUnknown(msg, 5, kUpb_WireFormat_DefaultDepthLimit);
+ EXPECT_EQ(unknown.status, kUpb_FindUnknown_Ok);
+ // Update mini table and promote unknown to a message.
+ EXPECT_TRUE(upb_MiniTable_SetSubMessage(
+ mini_table, (upb_MiniTableField*)&mini_table->fields[1],
+ &upb_test_ModelWithExtensions_msg_init));
+ const int decode_options = upb_DecodeOptions_MaxDepth(
+ kUpb_WireFormat_DefaultDepthLimit); // UPB_DECODE_ALIAS disabled.
+ upb_UnknownToMessageRet promote_result =
+ upb_MiniTable_PromoteUnknownToMessage(
+ msg, mini_table, &mini_table->fields[1],
+ &upb_test_ModelWithExtensions_msg_init, decode_options, arena);
+ EXPECT_EQ(promote_result.status, kUpb_UnknownToMessage_Ok);
+ const upb_Message* promoted_message =
+ upb_Message_GetMessage(msg, &mini_table->fields[1], nullptr);
+ EXPECT_EQ(upb_test_ModelWithExtensions_random_int32(
+ (upb_test_ModelWithExtensions*)promoted_message),
+ 12);
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, PromoteUnknownRepeatedMessageOld) {
+ upb_Arena* arena = upb_Arena_New();
+ upb_test_ModelWithSubMessages* input_msg =
+ upb_test_ModelWithSubMessages_new(arena);
+ upb_test_ModelWithSubMessages_set_id(input_msg, 123);
+
+ // Add 2 repeated messages to input_msg.
+ upb_test_ModelWithExtensions* item =
+ upb_test_ModelWithSubMessages_add_items(input_msg, arena);
+ upb_test_ModelWithExtensions_set_random_int32(item, 5);
+ item = upb_test_ModelWithSubMessages_add_items(input_msg, arena);
+ upb_test_ModelWithExtensions_set_random_int32(item, 6);
+
+ size_t serialized_size;
+ char* serialized = upb_test_ModelWithSubMessages_serialize(input_msg, arena,
+ &serialized_size);
+
+ upb_MiniTable* mini_table = CreateMiniTableWithEmptySubTablesOld(arena);
+ upb_Message* msg = _upb_Message_New(mini_table, arena);
+ upb_DecodeStatus decode_status = upb_Decode(serialized, serialized_size, msg,
+ mini_table, nullptr, 0, arena);
+ EXPECT_EQ(decode_status, kUpb_DecodeStatus_Ok);
+ int32_t val = upb_Message_GetInt32(
+ msg, upb_MiniTable_FindFieldByNumber(mini_table, 4), 0);
+ EXPECT_EQ(val, 123);
+
+ // Check that we have repeated field data in an unknown.
+ upb_FindUnknownRet unknown =
+ upb_MiniTable_FindUnknown(msg, 6, kUpb_WireFormat_DefaultDepthLimit);
+ EXPECT_EQ(unknown.status, kUpb_FindUnknown_Ok);
+
+ // Update mini table and promote unknown to a message.
+ EXPECT_TRUE(upb_MiniTable_SetSubMessage(
+ mini_table, (upb_MiniTableField*)&mini_table->fields[2],
+ &upb_test_ModelWithExtensions_msg_init));
+ const int decode_options = upb_DecodeOptions_MaxDepth(
+ kUpb_WireFormat_DefaultDepthLimit); // UPB_DECODE_ALIAS disabled.
+ upb_UnknownToMessage_Status promote_result =
+ upb_MiniTable_PromoteUnknownToMessageArray(
+ msg, &mini_table->fields[2], &upb_test_ModelWithExtensions_msg_init,
+ decode_options, arena);
+ EXPECT_EQ(promote_result, kUpb_UnknownToMessage_Ok);
+
+ upb_Array* array = upb_Message_GetMutableArray(msg, &mini_table->fields[2]);
+ const upb_Message* promoted_message = upb_Array_Get(array, 0).msg_val;
+ EXPECT_EQ(upb_test_ModelWithExtensions_random_int32(
+ (upb_test_ModelWithExtensions*)promoted_message),
+ 5);
+ promoted_message = upb_Array_Get(array, 1).msg_val;
+ EXPECT_EQ(upb_test_ModelWithExtensions_random_int32(
+ (upb_test_ModelWithExtensions*)promoted_message),
+ 6);
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, PromoteUnknownToMapOld) {
+ upb_Arena* arena = upb_Arena_New();
+ upb_test_ModelWithMaps* input_msg = upb_test_ModelWithMaps_new(arena);
+ upb_test_ModelWithMaps_set_id(input_msg, 123);
+
+ // Add 2 map entries.
+ upb_test_ModelWithMaps_map_ss_set(input_msg,
+ upb_StringView_FromString("key1"),
+ upb_StringView_FromString("value1"), arena);
+ upb_test_ModelWithMaps_map_ss_set(input_msg,
+ upb_StringView_FromString("key2"),
+ upb_StringView_FromString("value2"), arena);
+
+ size_t serialized_size;
+ char* serialized =
+ upb_test_ModelWithMaps_serialize(input_msg, arena, &serialized_size);
+
+ upb_MiniTable* mini_table =
+ CreateMiniTableWithEmptySubTablesForMapsOld(arena);
+ upb_MiniTable* map_entry_mini_table = CreateMapEntryMiniTableOld(arena);
+ upb_Message* msg = _upb_Message_New(mini_table, arena);
+ const int decode_options =
+ upb_DecodeOptions_MaxDepth(kUpb_WireFormat_DefaultDepthLimit);
+ upb_DecodeStatus decode_status =
+ upb_Decode(serialized, serialized_size, msg, mini_table, nullptr,
+ decode_options, arena);
+ EXPECT_EQ(decode_status, kUpb_DecodeStatus_Ok);
+ int32_t val = upb_Message_GetInt32(
+ msg, upb_MiniTable_FindFieldByNumber(mini_table, 1), 0);
+ EXPECT_EQ(val, 123);
+
+ // Check that we have map data in an unknown.
+ upb_FindUnknownRet unknown =
+ upb_MiniTable_FindUnknown(msg, 3, kUpb_WireFormat_DefaultDepthLimit);
+ EXPECT_EQ(unknown.status, kUpb_FindUnknown_Ok);
+
+ // Update mini table and promote unknown to a message.
+ EXPECT_TRUE(upb_MiniTable_SetSubMessage(
+ mini_table, (upb_MiniTableField*)&mini_table->fields[1],
+ map_entry_mini_table));
+ upb_UnknownToMessage_Status promote_result =
+ upb_MiniTable_PromoteUnknownToMap(msg, mini_table, &mini_table->fields[1],
+ decode_options, arena);
+ EXPECT_EQ(promote_result, kUpb_UnknownToMessage_Ok);
+
+ upb_Map* map = upb_Message_GetOrCreateMutableMap(
+ msg, map_entry_mini_table, &mini_table->fields[1], arena);
+ EXPECT_NE(map, nullptr);
+ // Lookup in map.
+ upb_MessageValue key;
+ key.str_val = upb_StringView_FromString("key2");
+ upb_MessageValue value;
+ EXPECT_TRUE(upb_Map_Get(map, key, &value));
+ EXPECT_EQ(0, strncmp(value.str_val.data, "value2", 5));
+ upb_Arena_Free(arena);
+}
+
+} // namespace
diff --git a/upb/upb/message/tagged_ptr.h b/upb/upb/message/tagged_ptr.h
new file mode 100644
index 0000000..2ae3a09
--- /dev/null
+++ b/upb/upb/message/tagged_ptr.h
@@ -0,0 +1,92 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_TABLE_TYPES_H_
+#define UPB_MINI_TABLE_TYPES_H_
+
+#include <stdint.h>
+
+#include "upb/message/types.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// When a upb_Message* is stored in a message, array, or map, it is stored in a
+// tagged form. If the tag bit is set, the referenced upb_Message is of type
+// _kUpb_MiniTable_Empty (a sentinel message type with no fields) instead of
+// that field's true message type. This forms the basis of what we call
+// "dynamic tree shaking."
+//
+// See the documentation for kUpb_DecodeOption_ExperimentalAllowUnlinked for
+// more information.
+typedef uintptr_t upb_TaggedMessagePtr;
+
+// Internal-only because empty messages cannot be created by the user.
+UPB_INLINE upb_TaggedMessagePtr _upb_TaggedMessagePtr_Pack(upb_Message* ptr,
+ bool empty) {
+ UPB_ASSERT(((uintptr_t)ptr & 1) == 0);
+ return (uintptr_t)ptr | (empty ? 1 : 0);
+}
+
+// Users who enable unlinked sub-messages must use this to test whether a
+// message is empty before accessing it. If a message is empty, it must be
+// first promoted using the interfaces in message/promote.h.
+UPB_INLINE bool upb_TaggedMessagePtr_IsEmpty(upb_TaggedMessagePtr ptr) {
+ return ptr & 1;
+}
+
+UPB_INLINE upb_Message* _upb_TaggedMessagePtr_GetMessage(
+ upb_TaggedMessagePtr ptr) {
+ return (upb_Message*)(ptr & ~(uintptr_t)1);
+}
+
+UPB_INLINE upb_Message* upb_TaggedMessagePtr_GetNonEmptyMessage(
+ upb_TaggedMessagePtr ptr) {
+ UPB_ASSERT(!upb_TaggedMessagePtr_IsEmpty(ptr));
+ return _upb_TaggedMessagePtr_GetMessage(ptr);
+}
+
+UPB_INLINE upb_Message* _upb_TaggedMessagePtr_GetEmptyMessage(
+ upb_TaggedMessagePtr ptr) {
+ UPB_ASSERT(upb_TaggedMessagePtr_IsEmpty(ptr));
+ return _upb_TaggedMessagePtr_GetMessage(ptr);
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MINI_TABLE_TYPES_H_ */
diff --git a/upb/upb/message/test.cc b/upb/upb/message/test.cc
new file mode 100644
index 0000000..c016b24
--- /dev/null
+++ b/upb/upb/message/test.cc
@@ -0,0 +1,686 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <string>
+#include <string_view>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "google/protobuf/test_messages_proto3.upb.h"
+#include "upb/base/status.hpp"
+#include "upb/json/decode.h"
+#include "upb/json/encode.h"
+#include "upb/mem/arena.hpp"
+#include "upb/message/test.upb.h"
+#include "upb/message/test.upbdefs.h"
+#include "upb/reflection/def.hpp"
+#include "upb/test/fuzz_util.h"
+#include "upb/wire/decode.h"
+
+// begin:google_only
+// #include "testing/fuzzing/fuzztest.h"
+// end:google_only
+
+void VerifyMessage(const upb_test_TestExtensions* ext_msg) {
+ EXPECT_TRUE(upb_test_TestExtensions_has_optional_int32_ext(ext_msg));
+ // EXPECT_FALSE(upb_test_TestExtensions_Nested_has_optional_int32_ext(ext_msg));
+ EXPECT_TRUE(upb_test_has_optional_msg_ext(ext_msg));
+
+ EXPECT_EQ(123, upb_test_TestExtensions_optional_int32_ext(ext_msg));
+ const protobuf_test_messages_proto3_TestAllTypesProto3* ext_submsg =
+ upb_test_optional_msg_ext(ext_msg);
+ EXPECT_TRUE(ext_submsg != nullptr);
+ EXPECT_EQ(456,
+ protobuf_test_messages_proto3_TestAllTypesProto3_optional_int32(
+ ext_submsg));
+}
+
+TEST(MessageTest, Extensions) {
+ upb::Arena arena;
+ upb_test_TestExtensions* ext_msg = upb_test_TestExtensions_new(arena.ptr());
+
+ EXPECT_FALSE(upb_test_TestExtensions_has_optional_int32_ext(ext_msg));
+ // EXPECT_FALSE(upb_test_TestExtensions_Nested_has_optional_int32_ext(ext_msg));
+ EXPECT_FALSE(upb_test_has_optional_msg_ext(ext_msg));
+
+ upb::DefPool defpool;
+ upb::MessageDefPtr m(upb_test_TestExtensions_getmsgdef(defpool.ptr()));
+ EXPECT_TRUE(m.ptr() != nullptr);
+
+ std::string json = R"json(
+ {
+ "[upb_test.TestExtensions.optional_int32_ext]": 123,
+ "[upb_test.TestExtensions.Nested.repeated_int32_ext]": [2, 4, 6],
+ "[upb_test.optional_msg_ext]": {"optional_int32": 456}
+ }
+ )json";
+ upb::Status status;
+ EXPECT_TRUE(upb_JsonDecode(json.data(), json.size(), ext_msg, m.ptr(),
+ defpool.ptr(), 0, arena.ptr(), status.ptr()))
+ << status.error_message();
+
+ VerifyMessage(ext_msg);
+
+ // Test round-trip through binary format.
+ size_t size;
+ char* serialized =
+ upb_test_TestExtensions_serialize(ext_msg, arena.ptr(), &size);
+ ASSERT_TRUE(serialized != nullptr);
+ ASSERT_GE(size, 0);
+
+ upb_test_TestExtensions* ext_msg2 = upb_test_TestExtensions_parse_ex(
+ serialized, size, upb_DefPool_ExtensionRegistry(defpool.ptr()), 0,
+ arena.ptr());
+ VerifyMessage(ext_msg2);
+
+ // Test round-trip through JSON format.
+ size_t json_size = upb_JsonEncode(ext_msg, m.ptr(), defpool.ptr(), 0, nullptr,
+ 0, status.ptr());
+ char* json_buf =
+ static_cast<char*>(upb_Arena_Malloc(arena.ptr(), json_size + 1));
+ upb_JsonEncode(ext_msg, m.ptr(), defpool.ptr(), 0, json_buf, json_size + 1,
+ status.ptr());
+ upb_test_TestExtensions* ext_msg3 = upb_test_TestExtensions_new(arena.ptr());
+ EXPECT_TRUE(upb_JsonDecode(json_buf, json_size, ext_msg3, m.ptr(),
+ defpool.ptr(), 0, arena.ptr(), status.ptr()))
+ << status.error_message();
+ VerifyMessage(ext_msg3);
+}
+
+void VerifyMessageSet(const upb_test_TestMessageSet* mset_msg) {
+ ASSERT_TRUE(mset_msg != nullptr);
+ bool has = upb_test_MessageSetMember_has_message_set_extension(mset_msg);
+ EXPECT_TRUE(has);
+ if (!has) return;
+ const upb_test_MessageSetMember* member =
+ upb_test_MessageSetMember_message_set_extension(mset_msg);
+ EXPECT_TRUE(member != nullptr);
+ EXPECT_TRUE(upb_test_MessageSetMember_has_optional_int32(member));
+ EXPECT_EQ(234, upb_test_MessageSetMember_optional_int32(member));
+}
+
+TEST(MessageTest, MessageSet) {
+ upb::Arena arena;
+ upb_test_TestMessageSet* ext_msg = upb_test_TestMessageSet_new(arena.ptr());
+
+ EXPECT_FALSE(upb_test_MessageSetMember_has_message_set_extension(ext_msg));
+
+ upb::DefPool defpool;
+ upb::MessageDefPtr m(upb_test_TestMessageSet_getmsgdef(defpool.ptr()));
+ EXPECT_TRUE(m.ptr() != nullptr);
+
+ std::string json = R"json(
+ {
+ "[upb_test.MessageSetMember]": {"optional_int32": 234}
+ }
+ )json";
+ upb::Status status;
+ EXPECT_TRUE(upb_JsonDecode(json.data(), json.size(), ext_msg, m.ptr(),
+ defpool.ptr(), 0, arena.ptr(), status.ptr()))
+ << status.error_message();
+
+ VerifyMessageSet(ext_msg);
+
+ // Test round-trip through binary format.
+ size_t size;
+ char* serialized =
+ upb_test_TestMessageSet_serialize(ext_msg, arena.ptr(), &size);
+ ASSERT_TRUE(serialized != nullptr);
+ ASSERT_GE(size, 0);
+
+ upb_test_TestMessageSet* ext_msg2 = upb_test_TestMessageSet_parse_ex(
+ serialized, size, upb_DefPool_ExtensionRegistry(defpool.ptr()), 0,
+ arena.ptr());
+ VerifyMessageSet(ext_msg2);
+
+ // Test round-trip through JSON format.
+ size_t json_size = upb_JsonEncode(ext_msg, m.ptr(), defpool.ptr(), 0, nullptr,
+ 0, status.ptr());
+ char* json_buf =
+ static_cast<char*>(upb_Arena_Malloc(arena.ptr(), json_size + 1));
+ upb_JsonEncode(ext_msg, m.ptr(), defpool.ptr(), 0, json_buf, json_size + 1,
+ status.ptr());
+ upb_test_TestMessageSet* ext_msg3 = upb_test_TestMessageSet_new(arena.ptr());
+ EXPECT_TRUE(upb_JsonDecode(json_buf, json_size, ext_msg3, m.ptr(),
+ defpool.ptr(), 0, arena.ptr(), status.ptr()))
+ << status.error_message();
+ VerifyMessageSet(ext_msg3);
+}
+
+TEST(MessageTest, UnknownMessageSet) {
+ static const char data[] = "ABCDE";
+ upb_StringView data_view = upb_StringView_FromString(data);
+ upb::Arena arena;
+ upb_test_FakeMessageSet* fake = upb_test_FakeMessageSet_new(arena.ptr());
+
+ // Add a MessageSet item that is unknown (there is no matching extension in
+ // the .proto file)
+ upb_test_FakeMessageSet_Item* item =
+ upb_test_FakeMessageSet_add_item(fake, arena.ptr());
+ upb_test_FakeMessageSet_Item_set_type_id(item, 12345);
+ upb_test_FakeMessageSet_Item_set_message(item, data_view);
+
+ // Set unknown fields inside the message set to test that we can skip them.
+ upb_test_FakeMessageSet_Item_set_unknown_varint(item, 12345678);
+ upb_test_FakeMessageSet_Item_set_unknown_fixed32(item, 12345678);
+ upb_test_FakeMessageSet_Item_set_unknown_fixed64(item, 12345678);
+ upb_test_FakeMessageSet_Item_set_unknown_bytes(item, data_view);
+ upb_test_FakeMessageSet_Item_mutable_unknowngroup(item, arena.ptr());
+
+ // Round trip through a true MessageSet where this item_id is unknown.
+ size_t size;
+ char* serialized =
+ upb_test_FakeMessageSet_serialize(fake, arena.ptr(), &size);
+ ASSERT_TRUE(serialized != nullptr);
+ ASSERT_GE(size, 0);
+
+ upb::DefPool defpool;
+ upb::MessageDefPtr m(upb_test_TestMessageSet_getmsgdef(defpool.ptr()));
+ EXPECT_TRUE(m.ptr() != nullptr);
+ upb_test_TestMessageSet* message_set = upb_test_TestMessageSet_parse_ex(
+ serialized, size, upb_DefPool_ExtensionRegistry(defpool.ptr()), 0,
+ arena.ptr());
+ ASSERT_TRUE(message_set != nullptr);
+
+ char* serialized2 =
+ upb_test_TestMessageSet_serialize(message_set, arena.ptr(), &size);
+ ASSERT_TRUE(serialized2 != nullptr);
+ ASSERT_GE(size, 0);
+
+ // Parse back into a fake MessageSet and verify that the unknown MessageSet
+ // item was preserved in full (both type_id and message).
+ upb_test_FakeMessageSet* fake2 =
+ upb_test_FakeMessageSet_parse(serialized2, size, arena.ptr());
+ ASSERT_TRUE(fake2 != nullptr);
+
+ const upb_test_FakeMessageSet_Item* const* items =
+ upb_test_FakeMessageSet_item(fake2, &size);
+ ASSERT_EQ(1, size);
+ EXPECT_EQ(12345, upb_test_FakeMessageSet_Item_type_id(items[0]));
+ EXPECT_TRUE(upb_StringView_IsEqual(
+ data_view, upb_test_FakeMessageSet_Item_message(items[0])));
+
+ // The non-MessageSet unknown fields should have been discarded.
+ EXPECT_FALSE(upb_test_FakeMessageSet_Item_has_unknown_varint(items[0]));
+ EXPECT_FALSE(upb_test_FakeMessageSet_Item_has_unknown_fixed32(items[0]));
+ EXPECT_FALSE(upb_test_FakeMessageSet_Item_has_unknown_fixed64(items[0]));
+ EXPECT_FALSE(upb_test_FakeMessageSet_Item_has_unknown_bytes(items[0]));
+ EXPECT_FALSE(upb_test_FakeMessageSet_Item_has_unknowngroup(items[0]));
+}
+
+TEST(MessageTest, Proto2Enum) {
+ upb::Arena arena;
+ upb_test_Proto2FakeEnumMessage* fake_msg =
+ upb_test_Proto2FakeEnumMessage_new(arena.ptr());
+
+ upb_test_Proto2FakeEnumMessage_set_optional_enum(fake_msg, 999);
+
+ int32_t* vals = upb_test_Proto2FakeEnumMessage_resize_repeated_enum(
+ fake_msg, 6, arena.ptr());
+ vals[0] = upb_test_Proto2EnumMessage_ZERO;
+ vals[1] = 7; // Unknown small.
+ vals[2] = upb_test_Proto2EnumMessage_SMALL;
+ vals[3] = 888; // Unknown large.
+ vals[4] = upb_test_Proto2EnumMessage_LARGE;
+ vals[5] = upb_test_Proto2EnumMessage_NEGATIVE;
+
+ vals = upb_test_Proto2FakeEnumMessage_resize_packed_enum(fake_msg, 6,
+ arena.ptr());
+ vals[0] = upb_test_Proto2EnumMessage_ZERO;
+ vals[1] = 7; // Unknown small.
+ vals[2] = upb_test_Proto2EnumMessage_SMALL;
+ vals[3] = 888; // Unknown large.
+ vals[4] = upb_test_Proto2EnumMessage_LARGE;
+ vals[5] = upb_test_Proto2EnumMessage_NEGATIVE;
+
+ size_t size;
+ char* pb =
+ upb_test_Proto2FakeEnumMessage_serialize(fake_msg, arena.ptr(), &size);
+
+ // Parsing as enums puts unknown values into unknown fields.
+ upb_test_Proto2EnumMessage* enum_msg =
+ upb_test_Proto2EnumMessage_parse(pb, size, arena.ptr());
+ ASSERT_TRUE(enum_msg != nullptr);
+
+ EXPECT_EQ(false, upb_test_Proto2EnumMessage_has_optional_enum(enum_msg));
+ const int32_t* vals_const =
+ upb_test_Proto2EnumMessage_repeated_enum(enum_msg, &size);
+ EXPECT_EQ(4, size); // Two unknown values moved to the unknown field set.
+
+ // Parsing back into the fake message shows the original data, except the
+ // repeated enum is rearranged.
+ pb = upb_test_Proto2EnumMessage_serialize(enum_msg, arena.ptr(), &size);
+ upb_test_Proto2FakeEnumMessage* fake_msg2 =
+ upb_test_Proto2FakeEnumMessage_parse(pb, size, arena.ptr());
+ ASSERT_TRUE(fake_msg2 != nullptr);
+
+ EXPECT_EQ(true, upb_test_Proto2FakeEnumMessage_has_optional_enum(fake_msg2));
+ EXPECT_EQ(999, upb_test_Proto2FakeEnumMessage_optional_enum(fake_msg2));
+
+ int32_t expected[] = {
+ upb_test_Proto2EnumMessage_ZERO,
+ upb_test_Proto2EnumMessage_SMALL,
+ upb_test_Proto2EnumMessage_LARGE,
+ upb_test_Proto2EnumMessage_NEGATIVE,
+ 7,
+ 888,
+ };
+
+ vals_const = upb_test_Proto2FakeEnumMessage_repeated_enum(fake_msg2, &size);
+ EXPECT_EQ(6, size);
+ EXPECT_THAT(std::vector<int32_t>(vals_const, vals_const + size),
+ ::testing::ElementsAreArray(expected));
+
+ vals_const = upb_test_Proto2FakeEnumMessage_packed_enum(fake_msg2, &size);
+ EXPECT_EQ(6, size);
+ EXPECT_THAT(std::vector<int32_t>(vals_const, vals_const + size),
+ ::testing::ElementsAreArray(expected));
+}
+
+TEST(MessageTest, TestBadUTF8) {
+ upb::Arena arena;
+ std::string serialized("r\x03\xed\xa0\x81");
+ EXPECT_EQ(nullptr, protobuf_test_messages_proto3_TestAllTypesProto3_parse(
+ serialized.data(), serialized.size(), arena.ptr()));
+}
+
+TEST(MessageTest, DecodeRequiredFieldsTopLevelMessage) {
+ upb::Arena arena;
+ upb_test_TestRequiredFields* test_msg;
+ upb_test_EmptyMessage* empty_msg;
+
+ // Succeeds, because we did not request required field checks.
+ test_msg = upb_test_TestRequiredFields_parse(nullptr, 0, arena.ptr());
+ EXPECT_NE(nullptr, test_msg);
+
+ // Fails, because required fields are missing.
+ EXPECT_EQ(
+ kUpb_DecodeStatus_MissingRequired,
+ upb_Decode(nullptr, 0, test_msg, &upb_test_TestRequiredFields_msg_init,
+ nullptr, kUpb_DecodeOption_CheckRequired, arena.ptr()));
+
+ upb_test_TestRequiredFields_set_required_int32(test_msg, 1);
+ size_t size;
+ char* serialized =
+ upb_test_TestRequiredFields_serialize(test_msg, arena.ptr(), &size);
+ ASSERT_TRUE(serialized != nullptr);
+ EXPECT_NE(0, size);
+
+ // Fails, but the code path is slightly different because the serialized
+ // payload is not empty.
+ EXPECT_EQ(kUpb_DecodeStatus_MissingRequired,
+ upb_Decode(serialized, size, test_msg,
+ &upb_test_TestRequiredFields_msg_init, nullptr,
+ kUpb_DecodeOption_CheckRequired, arena.ptr()));
+
+ empty_msg = upb_test_EmptyMessage_new(arena.ptr());
+ upb_test_TestRequiredFields_set_required_int32(test_msg, 1);
+ upb_test_TestRequiredFields_set_required_int64(test_msg, 2);
+ upb_test_TestRequiredFields_set_required_message(test_msg, empty_msg);
+
+ // Succeeds, because required fields are present (though not in the input).
+ EXPECT_EQ(
+ kUpb_DecodeStatus_Ok,
+ upb_Decode(nullptr, 0, test_msg, &upb_test_TestRequiredFields_msg_init,
+ nullptr, kUpb_DecodeOption_CheckRequired, arena.ptr()));
+
+ // Serialize a complete payload.
+ serialized =
+ upb_test_TestRequiredFields_serialize(test_msg, arena.ptr(), &size);
+ ASSERT_TRUE(serialized != nullptr);
+ EXPECT_NE(0, size);
+
+ upb_test_TestRequiredFields* test_msg2 = upb_test_TestRequiredFields_parse_ex(
+ serialized, size, nullptr, kUpb_DecodeOption_CheckRequired, arena.ptr());
+ EXPECT_NE(nullptr, test_msg2);
+
+ // When we add an incomplete sub-message, this is not flagged by the parser.
+ // This makes parser checking unsuitable for MergeFrom().
+ upb_test_TestRequiredFields_set_optional_message(
+ test_msg2, upb_test_TestRequiredFields_new(arena.ptr()));
+ EXPECT_EQ(kUpb_DecodeStatus_Ok,
+ upb_Decode(serialized, size, test_msg2,
+ &upb_test_TestRequiredFields_msg_init, nullptr,
+ kUpb_DecodeOption_CheckRequired, arena.ptr()));
+}
+
+TEST(MessageTest, DecodeRequiredFieldsSubMessage) {
+ upb::Arena arena;
+ upb_test_TestRequiredFields* test_msg =
+ upb_test_TestRequiredFields_new(arena.ptr());
+ upb_test_SubMessageHasRequired* sub_msg =
+ upb_test_SubMessageHasRequired_new(arena.ptr());
+ upb_test_EmptyMessage* empty_msg = upb_test_EmptyMessage_new(arena.ptr());
+
+ upb_test_SubMessageHasRequired_set_optional_message(sub_msg, test_msg);
+ size_t size;
+ char* serialized =
+ upb_test_SubMessageHasRequired_serialize(sub_msg, arena.ptr(), &size);
+ EXPECT_NE(0, size);
+
+ // No parse error when parsing normally.
+ EXPECT_NE(nullptr, upb_test_SubMessageHasRequired_parse(serialized, size,
+ arena.ptr()));
+
+ // Parse error when verifying required fields, due to incomplete sub-message.
+ EXPECT_EQ(nullptr, upb_test_SubMessageHasRequired_parse_ex(
+ serialized, size, nullptr,
+ kUpb_DecodeOption_CheckRequired, arena.ptr()));
+
+ upb_test_TestRequiredFields_set_required_int32(test_msg, 1);
+ upb_test_TestRequiredFields_set_required_int64(test_msg, 2);
+ upb_test_TestRequiredFields_set_required_message(test_msg, empty_msg);
+
+ serialized =
+ upb_test_SubMessageHasRequired_serialize(sub_msg, arena.ptr(), &size);
+ EXPECT_NE(0, size);
+
+ // No parse error; sub-message now is complete.
+ EXPECT_NE(nullptr, upb_test_SubMessageHasRequired_parse_ex(
+ serialized, size, nullptr,
+ kUpb_DecodeOption_CheckRequired, arena.ptr()));
+}
+
+TEST(MessageTest, EncodeRequiredFields) {
+ upb::Arena arena;
+ upb_test_TestRequiredFields* test_msg =
+ upb_test_TestRequiredFields_new(arena.ptr());
+
+ // Succeeds, we didn't ask for required field checking.
+ size_t size;
+ char* serialized =
+ upb_test_TestRequiredFields_serialize_ex(test_msg, 0, arena.ptr(), &size);
+ ASSERT_TRUE(serialized != nullptr);
+ EXPECT_EQ(size, 0);
+
+ // Fails, we asked for required field checking but the required field is
+ // missing.
+ serialized = upb_test_TestRequiredFields_serialize_ex(
+ test_msg, kUpb_EncodeOption_CheckRequired, arena.ptr(), &size);
+ ASSERT_TRUE(serialized == nullptr);
+
+ // Fails, some required fields are present but not others.
+ upb_test_TestRequiredFields_set_required_int32(test_msg, 1);
+ serialized = upb_test_TestRequiredFields_serialize_ex(
+ test_msg, kUpb_EncodeOption_CheckRequired, arena.ptr(), &size);
+ ASSERT_TRUE(serialized == nullptr);
+
+ // Succeeds, all required fields are set.
+ upb_test_EmptyMessage* empty_msg = upb_test_EmptyMessage_new(arena.ptr());
+ upb_test_TestRequiredFields_set_required_int64(test_msg, 2);
+ upb_test_TestRequiredFields_set_required_message(test_msg, empty_msg);
+ serialized = upb_test_TestRequiredFields_serialize_ex(
+ test_msg, kUpb_EncodeOption_CheckRequired, arena.ptr(), &size);
+ ASSERT_TRUE(serialized != nullptr);
+}
+
+TEST(MessageTest, MaxRequiredFields) {
+ upb::Arena arena;
+ upb_test_TestMaxRequiredFields* test_msg =
+ upb_test_TestMaxRequiredFields_new(arena.ptr());
+
+ // Fails, we asked for required field checking but the required field is
+ // missing.
+ size_t size;
+ char* serialized = upb_test_TestMaxRequiredFields_serialize_ex(
+ test_msg, kUpb_EncodeOption_CheckRequired, arena.ptr(), &size);
+ ASSERT_TRUE(serialized == nullptr);
+
+ upb::DefPool defpool;
+ upb::MessageDefPtr m(upb_test_TestMaxRequiredFields_getmsgdef(defpool.ptr()));
+ upb_MessageValue val;
+ val.int32_val = 1;
+ for (int i = 1; i <= 61; i++) {
+ upb::FieldDefPtr f = m.FindFieldByNumber(i);
+ ASSERT_TRUE(f);
+ upb_Message_SetFieldByDef(test_msg, f.ptr(), val, arena.ptr());
+ }
+
+ // Fails, field 63 still isn't set.
+ serialized = upb_test_TestMaxRequiredFields_serialize_ex(
+ test_msg, kUpb_EncodeOption_CheckRequired, arena.ptr(), &size);
+ ASSERT_TRUE(serialized == nullptr);
+
+ // Succeeds, all required fields are set.
+ upb::FieldDefPtr f = m.FindFieldByNumber(62);
+ ASSERT_TRUE(f);
+ upb_Message_SetFieldByDef(test_msg, f.ptr(), val, arena.ptr());
+ serialized = upb_test_TestMaxRequiredFields_serialize_ex(
+ test_msg, kUpb_EncodeOption_CheckRequired, arena.ptr(), &size);
+ ASSERT_TRUE(serialized != nullptr);
+}
+
+TEST(MessageTest, MapField) {
+ upb::Arena arena;
+ upb_test_TestMapFieldExtra* test_msg_extra =
+ upb_test_TestMapFieldExtra_new(arena.ptr());
+
+ ASSERT_TRUE(upb_test_TestMapFieldExtra_map_field_set(
+ test_msg_extra, 0, upb_test_TestMapFieldExtra_THREE, arena.ptr()));
+
+ size_t size;
+ char* serialized = upb_test_TestMapFieldExtra_serialize_ex(
+ test_msg_extra, 0, arena.ptr(), &size);
+ ASSERT_NE(nullptr, serialized);
+ ASSERT_NE(0, size);
+
+ upb_test_TestMapField* test_msg =
+ upb_test_TestMapField_parse(serialized, size, arena.ptr());
+ ASSERT_NE(nullptr, test_msg);
+
+ ASSERT_FALSE(upb_test_TestMapField_map_field_get(test_msg, 0, nullptr));
+ serialized =
+ upb_test_TestMapField_serialize_ex(test_msg, 0, arena.ptr(), &size);
+ ASSERT_NE(0, size);
+ // parse into second instance
+ upb_test_TestMapFieldExtra* test_msg_extra2 =
+ upb_test_TestMapFieldExtra_parse(serialized, size, arena.ptr());
+ ASSERT_TRUE(
+ upb_test_TestMapFieldExtra_map_field_get(test_msg_extra2, 0, nullptr));
+}
+
+// begin:google_only
+//
+// static void DecodeEncodeArbitrarySchemaAndPayload(
+// const upb::fuzz::MiniTableFuzzInput& input, std::string_view proto_payload,
+// int decode_options, int encode_options) {
+// // Lexan does not have setenv
+// #ifndef _MSC_VER
+// setenv("FUZZTEST_STACK_LIMIT", "262144", 1);
+// #endif
+// // The value of 80 used here is empirical and intended to roughly represent
+// // the tiny 64K stack size used by the test framework. We still see the
+// // occasional stack overflow at 90, so far 80 has worked 100% of the time.
+// decode_options = upb_Decode_LimitDepth(decode_options, 80);
+// encode_options = upb_Encode_LimitDepth(encode_options, 80);
+//
+// upb::Arena arena;
+// upb_ExtensionRegistry* exts;
+// const upb_MiniTable* mini_table =
+// upb::fuzz::BuildMiniTable(input, &exts, arena.ptr());
+// if (!mini_table) return;
+// upb_Message* msg = upb_Message_New(mini_table, arena.ptr());
+// upb_Decode(proto_payload.data(), proto_payload.size(), msg, mini_table, exts,
+// decode_options, arena.ptr());
+// char* ptr;
+// size_t size;
+// upb_Encode(msg, mini_table, encode_options, arena.ptr(), &ptr, &size);
+// }
+// FUZZ_TEST(FuzzTest, DecodeEncodeArbitrarySchemaAndPayload);
+//
+// TEST(FuzzTest, DecodeUnknownProto2EnumExtension) {
+// DecodeEncodeArbitrarySchemaAndPayload(
+// {{"\256\354Rt\216\3271\234", "\243\243\267\207\336gV\366w"},
+// {"z"},
+// "}\212\304d\371\363\341\2329\325B\264\377?\215\223\201\201\226y\201%"
+// "\321\363\255;",
+// {}},
+// "\010", -724543908, -591643538);
+// }
+//
+// TEST(FuzzTest, DecodeExtensionEnsurePresenceInitialized) {
+// DecodeEncodeArbitrarySchemaAndPayload(
+// {{"\031", "S", "\364", "", "", "j", "\303", "", "\224", "\277"},
+// {},
+// "_C-\236$*)C0C>",
+// {4041515984, 2147483647, 1929379871, 0, 3715937258, 4294967295}},
+// "\010\002", 342248070, -806315555);
+// }
+//
+// TEST(FuzzTest, DecodeExtendMessageSetWithNonMessage) {
+// DecodeEncodeArbitrarySchemaAndPayload(
+// {{"\n"}, {""}, ".\244", {}}, "\013\032\005\212a#\365\336\020\001\226",
+// 14803219, 670718349);
+// }
+//
+// TEST(FuzzTest, DecodeExtendMessageSetWithNonMessage2) {
+// DecodeEncodeArbitrarySchemaAndPayload({{"\n", "G", "\n", "\274", ""},
+// {"", "\030"},
+// "_@",
+// {4294967295, 2147483647}},
+// std::string("\013\032\000\220", 4),
+// 279975758, 1647495141);
+// }
+//
+// TEST(FuzzTest, DecodeExtendMessageSetWithNonMessage3) {
+// DecodeEncodeArbitrarySchemaAndPayload(
+// {{"\n"}, {"B", ""}, "\212:b", {11141121}},
+// "\013\032\004\357;7\363\020\001\346\240\200\201\271", 399842149,
+// -452966025);
+// }
+//
+// TEST(FuzzTest, DecodeExtendMessageSetWithNonMessage4) {
+// DecodeEncodeArbitrarySchemaAndPayload(
+// {{"\n", "3\340", "\354"}, {}, "B}G", {4294967295, 4082331310}},
+// "\013\032\004\244B\331\255\020\001\220\224\243\350\t", -561523015,
+// 1683327312);
+// }
+//
+// TEST(FuzzTest, DecodeExtendMessageSetWithNonMessage5) {
+// DecodeEncodeArbitrarySchemaAndPayload(
+// {{"\n"}, {""}, "kB", {0}},
+// "x\203\251\006\013\032\002S\376\010\273\'\020\014\365\207\244\234",
+// -696925610, -654590577);
+// }
+//
+// TEST(FuzzTest, ExtendMessageSetWithEmptyExtension) {
+// DecodeEncodeArbitrarySchemaAndPayload({{"\n"}, {}, "_", {}}, std::string(), 0,
+// 0);
+// }
+//
+// TEST(FuzzTest, DecodeEncodeArbitrarySchemaAndPayloadRegression) {
+// DecodeEncodeArbitrarySchemaAndPayload(
+// {{"\320", "\320", "\320", "\320", "\320", "%2%%%%%"},
+// {"", "", "", "", "", "", "", "", "", "", "", "",
+// "", "", "", "", "", "", "", "", "", "", ""},
+// "\226\226\226\226\226\226\350\351\350\350\350\350\350\350\350\314",
+// {4026531839}},
+// std::string("\n\n\n\n\272\n======@@%%%%%%%%%%%%%%%@@@(("
+// "qqqqqqqq5555555555qqqqqffq((((((((((((\335@@>"
+// "\ru\360ncppppxxxxxxxxx\025\025\025xxxxxppppppp<="
+// "\2165\275\275\315\217\361\010\t\000\016\013in\n\n\n\256\263",
+// 130),
+// 901979906, 65537);
+// }
+//
+// // This test encodes a map field with extra cruft.
+// TEST(FuzzTest, DecodeEncodeArbitrarySchemaAndPayloadRegressionInvalidMap) {
+// DecodeEncodeArbitrarySchemaAndPayload({{"%%%%///////"}, {}, "", {}},
+// std::string("\035|", 2), 65536, 3);
+// }
+//
+// // This test found a case where presence was unset for a mini table field.
+// TEST(FuzzTest, DecodeEncodeArbitrarySchemaAndPayloadRegressionMsan) {
+// DecodeEncodeArbitrarySchemaAndPayload({{"%-#^#"}, {}, "", {}}, std::string(),
+// -1960166338, 16809991);
+// }
+//
+// // This test encodes a map containing a msg wrapping another, empty msg.
+// TEST(FuzzTest, DecodeEncodeArbitrarySchemaAndPayloadRegressionMapMap) {
+// DecodeEncodeArbitrarySchemaAndPayload(
+// {{"%#G"}, {}, "", {}}, std::string("\022\002\022\000", 4), 0, 0);
+// }
+//
+// TEST(FuzzTest, GroupMap) {
+// // Groups should not be allowed as maps, but we previously failed to prevent
+// // this.
+// DecodeEncodeArbitrarySchemaAndPayload(
+// {.mini_descriptors = {"$$FF$", "%-C"},
+// .enum_mini_descriptors = {},
+// .extensions = "",
+// .links = {1}},
+// std::string(
+// "\023\020\030\233\000\204\330\372#\000`"
+// "a\000\000\001\000\000\000ccccccc\030s\273sssssssss\030\030\030\030"
+// "\030\030\030\030\215\215\215\215\215\215\215\215\030\030\232\253\253"
+// "\232*\334\227\273\231\207\373\t\0051\305\265\335\224\226"),
+// 0, 0);
+// }
+//
+// TEST(FuzzTest, MapUnknownFieldSpanBuffers) {
+// DecodeEncodeArbitrarySchemaAndPayload(
+// {{"$ 3", "%# "}, {}, "", {1}},
+// std::string(
+// "\"\002\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000",
+// 17),
+// 0, 0);
+// }
+//
+// // Another test for mismatched submsg types.
+// TEST(FuzzTest, DecodeEncodeArbitrarySchemaAndPayloadRegression22) {
+// DecodeEncodeArbitrarySchemaAndPayload(
+// {{"$2222222222222222222222", "%,&"}, {}, "", {1}},
+// std::string("\035\170\170\170\051\263\001\030\000\035\357\357\340\021\035"
+// "\025\331\035\035\035\035\035\035\035\035",
+// 25),
+// 0, 0);
+// }
+//
+// TEST(FuzzTest, ExtensionWithoutExt) {
+// DecodeEncodeArbitrarySchemaAndPayload({{"$ 3", "", "%#F"}, {}, "", {2, 1}},
+// std::string("\022\002\010\000", 4), 0,
+// 0);
+// }
+//
+// TEST(FuzzTest, MapFieldVerify) {
+// DecodeEncodeArbitrarySchemaAndPayload({{"% ^!"}, {}, "", {}}, "", 0, 0);
+// }
+//
+// TEST(FuzzTest, TooManyRequiredFields) {
+// DecodeEncodeArbitrarySchemaAndPayload(
+// {{"$ N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N "
+// "N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N"},
+// {},
+// "",
+// {}},
+// "", 0, 4);
+// }
+//
+// end:google_only
diff --git a/upb/upb/message/test.proto b/upb/upb/message/test.proto
new file mode 100644
index 0000000..c870c28
--- /dev/null
+++ b/upb/upb/message/test.proto
@@ -0,0 +1,203 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// LINT: ALLOW_GROUPS
+
+syntax = "proto2";
+
+package upb_test;
+
+import "google/protobuf/test_messages_proto3.proto";
+
+message TestExtensions {
+ extensions 1000 to max;
+ extend TestExtensions {
+ optional int32 optional_int32_ext = 1000;
+ }
+ message Nested {
+ extend TestExtensions {
+ repeated int32 repeated_int32_ext = 1001;
+ }
+ }
+}
+
+extend TestExtensions {
+ optional protobuf_test_messages.proto3.TestAllTypesProto3 optional_msg_ext =
+ 1002;
+}
+
+message TestMessageSet {
+ option message_set_wire_format = true;
+
+ extensions 4 to max;
+}
+
+message MessageSetMember {
+ optional int32 optional_int32 = 1;
+ extend TestMessageSet {
+ optional MessageSetMember message_set_extension = 4;
+ }
+}
+
+message FakeMessageSet {
+ repeated group Item = 1 {
+ optional int32 type_id = 2;
+ optional bytes message = 3;
+ optional int32 unknown_varint = 4;
+ optional fixed32 unknown_fixed32 = 5;
+ optional fixed64 unknown_fixed64 = 6;
+ optional bytes unknown_bytes = 7;
+ optional group UnknownGroup = 8 {}
+ }
+}
+
+message Proto2EnumMessage {
+ enum Proto2TestEnum {
+ ZERO = 0;
+ NEGATIVE = -1;
+ SMALL = 15;
+ LARGE = 12345;
+ }
+
+ optional Proto2TestEnum optional_enum = 1;
+ repeated Proto2TestEnum repeated_enum = 2;
+ repeated Proto2TestEnum packed_enum = 3 [packed = true];
+}
+
+// The same fields as Proto2EnumMessage, but with int32 fields so we can fake
+// wire format.
+message Proto2FakeEnumMessage {
+ optional int32 optional_enum = 1;
+ repeated int32 repeated_enum = 2;
+ repeated int32 packed_enum = 3 [packed = true];
+}
+
+message EmptyMessage {}
+
+message TestRequiredFields {
+ required int32 required_int32 = 1;
+ optional int32 optional_int32 = 2;
+ required int64 required_int64 = 3;
+ optional TestRequiredFields optional_message = 4;
+ required EmptyMessage required_message = 5;
+}
+
+message SubMessageHasRequired {
+ optional TestRequiredFields optional_message = 1;
+}
+
+message TestMaxRequiredFields {
+ required int32 required_int32_1 = 1;
+ required int32 required_int32_2 = 2;
+ required int32 required_int32_3 = 3;
+ required int32 required_int32_4 = 4;
+ required int32 required_int32_5 = 5;
+ required int32 required_int32_6 = 6;
+ required int32 required_int32_7 = 7;
+ required int32 required_int32_8 = 8;
+ required int32 required_int32_9 = 9;
+ required int32 required_int32_10 = 10;
+ required int32 required_int32_11 = 11;
+ required int32 required_int32_12 = 12;
+ required int32 required_int32_13 = 13;
+ required int32 required_int32_14 = 14;
+ required int32 required_int32_15 = 15;
+ required int32 required_int32_16 = 16;
+ required int32 required_int32_17 = 17;
+ required int32 required_int32_18 = 18;
+ required int32 required_int32_19 = 19;
+ required int32 required_int32_20 = 20;
+ required int32 required_int32_21 = 21;
+ required int32 required_int32_22 = 22;
+ required int32 required_int32_23 = 23;
+ required int32 required_int32_24 = 24;
+ required int32 required_int32_25 = 25;
+ required int32 required_int32_26 = 26;
+ required int32 required_int32_27 = 27;
+ required int32 required_int32_28 = 28;
+ required int32 required_int32_29 = 29;
+ required int32 required_int32_30 = 30;
+ required int32 required_int32_31 = 31;
+ required int32 required_int32_32 = 32;
+ required int32 required_int32_33 = 33;
+ required int32 required_int32_34 = 34;
+ required int32 required_int32_35 = 35;
+ required int32 required_int32_36 = 36;
+ required int32 required_int32_37 = 37;
+ required int32 required_int32_38 = 38;
+ required int32 required_int32_39 = 39;
+ required int32 required_int32_40 = 40;
+ required int32 required_int32_41 = 41;
+ required int32 required_int32_42 = 42;
+ required int32 required_int32_43 = 43;
+ required int32 required_int32_44 = 44;
+ required int32 required_int32_45 = 45;
+ required int32 required_int32_46 = 46;
+ required int32 required_int32_47 = 47;
+ required int32 required_int32_48 = 48;
+ required int32 required_int32_49 = 49;
+ required int32 required_int32_50 = 50;
+ required int32 required_int32_51 = 51;
+ required int32 required_int32_52 = 52;
+ required int32 required_int32_53 = 53;
+ required int32 required_int32_54 = 54;
+ required int32 required_int32_55 = 55;
+ required int32 required_int32_56 = 56;
+ required int32 required_int32_57 = 57;
+ required int32 required_int32_58 = 58;
+ required int32 required_int32_59 = 59;
+ required int32 required_int32_60 = 60;
+ required int32 required_int32_61 = 61;
+ required int32 required_int32_62 = 62;
+}
+
+message TestMapField {
+ enum EnumMap {
+ ZERO = 0;
+ ONE = 1;
+ TWO = 2;
+ }
+ map<int32, EnumMap> map_field = 1;
+}
+
+message TestMapFieldExtra {
+ enum EnumMap {
+ ZERO = 0;
+ ONE = 1;
+ TWO = 2;
+ THREE = 3;
+ }
+ map<int32, EnumMap> map_field = 1;
+}
+
+message TestNameConflict {
+ map<string, string> map_field = 1;
+ optional bool clear_map_field = 2;
+}
diff --git a/upb/upb/message/types.h b/upb/upb/message/types.h
new file mode 100644
index 0000000..a014272
--- /dev/null
+++ b/upb/upb/message/types.h
@@ -0,0 +1,38 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MESSAGE_TYPES_H_
+#define UPB_MESSAGE_TYPES_H_
+
+// This typedef is in a leaf header to resolve a circular dependency between
+// messages and mini tables.
+typedef void upb_Message;
+
+#endif /* UPB_MESSAGE_TYPES_H_ */
diff --git a/upb/upb/mini_descriptor/BUILD b/upb/upb/mini_descriptor/BUILD
new file mode 100644
index 0000000..db70056
--- /dev/null
+++ b/upb/upb/mini_descriptor/BUILD
@@ -0,0 +1,86 @@
+load(
+ "//bazel:build_defs.bzl",
+ "UPB_DEFAULT_COPTS",
+ "UPB_DEFAULT_CPPOPTS",
+)
+
+cc_library(
+ name = "mini_descriptor",
+ srcs = [
+ "build_enum.c",
+ "decode.c",
+ "link.c",
+ ],
+ hdrs = [
+ "build_enum.h",
+ "decode.h",
+ "link.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":internal",
+ "//:base",
+ "//:mem",
+ "//:mini_table",
+ "//:mini_table_internal",
+ "//:port",
+ ],
+)
+
+cc_library(
+ name = "internal",
+ srcs = [
+ "internal/base92.c",
+ "internal/encode.c",
+ ],
+ hdrs = [
+ "internal/base92.h",
+ "internal/decoder.h",
+ "internal/encode.h",
+ "internal/encode.hpp",
+ "internal/modifiers.h",
+ "internal/wire_constants.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ "//:base",
+ "//:base_internal",
+ "//:port",
+ ],
+)
+
+cc_test(
+ name = "encode_test",
+ srcs = ["internal/encode_test.cc"],
+ copts = UPB_DEFAULT_CPPOPTS,
+ deps = [
+ ":internal",
+ ":mini_descriptor",
+ "//:base",
+ "//:mem",
+ "//:message_accessors_internal",
+ "//:mini_table",
+ "//:wire",
+ "@com_google_absl//absl/container:flat_hash_set",
+ "@com_google_googletest//:gtest_main",
+ "@com_google_protobuf//:protobuf",
+ ],
+)
+
+# begin:github_only
+filegroup(
+ name = "source_files",
+ srcs = glob(
+ [
+ "**/*.c",
+ "**/*.h",
+ ],
+ ),
+ visibility = [
+ "//cmake:__pkg__",
+ "//python/dist:__pkg__",
+ ]
+)
+# end:github_only
diff --git a/upb/upb/mini_descriptor/build_enum.c b/upb/upb/mini_descriptor/build_enum.c
new file mode 100644
index 0000000..152a7ce
--- /dev/null
+++ b/upb/upb/mini_descriptor/build_enum.c
@@ -0,0 +1,153 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/mini_descriptor/build_enum.h"
+
+#include "upb/mini_descriptor/internal/decoder.h"
+#include "upb/mini_descriptor/internal/wire_constants.h"
+#include "upb/mini_table/internal/enum.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+typedef struct {
+ upb_MdDecoder base;
+ upb_Arena* arena;
+ upb_MiniTableEnum* enum_table;
+ uint32_t enum_value_count;
+ uint32_t enum_data_count;
+ uint32_t enum_data_capacity;
+} upb_MdEnumDecoder;
+
+static size_t upb_MiniTableEnum_Size(size_t count) {
+ return sizeof(upb_MiniTableEnum) + count * sizeof(uint32_t);
+}
+
+static upb_MiniTableEnum* _upb_MiniTable_AddEnumDataMember(upb_MdEnumDecoder* d,
+ uint32_t val) {
+ if (d->enum_data_count == d->enum_data_capacity) {
+ size_t old_sz = upb_MiniTableEnum_Size(d->enum_data_capacity);
+ d->enum_data_capacity = UPB_MAX(2, d->enum_data_capacity * 2);
+ size_t new_sz = upb_MiniTableEnum_Size(d->enum_data_capacity);
+ d->enum_table = upb_Arena_Realloc(d->arena, d->enum_table, old_sz, new_sz);
+ upb_MdDecoder_CheckOutOfMemory(&d->base, d->enum_table);
+ }
+ d->enum_table->data[d->enum_data_count++] = val;
+ return d->enum_table;
+}
+
+static void upb_MiniTableEnum_BuildValue(upb_MdEnumDecoder* d, uint32_t val) {
+ upb_MiniTableEnum* table = d->enum_table;
+ d->enum_value_count++;
+ if (table->value_count || (val > 512 && d->enum_value_count < val / 32)) {
+ if (table->value_count == 0) {
+ assert(d->enum_data_count == table->mask_limit / 32);
+ }
+ table = _upb_MiniTable_AddEnumDataMember(d, val);
+ table->value_count++;
+ } else {
+ uint32_t new_mask_limit = ((val / 32) + 1) * 32;
+ while (table->mask_limit < new_mask_limit) {
+ table = _upb_MiniTable_AddEnumDataMember(d, 0);
+ table->mask_limit += 32;
+ }
+ table->data[val / 32] |= 1ULL << (val % 32);
+ }
+}
+
+static upb_MiniTableEnum* upb_MtDecoder_DoBuildMiniTableEnum(
+ upb_MdEnumDecoder* d, const char* data, size_t len) {
+ // If the string is non-empty then it must begin with a version tag.
+ if (len) {
+ if (*data != kUpb_EncodedVersion_EnumV1) {
+ upb_MdDecoder_ErrorJmp(&d->base, "Invalid enum version: %c", *data);
+ }
+ data++;
+ len--;
+ }
+
+ upb_MdDecoder_CheckOutOfMemory(&d->base, d->enum_table);
+
+ // Guarantee at least 64 bits of mask without checking mask size.
+ d->enum_table->mask_limit = 64;
+ d->enum_table = _upb_MiniTable_AddEnumDataMember(d, 0);
+ d->enum_table = _upb_MiniTable_AddEnumDataMember(d, 0);
+
+ d->enum_table->value_count = 0;
+
+ const char* ptr = data;
+ uint32_t base = 0;
+
+ while (ptr < d->base.end) {
+ char ch = *ptr++;
+ if (ch <= kUpb_EncodedValue_MaxEnumMask) {
+ uint32_t mask = _upb_FromBase92(ch);
+ for (int i = 0; i < 5; i++, base++, mask >>= 1) {
+ if (mask & 1) upb_MiniTableEnum_BuildValue(d, base);
+ }
+ } else if (kUpb_EncodedValue_MinSkip <= ch &&
+ ch <= kUpb_EncodedValue_MaxSkip) {
+ uint32_t skip;
+ ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch,
+ kUpb_EncodedValue_MinSkip,
+ kUpb_EncodedValue_MaxSkip, &skip);
+ base += skip;
+ } else {
+ upb_MdDecoder_ErrorJmp(&d->base, "Unexpected character: %c", ch);
+ }
+ }
+
+ return d->enum_table;
+}
+
+static upb_MiniTableEnum* upb_MtDecoder_BuildMiniTableEnum(
+ upb_MdEnumDecoder* const decoder, const char* const data, size_t const len) {
+ if (UPB_SETJMP(decoder->base.err) != 0) return NULL;
+ return upb_MtDecoder_DoBuildMiniTableEnum(decoder, data, len);
+}
+
+upb_MiniTableEnum* upb_MiniDescriptor_BuildEnum(const char* data, size_t len,
+ upb_Arena* arena,
+ upb_Status* status) {
+ upb_MdEnumDecoder decoder = {
+ .base =
+ {
+ .end = UPB_PTRADD(data, len),
+ .status = status,
+ },
+ .arena = arena,
+ .enum_table = upb_Arena_Malloc(arena, upb_MiniTableEnum_Size(2)),
+ .enum_value_count = 0,
+ .enum_data_count = 0,
+ .enum_data_capacity = 1,
+ };
+
+ return upb_MtDecoder_BuildMiniTableEnum(&decoder, data, len);
+}
diff --git a/upb/upb/mini_descriptor/build_enum.h b/upb/upb/mini_descriptor/build_enum.h
new file mode 100644
index 0000000..8b6e567
--- /dev/null
+++ b/upb/upb/mini_descriptor/build_enum.h
@@ -0,0 +1,66 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_DESCRIPTOR_BUILD_ENUM_H_
+#define UPB_MINI_DESCRIPTOR_BUILD_ENUM_H_
+
+#include "upb/base/status.h"
+#include "upb/mem/arena.h"
+#include "upb/mini_table/enum.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Builds a upb_MiniTableEnum from an enum MiniDescriptor. The MiniDescriptor
+// must be for an enum, not a message.
+UPB_API upb_MiniTableEnum* upb_MiniDescriptor_BuildEnum(const char* data,
+ size_t len,
+ upb_Arena* arena,
+ upb_Status* status);
+
+// TODO(b/289057707): Deprecated name; update callers.
+UPB_API_INLINE upb_MiniTableEnum* upb_MiniTableEnum_Build(const char* data,
+ size_t len,
+ upb_Arena* arena,
+ upb_Status* status) {
+ return upb_MiniDescriptor_BuildEnum(data, len, arena, status);
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif // UPB_MINI_DESCRIPTOR_BUILD_ENUM_H_
diff --git a/upb/upb/mini_descriptor/decode.c b/upb/upb/mini_descriptor/decode.c
new file mode 100644
index 0000000..52019fe
--- /dev/null
+++ b/upb/upb/mini_descriptor/decode.c
@@ -0,0 +1,878 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/mini_descriptor/decode.h"
+
+#include <inttypes.h>
+#include <stdlib.h>
+
+#include "upb/base/string_view.h"
+#include "upb/mem/arena.h"
+#include "upb/mini_descriptor/internal/base92.h"
+#include "upb/mini_descriptor/internal/decoder.h"
+#include "upb/mini_descriptor/internal/modifiers.h"
+#include "upb/mini_descriptor/internal/wire_constants.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+// Note: we sort by this number when calculating layout order.
+typedef enum {
+ kUpb_LayoutItemType_OneofCase, // Oneof case.
+ kUpb_LayoutItemType_OneofField, // Oneof field data.
+ kUpb_LayoutItemType_Field, // Non-oneof field data.
+
+ kUpb_LayoutItemType_Max = kUpb_LayoutItemType_Field,
+} upb_LayoutItemType;
+
+#define kUpb_LayoutItem_IndexSentinel ((uint16_t)-1)
+
+typedef struct {
+ // Index of the corresponding field. When this is a oneof field, the field's
+ // offset will be the index of the next field in a linked list.
+ uint16_t field_index;
+ uint16_t offset;
+ upb_FieldRep rep;
+ upb_LayoutItemType type;
+} upb_LayoutItem;
+
+typedef struct {
+ upb_LayoutItem* data;
+ size_t size;
+ size_t capacity;
+} upb_LayoutItemVector;
+
+typedef struct {
+ upb_MdDecoder base;
+ upb_MiniTable* table;
+ upb_MiniTableField* fields;
+ upb_MiniTablePlatform platform;
+ upb_LayoutItemVector vec;
+ upb_Arena* arena;
+} upb_MtDecoder;
+
+// In each field's offset, we temporarily store a presence classifier:
+enum PresenceClass {
+ kNoPresence = 0,
+ kHasbitPresence = 1,
+ kRequiredPresence = 2,
+ kOneofBase = 3,
+ // Negative values refer to a specific oneof with that number. Positive
+ // values >= kOneofBase indicate that this field is in a oneof, and specify
+ // the next field in this oneof's linked list.
+};
+
+static bool upb_MtDecoder_FieldIsPackable(upb_MiniTableField* field) {
+ return (field->mode & kUpb_FieldMode_Array) &&
+ upb_FieldType_IsPackable(field->UPB_PRIVATE(descriptortype));
+}
+
+typedef struct {
+ uint16_t submsg_count;
+ uint16_t subenum_count;
+} upb_SubCounts;
+
+static void upb_MiniTable_SetTypeAndSub(upb_MiniTableField* field,
+ upb_FieldType type,
+ upb_SubCounts* sub_counts,
+ uint64_t msg_modifiers,
+ bool is_proto3_enum) {
+ if (is_proto3_enum) {
+ UPB_ASSERT(type == kUpb_FieldType_Enum);
+ type = kUpb_FieldType_Int32;
+ field->mode |= kUpb_LabelFlags_IsAlternate;
+ } else if (type == kUpb_FieldType_String &&
+ !(msg_modifiers & kUpb_MessageModifier_ValidateUtf8)) {
+ type = kUpb_FieldType_Bytes;
+ field->mode |= kUpb_LabelFlags_IsAlternate;
+ }
+
+ field->UPB_PRIVATE(descriptortype) = type;
+
+ if (upb_MtDecoder_FieldIsPackable(field) &&
+ (msg_modifiers & kUpb_MessageModifier_DefaultIsPacked)) {
+ field->mode |= kUpb_LabelFlags_IsPacked;
+ }
+
+ if (type == kUpb_FieldType_Message || type == kUpb_FieldType_Group) {
+ field->UPB_PRIVATE(submsg_index) = sub_counts->submsg_count++;
+ } else if (type == kUpb_FieldType_Enum) {
+ // We will need to update this later once we know the total number of
+ // submsg fields.
+ field->UPB_PRIVATE(submsg_index) = sub_counts->subenum_count++;
+ } else {
+ field->UPB_PRIVATE(submsg_index) = kUpb_NoSub;
+ }
+}
+
+static const char kUpb_EncodedToType[] = {
+ [kUpb_EncodedType_Double] = kUpb_FieldType_Double,
+ [kUpb_EncodedType_Float] = kUpb_FieldType_Float,
+ [kUpb_EncodedType_Int64] = kUpb_FieldType_Int64,
+ [kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64,
+ [kUpb_EncodedType_Int32] = kUpb_FieldType_Int32,
+ [kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64,
+ [kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32,
+ [kUpb_EncodedType_Bool] = kUpb_FieldType_Bool,
+ [kUpb_EncodedType_String] = kUpb_FieldType_String,
+ [kUpb_EncodedType_Group] = kUpb_FieldType_Group,
+ [kUpb_EncodedType_Message] = kUpb_FieldType_Message,
+ [kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes,
+ [kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32,
+ [kUpb_EncodedType_OpenEnum] = kUpb_FieldType_Enum,
+ [kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32,
+ [kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64,
+ [kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32,
+ [kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64,
+ [kUpb_EncodedType_ClosedEnum] = kUpb_FieldType_Enum,
+};
+
+static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch,
+ upb_MiniTableField* field,
+ uint64_t msg_modifiers,
+ upb_SubCounts* sub_counts) {
+ static const char kUpb_EncodedToFieldRep[] = {
+ [kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte,
+ [kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte,
+ [kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte,
+ [kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte,
+ [kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte,
+ [kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte,
+ [kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte,
+ [kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte,
+ [kUpb_EncodedType_String] = kUpb_FieldRep_StringView,
+ [kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView,
+ [kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte,
+ [kUpb_EncodedType_OpenEnum] = kUpb_FieldRep_4Byte,
+ [kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte,
+ [kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte,
+ [kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte,
+ [kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte,
+ [kUpb_EncodedType_ClosedEnum] = kUpb_FieldRep_4Byte,
+ };
+
+ char pointer_rep = d->platform == kUpb_MiniTablePlatform_32Bit
+ ? kUpb_FieldRep_4Byte
+ : kUpb_FieldRep_8Byte;
+
+ int8_t type = _upb_FromBase92(ch);
+ if (ch >= _upb_ToBase92(kUpb_EncodedType_RepeatedBase)) {
+ type -= kUpb_EncodedType_RepeatedBase;
+ field->mode = kUpb_FieldMode_Array;
+ field->mode |= pointer_rep << kUpb_FieldRep_Shift;
+ field->offset = kNoPresence;
+ } else {
+ field->mode = kUpb_FieldMode_Scalar;
+ field->offset = kHasbitPresence;
+ if (type == kUpb_EncodedType_Group || type == kUpb_EncodedType_Message) {
+ field->mode |= pointer_rep << kUpb_FieldRep_Shift;
+ } else if ((unsigned long)type >= sizeof(kUpb_EncodedToFieldRep)) {
+ upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type);
+ } else {
+ field->mode |= kUpb_EncodedToFieldRep[type] << kUpb_FieldRep_Shift;
+ }
+ }
+ if ((unsigned long)type >= sizeof(kUpb_EncodedToType)) {
+ upb_MdDecoder_ErrorJmp(&d->base, "Invalid field type: %d", (int)type);
+ }
+ upb_MiniTable_SetTypeAndSub(field, kUpb_EncodedToType[type], sub_counts,
+ msg_modifiers, type == kUpb_EncodedType_OpenEnum);
+}
+
+static void upb_MtDecoder_ModifyField(upb_MtDecoder* d,
+ uint32_t message_modifiers,
+ uint32_t field_modifiers,
+ upb_MiniTableField* field) {
+ if (field_modifiers & kUpb_EncodedFieldModifier_FlipPacked) {
+ if (!upb_MtDecoder_FieldIsPackable(field)) {
+ upb_MdDecoder_ErrorJmp(&d->base,
+ "Cannot flip packed on unpackable field %" PRIu32,
+ field->number);
+ }
+ field->mode ^= kUpb_LabelFlags_IsPacked;
+ }
+
+ bool singular = field_modifiers & kUpb_EncodedFieldModifier_IsProto3Singular;
+ bool required = field_modifiers & kUpb_EncodedFieldModifier_IsRequired;
+
+ // Validate.
+ if ((singular || required) && field->offset != kHasbitPresence) {
+ upb_MdDecoder_ErrorJmp(&d->base,
+ "Invalid modifier(s) for repeated field %" PRIu32,
+ field->number);
+ }
+ if (singular && required) {
+ upb_MdDecoder_ErrorJmp(
+ &d->base, "Field %" PRIu32 " cannot be both singular and required",
+ field->number);
+ }
+
+ if (singular) field->offset = kNoPresence;
+ if (required) {
+ field->offset = kRequiredPresence;
+ }
+}
+
+static void upb_MtDecoder_PushItem(upb_MtDecoder* d, upb_LayoutItem item) {
+ if (d->vec.size == d->vec.capacity) {
+ size_t new_cap = UPB_MAX(8, d->vec.size * 2);
+ d->vec.data = realloc(d->vec.data, new_cap * sizeof(*d->vec.data));
+ upb_MdDecoder_CheckOutOfMemory(&d->base, d->vec.data);
+ d->vec.capacity = new_cap;
+ }
+ d->vec.data[d->vec.size++] = item;
+}
+
+static void upb_MtDecoder_PushOneof(upb_MtDecoder* d, upb_LayoutItem item) {
+ if (item.field_index == kUpb_LayoutItem_IndexSentinel) {
+ upb_MdDecoder_ErrorJmp(&d->base, "Empty oneof");
+ }
+ item.field_index -= kOneofBase;
+
+ // Push oneof data.
+ item.type = kUpb_LayoutItemType_OneofField;
+ upb_MtDecoder_PushItem(d, item);
+
+ // Push oneof case.
+ item.rep = kUpb_FieldRep_4Byte; // Field Number.
+ item.type = kUpb_LayoutItemType_OneofCase;
+ upb_MtDecoder_PushItem(d, item);
+}
+
+size_t upb_MtDecoder_SizeOfRep(upb_FieldRep rep,
+ upb_MiniTablePlatform platform) {
+ static const uint8_t kRepToSize32[] = {
+ [kUpb_FieldRep_1Byte] = 1,
+ [kUpb_FieldRep_4Byte] = 4,
+ [kUpb_FieldRep_StringView] = 8,
+ [kUpb_FieldRep_8Byte] = 8,
+ };
+ static const uint8_t kRepToSize64[] = {
+ [kUpb_FieldRep_1Byte] = 1,
+ [kUpb_FieldRep_4Byte] = 4,
+ [kUpb_FieldRep_StringView] = 16,
+ [kUpb_FieldRep_8Byte] = 8,
+ };
+ UPB_ASSERT(sizeof(upb_StringView) ==
+ UPB_SIZE(kRepToSize32, kRepToSize64)[kUpb_FieldRep_StringView]);
+ return platform == kUpb_MiniTablePlatform_32Bit ? kRepToSize32[rep]
+ : kRepToSize64[rep];
+}
+
+size_t upb_MtDecoder_AlignOfRep(upb_FieldRep rep,
+ upb_MiniTablePlatform platform) {
+ static const uint8_t kRepToAlign32[] = {
+ [kUpb_FieldRep_1Byte] = 1,
+ [kUpb_FieldRep_4Byte] = 4,
+ [kUpb_FieldRep_StringView] = 4,
+ [kUpb_FieldRep_8Byte] = 8,
+ };
+ static const uint8_t kRepToAlign64[] = {
+ [kUpb_FieldRep_1Byte] = 1,
+ [kUpb_FieldRep_4Byte] = 4,
+ [kUpb_FieldRep_StringView] = 8,
+ [kUpb_FieldRep_8Byte] = 8,
+ };
+ UPB_ASSERT(UPB_ALIGN_OF(upb_StringView) ==
+ UPB_SIZE(kRepToAlign32, kRepToAlign64)[kUpb_FieldRep_StringView]);
+ return platform == kUpb_MiniTablePlatform_32Bit ? kRepToAlign32[rep]
+ : kRepToAlign64[rep];
+}
+
+static const char* upb_MtDecoder_DecodeOneofField(upb_MtDecoder* d,
+ const char* ptr,
+ char first_ch,
+ upb_LayoutItem* item) {
+ uint32_t field_num;
+ ptr = upb_MdDecoder_DecodeBase92Varint(
+ &d->base, ptr, first_ch, kUpb_EncodedValue_MinOneofField,
+ kUpb_EncodedValue_MaxOneofField, &field_num);
+ upb_MiniTableField* f =
+ (void*)upb_MiniTable_FindFieldByNumber(d->table, field_num);
+
+ if (!f) {
+ upb_MdDecoder_ErrorJmp(&d->base,
+ "Couldn't add field number %" PRIu32
+ " to oneof, no such field number.",
+ field_num);
+ }
+ if (f->offset != kHasbitPresence) {
+ upb_MdDecoder_ErrorJmp(
+ &d->base,
+ "Cannot add repeated, required, or singular field %" PRIu32
+ " to oneof.",
+ field_num);
+ }
+
+ // Oneof storage must be large enough to accommodate the largest member.
+ int rep = f->mode >> kUpb_FieldRep_Shift;
+ if (upb_MtDecoder_SizeOfRep(rep, d->platform) >
+ upb_MtDecoder_SizeOfRep(item->rep, d->platform)) {
+ item->rep = rep;
+ }
+ // Prepend this field to the linked list.
+ f->offset = item->field_index;
+ item->field_index = (f - d->fields) + kOneofBase;
+ return ptr;
+}
+
+static const char* upb_MtDecoder_DecodeOneofs(upb_MtDecoder* d,
+ const char* ptr) {
+ upb_LayoutItem item = {.rep = 0,
+ .field_index = kUpb_LayoutItem_IndexSentinel};
+ while (ptr < d->base.end) {
+ char ch = *ptr++;
+ if (ch == kUpb_EncodedValue_FieldSeparator) {
+ // Field separator, no action needed.
+ } else if (ch == kUpb_EncodedValue_OneofSeparator) {
+ // End of oneof.
+ upb_MtDecoder_PushOneof(d, item);
+ item.field_index = kUpb_LayoutItem_IndexSentinel; // Move to next oneof.
+ } else {
+ ptr = upb_MtDecoder_DecodeOneofField(d, ptr, ch, &item);
+ }
+ }
+
+ // Push final oneof.
+ upb_MtDecoder_PushOneof(d, item);
+ return ptr;
+}
+
+static const char* upb_MtDecoder_ParseModifier(upb_MtDecoder* d,
+ const char* ptr, char first_ch,
+ upb_MiniTableField* last_field,
+ uint64_t* msg_modifiers) {
+ uint32_t mod;
+ ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, first_ch,
+ kUpb_EncodedValue_MinModifier,
+ kUpb_EncodedValue_MaxModifier, &mod);
+ if (last_field) {
+ upb_MtDecoder_ModifyField(d, *msg_modifiers, mod, last_field);
+ } else {
+ if (!d->table) {
+ upb_MdDecoder_ErrorJmp(&d->base,
+ "Extensions cannot have message modifiers");
+ }
+ *msg_modifiers = mod;
+ }
+
+ return ptr;
+}
+
+static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d,
+ upb_SubCounts sub_counts) {
+ uint32_t total_count = sub_counts.submsg_count + sub_counts.subenum_count;
+ size_t subs_bytes = sizeof(*d->table->subs) * total_count;
+ upb_MiniTableSub* subs = upb_Arena_Malloc(d->arena, subs_bytes);
+ upb_MdDecoder_CheckOutOfMemory(&d->base, subs);
+ uint32_t i = 0;
+ for (; i < sub_counts.submsg_count; i++) {
+ subs[i].submsg = &_kUpb_MiniTable_Empty;
+ }
+ if (sub_counts.subenum_count) {
+ upb_MiniTableField* f = d->fields;
+ upb_MiniTableField* end_f = f + d->table->field_count;
+ for (; f < end_f; f++) {
+ if (f->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Enum) {
+ f->UPB_PRIVATE(submsg_index) += sub_counts.submsg_count;
+ }
+ }
+ for (; i < sub_counts.submsg_count + sub_counts.subenum_count; i++) {
+ subs[i].subenum = NULL;
+ }
+ }
+ d->table->subs = subs;
+}
+
+static const char* upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr,
+ size_t len, void* fields,
+ size_t field_size, uint16_t* field_count,
+ upb_SubCounts* sub_counts) {
+ uint64_t msg_modifiers = 0;
+ uint32_t last_field_number = 0;
+ upb_MiniTableField* last_field = NULL;
+ bool need_dense_below = d->table != NULL;
+
+ d->base.end = UPB_PTRADD(ptr, len);
+
+ while (ptr < d->base.end) {
+ char ch = *ptr++;
+ if (ch <= kUpb_EncodedValue_MaxField) {
+ if (!d->table && last_field) {
+ // For extensions, consume only a single field and then return.
+ return --ptr;
+ }
+ upb_MiniTableField* field = fields;
+ *field_count += 1;
+ fields = (char*)fields + field_size;
+ field->number = ++last_field_number;
+ last_field = field;
+ upb_MiniTable_SetField(d, ch, field, msg_modifiers, sub_counts);
+ } else if (kUpb_EncodedValue_MinModifier <= ch &&
+ ch <= kUpb_EncodedValue_MaxModifier) {
+ ptr = upb_MtDecoder_ParseModifier(d, ptr, ch, last_field, &msg_modifiers);
+ if (msg_modifiers & kUpb_MessageModifier_IsExtendable) {
+ d->table->ext |= kUpb_ExtMode_Extendable;
+ }
+ } else if (ch == kUpb_EncodedValue_End) {
+ if (!d->table) {
+ upb_MdDecoder_ErrorJmp(&d->base, "Extensions cannot have oneofs.");
+ }
+ ptr = upb_MtDecoder_DecodeOneofs(d, ptr);
+ } else if (kUpb_EncodedValue_MinSkip <= ch &&
+ ch <= kUpb_EncodedValue_MaxSkip) {
+ if (need_dense_below) {
+ d->table->dense_below = d->table->field_count;
+ need_dense_below = false;
+ }
+ uint32_t skip;
+ ptr = upb_MdDecoder_DecodeBase92Varint(&d->base, ptr, ch,
+ kUpb_EncodedValue_MinSkip,
+ kUpb_EncodedValue_MaxSkip, &skip);
+ last_field_number += skip;
+ last_field_number--; // Next field seen will increment.
+ } else {
+ upb_MdDecoder_ErrorJmp(&d->base, "Invalid char: %c", ch);
+ }
+ }
+
+ if (need_dense_below) {
+ d->table->dense_below = d->table->field_count;
+ }
+
+ return ptr;
+}
+
+static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data,
+ size_t len) {
+ // Buffer length is an upper bound on the number of fields. We will return
+ // what we don't use.
+ d->fields = upb_Arena_Malloc(d->arena, sizeof(*d->fields) * len);
+ upb_MdDecoder_CheckOutOfMemory(&d->base, d->fields);
+
+ upb_SubCounts sub_counts = {0, 0};
+ d->table->field_count = 0;
+ d->table->fields = d->fields;
+ upb_MtDecoder_Parse(d, data, len, d->fields, sizeof(*d->fields),
+ &d->table->field_count, &sub_counts);
+
+ upb_Arena_ShrinkLast(d->arena, d->fields, sizeof(*d->fields) * len,
+ sizeof(*d->fields) * d->table->field_count);
+ d->table->fields = d->fields;
+ upb_MtDecoder_AllocateSubs(d, sub_counts);
+}
+
+int upb_MtDecoder_CompareFields(const void* _a, const void* _b) {
+ const upb_LayoutItem* a = _a;
+ const upb_LayoutItem* b = _b;
+ // Currently we just sort by:
+ // 1. rep (smallest fields first)
+ // 2. type (oneof cases first)
+ // 2. field_index (smallest numbers first)
+ // The main goal of this is to reduce space lost to padding.
+ // Later we may have more subtle reasons to prefer a different ordering.
+ const int rep_bits = upb_Log2Ceiling(kUpb_FieldRep_Max);
+ const int type_bits = upb_Log2Ceiling(kUpb_LayoutItemType_Max);
+ const int idx_bits = (sizeof(a->field_index) * 8);
+ UPB_ASSERT(idx_bits + rep_bits + type_bits < 32);
+#define UPB_COMBINE(rep, ty, idx) (((rep << type_bits) | ty) << idx_bits) | idx
+ uint32_t a_packed = UPB_COMBINE(a->rep, a->type, a->field_index);
+ uint32_t b_packed = UPB_COMBINE(b->rep, b->type, b->field_index);
+ assert(a_packed != b_packed);
+#undef UPB_COMBINE
+ return a_packed < b_packed ? -1 : 1;
+}
+
+static bool upb_MtDecoder_SortLayoutItems(upb_MtDecoder* d) {
+ // Add items for all non-oneof fields (oneofs were already added).
+ int n = d->table->field_count;
+ for (int i = 0; i < n; i++) {
+ upb_MiniTableField* f = &d->fields[i];
+ if (f->offset >= kOneofBase) continue;
+ upb_LayoutItem item = {.field_index = i,
+ .rep = f->mode >> kUpb_FieldRep_Shift,
+ .type = kUpb_LayoutItemType_Field};
+ upb_MtDecoder_PushItem(d, item);
+ }
+
+ if (d->vec.size) {
+ qsort(d->vec.data, d->vec.size, sizeof(*d->vec.data),
+ upb_MtDecoder_CompareFields);
+ }
+
+ return true;
+}
+
+static size_t upb_MiniTable_DivideRoundUp(size_t n, size_t d) {
+ return (n + d - 1) / d;
+}
+
+static void upb_MtDecoder_AssignHasbits(upb_MtDecoder* d) {
+ upb_MiniTable* ret = d->table;
+ int n = ret->field_count;
+ int last_hasbit = 0; // 0 cannot be used.
+
+ // First assign required fields, which must have the lowest hasbits.
+ for (int i = 0; i < n; i++) {
+ upb_MiniTableField* field = (upb_MiniTableField*)&ret->fields[i];
+ if (field->offset == kRequiredPresence) {
+ field->presence = ++last_hasbit;
+ } else if (field->offset == kNoPresence) {
+ field->presence = 0;
+ }
+ }
+ ret->required_count = last_hasbit;
+
+ if (ret->required_count > 63) {
+ upb_MdDecoder_ErrorJmp(&d->base, "Too many required fields");
+ }
+
+ // Next assign non-required hasbit fields.
+ for (int i = 0; i < n; i++) {
+ upb_MiniTableField* field = (upb_MiniTableField*)&ret->fields[i];
+ if (field->offset == kHasbitPresence) {
+ field->presence = ++last_hasbit;
+ }
+ }
+
+ ret->size = last_hasbit ? upb_MiniTable_DivideRoundUp(last_hasbit + 1, 8) : 0;
+}
+
+size_t upb_MtDecoder_Place(upb_MtDecoder* d, upb_FieldRep rep) {
+ size_t size = upb_MtDecoder_SizeOfRep(rep, d->platform);
+ size_t align = upb_MtDecoder_AlignOfRep(rep, d->platform);
+ size_t ret = UPB_ALIGN_UP(d->table->size, align);
+ static const size_t max = UINT16_MAX;
+ size_t new_size = ret + size;
+ if (new_size > max) {
+ upb_MdDecoder_ErrorJmp(
+ &d->base, "Message size exceeded maximum size of %zu bytes", max);
+ }
+ d->table->size = new_size;
+ return ret;
+}
+
+static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) {
+ upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size);
+
+ // Compute offsets.
+ for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
+ item->offset = upb_MtDecoder_Place(d, item->rep);
+ }
+
+ // Assign oneof case offsets. We must do these first, since assigning
+ // actual offsets will overwrite the links of the linked list.
+ for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
+ if (item->type != kUpb_LayoutItemType_OneofCase) continue;
+ upb_MiniTableField* f = &d->fields[item->field_index];
+ while (true) {
+ f->presence = ~item->offset;
+ if (f->offset == kUpb_LayoutItem_IndexSentinel) break;
+ UPB_ASSERT(f->offset - kOneofBase < d->table->field_count);
+ f = &d->fields[f->offset - kOneofBase];
+ }
+ }
+
+ // Assign offsets.
+ for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
+ upb_MiniTableField* f = &d->fields[item->field_index];
+ switch (item->type) {
+ case kUpb_LayoutItemType_OneofField:
+ while (true) {
+ uint16_t next_offset = f->offset;
+ f->offset = item->offset;
+ if (next_offset == kUpb_LayoutItem_IndexSentinel) break;
+ f = &d->fields[next_offset - kOneofBase];
+ }
+ break;
+ case kUpb_LayoutItemType_Field:
+ f->offset = item->offset;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // The fasttable parser (supported on 64-bit only) depends on this being a
+ // multiple of 8 in order to satisfy UPB_MALLOC_ALIGN, which is also 8.
+ //
+ // On 32-bit we could potentially make this smaller, but there is no
+ // compelling reason to optimize this right now.
+ d->table->size = UPB_ALIGN_UP(d->table->size, 8);
+}
+
+static void upb_MtDecoder_ValidateEntryField(upb_MtDecoder* d,
+ const upb_MiniTableField* f,
+ uint32_t expected_num) {
+ const char* name = expected_num == 1 ? "key" : "val";
+ if (f->number != expected_num) {
+ upb_MdDecoder_ErrorJmp(&d->base,
+ "map %s did not have expected number (%d vs %d)",
+ name, expected_num, (int)f->number);
+ }
+
+ if (upb_IsRepeatedOrMap(f)) {
+ upb_MdDecoder_ErrorJmp(
+ &d->base, "map %s cannot be repeated or map, or be in oneof", name);
+ }
+
+ uint32_t not_ok_types;
+ if (expected_num == 1) {
+ not_ok_types = (1 << kUpb_FieldType_Float) | (1 << kUpb_FieldType_Double) |
+ (1 << kUpb_FieldType_Message) | (1 << kUpb_FieldType_Group) |
+ (1 << kUpb_FieldType_Bytes) | (1 << kUpb_FieldType_Enum);
+ } else {
+ not_ok_types = 1 << kUpb_FieldType_Group;
+ }
+
+ if ((1 << upb_MiniTableField_Type(f)) & not_ok_types) {
+ upb_MdDecoder_ErrorJmp(&d->base, "map %s cannot have type %d", name,
+ (int)f->UPB_PRIVATE(descriptortype));
+ }
+}
+
+static void upb_MtDecoder_ParseMap(upb_MtDecoder* d, const char* data,
+ size_t len) {
+ upb_MtDecoder_ParseMessage(d, data, len);
+ upb_MtDecoder_AssignHasbits(d);
+
+ if (UPB_UNLIKELY(d->table->field_count != 2)) {
+ upb_MdDecoder_ErrorJmp(&d->base, "%hu fields in map",
+ d->table->field_count);
+ UPB_UNREACHABLE();
+ }
+
+ upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size);
+ for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
+ if (item->type == kUpb_LayoutItemType_OneofCase) {
+ upb_MdDecoder_ErrorJmp(&d->base, "Map entry cannot have oneof");
+ }
+ }
+
+ upb_MtDecoder_ValidateEntryField(d, &d->table->fields[0], 1);
+ upb_MtDecoder_ValidateEntryField(d, &d->table->fields[1], 2);
+
+ // Map entries have a pre-determined layout, regardless of types.
+ // NOTE: sync with mini_table/message_internal.h.
+ const size_t kv_size = d->platform == kUpb_MiniTablePlatform_32Bit ? 8 : 16;
+ const size_t hasbit_size = 8;
+ d->fields[0].offset = hasbit_size;
+ d->fields[1].offset = hasbit_size + kv_size;
+ d->table->size = UPB_ALIGN_UP(hasbit_size + kv_size + kv_size, 8);
+
+ // Map entries have a special bit set to signal it's a map entry, used in
+ // upb_MiniTable_SetSubMessage() below.
+ d->table->ext |= kUpb_ExtMode_IsMapEntry;
+}
+
+static void upb_MtDecoder_ParseMessageSet(upb_MtDecoder* d, const char* data,
+ size_t len) {
+ if (len > 0) {
+ upb_MdDecoder_ErrorJmp(&d->base, "Invalid message set encode length: %zu",
+ len);
+ }
+
+ upb_MiniTable* ret = d->table;
+ ret->size = 0;
+ ret->field_count = 0;
+ ret->ext = kUpb_ExtMode_IsMessageSet;
+ ret->dense_below = 0;
+ ret->table_mask = -1;
+ ret->required_count = 0;
+}
+
+static upb_MiniTable* upb_MtDecoder_DoBuildMiniTableWithBuf(
+ upb_MtDecoder* decoder, const char* data, size_t len, void** buf,
+ size_t* buf_size) {
+ upb_MdDecoder_CheckOutOfMemory(&decoder->base, decoder->table);
+
+ decoder->table->size = 0;
+ decoder->table->field_count = 0;
+ decoder->table->ext = kUpb_ExtMode_NonExtendable;
+ decoder->table->dense_below = 0;
+ decoder->table->table_mask = -1;
+ decoder->table->required_count = 0;
+
+ // Strip off and verify the version tag.
+ if (!len--) goto done;
+ const char vers = *data++;
+
+ switch (vers) {
+ case kUpb_EncodedVersion_MapV1:
+ upb_MtDecoder_ParseMap(decoder, data, len);
+ break;
+
+ case kUpb_EncodedVersion_MessageV1:
+ upb_MtDecoder_ParseMessage(decoder, data, len);
+ upb_MtDecoder_AssignHasbits(decoder);
+ upb_MtDecoder_SortLayoutItems(decoder);
+ upb_MtDecoder_AssignOffsets(decoder);
+ break;
+
+ case kUpb_EncodedVersion_MessageSetV1:
+ upb_MtDecoder_ParseMessageSet(decoder, data, len);
+ break;
+
+ default:
+ upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid message version: %c",
+ vers);
+ }
+
+done:
+ *buf = decoder->vec.data;
+ *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data);
+ return decoder->table;
+}
+
+static upb_MiniTable* upb_MtDecoder_BuildMiniTableWithBuf(
+ upb_MtDecoder* const decoder, const char* const data, const size_t len,
+ void** const buf, size_t* const buf_size) {
+ if (UPB_SETJMP(decoder->base.err) != 0) {
+ *buf = decoder->vec.data;
+ *buf_size = decoder->vec.capacity * sizeof(*decoder->vec.data);
+ return NULL;
+ }
+
+ return upb_MtDecoder_DoBuildMiniTableWithBuf(decoder, data, len, buf,
+ buf_size);
+}
+
+upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,
+ upb_MiniTablePlatform platform,
+ upb_Arena* arena, void** buf,
+ size_t* buf_size,
+ upb_Status* status) {
+ upb_MtDecoder decoder = {
+ .base = {.status = status},
+ .platform = platform,
+ .vec =
+ {
+ .data = *buf,
+ .capacity = *buf_size / sizeof(*decoder.vec.data),
+ .size = 0,
+ },
+ .arena = arena,
+ .table = upb_Arena_Malloc(arena, sizeof(*decoder.table)),
+ };
+
+ return upb_MtDecoder_BuildMiniTableWithBuf(&decoder, data, len, buf,
+ buf_size);
+}
+
+static const char* upb_MtDecoder_DoBuildMiniTableExtension(
+ upb_MtDecoder* decoder, const char* data, size_t len,
+ upb_MiniTableExtension* ext, const upb_MiniTable* extendee,
+ upb_MiniTableSub sub) {
+ // If the string is non-empty then it must begin with a version tag.
+ if (len) {
+ if (*data != kUpb_EncodedVersion_ExtensionV1) {
+ upb_MdDecoder_ErrorJmp(&decoder->base, "Invalid ext version: %c", *data);
+ }
+ data++;
+ len--;
+ }
+
+ uint16_t count = 0;
+ upb_SubCounts sub_counts = {0, 0};
+ const char* ret = upb_MtDecoder_Parse(decoder, data, len, ext, sizeof(*ext),
+ &count, &sub_counts);
+ if (!ret || count != 1) return NULL;
+
+ upb_MiniTableField* f = &ext->field;
+
+ f->mode |= kUpb_LabelFlags_IsExtension;
+ f->offset = 0;
+ f->presence = 0;
+
+ if (extendee->ext & kUpb_ExtMode_IsMessageSet) {
+ // Extensions of MessageSet must be messages.
+ if (!upb_IsSubMessage(f)) return NULL;
+
+ // Extensions of MessageSet must be non-repeating.
+ if ((f->mode & kUpb_FieldMode_Mask) == kUpb_FieldMode_Array) return NULL;
+ }
+
+ ext->extendee = extendee;
+ ext->sub = sub;
+
+ return ret;
+}
+
+static const char* upb_MtDecoder_BuildMiniTableExtension(
+ upb_MtDecoder* const decoder, const char* const data, const size_t len,
+ upb_MiniTableExtension* const ext, const upb_MiniTable* const extendee,
+ const upb_MiniTableSub sub) {
+ if (UPB_SETJMP(decoder->base.err) != 0) return NULL;
+ return upb_MtDecoder_DoBuildMiniTableExtension(decoder, data, len, ext,
+ extendee, sub);
+}
+
+const char* _upb_MiniTableExtension_Init(const char* data, size_t len,
+ upb_MiniTableExtension* ext,
+ const upb_MiniTable* extendee,
+ upb_MiniTableSub sub,
+ upb_MiniTablePlatform platform,
+ upb_Status* status) {
+ upb_MtDecoder decoder = {
+ .base = {.status = status},
+ .arena = NULL,
+ .table = NULL,
+ .platform = platform,
+ };
+
+ return upb_MtDecoder_BuildMiniTableExtension(&decoder, data, len, ext,
+ extendee, sub);
+}
+
+upb_MiniTableExtension* _upb_MiniTableExtension_Build(
+ const char* data, size_t len, const upb_MiniTable* extendee,
+ upb_MiniTableSub sub, upb_MiniTablePlatform platform, upb_Arena* arena,
+ upb_Status* status) {
+ upb_MiniTableExtension* ext =
+ upb_Arena_Malloc(arena, sizeof(upb_MiniTableExtension));
+ if (UPB_UNLIKELY(!ext)) return NULL;
+
+ const char* ptr = _upb_MiniTableExtension_Init(data, len, ext, extendee, sub,
+ platform, status);
+ if (UPB_UNLIKELY(!ptr)) return NULL;
+
+ return ext;
+}
+
+upb_MiniTable* _upb_MiniTable_Build(const char* data, size_t len,
+ upb_MiniTablePlatform platform,
+ upb_Arena* arena, upb_Status* status) {
+ void* buf = NULL;
+ size_t size = 0;
+ upb_MiniTable* ret = upb_MiniTable_BuildWithBuf(data, len, platform, arena,
+ &buf, &size, status);
+ free(buf);
+ return ret;
+}
diff --git a/upb/upb/mini_descriptor/decode.h b/upb/upb/mini_descriptor/decode.h
new file mode 100644
index 0000000..81e3a71
--- /dev/null
+++ b/upb/upb/mini_descriptor/decode.h
@@ -0,0 +1,143 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_TABLE_DECODE_H_
+#define UPB_MINI_TABLE_DECODE_H_
+
+#include "upb/base/status.h"
+#include "upb/mem/arena.h"
+#include "upb/mini_table/extension.h"
+#include "upb/mini_table/field.h"
+#include "upb/mini_table/message.h"
+#include "upb/mini_table/sub.h"
+
+// Export the newer headers, for legacy users. New users should include the
+// more specific headers directly.
+// IWYU pragma: begin_exports
+#include "upb/mini_descriptor/build_enum.h"
+#include "upb/mini_descriptor/link.h"
+// IWYU pragma: end_exports
+
+// Must be last.
+#include "upb/port/def.inc"
+
+typedef enum {
+ kUpb_MiniTablePlatform_32Bit,
+ kUpb_MiniTablePlatform_64Bit,
+ kUpb_MiniTablePlatform_Native =
+ UPB_SIZE(kUpb_MiniTablePlatform_32Bit, kUpb_MiniTablePlatform_64Bit),
+} upb_MiniTablePlatform;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Builds a mini table from the data encoded in the buffer [data, len]. If any
+// errors occur, returns NULL and sets a status message. In the success case,
+// the caller must call upb_MiniTable_SetSub*() for all message or proto2 enum
+// fields to link the table to the appropriate sub-tables.
+upb_MiniTable* _upb_MiniTable_Build(const char* data, size_t len,
+ upb_MiniTablePlatform platform,
+ upb_Arena* arena, upb_Status* status);
+
+UPB_API_INLINE upb_MiniTable* upb_MiniTable_Build(const char* data, size_t len,
+ upb_Arena* arena,
+ upb_Status* status) {
+ return _upb_MiniTable_Build(data, len, kUpb_MiniTablePlatform_Native, arena,
+ status);
+}
+
+// Initializes a MiniTableExtension buffer that has already been allocated.
+// This is needed by upb_FileDef and upb_MessageDef, which allocate all of the
+// extensions together in a single contiguous array.
+const char* _upb_MiniTableExtension_Init(const char* data, size_t len,
+ upb_MiniTableExtension* ext,
+ const upb_MiniTable* extendee,
+ upb_MiniTableSub sub,
+ upb_MiniTablePlatform platform,
+ upb_Status* status);
+
+UPB_API_INLINE const char* upb_MiniTableExtension_Init(
+ const char* data, size_t len, upb_MiniTableExtension* ext,
+ const upb_MiniTable* extendee, upb_MiniTableSub sub, upb_Status* status) {
+ return _upb_MiniTableExtension_Init(data, len, ext, extendee, sub,
+ kUpb_MiniTablePlatform_Native, status);
+}
+
+UPB_API upb_MiniTableExtension* _upb_MiniTableExtension_Build(
+ const char* data, size_t len, const upb_MiniTable* extendee,
+ upb_MiniTableSub sub, upb_MiniTablePlatform platform, upb_Arena* arena,
+ upb_Status* status);
+
+UPB_API_INLINE upb_MiniTableExtension* upb_MiniTableExtension_Build(
+ const char* data, size_t len, const upb_MiniTable* extendee,
+ upb_Arena* arena, upb_Status* status) {
+ upb_MiniTableSub sub;
+ sub.submsg = NULL;
+ return _upb_MiniTableExtension_Build(
+ data, len, extendee, sub, kUpb_MiniTablePlatform_Native, arena, status);
+}
+
+UPB_API_INLINE upb_MiniTableExtension* upb_MiniTableExtension_BuildMessage(
+ const char* data, size_t len, const upb_MiniTable* extendee,
+ upb_MiniTable* submsg, upb_Arena* arena, upb_Status* status) {
+ upb_MiniTableSub sub;
+ sub.submsg = submsg;
+ return _upb_MiniTableExtension_Build(
+ data, len, extendee, sub, kUpb_MiniTablePlatform_Native, arena, status);
+}
+
+UPB_API_INLINE upb_MiniTableExtension* upb_MiniTableExtension_BuildEnum(
+ const char* data, size_t len, const upb_MiniTable* extendee,
+ upb_MiniTableEnum* subenum, upb_Arena* arena, upb_Status* status) {
+ upb_MiniTableSub sub;
+ sub.subenum = subenum;
+ return _upb_MiniTableExtension_Build(
+ data, len, extendee, sub, kUpb_MiniTablePlatform_Native, arena, status);
+}
+
+// Like upb_MiniTable_Build(), but the user provides a buffer of layout data so
+// it can be reused from call to call, avoiding repeated realloc()/free().
+//
+// The caller owns `*buf` both before and after the call, and must free() it
+// when it is no longer in use. The function will realloc() `*buf` as
+// necessary, updating `*size` accordingly.
+upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,
+ upb_MiniTablePlatform platform,
+ upb_Arena* arena, void** buf,
+ size_t* buf_size, upb_Status* status);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MINI_TABLE_DECODE_H_ */
diff --git a/upb/upb/mini_descriptor/internal/base92.c b/upb/upb/mini_descriptor/internal/base92.c
new file mode 100644
index 0000000..6890e50
--- /dev/null
+++ b/upb/upb/mini_descriptor/internal/base92.c
@@ -0,0 +1,49 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/mini_descriptor/internal/base92.h"
+
+const char _kUpb_ToBase92[] = {
+ ' ', '!', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/',
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=',
+ '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
+ 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
+ 'Z', '[', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
+ 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
+ 'w', 'x', 'y', 'z', '{', '|', '}', '~',
+};
+
+const int8_t _kUpb_FromBase92[] = {
+ 0, 1, -1, 2, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
+ 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
+ 55, 56, 57, -1, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
+ 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
+};
diff --git a/upb/upb/mini_descriptor/internal/base92.h b/upb/upb/mini_descriptor/internal/base92.h
new file mode 100644
index 0000000..ff515e9
--- /dev/null
+++ b/upb/upb/mini_descriptor/internal/base92.h
@@ -0,0 +1,86 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_
+#define UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_
+
+#include <stdint.h>
+
+#include "upb/base/internal/log2.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+UPB_INLINE char _upb_ToBase92(int8_t ch) {
+ extern const char _kUpb_ToBase92[];
+ UPB_ASSERT(0 <= ch && ch < 92);
+ return _kUpb_ToBase92[ch];
+}
+
+UPB_INLINE char _upb_FromBase92(uint8_t ch) {
+ extern const int8_t _kUpb_FromBase92[];
+ if (' ' > ch || ch > '~') return -1;
+ return _kUpb_FromBase92[ch - ' '];
+}
+
+UPB_INLINE const char* _upb_Base92_DecodeVarint(const char* ptr,
+ const char* end, char first_ch,
+ uint8_t min, uint8_t max,
+ uint32_t* out_val) {
+ uint32_t val = 0;
+ uint32_t shift = 0;
+ const int bits_per_char =
+ upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min));
+ char ch = first_ch;
+ while (1) {
+ uint32_t bits = _upb_FromBase92(ch) - _upb_FromBase92(min);
+ val |= bits << shift;
+ if (ptr == end || *ptr < min || max < *ptr) {
+ *out_val = val;
+ UPB_ASSUME(ptr != NULL);
+ return ptr;
+ }
+ ch = *ptr++;
+ shift += bits_per_char;
+ if (shift >= 32) return NULL;
+ }
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif // UPB_MINI_DESCRIPTOR_INTERNAL_BASE92_H_
diff --git a/upb/upb/mini_descriptor/internal/decoder.h b/upb/upb/mini_descriptor/internal/decoder.h
new file mode 100644
index 0000000..cde5d1d
--- /dev/null
+++ b/upb/upb/mini_descriptor/internal/decoder.h
@@ -0,0 +1,76 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_
+#define UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_
+
+#include "upb/base/status.h"
+#include "upb/mini_descriptor/internal/base92.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+// upb_MdDecoder: used internally for decoding MiniDescriptors for messages,
+// extensions, and enums.
+typedef struct {
+ const char* end;
+ upb_Status* status;
+ jmp_buf err;
+} upb_MdDecoder;
+
+UPB_PRINTF(2, 3)
+UPB_NORETURN UPB_INLINE void upb_MdDecoder_ErrorJmp(upb_MdDecoder* d,
+ const char* fmt, ...) {
+ if (d->status) {
+ va_list argp;
+ upb_Status_SetErrorMessage(d->status, "Error building mini table: ");
+ va_start(argp, fmt);
+ upb_Status_VAppendErrorFormat(d->status, fmt, argp);
+ va_end(argp);
+ }
+ UPB_LONGJMP(d->err, 1);
+}
+
+UPB_INLINE void upb_MdDecoder_CheckOutOfMemory(upb_MdDecoder* d,
+ const void* ptr) {
+ if (!ptr) upb_MdDecoder_ErrorJmp(d, "Out of memory");
+}
+
+UPB_INLINE const char* upb_MdDecoder_DecodeBase92Varint(
+ upb_MdDecoder* d, const char* ptr, char first_ch, uint8_t min, uint8_t max,
+ uint32_t* out_val) {
+ ptr = _upb_Base92_DecodeVarint(ptr, d->end, first_ch, min, max, out_val);
+ if (!ptr) upb_MdDecoder_ErrorJmp(d, "Overlong varint");
+ return ptr;
+}
+
+#include "upb/port/undef.inc"
+
+#endif // UPB_MINI_DESCRIPTOR_INTERNAL_DECODER_H_
diff --git a/upb/upb/mini_descriptor/internal/encode.c b/upb/upb/mini_descriptor/internal/encode.c
new file mode 100644
index 0000000..2570b16
--- /dev/null
+++ b/upb/upb/mini_descriptor/internal/encode.c
@@ -0,0 +1,329 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/mini_descriptor/internal/encode.h"
+
+#include "upb/base/internal/log2.h"
+#include "upb/mini_descriptor/internal/base92.h"
+#include "upb/mini_descriptor/internal/modifiers.h"
+#include "upb/mini_descriptor/internal/wire_constants.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+typedef struct {
+ uint64_t present_values_mask;
+ uint32_t last_written_value;
+} upb_MtDataEncoderInternal_EnumState;
+
+typedef struct {
+ uint64_t msg_modifiers;
+ uint32_t last_field_num;
+ enum {
+ kUpb_OneofState_NotStarted,
+ kUpb_OneofState_StartedOneof,
+ kUpb_OneofState_EmittedOneofField,
+ } oneof_state;
+} upb_MtDataEncoderInternal_MsgState;
+
+typedef struct {
+ char* buf_start; // Only for checking kUpb_MtDataEncoder_MinSize.
+ union {
+ upb_MtDataEncoderInternal_EnumState enum_state;
+ upb_MtDataEncoderInternal_MsgState msg_state;
+ } state;
+} upb_MtDataEncoderInternal;
+
+static upb_MtDataEncoderInternal* upb_MtDataEncoder_GetInternal(
+ upb_MtDataEncoder* e, char* buf_start) {
+ UPB_ASSERT(sizeof(upb_MtDataEncoderInternal) <= sizeof(e->internal));
+ upb_MtDataEncoderInternal* ret = (upb_MtDataEncoderInternal*)e->internal;
+ ret->buf_start = buf_start;
+ return ret;
+}
+
+static char* upb_MtDataEncoder_PutRaw(upb_MtDataEncoder* e, char* ptr,
+ char ch) {
+ upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
+ UPB_ASSERT(ptr - in->buf_start < kUpb_MtDataEncoder_MinSize);
+ if (ptr == e->end) return NULL;
+ *ptr++ = ch;
+ return ptr;
+}
+
+static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) {
+ return upb_MtDataEncoder_PutRaw(e, ptr, _upb_ToBase92(ch));
+}
+
+static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr,
+ uint32_t val, int min, int max) {
+ int shift = upb_Log2Ceiling(_upb_FromBase92(max) - _upb_FromBase92(min) + 1);
+ UPB_ASSERT(shift <= 6);
+ uint32_t mask = (1 << shift) - 1;
+ do {
+ uint32_t bits = val & mask;
+ ptr = upb_MtDataEncoder_Put(e, ptr, bits + _upb_FromBase92(min));
+ if (!ptr) return NULL;
+ val >>= shift;
+ } while (val);
+ return ptr;
+}
+
+char* upb_MtDataEncoder_PutModifier(upb_MtDataEncoder* e, char* ptr,
+ uint64_t mod) {
+ if (mod) {
+ ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, mod,
+ kUpb_EncodedValue_MinModifier,
+ kUpb_EncodedValue_MaxModifier);
+ }
+ return ptr;
+}
+
+char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr,
+ upb_FieldType type, uint32_t field_num,
+ uint64_t field_mod) {
+ upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
+ in->state.msg_state.msg_modifiers = 0;
+ in->state.msg_state.last_field_num = 0;
+ in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;
+
+ ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_ExtensionV1);
+ if (!ptr) return NULL;
+
+ return upb_MtDataEncoder_PutField(e, ptr, type, field_num, field_mod);
+}
+
+char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr,
+ upb_FieldType key_type,
+ upb_FieldType value_type, uint64_t key_mod,
+ uint64_t value_mod) {
+ upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
+ in->state.msg_state.msg_modifiers = 0;
+ in->state.msg_state.last_field_num = 0;
+ in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;
+
+ ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MapV1);
+ if (!ptr) return NULL;
+
+ ptr = upb_MtDataEncoder_PutField(e, ptr, key_type, 1, key_mod);
+ if (!ptr) return NULL;
+
+ return upb_MtDataEncoder_PutField(e, ptr, value_type, 2, value_mod);
+}
+
+char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr) {
+ (void)upb_MtDataEncoder_GetInternal(e, ptr);
+ return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageSetV1);
+}
+
+char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr,
+ uint64_t msg_mod) {
+ upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
+ in->state.msg_state.msg_modifiers = msg_mod;
+ in->state.msg_state.last_field_num = 0;
+ in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;
+
+ ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageV1);
+ if (!ptr) return NULL;
+
+ return upb_MtDataEncoder_PutModifier(e, ptr, msg_mod);
+}
+
+static char* _upb_MtDataEncoder_MaybePutFieldSkip(upb_MtDataEncoder* e,
+ char* ptr,
+ uint32_t field_num) {
+ upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
+ if (field_num <= in->state.msg_state.last_field_num) return NULL;
+ if (in->state.msg_state.last_field_num + 1 != field_num) {
+ // Put skip.
+ UPB_ASSERT(field_num > in->state.msg_state.last_field_num);
+ uint32_t skip = field_num - in->state.msg_state.last_field_num;
+ ptr = upb_MtDataEncoder_PutBase92Varint(
+ e, ptr, skip, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip);
+ if (!ptr) return NULL;
+ }
+ in->state.msg_state.last_field_num = field_num;
+ return ptr;
+}
+
+static char* _upb_MtDataEncoder_PutFieldType(upb_MtDataEncoder* e, char* ptr,
+ upb_FieldType type,
+ uint64_t field_mod) {
+ static const char kUpb_TypeToEncoded[] = {
+ [kUpb_FieldType_Double] = kUpb_EncodedType_Double,
+ [kUpb_FieldType_Float] = kUpb_EncodedType_Float,
+ [kUpb_FieldType_Int64] = kUpb_EncodedType_Int64,
+ [kUpb_FieldType_UInt64] = kUpb_EncodedType_UInt64,
+ [kUpb_FieldType_Int32] = kUpb_EncodedType_Int32,
+ [kUpb_FieldType_Fixed64] = kUpb_EncodedType_Fixed64,
+ [kUpb_FieldType_Fixed32] = kUpb_EncodedType_Fixed32,
+ [kUpb_FieldType_Bool] = kUpb_EncodedType_Bool,
+ [kUpb_FieldType_String] = kUpb_EncodedType_String,
+ [kUpb_FieldType_Group] = kUpb_EncodedType_Group,
+ [kUpb_FieldType_Message] = kUpb_EncodedType_Message,
+ [kUpb_FieldType_Bytes] = kUpb_EncodedType_Bytes,
+ [kUpb_FieldType_UInt32] = kUpb_EncodedType_UInt32,
+ [kUpb_FieldType_Enum] = kUpb_EncodedType_OpenEnum,
+ [kUpb_FieldType_SFixed32] = kUpb_EncodedType_SFixed32,
+ [kUpb_FieldType_SFixed64] = kUpb_EncodedType_SFixed64,
+ [kUpb_FieldType_SInt32] = kUpb_EncodedType_SInt32,
+ [kUpb_FieldType_SInt64] = kUpb_EncodedType_SInt64,
+ };
+
+ int encoded_type = kUpb_TypeToEncoded[type];
+
+ if (field_mod & kUpb_FieldModifier_IsClosedEnum) {
+ UPB_ASSERT(type == kUpb_FieldType_Enum);
+ encoded_type = kUpb_EncodedType_ClosedEnum;
+ }
+
+ if (field_mod & kUpb_FieldModifier_IsRepeated) {
+ // Repeated fields shift the type number up (unlike other modifiers which
+ // are bit flags).
+ encoded_type += kUpb_EncodedType_RepeatedBase;
+ }
+
+ return upb_MtDataEncoder_Put(e, ptr, encoded_type);
+}
+
+static char* _upb_MtDataEncoder_MaybePutModifiers(upb_MtDataEncoder* e,
+ char* ptr, upb_FieldType type,
+ uint64_t field_mod) {
+ upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
+ uint32_t encoded_modifiers = 0;
+ if ((field_mod & kUpb_FieldModifier_IsRepeated) &&
+ upb_FieldType_IsPackable(type)) {
+ bool field_is_packed = field_mod & kUpb_FieldModifier_IsPacked;
+ bool default_is_packed = in->state.msg_state.msg_modifiers &
+ kUpb_MessageModifier_DefaultIsPacked;
+ if (field_is_packed != default_is_packed) {
+ encoded_modifiers |= kUpb_EncodedFieldModifier_FlipPacked;
+ }
+ }
+
+ if (field_mod & kUpb_FieldModifier_IsProto3Singular) {
+ encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular;
+ }
+
+ if (field_mod & kUpb_FieldModifier_IsRequired) {
+ encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired;
+ }
+
+ return upb_MtDataEncoder_PutModifier(e, ptr, encoded_modifiers);
+}
+
+char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr,
+ upb_FieldType type, uint32_t field_num,
+ uint64_t field_mod) {
+ upb_MtDataEncoder_GetInternal(e, ptr);
+
+ ptr = _upb_MtDataEncoder_MaybePutFieldSkip(e, ptr, field_num);
+ if (!ptr) return NULL;
+
+ ptr = _upb_MtDataEncoder_PutFieldType(e, ptr, type, field_mod);
+ if (!ptr) return NULL;
+
+ return _upb_MtDataEncoder_MaybePutModifiers(e, ptr, type, field_mod);
+}
+
+char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr) {
+ upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
+ if (in->state.msg_state.oneof_state == kUpb_OneofState_NotStarted) {
+ ptr = upb_MtDataEncoder_Put(e, ptr, _upb_FromBase92(kUpb_EncodedValue_End));
+ } else {
+ ptr = upb_MtDataEncoder_Put(
+ e, ptr, _upb_FromBase92(kUpb_EncodedValue_OneofSeparator));
+ }
+ in->state.msg_state.oneof_state = kUpb_OneofState_StartedOneof;
+ return ptr;
+}
+
+char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr,
+ uint32_t field_num) {
+ upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
+ if (in->state.msg_state.oneof_state == kUpb_OneofState_EmittedOneofField) {
+ ptr = upb_MtDataEncoder_Put(
+ e, ptr, _upb_FromBase92(kUpb_EncodedValue_FieldSeparator));
+ if (!ptr) return NULL;
+ }
+ ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, field_num, _upb_ToBase92(0),
+ _upb_ToBase92(63));
+ in->state.msg_state.oneof_state = kUpb_OneofState_EmittedOneofField;
+ return ptr;
+}
+
+char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr) {
+ upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
+ in->state.enum_state.present_values_mask = 0;
+ in->state.enum_state.last_written_value = 0;
+
+ return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_EnumV1);
+}
+
+static char* upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder* e,
+ char* ptr) {
+ upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
+ ptr = upb_MtDataEncoder_Put(e, ptr, in->state.enum_state.present_values_mask);
+ in->state.enum_state.present_values_mask = 0;
+ in->state.enum_state.last_written_value += 5;
+ return ptr;
+}
+
+char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr,
+ uint32_t val) {
+ // TODO(b/229641772): optimize this encoding.
+ upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
+ UPB_ASSERT(val >= in->state.enum_state.last_written_value);
+ uint32_t delta = val - in->state.enum_state.last_written_value;
+ if (delta >= 5 && in->state.enum_state.present_values_mask) {
+ ptr = upb_MtDataEncoder_FlushDenseEnumMask(e, ptr);
+ if (!ptr) {
+ return NULL;
+ }
+ delta -= 5;
+ }
+
+ if (delta >= 5) {
+ ptr = upb_MtDataEncoder_PutBase92Varint(
+ e, ptr, delta, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip);
+ in->state.enum_state.last_written_value += delta;
+ delta = 0;
+ }
+
+ UPB_ASSERT((in->state.enum_state.present_values_mask >> delta) == 0);
+ in->state.enum_state.present_values_mask |= 1ULL << delta;
+ return ptr;
+}
+
+char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr) {
+ upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
+ if (!in->state.enum_state.present_values_mask) return ptr;
+ return upb_MtDataEncoder_FlushDenseEnumMask(e, ptr);
+}
diff --git a/upb/upb/mini_descriptor/internal/encode.h b/upb/upb/mini_descriptor/internal/encode.h
new file mode 100644
index 0000000..104ae31
--- /dev/null
+++ b/upb/upb/mini_descriptor/internal/encode.h
@@ -0,0 +1,115 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_
+#define UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_
+
+#include <stdint.h>
+
+#include "upb/base/descriptor_constants.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+// If the input buffer has at least this many bytes available, the encoder call
+// is guaranteed to succeed (as long as field number order is maintained).
+#define kUpb_MtDataEncoder_MinSize 16
+
+typedef struct {
+ char* end; // Limit of the buffer passed as a parameter.
+ // Aliased to internal-only members in .cc.
+ char internal[32];
+} upb_MtDataEncoder;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Encodes field/oneof information for a given message. The sequence of calls
+// should look like:
+//
+// upb_MtDataEncoder e;
+// char buf[256];
+// char* ptr = buf;
+// e.end = ptr + sizeof(buf);
+// unit64_t msg_mod = ...; // bitwise & of kUpb_MessageModifiers or zero
+// ptr = upb_MtDataEncoder_StartMessage(&e, ptr, msg_mod);
+// // Fields *must* be in field number order.
+// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...);
+// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...);
+// ptr = upb_MtDataEncoder_PutField(&e, ptr, ...);
+//
+// // If oneofs are present. Oneofs must be encoded after regular fields.
+// ptr = upb_MiniTable_StartOneof(&e, ptr)
+// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...);
+// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...);
+//
+// ptr = upb_MiniTable_StartOneof(&e, ptr);
+// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...);
+// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...);
+//
+// Oneofs must be encoded after all regular fields.
+char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr,
+ uint64_t msg_mod);
+char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr,
+ upb_FieldType type, uint32_t field_num,
+ uint64_t field_mod);
+char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr);
+char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr,
+ uint32_t field_num);
+
+// Encodes the set of values for a given enum. The values must be given in
+// order (after casting to uint32_t), and repeats are not allowed.
+char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr);
+char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr,
+ uint32_t val);
+char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr);
+
+// Encodes an entire mini descriptor for an extension.
+char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr,
+ upb_FieldType type, uint32_t field_num,
+ uint64_t field_mod);
+
+// Encodes an entire mini descriptor for a map.
+char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr,
+ upb_FieldType key_type,
+ upb_FieldType value_type, uint64_t key_mod,
+ uint64_t value_mod);
+
+// Encodes an entire mini descriptor for a message set.
+char* upb_MtDataEncoder_EncodeMessageSet(upb_MtDataEncoder* e, char* ptr);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MINI_DESCRIPTOR_INTERNAL_ENCODE_H_ */
diff --git a/upb/upb/mini_descriptor/internal/encode.hpp b/upb/upb/mini_descriptor/internal/encode.hpp
new file mode 100644
index 0000000..d778a72
--- /dev/null
+++ b/upb/upb/mini_descriptor/internal/encode.hpp
@@ -0,0 +1,139 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_TABLE_ENCODE_INTERNAL_HPP_
+#define UPB_MINI_TABLE_ENCODE_INTERNAL_HPP_
+
+#include <string>
+
+#include "upb/base/internal/log2.h"
+#include "upb/mini_descriptor/internal/encode.h"
+
+namespace upb {
+
+class MtDataEncoder {
+ public:
+ MtDataEncoder() : appender_(&encoder_) {}
+
+ bool StartMessage(uint64_t msg_mod) {
+ return appender_([=](char* buf) {
+ return upb_MtDataEncoder_StartMessage(&encoder_, buf, msg_mod);
+ });
+ }
+
+ bool PutField(upb_FieldType type, uint32_t field_num, uint64_t field_mod) {
+ return appender_([=](char* buf) {
+ return upb_MtDataEncoder_PutField(&encoder_, buf, type, field_num,
+ field_mod);
+ });
+ }
+
+ bool StartOneof() {
+ return appender_([=](char* buf) {
+ return upb_MtDataEncoder_StartOneof(&encoder_, buf);
+ });
+ }
+
+ bool PutOneofField(uint32_t field_num) {
+ return appender_([=](char* buf) {
+ return upb_MtDataEncoder_PutOneofField(&encoder_, buf, field_num);
+ });
+ }
+
+ bool StartEnum() {
+ return appender_(
+ [=](char* buf) { return upb_MtDataEncoder_StartEnum(&encoder_, buf); });
+ }
+
+ bool PutEnumValue(uint32_t enum_value) {
+ return appender_([=](char* buf) {
+ return upb_MtDataEncoder_PutEnumValue(&encoder_, buf, enum_value);
+ });
+ }
+
+ bool EndEnum() {
+ return appender_(
+ [=](char* buf) { return upb_MtDataEncoder_EndEnum(&encoder_, buf); });
+ }
+
+ bool EncodeExtension(upb_FieldType type, uint32_t field_num,
+ uint64_t field_mod) {
+ return appender_([=](char* buf) {
+ return upb_MtDataEncoder_EncodeExtension(&encoder_, buf, type, field_num,
+ field_mod);
+ });
+ }
+
+ bool EncodeMap(upb_FieldType key_type, upb_FieldType val_type,
+ uint64_t key_mod, uint64_t val_mod) {
+ return appender_([=](char* buf) {
+ return upb_MtDataEncoder_EncodeMap(&encoder_, buf, key_type, val_type,
+ key_mod, val_mod);
+ });
+ }
+
+ bool EncodeMessageSet() {
+ return appender_([=](char* buf) {
+ return upb_MtDataEncoder_EncodeMessageSet(&encoder_, buf);
+ });
+ }
+
+ const std::string& data() const { return appender_.data(); }
+
+ private:
+ class StringAppender {
+ public:
+ StringAppender(upb_MtDataEncoder* e) { e->end = buf_ + sizeof(buf_); }
+
+ template <class T>
+ bool operator()(T&& func) {
+ char* end = func(buf_);
+ if (!end) return false;
+ // C++ does not guarantee that string has doubling growth behavior, but
+ // we need it to avoid O(n^2).
+ str_.reserve(upb_Log2CeilingSize(str_.size() + (end - buf_)));
+ str_.append(buf_, end - buf_);
+ return true;
+ }
+
+ const std::string& data() const { return str_; }
+
+ private:
+ char buf_[kUpb_MtDataEncoder_MinSize];
+ std::string str_;
+ };
+
+ upb_MtDataEncoder encoder_;
+ StringAppender appender_;
+};
+
+} // namespace upb
+
+#endif /* UPB_MINI_TABLE_ENCODE_INTERNAL_HPP_ */
diff --git a/upb/upb/mini_descriptor/internal/encode_test.cc b/upb/upb/mini_descriptor/internal/encode_test.cc
new file mode 100644
index 0000000..d40401d
--- /dev/null
+++ b/upb/upb/mini_descriptor/internal/encode_test.cc
@@ -0,0 +1,320 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/mini_descriptor/internal/encode.hpp"
+
+#include <string_view>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/container/flat_hash_set.h"
+#include "google/protobuf/descriptor.h"
+#include "upb/base/status.hpp"
+#include "upb/mem/arena.hpp"
+#include "upb/message/internal/accessors.h"
+#include "upb/mini_descriptor/decode.h"
+#include "upb/mini_descriptor/internal/base92.h"
+#include "upb/mini_descriptor/internal/modifiers.h"
+#include "upb/mini_table/enum.h"
+#include "upb/wire/decode.h"
+
+// begin:google_only
+// #include "testing/fuzzing/fuzztest.h"
+// end:google_only
+
+namespace protobuf = ::google::protobuf;
+
+class MiniTableTest : public testing::TestWithParam<upb_MiniTablePlatform> {};
+
+TEST_P(MiniTableTest, Empty) {
+ upb::Arena arena;
+ upb::Status status;
+ upb_MiniTable* table =
+ _upb_MiniTable_Build(nullptr, 0, GetParam(), arena.ptr(), status.ptr());
+ ASSERT_NE(nullptr, table);
+ EXPECT_EQ(0, table->field_count);
+ EXPECT_EQ(0, table->required_count);
+}
+
+TEST_P(MiniTableTest, AllScalarTypes) {
+ upb::Arena arena;
+ upb::MtDataEncoder e;
+ ASSERT_TRUE(e.StartMessage(0));
+ int count = 0;
+ for (int i = kUpb_FieldType_Double; i < kUpb_FieldType_SInt64; i++) {
+ ASSERT_TRUE(e.PutField(static_cast<upb_FieldType>(i), i, 0));
+ count++;
+ }
+ upb::Status status;
+ upb_MiniTable* table = _upb_MiniTable_Build(
+ e.data().data(), e.data().size(), GetParam(), arena.ptr(), status.ptr());
+ ASSERT_NE(nullptr, table);
+ EXPECT_EQ(count, table->field_count);
+ absl::flat_hash_set<size_t> offsets;
+ for (int i = 0; i < 16; i++) {
+ const upb_MiniTableField* f = &table->fields[i];
+ EXPECT_EQ(i + 1, f->number);
+ EXPECT_EQ(kUpb_FieldMode_Scalar, f->mode & kUpb_FieldMode_Mask);
+ EXPECT_TRUE(offsets.insert(f->offset).second);
+ EXPECT_TRUE(f->offset < table->size);
+ }
+ EXPECT_EQ(0, table->required_count);
+}
+
+TEST_P(MiniTableTest, AllRepeatedTypes) {
+ upb::Arena arena;
+ upb::MtDataEncoder e;
+ ASSERT_TRUE(e.StartMessage(0));
+ int count = 0;
+ for (int i = kUpb_FieldType_Double; i < kUpb_FieldType_SInt64; i++) {
+ ASSERT_TRUE(e.PutField(static_cast<upb_FieldType>(i), i,
+ kUpb_FieldModifier_IsRepeated));
+ count++;
+ }
+ upb::Status status;
+ upb_MiniTable* table = _upb_MiniTable_Build(
+ e.data().data(), e.data().size(), GetParam(), arena.ptr(), status.ptr());
+ ASSERT_NE(nullptr, table);
+ EXPECT_EQ(count, table->field_count);
+ absl::flat_hash_set<size_t> offsets;
+ for (int i = 0; i < 16; i++) {
+ const upb_MiniTableField* f = &table->fields[i];
+ EXPECT_EQ(i + 1, f->number);
+ EXPECT_EQ(kUpb_FieldMode_Array, f->mode & kUpb_FieldMode_Mask);
+ EXPECT_TRUE(offsets.insert(f->offset).second);
+ EXPECT_TRUE(f->offset < table->size);
+ }
+ EXPECT_EQ(0, table->required_count);
+}
+
+TEST_P(MiniTableTest, Skips) {
+ upb::Arena arena;
+ upb::MtDataEncoder e;
+ ASSERT_TRUE(e.StartMessage(0));
+ int count = 0;
+ std::vector<int> field_numbers;
+ for (int i = 0; i < 25; i++) {
+ int field_number = 1 << i;
+ field_numbers.push_back(field_number);
+ ASSERT_TRUE(e.PutField(kUpb_FieldType_Float, field_number, 0));
+ count++;
+ }
+ upb::Status status;
+ upb_MiniTable* table = _upb_MiniTable_Build(
+ e.data().data(), e.data().size(), GetParam(), arena.ptr(), status.ptr());
+ ASSERT_NE(nullptr, table);
+ EXPECT_EQ(count, table->field_count);
+ absl::flat_hash_set<size_t> offsets;
+ for (size_t i = 0; i < field_numbers.size(); i++) {
+ const upb_MiniTableField* f = &table->fields[i];
+ EXPECT_EQ(field_numbers[i], f->number);
+ EXPECT_EQ(kUpb_FieldType_Float, upb_MiniTableField_Type(f));
+ EXPECT_EQ(kUpb_FieldMode_Scalar, f->mode & kUpb_FieldMode_Mask);
+ EXPECT_TRUE(offsets.insert(f->offset).second);
+ EXPECT_TRUE(f->offset < table->size);
+ }
+ EXPECT_EQ(0, table->required_count);
+}
+
+TEST_P(MiniTableTest, AllScalarTypesOneof) {
+ upb::Arena arena;
+ upb::MtDataEncoder e;
+ ASSERT_TRUE(e.StartMessage(0));
+ int count = 0;
+ for (int i = kUpb_FieldType_Double; i < kUpb_FieldType_SInt64; i++) {
+ ASSERT_TRUE(e.PutField(static_cast<upb_FieldType>(i), i, 0));
+ count++;
+ }
+ ASSERT_TRUE(e.StartOneof());
+ for (int i = kUpb_FieldType_Double; i < kUpb_FieldType_SInt64; i++) {
+ ASSERT_TRUE(e.PutOneofField(i));
+ }
+ upb::Status status;
+ upb_MiniTable* table = _upb_MiniTable_Build(
+ e.data().data(), e.data().size(), GetParam(), arena.ptr(), status.ptr());
+ ASSERT_NE(nullptr, table) << status.error_message();
+ EXPECT_EQ(count, table->field_count);
+ absl::flat_hash_set<size_t> offsets;
+ for (int i = 0; i < 16; i++) {
+ const upb_MiniTableField* f = &table->fields[i];
+ EXPECT_EQ(i + 1, f->number);
+ EXPECT_EQ(kUpb_FieldMode_Scalar, f->mode & kUpb_FieldMode_Mask);
+ // For a oneof all fields have the same offset.
+ EXPECT_EQ(table->fields[0].offset, f->offset);
+ // All presence fields should point to the same oneof case offset.
+ size_t case_ofs = _upb_oneofcase_ofs(f);
+ EXPECT_EQ(table->fields[0].presence, f->presence);
+ EXPECT_TRUE(f->offset < table->size);
+ EXPECT_TRUE(case_ofs < table->size);
+ EXPECT_TRUE(case_ofs != f->offset);
+ }
+ EXPECT_EQ(0, table->required_count);
+}
+
+TEST_P(MiniTableTest, SizeOverflow) {
+ upb::Arena arena;
+ upb::MtDataEncoder e;
+ // upb can only handle messages up to UINT16_MAX.
+ size_t max_double_fields = UINT16_MAX / (sizeof(double) + 1);
+
+ // A bit under max_double_fields is ok.
+ ASSERT_TRUE(e.StartMessage(0));
+ for (size_t i = 1; i < max_double_fields; i++) {
+ ASSERT_TRUE(e.PutField(kUpb_FieldType_Double, i, 0));
+ }
+ upb::Status status;
+ upb_MiniTable* table = _upb_MiniTable_Build(
+ e.data().data(), e.data().size(), GetParam(), arena.ptr(), status.ptr());
+ ASSERT_NE(nullptr, table) << status.error_message();
+
+ // A bit over max_double_fields fails.
+ ASSERT_TRUE(e.StartMessage(0));
+ for (size_t i = 1; i < max_double_fields + 2; i++) {
+ ASSERT_TRUE(e.PutField(kUpb_FieldType_Double, i, 0));
+ }
+ upb_MiniTable* table2 = _upb_MiniTable_Build(
+ e.data().data(), e.data().size(), GetParam(), arena.ptr(), status.ptr());
+ ASSERT_EQ(nullptr, table2) << status.error_message();
+}
+
+INSTANTIATE_TEST_SUITE_P(Platforms, MiniTableTest,
+ testing::Values(kUpb_MiniTablePlatform_32Bit,
+ kUpb_MiniTablePlatform_64Bit));
+
+TEST(MiniTablePlatformIndependentTest, Base92Roundtrip) {
+ for (char i = 0; i < 92; i++) {
+ EXPECT_EQ(i, _upb_FromBase92(_upb_ToBase92(i)));
+ }
+}
+
+TEST(MiniTablePlatformIndependentTest, IsTypePackable) {
+ for (int i = 1; i <= protobuf::FieldDescriptor::MAX_TYPE; i++) {
+ EXPECT_EQ(upb_FieldType_IsPackable(static_cast<upb_FieldType>(i)),
+ protobuf::FieldDescriptor::IsTypePackable(
+ static_cast<protobuf::FieldDescriptor::Type>(i)));
+ }
+}
+
+TEST(MiniTableEnumTest, Enum) {
+ upb::Arena arena;
+ upb::MtDataEncoder e;
+
+ ASSERT_TRUE(e.StartEnum());
+ absl::flat_hash_set<int32_t> values;
+ for (int i = 0; i < 256; i++) {
+ values.insert(i * 2);
+ e.PutEnumValue(i * 2);
+ }
+ e.EndEnum();
+
+ upb::Status status;
+ upb_MiniTableEnum* table = upb_MiniTableEnum_Build(
+ e.data().data(), e.data().size(), arena.ptr(), status.ptr());
+ ASSERT_NE(nullptr, table) << status.error_message();
+
+ for (int i = 0; i < UINT16_MAX; i++) {
+ EXPECT_EQ(values.contains(i), upb_MiniTableEnum_CheckValue(table, i)) << i;
+ }
+}
+
+TEST_P(MiniTableTest, SubsInitializedToEmpty) {
+ upb::Arena arena;
+ upb::MtDataEncoder e;
+ // Create mini table with 2 message fields.
+ ASSERT_TRUE(e.StartMessage(0));
+ ASSERT_TRUE(e.PutField(kUpb_FieldType_Message, 15, 0));
+ ASSERT_TRUE(e.PutField(kUpb_FieldType_Message, 16, 0));
+ upb::Status status;
+ upb_MiniTable* table = _upb_MiniTable_Build(
+ e.data().data(), e.data().size(), GetParam(), arena.ptr(), status.ptr());
+ ASSERT_NE(nullptr, table);
+ EXPECT_EQ(table->field_count, 2);
+ EXPECT_EQ(table->subs[0].submsg, &_kUpb_MiniTable_Empty);
+ EXPECT_EQ(table->subs[1].submsg, &_kUpb_MiniTable_Empty);
+}
+
+TEST(MiniTableEnumTest, PositiveAndNegative) {
+ upb::Arena arena;
+ upb::MtDataEncoder e;
+
+ ASSERT_TRUE(e.StartEnum());
+ absl::flat_hash_set<int32_t> values;
+ for (int i = 0; i < 100; i++) {
+ values.insert(i);
+ e.PutEnumValue(i);
+ }
+ for (int i = 100; i > 0; i--) {
+ values.insert(-i);
+ e.PutEnumValue(-i);
+ }
+ e.EndEnum();
+
+ upb::Status status;
+ upb_MiniTableEnum* table = upb_MiniTableEnum_Build(
+ e.data().data(), e.data().size(), arena.ptr(), status.ptr());
+ ASSERT_NE(nullptr, table) << status.error_message();
+
+ for (int i = -UINT16_MAX; i < UINT16_MAX; i++) {
+ EXPECT_EQ(values.contains(i), upb_MiniTableEnum_CheckValue(table, i)) << i;
+ }
+}
+
+TEST_P(MiniTableTest, Extendible) {
+ upb::Arena arena;
+ upb::MtDataEncoder e;
+ ASSERT_TRUE(e.StartMessage(kUpb_MessageModifier_IsExtendable));
+ for (int i = kUpb_FieldType_Double; i < kUpb_FieldType_SInt64; i++) {
+ ASSERT_TRUE(e.PutField(static_cast<upb_FieldType>(i), i, 0));
+ }
+ upb::Status status;
+ upb_MiniTable* table = _upb_MiniTable_Build(
+ e.data().data(), e.data().size(), GetParam(), arena.ptr(), status.ptr());
+ ASSERT_NE(nullptr, table);
+ EXPECT_EQ(kUpb_ExtMode_Extendable, table->ext & kUpb_ExtMode_Extendable);
+}
+
+// begin:google_only
+//
+// static void BuildMiniTable(std::string_view s, bool is_32bit) {
+// upb::Arena arena;
+// upb::Status status;
+// _upb_MiniTable_Build(
+// s.data(), s.size(),
+// is_32bit ? kUpb_MiniTablePlatform_32Bit : kUpb_MiniTablePlatform_64Bit,
+// arena.ptr(), status.ptr());
+// }
+// FUZZ_TEST(FuzzTest, BuildMiniTable);
+//
+// TEST(FuzzTest, BuildMiniTableRegression) {
+// BuildMiniTable("g}{v~fq{\271", false);
+// }
+//
+// end:google_only
diff --git a/upb/upb/mini_descriptor/internal/modifiers.h b/upb/upb/mini_descriptor/internal/modifiers.h
new file mode 100644
index 0000000..bd4ee21
--- /dev/null
+++ b/upb/upb/mini_descriptor/internal/modifiers.h
@@ -0,0 +1,53 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_
+#define UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_
+
+// Must be last.
+#include "upb/port/def.inc"
+
+typedef enum {
+ kUpb_FieldModifier_IsRepeated = 1 << 0,
+ kUpb_FieldModifier_IsPacked = 1 << 1,
+ kUpb_FieldModifier_IsClosedEnum = 1 << 2,
+ kUpb_FieldModifier_IsProto3Singular = 1 << 3,
+ kUpb_FieldModifier_IsRequired = 1 << 4,
+} kUpb_FieldModifier;
+
+typedef enum {
+ kUpb_MessageModifier_ValidateUtf8 = 1 << 0,
+ kUpb_MessageModifier_DefaultIsPacked = 1 << 1,
+ kUpb_MessageModifier_IsExtendable = 1 << 2,
+} kUpb_MessageModifier;
+
+#include "upb/port/undef.inc"
+
+#endif // UPB_MINI_DESCRIPTOR_INTERNAL_MODIFIERS_H_
diff --git a/upb/upb/mini_descriptor/internal/wire_constants.h b/upb/upb/mini_descriptor/internal/wire_constants.h
new file mode 100644
index 0000000..f24306a
--- /dev/null
+++ b/upb/upb/mini_descriptor/internal/wire_constants.h
@@ -0,0 +1,94 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_
+#define UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_
+
+#include "upb/base/descriptor_constants.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+typedef enum {
+ kUpb_EncodedType_Double = 0,
+ kUpb_EncodedType_Float = 1,
+ kUpb_EncodedType_Fixed32 = 2,
+ kUpb_EncodedType_Fixed64 = 3,
+ kUpb_EncodedType_SFixed32 = 4,
+ kUpb_EncodedType_SFixed64 = 5,
+ kUpb_EncodedType_Int32 = 6,
+ kUpb_EncodedType_UInt32 = 7,
+ kUpb_EncodedType_SInt32 = 8,
+ kUpb_EncodedType_Int64 = 9,
+ kUpb_EncodedType_UInt64 = 10,
+ kUpb_EncodedType_SInt64 = 11,
+ kUpb_EncodedType_OpenEnum = 12,
+ kUpb_EncodedType_Bool = 13,
+ kUpb_EncodedType_Bytes = 14,
+ kUpb_EncodedType_String = 15,
+ kUpb_EncodedType_Group = 16,
+ kUpb_EncodedType_Message = 17,
+ kUpb_EncodedType_ClosedEnum = 18,
+
+ kUpb_EncodedType_RepeatedBase = 20,
+} upb_EncodedType;
+
+typedef enum {
+ kUpb_EncodedFieldModifier_FlipPacked = 1 << 0,
+ kUpb_EncodedFieldModifier_IsRequired = 1 << 1,
+ kUpb_EncodedFieldModifier_IsProto3Singular = 1 << 2,
+} upb_EncodedFieldModifier;
+
+enum {
+ kUpb_EncodedValue_MinField = ' ',
+ kUpb_EncodedValue_MaxField = 'I',
+ kUpb_EncodedValue_MinModifier = 'L',
+ kUpb_EncodedValue_MaxModifier = '[',
+ kUpb_EncodedValue_End = '^',
+ kUpb_EncodedValue_MinSkip = '_',
+ kUpb_EncodedValue_MaxSkip = '~',
+ kUpb_EncodedValue_OneofSeparator = '~',
+ kUpb_EncodedValue_FieldSeparator = '|',
+ kUpb_EncodedValue_MinOneofField = ' ',
+ kUpb_EncodedValue_MaxOneofField = 'b',
+ kUpb_EncodedValue_MaxEnumMask = 'A',
+};
+
+enum {
+ kUpb_EncodedVersion_EnumV1 = '!',
+ kUpb_EncodedVersion_ExtensionV1 = '#',
+ kUpb_EncodedVersion_MapV1 = '%',
+ kUpb_EncodedVersion_MessageV1 = '$',
+ kUpb_EncodedVersion_MessageSetV1 = '&',
+};
+
+#include "upb/port/undef.inc"
+
+#endif // UPB_MINI_DESCRIPTOR_INTERNAL_WIRE_CONSTANTS_H_
diff --git a/upb/upb/mini_descriptor/link.c b/upb/upb/mini_descriptor/link.c
new file mode 100644
index 0000000..82b0084
--- /dev/null
+++ b/upb/upb/mini_descriptor/link.c
@@ -0,0 +1,145 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/mini_descriptor/link.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+bool upb_MiniTable_SetSubMessage(upb_MiniTable* table,
+ upb_MiniTableField* field,
+ const upb_MiniTable* sub) {
+ UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field &&
+ (uintptr_t)field <
+ (uintptr_t)(table->fields + table->field_count));
+ UPB_ASSERT(sub);
+
+ const bool sub_is_map = sub->ext & kUpb_ExtMode_IsMapEntry;
+
+ switch (field->UPB_PRIVATE(descriptortype)) {
+ case kUpb_FieldType_Message:
+ if (sub_is_map) {
+ const bool table_is_map = table->ext & kUpb_ExtMode_IsMapEntry;
+ if (UPB_UNLIKELY(table_is_map)) return false;
+
+ field->mode = (field->mode & ~kUpb_FieldMode_Mask) | kUpb_FieldMode_Map;
+ }
+ break;
+
+ case kUpb_FieldType_Group:
+ if (UPB_UNLIKELY(sub_is_map)) return false;
+ break;
+
+ default:
+ return false;
+ }
+
+ upb_MiniTableSub* table_sub =
+ (void*)&table->subs[field->UPB_PRIVATE(submsg_index)];
+ // TODO(haberman): Add this assert back once YouTube is updated to not call
+ // this function repeatedly.
+ // UPB_ASSERT(table_sub->submsg == &_kUpb_MiniTable_Empty);
+ table_sub->submsg = sub;
+ return true;
+}
+
+bool upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTableField* field,
+ const upb_MiniTableEnum* sub) {
+ UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field &&
+ (uintptr_t)field <
+ (uintptr_t)(table->fields + table->field_count));
+ UPB_ASSERT(sub);
+
+ upb_MiniTableSub* table_sub =
+ (void*)&table->subs[field->UPB_PRIVATE(submsg_index)];
+ table_sub->subenum = sub;
+ return true;
+}
+
+uint32_t upb_MiniTable_GetSubList(const upb_MiniTable* mt,
+ const upb_MiniTableField** subs) {
+ uint32_t msg_count = 0;
+ uint32_t enum_count = 0;
+
+ for (int i = 0; i < mt->field_count; i++) {
+ const upb_MiniTableField* f = &mt->fields[i];
+ if (upb_MiniTableField_CType(f) == kUpb_CType_Message) {
+ *subs = f;
+ ++subs;
+ msg_count++;
+ }
+ }
+
+ for (int i = 0; i < mt->field_count; i++) {
+ const upb_MiniTableField* f = &mt->fields[i];
+ if (upb_MiniTableField_CType(f) == kUpb_CType_Enum) {
+ *subs = f;
+ ++subs;
+ enum_count++;
+ }
+ }
+
+ return (msg_count << 16) | enum_count;
+}
+
+// The list of sub_tables and sub_enums must exactly match the number and order
+// of sub-message fields and sub-enum fields given by upb_MiniTable_GetSubList()
+// above.
+bool upb_MiniTable_Link(upb_MiniTable* mt, const upb_MiniTable** sub_tables,
+ size_t sub_table_count,
+ const upb_MiniTableEnum** sub_enums,
+ size_t sub_enum_count) {
+ uint32_t msg_count = 0;
+ uint32_t enum_count = 0;
+
+ for (int i = 0; i < mt->field_count; i++) {
+ upb_MiniTableField* f = (upb_MiniTableField*)&mt->fields[i];
+ if (upb_MiniTableField_CType(f) == kUpb_CType_Message) {
+ const upb_MiniTable* sub = sub_tables[msg_count++];
+ if (msg_count > sub_table_count) return false;
+ if (sub != NULL) {
+ if (!upb_MiniTable_SetSubMessage(mt, f, sub)) return false;
+ }
+ }
+ }
+
+ for (int i = 0; i < mt->field_count; i++) {
+ upb_MiniTableField* f = (upb_MiniTableField*)&mt->fields[i];
+ if (upb_MiniTableField_IsClosedEnum(f)) {
+ const upb_MiniTableEnum* sub = sub_enums[enum_count++];
+ if (enum_count > sub_enum_count) return false;
+ if (sub != NULL) {
+ if (!upb_MiniTable_SetSubEnum(mt, f, sub)) return false;
+ }
+ }
+ }
+
+ return true;
+}
diff --git a/upb/upb/mini_descriptor/link.h b/upb/upb/mini_descriptor/link.h
new file mode 100644
index 0000000..65e728e
--- /dev/null
+++ b/upb/upb/mini_descriptor/link.h
@@ -0,0 +1,107 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Functions for linking MiniTables together once they are built from a
+// MiniDescriptor.
+//
+// These functions have names like upb_MiniTable_Link() because they operate on
+// MiniTables. We put them here, rather than in the mini_table/ directory,
+// because they are only needed when building MiniTables from MiniDescriptors.
+// The interfaces in mini_table/ assume that MiniTables are immutable.
+
+#ifndef UPB_MINI_DESCRIPTOR_LINK_H_
+#define UPB_MINI_DESCRIPTOR_LINK_H_
+
+#include "upb/base/status.h"
+#include "upb/mem/arena.h"
+#include "upb/mini_table/extension.h"
+#include "upb/mini_table/field.h"
+#include "upb/mini_table/message.h"
+#include "upb/mini_table/sub.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Links a sub-message field to a MiniTable for that sub-message. If a
+// sub-message field is not linked, it will be treated as an unknown field
+// during parsing, and setting the field will not be allowed. It is possible
+// to link the message field later, at which point it will no longer be treated
+// as unknown. However there is no synchronization for this operation, which
+// means parallel mutation requires external synchronization.
+// Returns success/failure.
+UPB_API bool upb_MiniTable_SetSubMessage(upb_MiniTable* table,
+ upb_MiniTableField* field,
+ const upb_MiniTable* sub);
+
+// Links an enum field to a MiniTable for that enum.
+// All enum fields must be linked prior to parsing.
+// Returns success/failure.
+UPB_API bool upb_MiniTable_SetSubEnum(upb_MiniTable* table,
+ upb_MiniTableField* field,
+ const upb_MiniTableEnum* sub);
+
+// Returns a list of fields that require linking at runtime, to connect the
+// MiniTable to its sub-messages and sub-enums. The list of fields will be
+// written to the `subs` array, which must have been allocated by the caller
+// and must be large enough to hold a list of all fields in the message.
+//
+// The order of the fields returned by this function is significant: it matches
+// the order expected by upb_MiniTable_Link() below.
+//
+// The return value packs the sub-message count and sub-enum count into a single
+// integer like so:
+// return (msg_count << 16) | enum_count;
+UPB_API uint32_t upb_MiniTable_GetSubList(const upb_MiniTable* mt,
+ const upb_MiniTableField** subs);
+
+// Links a message to its sub-messages and sub-enums. The caller must pass
+// arrays of sub-tables and sub-enums, in the same length and order as is
+// returned by upb_MiniTable_GetSubList() above. However, individual elements
+// of the sub_tables may be NULL if those sub-messages were tree shaken.
+//
+// Returns false if either array is too short, or if any of the tables fails
+// to link.
+UPB_API bool upb_MiniTable_Link(upb_MiniTable* mt,
+ const upb_MiniTable** sub_tables,
+ size_t sub_table_count,
+ const upb_MiniTableEnum** sub_enums,
+ size_t sub_enum_count);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif // UPB_MINI_DESCRIPTOR_LINK_H_
diff --git a/upb/upb/mini_table/BUILD b/upb/upb/mini_table/BUILD
new file mode 100644
index 0000000..36feedc
--- /dev/null
+++ b/upb/upb/mini_table/BUILD
@@ -0,0 +1,113 @@
+# Copyright (c) 2009-2023, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load(
+ "//bazel:build_defs.bzl",
+ "UPB_DEFAULT_COPTS",
+)
+
+# begin:google_only
+# cc_library(
+# name = "compat",
+# srcs = [
+# "compat.c",
+# ],
+# hdrs = [
+# "compat.h",
+# ],
+# compatible_with = ["//buildenv/target:non_prod"],
+# copts = UPB_DEFAULT_COPTS,
+# visibility = ["//:__pkg__"],
+# deps = [
+# ":mini_table",
+# "//:base",
+# "//:port",
+# ],
+# )
+# end:google_only
+
+cc_library(
+ name = "mini_table",
+ srcs = [
+ "extension_registry.c",
+ "message.c",
+ ],
+ hdrs = [
+ "enum.h",
+ "extension.h",
+ "extension_registry.h",
+ "field.h",
+ "file.h",
+ "message.h",
+ "sub.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":internal",
+ "//:base",
+ "//:hash",
+ "//:mem",
+ "//:message_types",
+ "//:port",
+ ],
+)
+
+cc_library(
+ name = "internal",
+ srcs = [
+ "internal/message.c",
+ ],
+ hdrs = [
+ "internal/enum.h",
+ "internal/extension.h",
+ "internal/field.h",
+ "internal/file.h",
+ "internal/message.h",
+ "internal/sub.h",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ "//:base",
+ "//:message_types",
+ "//:port",
+ ],
+)
+
+# begin:github_only
+filegroup(
+ name = "source_files",
+ srcs = glob(
+ [
+ "**/*.c",
+ "**/*.h",
+ ],
+ ),
+ visibility = [
+ "//cmake:__pkg__",
+ "//python/dist:__pkg__",
+ ]
+)
+# end:github_only
diff --git a/upb/upb/mini_table/compat.c b/upb/upb/mini_table/compat.c
new file mode 100644
index 0000000..26c69dd
--- /dev/null
+++ b/upb/upb/mini_table/compat.c
@@ -0,0 +1,81 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/mini_table/compat.h"
+
+#include "upb/base/descriptor_constants.h"
+#include "upb/mini_table/field.h"
+#include "upb/mini_table/message.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+static bool upb_deep_check(const upb_MiniTable* src, const upb_MiniTable* dst,
+ bool eq) {
+ if (src->field_count != dst->field_count) return false;
+
+ for (int i = 0; i < src->field_count; i++) {
+ const upb_MiniTableField* src_field = &src->fields[i];
+ const upb_MiniTableField* dst_field =
+ upb_MiniTable_FindFieldByNumber(dst, src_field->number);
+
+ if (upb_MiniTableField_CType(src_field) !=
+ upb_MiniTableField_CType(dst_field)) return false;
+ if (src_field->mode != dst_field->mode) return false;
+ if (src_field->offset != dst_field->offset) return false;
+ if (src_field->presence != dst_field->presence) return false;
+ if (src_field->UPB_PRIVATE(submsg_index) !=
+ dst_field->UPB_PRIVATE(submsg_index)) return false;
+
+ // Go no further if we are only checking for compatibility.
+ if (!eq) continue;
+
+ if (upb_MiniTableField_CType(src_field) == kUpb_CType_Message) {
+ const upb_MiniTable* sub_src =
+ upb_MiniTable_GetSubMessageTable(src, src_field);
+ const upb_MiniTable* sub_dst =
+ upb_MiniTable_GetSubMessageTable(dst, dst_field);
+ if (sub_src != NULL && !upb_MiniTable_Equals(sub_src, sub_dst)) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+bool upb_MiniTable_Compatible(const upb_MiniTable* src,
+ const upb_MiniTable* dst) {
+ return upb_deep_check(src, dst, false);
+}
+
+bool upb_MiniTable_Equals(const upb_MiniTable* src, const upb_MiniTable* dst) {
+ return upb_deep_check(src, dst, true);
+}
diff --git a/upb/upb/mini_table/compat.h b/upb/upb/mini_table/compat.h
new file mode 100644
index 0000000..d79f99a
--- /dev/null
+++ b/upb/upb/mini_table/compat.h
@@ -0,0 +1,61 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_TABLE_COMPAT_H_
+#define UPB_MINI_TABLE_COMPAT_H_
+
+#include "upb/mini_table/message.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+// upb does not support mixing minitables from different sources but these
+// functions are still used by some existing users so for now we make them
+// available here. This may or may not change in the future so do not add
+// them to new code.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Checks if memory layout of src is compatible with dst.
+bool upb_MiniTable_Compatible(const upb_MiniTable* src,
+ const upb_MiniTable* dst);
+
+// Checks equality of mini tables originating from different language runtimes.
+bool upb_MiniTable_Equals(const upb_MiniTable* src, const upb_MiniTable* dst);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MINI_TABLE_COMPAT_H_ */
diff --git a/upb/upb/mini_table/enum.h b/upb/upb/mini_table/enum.h
new file mode 100644
index 0000000..3a439fa
--- /dev/null
+++ b/upb/upb/mini_table/enum.h
@@ -0,0 +1,53 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_TABLE_ENUM_H_
+#define UPB_MINI_TABLE_ENUM_H_
+
+#include "upb/mini_table/internal/enum.h"
+
+// Must be last
+#include "upb/port/def.inc"
+
+typedef struct upb_MiniTableEnum upb_MiniTableEnum;
+
+// Validates enum value against range defined by enum mini table.
+UPB_INLINE bool upb_MiniTableEnum_CheckValue(const struct upb_MiniTableEnum* e,
+ uint32_t val) {
+ _kUpb_FastEnumCheck_Status status = _upb_MiniTable_CheckEnumValueFast(e, val);
+ if (UPB_UNLIKELY(status == _kUpb_FastEnumCheck_CannotCheckFast)) {
+ return _upb_MiniTable_CheckEnumValueSlow(e, val);
+ }
+ return status == _kUpb_FastEnumCheck_ValueIsInEnum ? true : false;
+}
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MINI_TABLE_ENUM_H_ */
diff --git a/upb/upb/mini_table/extension.h b/upb/upb/mini_table/extension.h
new file mode 100644
index 0000000..9589aa4
--- /dev/null
+++ b/upb/upb/mini_table/extension.h
@@ -0,0 +1,43 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_TABLE_EXTENSION_H_
+#define UPB_MINI_TABLE_EXTENSION_H_
+
+#include "upb/mini_table/internal/extension.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+typedef struct upb_MiniTableExtension upb_MiniTableExtension;
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MINI_TABLE_EXTENSION_H_ */
diff --git a/upb/upb/mini_table/extension_registry.c b/upb/upb/mini_table/extension_registry.c
new file mode 100644
index 0000000..1483955
--- /dev/null
+++ b/upb/upb/mini_table/extension_registry.c
@@ -0,0 +1,99 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/mini_table/extension_registry.h"
+
+#include "upb/hash/str_table.h"
+#include "upb/mini_table/extension.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#define EXTREG_KEY_SIZE (sizeof(upb_MiniTable*) + sizeof(uint32_t))
+
+struct upb_ExtensionRegistry {
+ upb_Arena* arena;
+ upb_strtable exts; // Key is upb_MiniTable* concatenated with fieldnum.
+};
+
+static void extreg_key(char* buf, const upb_MiniTable* l, uint32_t fieldnum) {
+ memcpy(buf, &l, sizeof(l));
+ memcpy(buf + sizeof(l), &fieldnum, sizeof(fieldnum));
+}
+
+upb_ExtensionRegistry* upb_ExtensionRegistry_New(upb_Arena* arena) {
+ upb_ExtensionRegistry* r = upb_Arena_Malloc(arena, sizeof(*r));
+ if (!r) return NULL;
+ r->arena = arena;
+ if (!upb_strtable_init(&r->exts, 8, arena)) return NULL;
+ return r;
+}
+
+UPB_API bool upb_ExtensionRegistry_Add(upb_ExtensionRegistry* r,
+ const upb_MiniTableExtension* e) {
+ char buf[EXTREG_KEY_SIZE];
+ extreg_key(buf, e->extendee, e->field.number);
+ if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, NULL)) return false;
+ return upb_strtable_insert(&r->exts, buf, EXTREG_KEY_SIZE,
+ upb_value_constptr(e), r->arena);
+}
+
+bool upb_ExtensionRegistry_AddArray(upb_ExtensionRegistry* r,
+ const upb_MiniTableExtension** e,
+ size_t count) {
+ const upb_MiniTableExtension** start = e;
+ const upb_MiniTableExtension** end = UPB_PTRADD(e, count);
+ for (; e < end; e++) {
+ if (!upb_ExtensionRegistry_Add(r, *e)) goto failure;
+ }
+ return true;
+
+failure:
+ // Back out the entries previously added.
+ for (end = e, e = start; e < end; e++) {
+ const upb_MiniTableExtension* ext = *e;
+ char buf[EXTREG_KEY_SIZE];
+ extreg_key(buf, ext->extendee, ext->field.number);
+ upb_strtable_remove2(&r->exts, buf, EXTREG_KEY_SIZE, NULL);
+ }
+ return false;
+}
+
+const upb_MiniTableExtension* upb_ExtensionRegistry_Lookup(
+ const upb_ExtensionRegistry* r, const upb_MiniTable* t, uint32_t num) {
+ char buf[EXTREG_KEY_SIZE];
+ upb_value v;
+ extreg_key(buf, t, num);
+ if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, &v)) {
+ return upb_value_getconstptr(v);
+ } else {
+ return NULL;
+ }
+}
diff --git a/upb/upb/mini_table/extension_registry.h b/upb/upb/mini_table/extension_registry.h
new file mode 100644
index 0000000..973123a
--- /dev/null
+++ b/upb/upb/mini_table/extension_registry.h
@@ -0,0 +1,108 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_TABLE_EXTENSION_REGISTRY_H_
+#define UPB_MINI_TABLE_EXTENSION_REGISTRY_H_
+
+#include "upb/mem/arena.h"
+#include "upb/mini_table/extension.h"
+#include "upb/mini_table/message.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Extension registry: a dynamic data structure that stores a map of:
+ * (upb_MiniTable, number) -> extension info
+ *
+ * upb_decode() uses upb_ExtensionRegistry to look up extensions while parsing
+ * binary format.
+ *
+ * upb_ExtensionRegistry is part of the mini-table (msglayout) family of
+ * objects. Like all mini-table objects, it is suitable for reflection-less
+ * builds that do not want to expose names into the binary.
+ *
+ * Unlike most mini-table types, upb_ExtensionRegistry requires dynamic memory
+ * allocation and dynamic initialization:
+ * * If reflection is being used, then upb_DefPool will construct an appropriate
+ * upb_ExtensionRegistry automatically.
+ * * For a mini-table only build, the user must manually construct the
+ * upb_ExtensionRegistry and populate it with all of the extensions the user
+ * cares about.
+ * * A third alternative is to manually unpack relevant extensions after the
+ * main parse is complete, similar to how Any works. This is perhaps the
+ * nicest solution from the perspective of reducing dependencies, avoiding
+ * dynamic memory allocation, and avoiding the need to parse uninteresting
+ * extensions. The downsides are:
+ * (1) parse errors are not caught during the main parse
+ * (2) the CPU hit of parsing comes during access, which could cause an
+ * undesirable stutter in application performance.
+ *
+ * Users cannot directly get or put into this map. Users can only add the
+ * extensions from a generated module and pass the extension registry to the
+ * binary decoder.
+ *
+ * A upb_DefPool provides a upb_ExtensionRegistry, so any users who use
+ * reflection do not need to populate a upb_ExtensionRegistry directly.
+ */
+
+typedef struct upb_ExtensionRegistry upb_ExtensionRegistry;
+
+// Creates a upb_ExtensionRegistry in the given arena.
+// The arena must outlive any use of the extreg.
+UPB_API upb_ExtensionRegistry* upb_ExtensionRegistry_New(upb_Arena* arena);
+
+UPB_API bool upb_ExtensionRegistry_Add(upb_ExtensionRegistry* r,
+ const upb_MiniTableExtension* e);
+
+// Adds the given extension info for the array |e| of size |count| into the
+// registry. If there are any errors, the entire array is backed out.
+// The extensions must outlive the registry.
+// Possible errors include OOM or an extension number that already exists.
+// TODO(salo): There is currently no way to know the exact reason for failure.
+bool upb_ExtensionRegistry_AddArray(upb_ExtensionRegistry* r,
+ const upb_MiniTableExtension** e,
+ size_t count);
+
+// Looks up the extension (if any) defined for message type |t| and field
+// number |num|. Returns the extension if found, otherwise NULL.
+UPB_API const upb_MiniTableExtension* upb_ExtensionRegistry_Lookup(
+ const upb_ExtensionRegistry* r, const upb_MiniTable* t, uint32_t num);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MINI_TABLE_EXTENSION_REGISTRY_H_ */
diff --git a/upb/upb/mini_table/field.h b/upb/upb/mini_table/field.h
new file mode 100644
index 0000000..17ea33b
--- /dev/null
+++ b/upb/upb/mini_table/field.h
@@ -0,0 +1,121 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_TABLE_FIELD_H_
+#define UPB_MINI_TABLE_FIELD_H_
+
+#include "upb/mini_table/internal/field.h"
+#include "upb/mini_table/internal/message.h"
+#include "upb/mini_table/internal/sub.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct upb_MiniTableField upb_MiniTableField;
+
+UPB_API_INLINE upb_FieldType
+upb_MiniTableField_Type(const upb_MiniTableField* field) {
+ if (field->mode & kUpb_LabelFlags_IsAlternate) {
+ if (field->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Int32) {
+ return kUpb_FieldType_Enum;
+ } else if (field->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Bytes) {
+ return kUpb_FieldType_String;
+ } else {
+ UPB_ASSERT(false);
+ }
+ }
+ return (upb_FieldType)field->UPB_PRIVATE(descriptortype);
+}
+
+UPB_API_INLINE upb_CType upb_MiniTableField_CType(const upb_MiniTableField* f) {
+ switch (upb_MiniTableField_Type(f)) {
+ case kUpb_FieldType_Double:
+ return kUpb_CType_Double;
+ case kUpb_FieldType_Float:
+ return kUpb_CType_Float;
+ case kUpb_FieldType_Int64:
+ case kUpb_FieldType_SInt64:
+ case kUpb_FieldType_SFixed64:
+ return kUpb_CType_Int64;
+ case kUpb_FieldType_Int32:
+ case kUpb_FieldType_SFixed32:
+ case kUpb_FieldType_SInt32:
+ return kUpb_CType_Int32;
+ case kUpb_FieldType_UInt64:
+ case kUpb_FieldType_Fixed64:
+ return kUpb_CType_UInt64;
+ case kUpb_FieldType_UInt32:
+ case kUpb_FieldType_Fixed32:
+ return kUpb_CType_UInt32;
+ case kUpb_FieldType_Enum:
+ return kUpb_CType_Enum;
+ case kUpb_FieldType_Bool:
+ return kUpb_CType_Bool;
+ case kUpb_FieldType_String:
+ return kUpb_CType_String;
+ case kUpb_FieldType_Bytes:
+ return kUpb_CType_Bytes;
+ case kUpb_FieldType_Group:
+ case kUpb_FieldType_Message:
+ return kUpb_CType_Message;
+ }
+ UPB_UNREACHABLE();
+}
+
+UPB_API_INLINE bool upb_MiniTableField_IsExtension(
+ const upb_MiniTableField* field) {
+ return field->mode & kUpb_LabelFlags_IsExtension;
+}
+
+UPB_API_INLINE bool upb_MiniTableField_IsClosedEnum(
+ const upb_MiniTableField* field) {
+ return field->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Enum;
+}
+
+UPB_API_INLINE bool upb_MiniTableField_HasPresence(
+ const upb_MiniTableField* field) {
+ if (upb_MiniTableField_IsExtension(field)) {
+ return !upb_IsRepeatedOrMap(field);
+ } else {
+ return field->presence != 0;
+ }
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MINI_TABLE_FIELD_H_ */
diff --git a/upb/upb/mini_table/file.h b/upb/upb/mini_table/file.h
new file mode 100644
index 0000000..27f50b4
--- /dev/null
+++ b/upb/upb/mini_table/file.h
@@ -0,0 +1,38 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_TABLE_FILE_H_
+#define UPB_MINI_TABLE_FILE_H_
+
+#include "upb/mini_table/internal/file.h"
+
+typedef struct upb_MiniTableFile upb_MiniTableFile;
+
+#endif /* UPB_MINI_TABLE_FILE_H_ */
diff --git a/upb/upb/mini_table/internal/enum.h b/upb/upb/mini_table/internal/enum.h
new file mode 100644
index 0000000..470326c
--- /dev/null
+++ b/upb/upb/mini_table/internal/enum.h
@@ -0,0 +1,81 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_TABLE_INTERNAL_ENUM_H_
+#define UPB_MINI_TABLE_INTERNAL_ENUM_H_
+
+#include <stdint.h>
+
+// Must be last.
+#include "upb/port/def.inc"
+
+struct upb_MiniTableEnum {
+ uint32_t mask_limit; // Limit enum value that can be tested with mask.
+ uint32_t value_count; // Number of values after the bitfield.
+ uint32_t data[]; // Bitmask + enumerated values follow.
+};
+
+typedef enum {
+ _kUpb_FastEnumCheck_ValueIsInEnum = 0,
+ _kUpb_FastEnumCheck_ValueIsNotInEnum = 1,
+ _kUpb_FastEnumCheck_CannotCheckFast = 2,
+} _kUpb_FastEnumCheck_Status;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+UPB_INLINE _kUpb_FastEnumCheck_Status _upb_MiniTable_CheckEnumValueFast(
+ const struct upb_MiniTableEnum* e, uint32_t val) {
+ if (UPB_UNLIKELY(val >= 64)) return _kUpb_FastEnumCheck_CannotCheckFast;
+ uint64_t mask = e->data[0] | ((uint64_t)e->data[1] << 32);
+ return (mask & (1ULL << val)) ? _kUpb_FastEnumCheck_ValueIsInEnum
+ : _kUpb_FastEnumCheck_ValueIsNotInEnum;
+}
+
+UPB_INLINE bool _upb_MiniTable_CheckEnumValueSlow(
+ const struct upb_MiniTableEnum* e, uint32_t val) {
+ if (val < e->mask_limit) return e->data[val / 32] & (1ULL << (val % 32));
+ // OPT: binary search long lists?
+ const uint32_t* start = &e->data[e->mask_limit / 32];
+ const uint32_t* limit = &e->data[(e->mask_limit / 32) + e->value_count];
+ for (const uint32_t* p = start; p < limit; p++) {
+ if (*p == val) return true;
+ }
+ return false;
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MINI_TABLE_INTERNAL_ENUM_H_ */
diff --git a/upb/upb/mini_table/internal/extension.h b/upb/upb/mini_table/internal/extension.h
new file mode 100644
index 0000000..3202caa
--- /dev/null
+++ b/upb/upb/mini_table/internal/extension.h
@@ -0,0 +1,50 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_TABLE_INTERNAL_EXTENSION_H_
+#define UPB_MINI_TABLE_INTERNAL_EXTENSION_H_
+
+#include "upb/mini_table/internal/field.h"
+#include "upb/mini_table/internal/sub.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+struct upb_MiniTableExtension {
+ // Do not move this field. We need to be able to alias pointers.
+ struct upb_MiniTableField field;
+
+ const struct upb_MiniTable* extendee;
+ union upb_MiniTableSub sub; // NULL unless submessage or proto2 enum
+};
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MINI_TABLE_INTERNAL_EXTENSION_H_ */
diff --git a/upb/upb/mini_table/internal/field.h b/upb/upb/mini_table/internal/field.h
new file mode 100644
index 0000000..7a0ff51
--- /dev/null
+++ b/upb/upb/mini_table/internal/field.h
@@ -0,0 +1,137 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_TABLE_INTERNAL_FIELD_H_
+#define UPB_MINI_TABLE_INTERNAL_FIELD_H_
+
+#include <stdint.h>
+
+#include "upb/base/descriptor_constants.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+struct upb_MiniTableField {
+ uint32_t number;
+ uint16_t offset;
+ int16_t presence; // If >0, hasbit_index. If <0, ~oneof_index
+
+ // Indexes into `upb_MiniTable.subs`
+ // Will be set to `kUpb_NoSub` if `descriptortype` != MESSAGE/GROUP/ENUM
+ uint16_t UPB_PRIVATE(submsg_index);
+
+ uint8_t UPB_PRIVATE(descriptortype);
+
+ // upb_FieldMode | upb_LabelFlags | (upb_FieldRep << kUpb_FieldRep_Shift)
+ uint8_t mode;
+};
+
+#define kUpb_NoSub ((uint16_t)-1)
+
+typedef enum {
+ kUpb_FieldMode_Map = 0,
+ kUpb_FieldMode_Array = 1,
+ kUpb_FieldMode_Scalar = 2,
+} upb_FieldMode;
+
+// Mask to isolate the upb_FieldMode from field.mode.
+#define kUpb_FieldMode_Mask 3
+
+// Extra flags on the mode field.
+typedef enum {
+ kUpb_LabelFlags_IsPacked = 4,
+ kUpb_LabelFlags_IsExtension = 8,
+ // Indicates that this descriptor type is an "alternate type":
+ // - for Int32, this indicates that the actual type is Enum (but was
+ // rewritten to Int32 because it is an open enum that requires no check).
+ // - for Bytes, this indicates that the actual type is String (but does
+ // not require any UTF-8 check).
+ kUpb_LabelFlags_IsAlternate = 16,
+} upb_LabelFlags;
+
+// Note: we sort by this number when calculating layout order.
+typedef enum {
+ kUpb_FieldRep_1Byte = 0,
+ kUpb_FieldRep_4Byte = 1,
+ kUpb_FieldRep_StringView = 2,
+ kUpb_FieldRep_8Byte = 3,
+
+ kUpb_FieldRep_NativePointer =
+ UPB_SIZE(kUpb_FieldRep_4Byte, kUpb_FieldRep_8Byte),
+ kUpb_FieldRep_Max = kUpb_FieldRep_8Byte,
+} upb_FieldRep;
+
+#define kUpb_FieldRep_Shift 6
+
+UPB_INLINE upb_FieldRep
+_upb_MiniTableField_GetRep(const struct upb_MiniTableField* field) {
+ return (upb_FieldRep)(field->mode >> kUpb_FieldRep_Shift);
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+UPB_INLINE upb_FieldMode
+upb_FieldMode_Get(const struct upb_MiniTableField* field) {
+ return (upb_FieldMode)(field->mode & 3);
+}
+
+UPB_INLINE void _upb_MiniTableField_CheckIsArray(
+ const struct upb_MiniTableField* field) {
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_NativePointer);
+ UPB_ASSUME(upb_FieldMode_Get(field) == kUpb_FieldMode_Array);
+ UPB_ASSUME(field->presence == 0);
+}
+
+UPB_INLINE void _upb_MiniTableField_CheckIsMap(
+ const struct upb_MiniTableField* field) {
+ UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_NativePointer);
+ UPB_ASSUME(upb_FieldMode_Get(field) == kUpb_FieldMode_Map);
+ UPB_ASSUME(field->presence == 0);
+}
+
+UPB_INLINE bool upb_IsRepeatedOrMap(const struct upb_MiniTableField* field) {
+ // This works because upb_FieldMode has no value 3.
+ return !(field->mode & kUpb_FieldMode_Scalar);
+}
+
+UPB_INLINE bool upb_IsSubMessage(const struct upb_MiniTableField* field) {
+ return field->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Message ||
+ field->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group;
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MINI_TABLE_INTERNAL_FIELD_H_ */
diff --git a/upb/upb/mini_table/internal/file.h b/upb/upb/mini_table/internal/file.h
new file mode 100644
index 0000000..9b8aa99
--- /dev/null
+++ b/upb/upb/mini_table/internal/file.h
@@ -0,0 +1,52 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_TABLE_INTERNAL_FILE_H_
+#define UPB_MINI_TABLE_INTERNAL_FILE_H_
+
+#include "upb/mini_table/internal/enum.h"
+#include "upb/mini_table/internal/extension.h"
+#include "upb/mini_table/internal/message.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+struct upb_MiniTableFile {
+ const struct upb_MiniTable** msgs;
+ const struct upb_MiniTableEnum** enums;
+ const struct upb_MiniTableExtension** exts;
+ int msg_count;
+ int enum_count;
+ int ext_count;
+};
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MINI_TABLE_INTERNAL_FILE_H_ */
diff --git a/upb/upb/mini_table/internal/message.c b/upb/upb/mini_table/internal/message.c
new file mode 100644
index 0000000..3704bae
--- /dev/null
+++ b/upb/upb/mini_table/internal/message.c
@@ -0,0 +1,42 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/mini_table/internal/message.h"
+
+const struct upb_MiniTable _kUpb_MiniTable_Empty = {
+ .subs = NULL,
+ .fields = NULL,
+ .size = 0,
+ .field_count = 0,
+ .ext = kUpb_ExtMode_NonExtendable,
+ .dense_below = 0,
+ .table_mask = -1,
+ .required_count = 0,
+};
diff --git a/upb/upb/mini_table/internal/message.h b/upb/upb/mini_table/internal/message.h
new file mode 100644
index 0000000..fc67203
--- /dev/null
+++ b/upb/upb/mini_table/internal/message.h
@@ -0,0 +1,111 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_TABLE_INTERNAL_MESSAGE_H_
+#define UPB_MINI_TABLE_INTERNAL_MESSAGE_H_
+
+#include "upb/message/types.h"
+#include "upb/mini_table/internal/field.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+struct upb_Decoder;
+typedef const char* _upb_FieldParser(struct upb_Decoder* d, const char* ptr,
+ upb_Message* msg, intptr_t table,
+ uint64_t hasbits, uint64_t data);
+typedef struct {
+ uint64_t field_data;
+ _upb_FieldParser* field_parser;
+} _upb_FastTable_Entry;
+
+typedef enum {
+ kUpb_ExtMode_NonExtendable = 0, // Non-extendable message.
+ kUpb_ExtMode_Extendable = 1, // Normal extendable message.
+ kUpb_ExtMode_IsMessageSet = 2, // MessageSet message.
+ kUpb_ExtMode_IsMessageSet_ITEM =
+ 3, // MessageSet item (temporary only, see decode.c)
+
+ // During table building we steal a bit to indicate that the message is a map
+ // entry. *Only* used during table building!
+ kUpb_ExtMode_IsMapEntry = 4,
+} upb_ExtMode;
+
+union upb_MiniTableSub;
+
+// upb_MiniTable represents the memory layout of a given upb_MessageDef.
+// The members are public so generated code can initialize them,
+// but users MUST NOT directly read or write any of its members.
+struct upb_MiniTable {
+ const union upb_MiniTableSub* subs;
+ const struct upb_MiniTableField* fields;
+
+ // Must be aligned to sizeof(void*). Doesn't include internal members like
+ // unknown fields, extension dict, pointer to msglayout, etc.
+ uint16_t size;
+
+ uint16_t field_count;
+ uint8_t ext; // upb_ExtMode, declared as uint8_t so sizeof(ext) == 1
+ uint8_t dense_below;
+ uint8_t table_mask;
+ uint8_t required_count; // Required fields have the lowest hasbits.
+
+ // To statically initialize the tables of variable length, we need a flexible
+ // array member, and we need to compile in gnu99 mode (constant initialization
+ // of flexible array members is a GNU extension, not in C99 unfortunately.
+ _upb_FastTable_Entry fasttable[];
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// A MiniTable for an empty message, used for unlinked sub-messages.
+extern const struct upb_MiniTable _kUpb_MiniTable_Empty;
+
+// Computes a bitmask in which the |l->required_count| lowest bits are set,
+// except that we skip the lowest bit (because upb never uses hasbit 0).
+//
+// Sample output:
+// requiredmask(1) => 0b10 (0x2)
+// requiredmask(5) => 0b111110 (0x3e)
+UPB_INLINE uint64_t upb_MiniTable_requiredmask(const struct upb_MiniTable* l) {
+ int n = l->required_count;
+ assert(0 < n && n <= 63);
+ return ((1ULL << n) - 1) << 1;
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MINI_TABLE_INTERNAL_MESSAGE_H_ */
diff --git a/upb/upb/mini_table/internal/sub.h b/upb/upb/mini_table/internal/sub.h
new file mode 100644
index 0000000..b73639d
--- /dev/null
+++ b/upb/upb/mini_table/internal/sub.h
@@ -0,0 +1,42 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_TABLE_INTERNAL_SUB_H_
+#define UPB_MINI_TABLE_INTERNAL_SUB_H_
+
+#include "upb/mini_table/internal/enum.h"
+#include "upb/mini_table/internal/message.h"
+
+union upb_MiniTableSub {
+ const struct upb_MiniTable* submsg;
+ const struct upb_MiniTableEnum* subenum;
+};
+
+#endif /* UPB_MINI_TABLE_INTERNAL_SUB_H_ */
diff --git a/upb/upb/mini_table/message.c b/upb/upb/mini_table/message.c
new file mode 100644
index 0000000..f10934f
--- /dev/null
+++ b/upb/upb/mini_table/message.c
@@ -0,0 +1,100 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/mini_table/message.h"
+
+#include <inttypes.h>
+
+#include "upb/mem/arena.h"
+#include "upb/mini_table/internal/message.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+const upb_MiniTableField* upb_MiniTable_FindFieldByNumber(
+ const upb_MiniTable* t, uint32_t number) {
+ const size_t i = ((size_t)number) - 1; // 0 wraps to SIZE_MAX
+
+ // Ideal case: index into dense fields
+ if (i < t->dense_below) {
+ UPB_ASSERT(t->fields[i].number == number);
+ return &t->fields[i];
+ }
+
+ // Slow case: binary search
+ int lo = t->dense_below;
+ int hi = t->field_count - 1;
+ while (lo <= hi) {
+ int mid = (lo + hi) / 2;
+ uint32_t num = t->fields[mid].number;
+ if (num < number) {
+ lo = mid + 1;
+ continue;
+ }
+ if (num > number) {
+ hi = mid - 1;
+ continue;
+ }
+ return &t->fields[mid];
+ }
+ return NULL;
+}
+
+static bool upb_MiniTable_Is_Oneof(const upb_MiniTableField* f) {
+ return f->presence < 0;
+}
+
+const upb_MiniTableField* upb_MiniTable_GetOneof(const upb_MiniTable* m,
+ const upb_MiniTableField* f) {
+ if (UPB_UNLIKELY(!upb_MiniTable_Is_Oneof(f))) {
+ return NULL;
+ }
+ const upb_MiniTableField* ptr = &m->fields[0];
+ const upb_MiniTableField* end = &m->fields[m->field_count];
+ while (++ptr < end) {
+ if (ptr->presence == (*f).presence) {
+ return ptr;
+ }
+ }
+ return NULL;
+}
+
+bool upb_MiniTable_NextOneofField(const upb_MiniTable* m,
+ const upb_MiniTableField** f) {
+ const upb_MiniTableField* ptr = *f;
+ const upb_MiniTableField* end = &m->fields[m->field_count];
+ while (++ptr < end) {
+ if (ptr->presence == (*f)->presence) {
+ *f = ptr;
+ return true;
+ }
+ }
+ return false;
+}
diff --git a/upb/upb/mini_table/message.h b/upb/upb/mini_table/message.h
new file mode 100644
index 0000000..cc5409f
--- /dev/null
+++ b/upb/upb/mini_table/message.h
@@ -0,0 +1,108 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_TABLE_MESSAGE_H_
+#define UPB_MINI_TABLE_MESSAGE_H_
+
+#include "upb/mini_table/enum.h"
+#include "upb/mini_table/field.h"
+#include "upb/mini_table/internal/message.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct upb_MiniTable upb_MiniTable;
+
+UPB_API const upb_MiniTableField* upb_MiniTable_FindFieldByNumber(
+ const upb_MiniTable* table, uint32_t number);
+
+UPB_API_INLINE const upb_MiniTableField* upb_MiniTable_GetFieldByIndex(
+ const upb_MiniTable* t, uint32_t index) {
+ return &t->fields[index];
+}
+
+// Returns the MiniTable for this message field. If the field is unlinked,
+// returns NULL.
+UPB_API_INLINE const upb_MiniTable* upb_MiniTable_GetSubMessageTable(
+ const upb_MiniTable* mini_table, const upb_MiniTableField* field) {
+ UPB_ASSERT(upb_MiniTableField_CType(field) == kUpb_CType_Message);
+ const upb_MiniTable* ret =
+ mini_table->subs[field->UPB_PRIVATE(submsg_index)].submsg;
+ UPB_ASSUME(ret);
+ return ret == &_kUpb_MiniTable_Empty ? NULL : ret;
+}
+
+// Returns the MiniTableEnum for this enum field. If the field is unlinked,
+// returns NULL.
+UPB_API_INLINE const upb_MiniTableEnum* upb_MiniTable_GetSubEnumTable(
+ const upb_MiniTable* mini_table, const upb_MiniTableField* field) {
+ UPB_ASSERT(upb_MiniTableField_CType(field) == kUpb_CType_Enum);
+ return mini_table->subs[field->UPB_PRIVATE(submsg_index)].subenum;
+}
+
+// Returns true if this MiniTable field is linked to a MiniTable for the
+// sub-message.
+UPB_API_INLINE bool upb_MiniTable_MessageFieldIsLinked(
+ const upb_MiniTable* mini_table, const upb_MiniTableField* field) {
+ return upb_MiniTable_GetSubMessageTable(mini_table, field) != NULL;
+}
+
+// If this field is in a oneof, returns the first field in the oneof.
+//
+// Otherwise returns NULL.
+//
+// Usage:
+// const upb_MiniTableField* field = upb_MiniTable_GetOneof(m, f);
+// do {
+// ..
+// } while (upb_MiniTable_NextOneofField(m, &field);
+//
+const upb_MiniTableField* upb_MiniTable_GetOneof(const upb_MiniTable* m,
+ const upb_MiniTableField* f);
+
+// Iterates to the next field in the oneof. If this is the last field in the
+// oneof, returns false. The ordering of fields in the oneof is not
+// guaranteed.
+// REQUIRES: |f| is the field initialized by upb_MiniTable_GetOneof and updated
+// by prior upb_MiniTable_NextOneofField calls.
+bool upb_MiniTable_NextOneofField(const upb_MiniTable* m,
+ const upb_MiniTableField** f);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_MINI_TABLE_MESSAGE_H_ */
diff --git a/upb/upb/mini_table/sub.h b/upb/upb/mini_table/sub.h
new file mode 100644
index 0000000..7f5d237
--- /dev/null
+++ b/upb/upb/mini_table/sub.h
@@ -0,0 +1,38 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_MINI_TABLE_SUB_H_
+#define UPB_MINI_TABLE_SUB_H_
+
+#include "upb/mini_table/internal/sub.h"
+
+typedef union upb_MiniTableSub upb_MiniTableSub;
+
+#endif /* UPB_MINI_TABLE_INTERNAL_SUB_H_ */
diff --git a/upb/upb/port/BUILD b/upb/upb/port/BUILD
new file mode 100644
index 0000000..a29b2b6
--- /dev/null
+++ b/upb/upb/port/BUILD
@@ -0,0 +1,62 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load("//bazel:build_defs.bzl", "UPB_DEFAULT_COPTS")
+
+cc_library(
+ name = "port",
+ hdrs = [
+ "atomic.h",
+ "vsnprintf_compat.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ textual_hdrs = [":inc"],
+ visibility = ["//visibility:public"],
+)
+
+filegroup(
+ name = "inc",
+ srcs = [
+ "def.inc",
+ "undef.inc",
+ ],
+ visibility = ["//visibility:public"],
+)
+
+# begin:github_only
+filegroup(
+ name = "source_files",
+ srcs = glob(
+ [
+ "**/*.h",
+ "**/*.inc",
+ ],
+ ),
+ visibility = [
+ "//cmake:__pkg__",
+ "//python/dist:__pkg__",
+ ]
+)
+# end:github_only
diff --git a/upb/upb/port/atomic.h b/upb/upb/port/atomic.h
new file mode 100644
index 0000000..8c792bc
--- /dev/null
+++ b/upb/upb/port/atomic.h
@@ -0,0 +1,104 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_PORT_ATOMIC_H_
+#define UPB_PORT_ATOMIC_H_
+
+#include "upb/port/def.inc"
+
+#ifdef UPB_USE_C11_ATOMICS
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define upb_Atomic_Init(addr, val) atomic_init(addr, val)
+#define upb_Atomic_Load(addr, order) atomic_load_explicit(addr, order)
+#define upb_Atomic_Store(addr, val, order) \
+ atomic_store_explicit(addr, val, order)
+#define upb_Atomic_Add(addr, val, order) \
+ atomic_fetch_add_explicit(addr, val, order)
+#define upb_Atomic_Sub(addr, val, order) \
+ atomic_fetch_sub_explicit(addr, val, order)
+#define upb_Atomic_Exchange(addr, val, order) \
+ atomic_exchange_explicit(addr, val, order)
+#define upb_Atomic_CompareExchangeStrong(addr, expected, desired, \
+ success_order, failure_order) \
+ atomic_compare_exchange_strong_explicit(addr, expected, desired, \
+ success_order, failure_order)
+#define upb_Atomic_CompareExchangeWeak(addr, expected, desired, success_order, \
+ failure_order) \
+ atomic_compare_exchange_weak_explicit(addr, expected, desired, \
+ success_order, failure_order)
+
+#else // !UPB_USE_C11_ATOMICS
+
+#include <string.h>
+
+#define upb_Atomic_Init(addr, val) (*addr = val)
+#define upb_Atomic_Load(addr, order) (*addr)
+#define upb_Atomic_Store(addr, val, order) (*(addr) = val)
+#define upb_Atomic_Add(addr, val, order) (*(addr) += val)
+#define upb_Atomic_Sub(addr, val, order) (*(addr) -= val)
+
+UPB_INLINE void* _upb_NonAtomic_Exchange(void* addr, void* value) {
+ void* old;
+ memcpy(&old, addr, sizeof(value));
+ memcpy(addr, &value, sizeof(value));
+ return old;
+}
+
+#define upb_Atomic_Exchange(addr, val, order) _upb_NonAtomic_Exchange(addr, val)
+
+// `addr` and `expected` are logically double pointers.
+UPB_INLINE bool _upb_NonAtomic_CompareExchangeStrongP(void* addr,
+ void* expected,
+ void* desired) {
+ if (memcmp(addr, expected, sizeof(desired)) == 0) {
+ memcpy(addr, &desired, sizeof(desired));
+ return true;
+ } else {
+ memcpy(expected, addr, sizeof(desired));
+ return false;
+ }
+}
+
+#define upb_Atomic_CompareExchangeStrong(addr, expected, desired, \
+ success_order, failure_order) \
+ _upb_NonAtomic_CompareExchangeStrongP((void*)addr, (void*)expected, \
+ (void*)desired)
+#define upb_Atomic_CompareExchangeWeak(addr, expected, desired, success_order, \
+ failure_order) \
+ upb_Atomic_CompareExchangeStrong(addr, expected, desired, 0, 0)
+
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif // UPB_PORT_ATOMIC_H_
diff --git a/upb/upb/port/def.inc b/upb/upb/port/def.inc
new file mode 100644
index 0000000..b9798bf
--- /dev/null
+++ b/upb/upb/port/def.inc
@@ -0,0 +1,351 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+/*
+ * This is where we define internal portability macros used across upb.
+ *
+ * All of these macros are undef'd in undef.inc to avoid leaking them to users.
+ *
+ * The correct usage is:
+ *
+ * #include "upb/foobar.h"
+ * #include "upb/baz.h"
+ *
+ * // MUST be last included header.
+ * #include "upb/port/def.inc"
+ *
+ * // Code for this file.
+ * // <...>
+ *
+ * // Can be omitted for .c files, required for .h.
+ * #include "upb/port/undef.inc"
+ *
+ * This file is private and must not be included by users!
+ */
+
+#if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
+ (defined(__cplusplus) && __cplusplus >= 201402L) || \
+ (defined(_MSC_VER) && _MSC_VER >= 1900))
+#error upb requires C99 or C++14 or MSVC >= 2015.
+#endif
+
+// Portable check for GCC minimum version:
+// https://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html
+#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
+#define UPB_GNUC_MIN(x, y) \
+ (__GNUC__ > (x) || __GNUC__ == (x) && __GNUC_MINOR__ >= (y))
+#else
+#define UPB_GNUC_MIN(x, y) 0
+#endif
+
+#include <assert.h>
+#include <setjmp.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#ifndef UINTPTR_MAX
+Error, UINTPTR_MAX is undefined
+#endif
+
+#if UINTPTR_MAX == 0xffffffff
+#define UPB_SIZE(size32, size64) size32
+#else
+#define UPB_SIZE(size32, size64) size64
+#endif
+
+/* If we always read/write as a consistent type to each address, this shouldn't
+ * violate aliasing.
+ */
+#define UPB_PTR_AT(msg, ofs, type) ((type*)((char*)(msg) + (ofs)))
+
+#define UPB_MAPTYPE_STRING 0
+
+// UPB_EXPORT: always generate a public symbol.
+#if defined(__GNUC__) || defined(__clang__)
+#define UPB_EXPORT __attribute__((visibility("default"))) __attribute__((used))
+#else
+#define UPB_EXPORT
+#endif
+
+// UPB_INLINE: inline if possible, emit standalone code if required.
+#ifdef __cplusplus
+#define UPB_INLINE inline
+#elif defined (__GNUC__) || defined(__clang__)
+#define UPB_INLINE static __inline__
+#else
+#define UPB_INLINE static
+#endif
+
+#ifdef UPB_BUILD_API
+#define UPB_API UPB_EXPORT
+#define UPB_API_INLINE UPB_EXPORT
+#else
+#define UPB_API
+#define UPB_API_INLINE UPB_INLINE
+#endif
+
+#define UPB_MALLOC_ALIGN 8
+#define UPB_ALIGN_UP(size, align) (((size) + (align) - 1) / (align) * (align))
+#define UPB_ALIGN_DOWN(size, align) ((size) / (align) * (align))
+#define UPB_ALIGN_MALLOC(size) UPB_ALIGN_UP(size, UPB_MALLOC_ALIGN)
+#ifdef __clang__
+#define UPB_ALIGN_OF(type) _Alignof(type)
+#else
+#define UPB_ALIGN_OF(type) offsetof (struct { char c; type member; }, member)
+#endif
+
+// Hints to the compiler about likely/unlikely branches.
+#if defined (__GNUC__) || defined(__clang__)
+#define UPB_LIKELY(x) __builtin_expect((bool)(x), 1)
+#define UPB_UNLIKELY(x) __builtin_expect((bool)(x), 0)
+#else
+#define UPB_LIKELY(x) (x)
+#define UPB_UNLIKELY(x) (x)
+#endif
+
+// Macros for function attributes on compilers that support them.
+#ifdef __GNUC__
+#define UPB_FORCEINLINE __inline__ __attribute__((always_inline))
+#define UPB_NOINLINE __attribute__((noinline))
+#define UPB_NORETURN __attribute__((__noreturn__))
+#define UPB_PRINTF(str, first_vararg) __attribute__((format (printf, str, first_vararg)))
+#elif defined(_MSC_VER)
+#define UPB_NOINLINE
+#define UPB_FORCEINLINE
+#define UPB_NORETURN __declspec(noreturn)
+#define UPB_PRINTF(str, first_vararg)
+#else /* !defined(__GNUC__) */
+#define UPB_FORCEINLINE
+#define UPB_NOINLINE
+#define UPB_NORETURN
+#define UPB_PRINTF(str, first_vararg)
+#endif
+
+#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
+#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
+
+#define UPB_UNUSED(var) (void)var
+
+// UPB_ASSUME(): in release mode, we tell the compiler to assume this is true.
+#ifdef NDEBUG
+#ifdef __GNUC__
+#define UPB_ASSUME(expr) if (!(expr)) __builtin_unreachable()
+#elif defined _MSC_VER
+#define UPB_ASSUME(expr) if (!(expr)) __assume(0)
+#else
+#define UPB_ASSUME(expr) do {} while (false && (expr))
+#endif
+#else
+#define UPB_ASSUME(expr) assert(expr)
+#endif
+
+/* UPB_ASSERT(): in release mode, we use the expression without letting it be
+ * evaluated. This prevents "unused variable" warnings. */
+#ifdef NDEBUG
+#define UPB_ASSERT(expr) do {} while (false && (expr))
+#else
+#define UPB_ASSERT(expr) assert(expr)
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#define UPB_UNREACHABLE() do { assert(0); __builtin_unreachable(); } while(0)
+#elif defined(_MSC_VER)
+#define UPB_UNREACHABLE() \
+ do { \
+ assert(0); \
+ __assume(0); \
+ } while (0)
+#else
+#define UPB_UNREACHABLE() do { assert(0); } while(0)
+#endif
+
+/* UPB_SETJMP() / UPB_LONGJMP(): avoid setting/restoring signal mask. */
+#ifdef __APPLE__
+#define UPB_SETJMP(buf) _setjmp(buf)
+#define UPB_LONGJMP(buf, val) _longjmp(buf, val)
+#else
+#define UPB_SETJMP(buf) setjmp(buf)
+#define UPB_LONGJMP(buf, val) longjmp(buf, val)
+#endif
+
+#ifdef __GNUC__
+#define UPB_USE_C11_ATOMICS
+#define UPB_ATOMIC(T) _Atomic(T)
+#else
+#define UPB_ATOMIC(T) T
+#endif
+
+/* UPB_PTRADD(ptr, ofs): add pointer while avoiding "NULL + 0" UB */
+#define UPB_PTRADD(ptr, ofs) ((ofs) ? (ptr) + (ofs) : (ptr))
+
+#define UPB_PRIVATE(x) x##_dont_copy_me__upb_internal_use_only
+
+/* Configure whether fasttable is switched on or not. *************************/
+
+#ifdef __has_attribute
+#define UPB_HAS_ATTRIBUTE(x) __has_attribute(x)
+#else
+#define UPB_HAS_ATTRIBUTE(x) 0
+#endif
+
+#if UPB_HAS_ATTRIBUTE(musttail)
+#define UPB_MUSTTAIL __attribute__((musttail))
+#else
+#define UPB_MUSTTAIL
+#endif
+
+#undef UPB_HAS_ATTRIBUTE
+
+/* This check is not fully robust: it does not require that we have "musttail"
+ * support available. We need tail calls to avoid consuming arbitrary amounts
+ * of stack space.
+ *
+ * GCC/Clang can mostly be trusted to generate tail calls as long as
+ * optimization is enabled, but, debug builds will not generate tail calls
+ * unless "musttail" is available.
+ *
+ * We should probably either:
+ * 1. require that the compiler supports musttail.
+ * 2. add some fallback code for when musttail isn't available (ie. return
+ * instead of tail calling). This is safe and portable, but this comes at
+ * a CPU cost.
+ */
+#if (defined(__x86_64__) || defined(__aarch64__)) && defined(__GNUC__)
+#define UPB_FASTTABLE_SUPPORTED 1
+#else
+#define UPB_FASTTABLE_SUPPORTED 0
+#endif
+
+/* define UPB_ENABLE_FASTTABLE to force fast table support.
+ * This is useful when we want to ensure we are really getting fasttable,
+ * for example for testing or benchmarking. */
+#if defined(UPB_ENABLE_FASTTABLE)
+#if !UPB_FASTTABLE_SUPPORTED
+#error fasttable is x86-64/ARM64 only and requires GCC or Clang.
+#endif
+#define UPB_FASTTABLE 1
+/* Define UPB_TRY_ENABLE_FASTTABLE to use fasttable if possible.
+ * This is useful for releasing code that might be used on multiple platforms,
+ * for example the PHP or Ruby C extensions. */
+#elif defined(UPB_TRY_ENABLE_FASTTABLE)
+#define UPB_FASTTABLE UPB_FASTTABLE_SUPPORTED
+#else
+#define UPB_FASTTABLE 0
+#endif
+
+/* UPB_FASTTABLE_INIT() allows protos compiled for fasttable to gracefully
+ * degrade to non-fasttable if the runtime or platform do not support it. */
+#if !UPB_FASTTABLE
+#define UPB_FASTTABLE_INIT(...)
+#define UPB_FASTTABLE_MASK(mask) -1
+#else
+#define UPB_FASTTABLE_INIT(...) __VA_ARGS__
+#define UPB_FASTTABLE_MASK(mask) mask
+#endif
+
+#undef UPB_FASTTABLE_SUPPORTED
+
+/* ASAN poisoning (for arena).
+ * If using UPB from an interpreted language like Ruby, a build of the
+ * interpreter compiled with ASAN enabled must be used in order to get sane and
+ * expected behavior.
+ */
+
+/* Due to preprocessor limitations, the conditional logic for setting
+ * UPN_CLANG_ASAN below cannot be consolidated into a portable one-liner.
+ * See https://gcc.gnu.org/onlinedocs/cpp/_005f_005fhas_005fattribute.html.
+ */
+#if defined(__has_feature)
+#if __has_feature(address_sanitizer)
+#define UPB_CLANG_ASAN 1
+#else
+#define UPB_CLANG_ASAN 0
+#endif
+#else
+#define UPB_CLANG_ASAN 0
+#endif
+
+#if defined(__SANITIZE_ADDRESS__) || UPB_CLANG_ASAN
+#define UPB_ASAN 1
+#define UPB_ASAN_GUARD_SIZE 32
+#ifdef __cplusplus
+ extern "C" {
+#endif
+void __asan_poison_memory_region(void const volatile *addr, size_t size);
+void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+#define UPB_POISON_MEMORY_REGION(addr, size) \
+ __asan_poison_memory_region((addr), (size))
+#define UPB_UNPOISON_MEMORY_REGION(addr, size) \
+ __asan_unpoison_memory_region((addr), (size))
+#else
+#define UPB_ASAN 0
+#define UPB_ASAN_GUARD_SIZE 0
+#define UPB_POISON_MEMORY_REGION(addr, size) \
+ ((void)(addr), (void)(size))
+#define UPB_UNPOISON_MEMORY_REGION(addr, size) \
+ ((void)(addr), (void)(size))
+#endif
+
+/* Disable proto2 arena behavior (TEMPORARY) **********************************/
+
+#ifdef UPB_DISABLE_PROTO2_ENUM_CHECKING
+#define UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 1
+#else
+#define UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 0
+#endif
+
+#if defined(__cplusplus)
+#if defined(__clang__) || UPB_GNUC_MIN(6, 0)
+// https://gcc.gnu.org/gcc-6/changes.html
+#if __cplusplus >= 201402L
+#define UPB_DEPRECATED [[deprecated]]
+#else
+#define UPB_DEPRECATED __attribute__((deprecated))
+#endif
+#else
+#define UPB_DEPRECATED
+#endif
+#else
+#define UPB_DEPRECATED
+#endif
+
+// begin:google_only
+// #define UPB_IS_GOOGLE3
+// end:google_only
+
+#if defined(UPB_IS_GOOGLE3) && !defined(UPB_BOOTSTRAP_STAGE0)
+#define UPB_DESC(sym) proto2_##sym
+#else
+#define UPB_DESC(sym) google_protobuf_##sym
+#endif
diff --git a/upb/upb/port/undef.inc b/upb/upb/port/undef.inc
new file mode 100644
index 0000000..edf61c8
--- /dev/null
+++ b/upb/upb/port/undef.inc
@@ -0,0 +1,78 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This should #undef all macros #defined in def.inc
+
+#undef UPB_SIZE
+#undef UPB_PTR_AT
+#undef UPB_MAPTYPE_STRING
+#undef UPB_EXPORT
+#undef UPB_INLINE
+#undef UPB_API
+#undef UPB_API_INLINE
+#undef UPB_ALIGN_UP
+#undef UPB_ALIGN_DOWN
+#undef UPB_ALIGN_MALLOC
+#undef UPB_ALIGN_OF
+#undef UPB_MALLOC_ALIGN
+#undef UPB_LIKELY
+#undef UPB_UNLIKELY
+#undef UPB_FORCEINLINE
+#undef UPB_NOINLINE
+#undef UPB_NORETURN
+#undef UPB_PRINTF
+#undef UPB_MAX
+#undef UPB_MIN
+#undef UPB_UNUSED
+#undef UPB_ASSUME
+#undef UPB_ASSERT
+#undef UPB_UNREACHABLE
+#undef UPB_SETJMP
+#undef UPB_LONGJMP
+#undef UPB_PTRADD
+#undef UPB_MUSTTAIL
+#undef UPB_FASTTABLE_SUPPORTED
+#undef UPB_FASTTABLE_MASK
+#undef UPB_FASTTABLE
+#undef UPB_FASTTABLE_INIT
+#undef UPB_POISON_MEMORY_REGION
+#undef UPB_UNPOISON_MEMORY_REGION
+#undef UPB_ASAN
+#undef UPB_ASAN_GUARD_SIZE
+#undef UPB_CLANG_ASAN
+#undef UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3
+#undef UPB_DEPRECATED
+#undef UPB_GNUC_MIN
+#undef UPB_DESCRIPTOR_UPB_H_FILENAME
+#undef UPB_DESC
+#undef UPB_IS_GOOGLE3
+#undef UPB_ATOMIC
+#undef UPB_USE_C11_ATOMICS
+#undef UPB_PRIVATE
diff --git a/upb/upb/port/vsnprintf_compat.h b/upb/upb/port/vsnprintf_compat.h
new file mode 100644
index 0000000..35c9697
--- /dev/null
+++ b/upb/upb/port/vsnprintf_compat.h
@@ -0,0 +1,53 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_PORT_VSNPRINTF_COMPAT_H_
+#define UPB_PORT_VSNPRINTF_COMPAT_H_
+
+// Must be last.
+#include "upb/port/def.inc"
+
+UPB_INLINE int _upb_vsnprintf(char* buf, size_t size, const char* fmt,
+ va_list ap) {
+#if defined(__MINGW64__) || defined(__MINGW32__) || defined(_MSC_VER)
+ // The msvc runtime has a non-conforming vsnprintf() that requires the
+ // following compatibility code to become conformant.
+ int n = -1;
+ if (size != 0) n = _vsnprintf_s(buf, size, _TRUNCATE, fmt, ap);
+ if (n == -1) n = _vscprintf(fmt, ap);
+ return n;
+#else
+ return vsnprintf(buf, size, fmt, ap);
+#endif
+}
+
+#include "upb/port/undef.inc"
+
+#endif // UPB_PORT_VSNPRINTF_COMPAT_H_
diff --git a/upb/upb/reflection/common.h b/upb/upb/reflection/common.h
new file mode 100644
index 0000000..b435e3c
--- /dev/null
+++ b/upb/upb/reflection/common.h
@@ -0,0 +1,70 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// IWYU pragma: private, include "upb/reflection/def.h"
+
+// Declarations common to all public def types.
+
+#ifndef UPB_REFLECTION_COMMON_H_
+#define UPB_REFLECTION_COMMON_H_
+
+// begin:google_only
+// #ifndef UPB_BOOTSTRAP_STAGE0
+// #include "net/proto2/proto/descriptor.upb.h"
+// #else
+// #include "google/protobuf/descriptor.upb.h"
+// #endif
+// end:google_only
+
+// begin:github_only
+#include "google/protobuf/descriptor.upb.h"
+// end:github_only
+
+typedef enum { kUpb_Syntax_Proto2 = 2, kUpb_Syntax_Proto3 = 3 } upb_Syntax;
+
+// Forward declarations for circular references.
+typedef struct upb_DefPool upb_DefPool;
+typedef struct upb_EnumDef upb_EnumDef;
+typedef struct upb_EnumReservedRange upb_EnumReservedRange;
+typedef struct upb_EnumValueDef upb_EnumValueDef;
+typedef struct upb_ExtensionRange upb_ExtensionRange;
+typedef struct upb_FieldDef upb_FieldDef;
+typedef struct upb_FileDef upb_FileDef;
+typedef struct upb_MessageDef upb_MessageDef;
+typedef struct upb_MessageReservedRange upb_MessageReservedRange;
+typedef struct upb_MethodDef upb_MethodDef;
+typedef struct upb_OneofDef upb_OneofDef;
+typedef struct upb_ServiceDef upb_ServiceDef;
+
+// EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE /////////////////////////
+
+typedef struct upb_DefBuilder upb_DefBuilder;
+
+#endif /* UPB_REFLECTION_COMMON_H_ */
diff --git a/upb/upb/reflection/def.h b/upb/upb/reflection/def.h
new file mode 100644
index 0000000..9fc718b
--- /dev/null
+++ b/upb/upb/reflection/def.h
@@ -0,0 +1,45 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_REFLECTION_DEF_H_
+#define UPB_REFLECTION_DEF_H_
+
+#include "upb/reflection/def_pool.h"
+#include "upb/reflection/enum_def.h"
+#include "upb/reflection/enum_value_def.h"
+#include "upb/reflection/extension_range.h"
+#include "upb/reflection/field_def.h"
+#include "upb/reflection/file_def.h"
+#include "upb/reflection/message_def.h"
+#include "upb/reflection/method_def.h"
+#include "upb/reflection/oneof_def.h"
+#include "upb/reflection/service_def.h"
+
+#endif /* UPB_REFLECTION_DEF_H_ */
diff --git a/upb/upb/reflection/def.hpp b/upb/upb/reflection/def.hpp
new file mode 100644
index 0000000..32264ae
--- /dev/null
+++ b/upb/upb/reflection/def.hpp
@@ -0,0 +1,628 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_REFLECTION_DEF_HPP_
+#define UPB_REFLECTION_DEF_HPP_
+
+#include <cstring>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "upb/base/status.hpp"
+#include "upb/mem/arena.hpp"
+#include "upb/reflection/def.h"
+#include "upb/reflection/internal/def_pool.h"
+#include "upb/reflection/internal/enum_def.h"
+#include "upb/reflection/message.h"
+
+// Must be last
+#include "upb/port/def.inc"
+
+namespace upb {
+
+typedef upb_MessageValue MessageValue;
+
+class EnumDefPtr;
+class FileDefPtr;
+class MessageDefPtr;
+class OneofDefPtr;
+
+// A upb::FieldDefPtr describes a single field in a message. It is most often
+// found as a part of a upb_MessageDef, but can also stand alone to represent
+// an extension.
+class FieldDefPtr {
+ public:
+ FieldDefPtr() : ptr_(nullptr) {}
+ explicit FieldDefPtr(const upb_FieldDef* ptr) : ptr_(ptr) {}
+
+ const upb_FieldDef* ptr() const { return ptr_; }
+
+ typedef upb_FieldType Type;
+ typedef upb_CType CType;
+ typedef upb_Label Label;
+
+ FileDefPtr file() const;
+ const char* full_name() const { return upb_FieldDef_FullName(ptr_); }
+
+ const upb_MiniTableField* mini_table() const {
+ return upb_FieldDef_MiniTable(ptr_);
+ }
+
+ const UPB_DESC(FieldOptions) * options() const {
+ return upb_FieldDef_Options(ptr_);
+ }
+
+ Type type() const { return upb_FieldDef_Type(ptr_); }
+ CType ctype() const { return upb_FieldDef_CType(ptr_); }
+ Label label() const { return upb_FieldDef_Label(ptr_); }
+ const char* name() const { return upb_FieldDef_Name(ptr_); }
+ const char* json_name() const { return upb_FieldDef_JsonName(ptr_); }
+ uint32_t number() const { return upb_FieldDef_Number(ptr_); }
+ bool is_extension() const { return upb_FieldDef_IsExtension(ptr_); }
+ bool is_required() const { return upb_FieldDef_IsRequired(ptr_); }
+ bool has_presence() const { return upb_FieldDef_HasPresence(ptr_); }
+
+ // For non-string, non-submessage fields, this indicates whether binary
+ // protobufs are encoded in packed or non-packed format.
+ //
+ // Note: this accessor reflects the fact that "packed" has different defaults
+ // depending on whether the proto is proto2 or proto3.
+ bool packed() const { return upb_FieldDef_IsPacked(ptr_); }
+
+ // An integer that can be used as an index into an array of fields for
+ // whatever message this field belongs to. Guaranteed to be less than
+ // f->containing_type()->field_count(). May only be accessed once the def has
+ // been finalized.
+ uint32_t index() const { return upb_FieldDef_Index(ptr_); }
+
+ // The MessageDef to which this field belongs (for extensions, the extended
+ // message).
+ MessageDefPtr containing_type() const;
+
+ // For extensions, the message the extension is declared inside, or NULL if
+ // none.
+ MessageDefPtr extension_scope() const;
+
+ // The OneofDef to which this field belongs, or NULL if this field is not part
+ // of a oneof.
+ OneofDefPtr containing_oneof() const;
+ OneofDefPtr real_containing_oneof() const;
+
+ // Convenient field type tests.
+ bool IsSubMessage() const { return upb_FieldDef_IsSubMessage(ptr_); }
+ bool IsString() const { return upb_FieldDef_IsString(ptr_); }
+ bool IsSequence() const { return upb_FieldDef_IsRepeated(ptr_); }
+ bool IsPrimitive() const { return upb_FieldDef_IsPrimitive(ptr_); }
+ bool IsMap() const { return upb_FieldDef_IsMap(ptr_); }
+
+ MessageValue default_value() const { return upb_FieldDef_Default(ptr_); }
+
+ // Returns the enum or submessage def for this field, if any. The field's
+ // type must match (ie. you may only call enum_subdef() for fields where
+ // type() == kUpb_CType_Enum).
+ EnumDefPtr enum_subdef() const;
+ MessageDefPtr message_type() const;
+
+ explicit operator bool() const { return ptr_ != nullptr; }
+
+ friend bool operator==(FieldDefPtr lhs, FieldDefPtr rhs) {
+ return lhs.ptr_ == rhs.ptr_;
+ }
+
+ friend bool operator!=(FieldDefPtr lhs, FieldDefPtr rhs) {
+ return !(lhs == rhs);
+ }
+
+ private:
+ const upb_FieldDef* ptr_;
+};
+
+// Class that represents a oneof.
+class OneofDefPtr {
+ public:
+ OneofDefPtr() : ptr_(nullptr) {}
+ explicit OneofDefPtr(const upb_OneofDef* ptr) : ptr_(ptr) {}
+
+ const upb_OneofDef* ptr() const { return ptr_; }
+ explicit operator bool() const { return ptr_ != nullptr; }
+
+ const UPB_DESC(OneofOptions) * options() const {
+ return upb_OneofDef_Options(ptr_);
+ }
+
+ // Returns the MessageDef that contains this OneofDef.
+ MessageDefPtr containing_type() const;
+
+ // Returns the name of this oneof.
+ const char* name() const { return upb_OneofDef_Name(ptr_); }
+ const char* full_name() const { return upb_OneofDef_FullName(ptr_); }
+
+ // Returns the number of fields in the oneof.
+ int field_count() const { return upb_OneofDef_FieldCount(ptr_); }
+ FieldDefPtr field(int i) const {
+ return FieldDefPtr(upb_OneofDef_Field(ptr_, i));
+ }
+
+ // Looks up by name.
+ FieldDefPtr FindFieldByName(const char* name, size_t len) const {
+ return FieldDefPtr(upb_OneofDef_LookupNameWithSize(ptr_, name, len));
+ }
+ FieldDefPtr FindFieldByName(const char* name) const {
+ return FieldDefPtr(upb_OneofDef_LookupName(ptr_, name));
+ }
+
+ template <class T>
+ FieldDefPtr FindFieldByName(const T& str) const {
+ return FindFieldByName(str.c_str(), str.size());
+ }
+
+ // Looks up by tag number.
+ FieldDefPtr FindFieldByNumber(uint32_t num) const {
+ return FieldDefPtr(upb_OneofDef_LookupNumber(ptr_, num));
+ }
+
+ private:
+ const upb_OneofDef* ptr_;
+};
+
+// Structure that describes a single .proto message type.
+class MessageDefPtr {
+ public:
+ MessageDefPtr() : ptr_(nullptr) {}
+ explicit MessageDefPtr(const upb_MessageDef* ptr) : ptr_(ptr) {}
+
+ const UPB_DESC(MessageOptions) * options() const {
+ return upb_MessageDef_Options(ptr_);
+ }
+
+ std::string MiniDescriptorEncode() const {
+ upb::Arena arena;
+ upb_StringView md;
+ upb_MessageDef_MiniDescriptorEncode(ptr_, arena.ptr(), &md);
+ return std::string(md.data, md.size);
+ }
+
+ const upb_MessageDef* ptr() const { return ptr_; }
+
+ FileDefPtr file() const;
+
+ const char* full_name() const { return upb_MessageDef_FullName(ptr_); }
+ const char* name() const { return upb_MessageDef_Name(ptr_); }
+
+ const upb_MiniTable* mini_table() const {
+ return upb_MessageDef_MiniTable(ptr_);
+ }
+
+ // The number of fields that belong to the MessageDef.
+ int field_count() const { return upb_MessageDef_FieldCount(ptr_); }
+ FieldDefPtr field(int i) const {
+ return FieldDefPtr(upb_MessageDef_Field(ptr_, i));
+ }
+
+ // The number of oneofs that belong to the MessageDef.
+ int oneof_count() const { return upb_MessageDef_OneofCount(ptr_); }
+ int real_oneof_count() const { return upb_MessageDef_RealOneofCount(ptr_); }
+ OneofDefPtr oneof(int i) const {
+ return OneofDefPtr(upb_MessageDef_Oneof(ptr_, i));
+ }
+
+ int enum_type_count() const { return upb_MessageDef_NestedEnumCount(ptr_); }
+ EnumDefPtr enum_type(int i) const;
+
+ int nested_message_count() const {
+ return upb_MessageDef_NestedMessageCount(ptr_);
+ }
+ MessageDefPtr nested_message(int i) const {
+ return MessageDefPtr(upb_MessageDef_NestedMessage(ptr_, i));
+ }
+
+ int nested_extension_count() const {
+ return upb_MessageDef_NestedExtensionCount(ptr_);
+ }
+ FieldDefPtr nested_extension(int i) const {
+ return FieldDefPtr(upb_MessageDef_NestedExtension(ptr_, i));
+ }
+
+ int extension_range_count() const {
+ return upb_MessageDef_ExtensionRangeCount(ptr_);
+ }
+
+ upb_Syntax syntax() const { return upb_MessageDef_Syntax(ptr_); }
+
+ // These return null pointers if the field is not found.
+ FieldDefPtr FindFieldByNumber(uint32_t number) const {
+ return FieldDefPtr(upb_MessageDef_FindFieldByNumber(ptr_, number));
+ }
+ FieldDefPtr FindFieldByName(const char* name, size_t len) const {
+ return FieldDefPtr(upb_MessageDef_FindFieldByNameWithSize(ptr_, name, len));
+ }
+ FieldDefPtr FindFieldByName(const char* name) const {
+ return FieldDefPtr(upb_MessageDef_FindFieldByName(ptr_, name));
+ }
+
+ template <class T>
+ FieldDefPtr FindFieldByName(const T& str) const {
+ return FindFieldByName(str.c_str(), str.size());
+ }
+
+ OneofDefPtr FindOneofByName(const char* name, size_t len) const {
+ return OneofDefPtr(upb_MessageDef_FindOneofByNameWithSize(ptr_, name, len));
+ }
+
+ OneofDefPtr FindOneofByName(const char* name) const {
+ return OneofDefPtr(upb_MessageDef_FindOneofByName(ptr_, name));
+ }
+
+ template <class T>
+ OneofDefPtr FindOneofByName(const T& str) const {
+ return FindOneofByName(str.c_str(), str.size());
+ }
+
+ // Is this message a map entry?
+ bool mapentry() const { return upb_MessageDef_IsMapEntry(ptr_); }
+
+ FieldDefPtr map_key() const {
+ if (!mapentry()) return FieldDefPtr();
+ return FieldDefPtr(upb_MessageDef_Field(ptr_, 0));
+ }
+
+ FieldDefPtr map_value() const {
+ if (!mapentry()) return FieldDefPtr();
+ return FieldDefPtr(upb_MessageDef_Field(ptr_, 1));
+ }
+
+ // Return the type of well known type message. kUpb_WellKnown_Unspecified for
+ // non-well-known message.
+ upb_WellKnown wellknowntype() const {
+ return upb_MessageDef_WellKnownType(ptr_);
+ }
+
+ explicit operator bool() const { return ptr_ != nullptr; }
+
+ friend bool operator==(MessageDefPtr lhs, MessageDefPtr rhs) {
+ return lhs.ptr_ == rhs.ptr_;
+ }
+
+ friend bool operator!=(MessageDefPtr lhs, MessageDefPtr rhs) {
+ return !(lhs == rhs);
+ }
+
+ private:
+ class FieldIter {
+ public:
+ explicit FieldIter(const upb_MessageDef* m, int i) : m_(m), i_(i) {}
+ void operator++() { i_++; }
+
+ FieldDefPtr operator*() {
+ return FieldDefPtr(upb_MessageDef_Field(m_, i_));
+ }
+
+ friend bool operator==(FieldIter lhs, FieldIter rhs) {
+ return lhs.i_ == rhs.i_;
+ }
+
+ friend bool operator!=(FieldIter lhs, FieldIter rhs) {
+ return !(lhs == rhs);
+ }
+
+ private:
+ const upb_MessageDef* m_;
+ int i_;
+ };
+
+ class FieldAccessor {
+ public:
+ explicit FieldAccessor(const upb_MessageDef* md) : md_(md) {}
+ FieldIter begin() { return FieldIter(md_, 0); }
+ FieldIter end() { return FieldIter(md_, upb_MessageDef_FieldCount(md_)); }
+
+ private:
+ const upb_MessageDef* md_;
+ };
+
+ class OneofIter {
+ public:
+ explicit OneofIter(const upb_MessageDef* m, int i) : m_(m), i_(i) {}
+ void operator++() { i_++; }
+
+ OneofDefPtr operator*() {
+ return OneofDefPtr(upb_MessageDef_Oneof(m_, i_));
+ }
+
+ friend bool operator==(OneofIter lhs, OneofIter rhs) {
+ return lhs.i_ == rhs.i_;
+ }
+
+ friend bool operator!=(OneofIter lhs, OneofIter rhs) {
+ return !(lhs == rhs);
+ }
+
+ private:
+ const upb_MessageDef* m_;
+ int i_;
+ };
+
+ class OneofAccessor {
+ public:
+ explicit OneofAccessor(const upb_MessageDef* md) : md_(md) {}
+ OneofIter begin() { return OneofIter(md_, 0); }
+ OneofIter end() { return OneofIter(md_, upb_MessageDef_OneofCount(md_)); }
+
+ private:
+ const upb_MessageDef* md_;
+ };
+
+ public:
+ FieldAccessor fields() const { return FieldAccessor(ptr()); }
+ OneofAccessor oneofs() const { return OneofAccessor(ptr()); }
+
+ private:
+ const upb_MessageDef* ptr_;
+};
+
+class EnumValDefPtr {
+ public:
+ EnumValDefPtr() : ptr_(nullptr) {}
+ explicit EnumValDefPtr(const upb_EnumValueDef* ptr) : ptr_(ptr) {}
+
+ const UPB_DESC(EnumValueOptions) * options() const {
+ return upb_EnumValueDef_Options(ptr_);
+ }
+
+ int32_t number() const { return upb_EnumValueDef_Number(ptr_); }
+ const char* full_name() const { return upb_EnumValueDef_FullName(ptr_); }
+ const char* name() const { return upb_EnumValueDef_Name(ptr_); }
+
+ private:
+ const upb_EnumValueDef* ptr_;
+};
+
+class EnumDefPtr {
+ public:
+ EnumDefPtr() : ptr_(nullptr) {}
+ explicit EnumDefPtr(const upb_EnumDef* ptr) : ptr_(ptr) {}
+
+ const UPB_DESC(EnumOptions) * options() const {
+ return upb_EnumDef_Options(ptr_);
+ }
+
+ const upb_MiniTableEnum* mini_table() const {
+ return _upb_EnumDef_MiniTable(ptr_);
+ }
+
+ std::string MiniDescriptorEncode() const {
+ upb::Arena arena;
+ upb_StringView md;
+ upb_EnumDef_MiniDescriptorEncode(ptr_, arena.ptr(), &md);
+ return std::string(md.data, md.size);
+ }
+
+ const upb_EnumDef* ptr() const { return ptr_; }
+ explicit operator bool() const { return ptr_ != nullptr; }
+
+ const char* full_name() const { return upb_EnumDef_FullName(ptr_); }
+ const char* name() const { return upb_EnumDef_Name(ptr_); }
+ bool is_closed() const { return upb_EnumDef_IsClosed(ptr_); }
+
+ // The value that is used as the default when no field default is specified.
+ // If not set explicitly, the first value that was added will be used.
+ // The default value must be a member of the enum.
+ // Requires that value_count() > 0.
+ int32_t default_value() const { return upb_EnumDef_Default(ptr_); }
+
+ // Returns the number of values currently defined in the enum. Note that
+ // multiple names can refer to the same number, so this may be greater than
+ // the total number of unique numbers.
+ int value_count() const { return upb_EnumDef_ValueCount(ptr_); }
+ EnumValDefPtr value(int i) const {
+ return EnumValDefPtr(upb_EnumDef_Value(ptr_, i));
+ }
+
+ // Lookups from name to integer, returning true if found.
+ EnumValDefPtr FindValueByName(const char* name) const {
+ return EnumValDefPtr(upb_EnumDef_FindValueByName(ptr_, name));
+ }
+
+ // Finds the name corresponding to the given number, or NULL if none was
+ // found. If more than one name corresponds to this number, returns the
+ // first one that was added.
+ EnumValDefPtr FindValueByNumber(int32_t num) const {
+ return EnumValDefPtr(upb_EnumDef_FindValueByNumber(ptr_, num));
+ }
+
+ private:
+ const upb_EnumDef* ptr_;
+};
+
+// Class that represents a .proto file with some things defined in it.
+//
+// Many users won't care about FileDefs, but they are necessary if you want to
+// read the values of file-level options.
+class FileDefPtr {
+ public:
+ explicit FileDefPtr(const upb_FileDef* ptr) : ptr_(ptr) {}
+
+ const UPB_DESC(FileOptions) * options() const {
+ return upb_FileDef_Options(ptr_);
+ }
+
+ const upb_FileDef* ptr() const { return ptr_; }
+
+ // Get/set name of the file (eg. "foo/bar.proto").
+ const char* name() const { return upb_FileDef_Name(ptr_); }
+
+ // Package name for definitions inside the file (eg. "foo.bar").
+ const char* package() const { return upb_FileDef_Package(ptr_); }
+
+ // Syntax for the file. Defaults to proto2.
+ upb_Syntax syntax() const { return upb_FileDef_Syntax(ptr_); }
+
+ // Get the list of dependencies from the file. These are returned in the
+ // order that they were added to the FileDefPtr.
+ int dependency_count() const { return upb_FileDef_DependencyCount(ptr_); }
+ FileDefPtr dependency(int index) const {
+ return FileDefPtr(upb_FileDef_Dependency(ptr_, index));
+ }
+
+ int public_dependency_count() const {
+ return upb_FileDef_PublicDependencyCount(ptr_);
+ }
+ FileDefPtr public_dependency(int index) const {
+ return FileDefPtr(upb_FileDef_PublicDependency(ptr_, index));
+ }
+
+ int toplevel_enum_count() const {
+ return upb_FileDef_TopLevelEnumCount(ptr_);
+ }
+ EnumDefPtr toplevel_enum(int index) const {
+ return EnumDefPtr(upb_FileDef_TopLevelEnum(ptr_, index));
+ }
+
+ int toplevel_message_count() const {
+ return upb_FileDef_TopLevelMessageCount(ptr_);
+ }
+ MessageDefPtr toplevel_message(int index) const {
+ return MessageDefPtr(upb_FileDef_TopLevelMessage(ptr_, index));
+ }
+
+ int toplevel_extension_count() const {
+ return upb_FileDef_TopLevelExtensionCount(ptr_);
+ }
+ FieldDefPtr toplevel_extension(int index) const {
+ return FieldDefPtr(upb_FileDef_TopLevelExtension(ptr_, index));
+ }
+
+ explicit operator bool() const { return ptr_ != nullptr; }
+
+ friend bool operator==(FileDefPtr lhs, FileDefPtr rhs) {
+ return lhs.ptr_ == rhs.ptr_;
+ }
+
+ friend bool operator!=(FileDefPtr lhs, FileDefPtr rhs) {
+ return !(lhs == rhs);
+ }
+
+ private:
+ const upb_FileDef* ptr_;
+};
+
+// Non-const methods in upb::DefPool are NOT thread-safe.
+class DefPool {
+ public:
+ DefPool() : ptr_(upb_DefPool_New(), upb_DefPool_Free) {}
+ explicit DefPool(upb_DefPool* s) : ptr_(s, upb_DefPool_Free) {}
+
+ const upb_DefPool* ptr() const { return ptr_.get(); }
+ upb_DefPool* ptr() { return ptr_.get(); }
+
+ // Finds an entry in the symbol table with this exact name. If not found,
+ // returns NULL.
+ MessageDefPtr FindMessageByName(const char* sym) const {
+ return MessageDefPtr(upb_DefPool_FindMessageByName(ptr_.get(), sym));
+ }
+
+ EnumDefPtr FindEnumByName(const char* sym) const {
+ return EnumDefPtr(upb_DefPool_FindEnumByName(ptr_.get(), sym));
+ }
+
+ FileDefPtr FindFileByName(const char* name) const {
+ return FileDefPtr(upb_DefPool_FindFileByName(ptr_.get(), name));
+ }
+
+ FieldDefPtr FindExtensionByName(const char* name) const {
+ return FieldDefPtr(upb_DefPool_FindExtensionByName(ptr_.get(), name));
+ }
+
+ void _SetPlatform(upb_MiniTablePlatform platform) {
+ _upb_DefPool_SetPlatform(ptr_.get(), platform);
+ }
+
+ // TODO: iteration?
+
+ // Adds the given serialized FileDescriptorProto to the pool.
+ FileDefPtr AddFile(const UPB_DESC(FileDescriptorProto) * file_proto,
+ Status* status) {
+ return FileDefPtr(
+ upb_DefPool_AddFile(ptr_.get(), file_proto, status->ptr()));
+ }
+
+ private:
+ std::unique_ptr<upb_DefPool, decltype(&upb_DefPool_Free)> ptr_;
+};
+
+// TODO(b/236632406): This typedef is deprecated. Delete it.
+using SymbolTable = DefPool;
+
+inline FileDefPtr FieldDefPtr::file() const {
+ return FileDefPtr(upb_FieldDef_File(ptr_));
+}
+
+inline FileDefPtr MessageDefPtr::file() const {
+ return FileDefPtr(upb_MessageDef_File(ptr_));
+}
+
+inline EnumDefPtr MessageDefPtr::enum_type(int i) const {
+ return EnumDefPtr(upb_MessageDef_NestedEnum(ptr_, i));
+}
+
+inline MessageDefPtr FieldDefPtr::message_type() const {
+ return MessageDefPtr(upb_FieldDef_MessageSubDef(ptr_));
+}
+
+inline MessageDefPtr FieldDefPtr::containing_type() const {
+ return MessageDefPtr(upb_FieldDef_ContainingType(ptr_));
+}
+
+inline MessageDefPtr FieldDefPtr::extension_scope() const {
+ return MessageDefPtr(upb_FieldDef_ExtensionScope(ptr_));
+}
+
+inline MessageDefPtr OneofDefPtr::containing_type() const {
+ return MessageDefPtr(upb_OneofDef_ContainingType(ptr_));
+}
+
+inline OneofDefPtr FieldDefPtr::containing_oneof() const {
+ return OneofDefPtr(upb_FieldDef_ContainingOneof(ptr_));
+}
+
+inline OneofDefPtr FieldDefPtr::real_containing_oneof() const {
+ return OneofDefPtr(upb_FieldDef_RealContainingOneof(ptr_));
+}
+
+inline EnumDefPtr FieldDefPtr::enum_subdef() const {
+ return EnumDefPtr(upb_FieldDef_EnumSubDef(ptr_));
+}
+
+} // namespace upb
+
+#include "upb/port/undef.inc"
+
+#endif // UPB_REFLECTION_DEF_HPP_
diff --git a/upb/upb/reflection/def_builder.c b/upb/upb/reflection/def_builder.c
new file mode 100644
index 0000000..93c4eb5
--- /dev/null
+++ b/upb/upb/reflection/def_builder.c
@@ -0,0 +1,361 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/reflection/internal/def_builder.h"
+
+#include <string.h>
+
+#include "upb/reflection/def_pool.h"
+#include "upb/reflection/def_type.h"
+#include "upb/reflection/field_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+/* The upb core does not generally have a concept of default instances. However
+ * for descriptor options we make an exception since the max size is known and
+ * modest (<200 bytes). All types can share a default instance since it is
+ * initialized to zeroes.
+ *
+ * We have to allocate an extra pointer for upb's internal metadata. */
+static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0};
+const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)];
+
+const char* _upb_DefBuilder_FullToShort(const char* fullname) {
+ const char* p;
+
+ if (fullname == NULL) {
+ return NULL;
+ } else if ((p = strrchr(fullname, '.')) == NULL) {
+ /* No '.' in the name, return the full string. */
+ return fullname;
+ } else {
+ /* Return one past the last '.'. */
+ return p + 1;
+ }
+}
+
+void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx) { UPB_LONGJMP(ctx->err, 1); }
+
+void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) {
+ va_list argp;
+ va_start(argp, fmt);
+ upb_Status_VSetErrorFormat(ctx->status, fmt, argp);
+ va_end(argp);
+ _upb_DefBuilder_FailJmp(ctx);
+}
+
+void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) {
+ upb_Status_SetErrorMessage(ctx->status, "out of memory");
+ _upb_DefBuilder_FailJmp(ctx);
+}
+
+// Verify a relative identifier string. The loop is branchless for speed.
+static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx,
+ upb_StringView name) {
+ bool good = name.size > 0;
+
+ for (size_t i = 0; i < name.size; i++) {
+ const char c = name.data[i];
+ const char d = c | 0x20; // force lowercase
+ const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_');
+ const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0);
+
+ good &= is_alpha | is_numer;
+ }
+
+ if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false);
+}
+
+const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx,
+ const char* prefix,
+ upb_StringView name) {
+ _upb_DefBuilder_CheckIdentNotFull(ctx, name);
+ if (prefix) {
+ // ret = prefix + '.' + name;
+ size_t n = strlen(prefix);
+ char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2);
+ strcpy(ret, prefix);
+ ret[n] = '.';
+ memcpy(&ret[n + 1], name.data, name.size);
+ ret[n + 1 + name.size] = '\0';
+ return ret;
+ } else {
+ char* ret = upb_strdup2(name.data, name.size, ctx->arena);
+ if (!ret) _upb_DefBuilder_OomErr(ctx);
+ return ret;
+ }
+}
+
+static bool remove_component(char* base, size_t* len) {
+ if (*len == 0) return false;
+
+ for (size_t i = *len - 1; i > 0; i--) {
+ if (base[i] == '.') {
+ *len = i;
+ return true;
+ }
+ }
+
+ *len = 0;
+ return true;
+}
+
+const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx,
+ const char* from_name_dbg,
+ const char* base, upb_StringView sym,
+ upb_deftype_t* type) {
+ if (sym.size == 0) goto notfound;
+ upb_value v;
+ if (sym.data[0] == '.') {
+ /* Symbols starting with '.' are absolute, so we do a single lookup.
+ * Slice to omit the leading '.' */
+ if (!_upb_DefPool_LookupSym(ctx->symtab, sym.data + 1, sym.size - 1, &v)) {
+ goto notfound;
+ }
+ } else {
+ /* Remove components from base until we find an entry or run out. */
+ size_t baselen = base ? strlen(base) : 0;
+ char* tmp = malloc(sym.size + baselen + 1);
+ while (1) {
+ char* p = tmp;
+ if (baselen) {
+ memcpy(p, base, baselen);
+ p[baselen] = '.';
+ p += baselen + 1;
+ }
+ memcpy(p, sym.data, sym.size);
+ p += sym.size;
+ if (_upb_DefPool_LookupSym(ctx->symtab, tmp, p - tmp, &v)) {
+ break;
+ }
+ if (!remove_component(tmp, &baselen)) {
+ free(tmp);
+ goto notfound;
+ }
+ }
+ free(tmp);
+ }
+
+ *type = _upb_DefType_Type(v);
+ return _upb_DefType_Unpack(v, *type);
+
+notfound:
+ _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'",
+ UPB_STRINGVIEW_ARGS(sym));
+}
+
+const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx,
+ const char* from_name_dbg, const char* base,
+ upb_StringView sym, upb_deftype_t type) {
+ upb_deftype_t found_type;
+ const void* ret =
+ _upb_DefBuilder_ResolveAny(ctx, from_name_dbg, base, sym, &found_type);
+ if (ret && found_type != type) {
+ _upb_DefBuilder_Errf(ctx,
+ "type mismatch when resolving %s: couldn't find "
+ "name " UPB_STRINGVIEW_FORMAT " with type=%d",
+ from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type);
+ }
+ return ret;
+}
+
+// Per ASCII this will lower-case a letter. If the result is a letter, the
+// input was definitely a letter. If the output is not a letter, this may
+// have transformed the character unpredictably.
+static char upb_ascii_lower(char ch) { return ch | 0x20; }
+
+// isalpha() etc. from <ctype.h> are locale-dependent, which we don't want.
+static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) {
+ return low <= c && c <= high;
+}
+
+static bool upb_isletter(char c) {
+ char lower = upb_ascii_lower(c);
+ return upb_isbetween(lower, 'a', 'z') || c == '_';
+}
+
+static bool upb_isalphanum(char c) {
+ return upb_isletter(c) || upb_isbetween(c, '0', '9');
+}
+
+static bool TryGetChar(const char** src, const char* end, char* ch) {
+ if (*src == end) return false;
+ *ch = **src;
+ *src += 1;
+ return true;
+}
+
+static int TryGetHexDigit(const char** src, const char* end) {
+ char ch;
+ if (!TryGetChar(src, end, &ch)) return -1;
+ if ('0' <= ch && ch <= '9') {
+ return ch - '0';
+ }
+ ch = upb_ascii_lower(ch);
+ if ('a' <= ch && ch <= 'f') {
+ return ch - 'a' + 0xa;
+ }
+ *src -= 1; // Char wasn't actually a hex digit.
+ return -1;
+}
+
+static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx,
+ const upb_FieldDef* f,
+ const char** src, const char* end) {
+ int hex_digit = TryGetHexDigit(src, end);
+ if (hex_digit < 0) {
+ _upb_DefBuilder_Errf(
+ ctx, "\\x must be followed by at least one hex digit (field='%s')",
+ upb_FieldDef_FullName(f));
+ return 0;
+ }
+ unsigned int ret = hex_digit;
+ while ((hex_digit = TryGetHexDigit(src, end)) >= 0) {
+ ret = (ret << 4) | hex_digit;
+ }
+ if (ret > 0xff) {
+ _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits",
+ upb_FieldDef_FullName(f));
+ return 0;
+ }
+ return ret;
+}
+
+static char TryGetOctalDigit(const char** src, const char* end) {
+ char ch;
+ if (!TryGetChar(src, end, &ch)) return -1;
+ if ('0' <= ch && ch <= '7') {
+ return ch - '0';
+ }
+ *src -= 1; // Char wasn't actually an octal digit.
+ return -1;
+}
+
+static char upb_DefBuilder_ParseOctalEscape(upb_DefBuilder* ctx,
+ const upb_FieldDef* f,
+ const char** src, const char* end) {
+ char ch = 0;
+ for (int i = 0; i < 3; i++) {
+ char digit;
+ if ((digit = TryGetOctalDigit(src, end)) >= 0) {
+ ch = (ch << 3) | digit;
+ }
+ }
+ return ch;
+}
+
+char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f,
+ const char** src, const char* end) {
+ char ch;
+ if (!TryGetChar(src, end, &ch)) {
+ _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s",
+ upb_FieldDef_FullName(f));
+ return 0;
+ }
+ switch (ch) {
+ case 'a':
+ return '\a';
+ case 'b':
+ return '\b';
+ case 'f':
+ return '\f';
+ case 'n':
+ return '\n';
+ case 'r':
+ return '\r';
+ case 't':
+ return '\t';
+ case 'v':
+ return '\v';
+ case '\\':
+ return '\\';
+ case '\'':
+ return '\'';
+ case '\"':
+ return '\"';
+ case '?':
+ return '\?';
+ case 'x':
+ case 'X':
+ return upb_DefBuilder_ParseHexEscape(ctx, f, src, end);
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ *src -= 1;
+ return upb_DefBuilder_ParseOctalEscape(ctx, f, src, end);
+ }
+ _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch);
+}
+
+void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name,
+ bool full) {
+ const char* str = name.data;
+ const size_t len = name.size;
+ bool start = true;
+ for (size_t i = 0; i < len; i++) {
+ const char c = str[i];
+ if (c == '.') {
+ if (start || !full) {
+ _upb_DefBuilder_Errf(
+ ctx, "invalid name: unexpected '.' (" UPB_STRINGVIEW_FORMAT ")",
+ UPB_STRINGVIEW_ARGS(name));
+ }
+ start = true;
+ } else if (start) {
+ if (!upb_isletter(c)) {
+ _upb_DefBuilder_Errf(ctx,
+ "invalid name: path components must start with a "
+ "letter (" UPB_STRINGVIEW_FORMAT ")",
+ UPB_STRINGVIEW_ARGS(name));
+ }
+ start = false;
+ } else if (!upb_isalphanum(c)) {
+ _upb_DefBuilder_Errf(
+ ctx,
+ "invalid name: non-alphanumeric character (" UPB_STRINGVIEW_FORMAT
+ ")",
+ UPB_STRINGVIEW_ARGS(name));
+ }
+ }
+ if (start) {
+ _upb_DefBuilder_Errf(ctx,
+ "invalid name: empty part (" UPB_STRINGVIEW_FORMAT ")",
+ UPB_STRINGVIEW_ARGS(name));
+ }
+
+ // We should never reach this point.
+ UPB_ASSERT(false);
+}
diff --git a/upb/upb/reflection/def_builder_test.cc b/upb/upb/reflection/def_builder_test.cc
new file mode 100644
index 0000000..35d63a4
--- /dev/null
+++ b/upb/upb/reflection/def_builder_test.cc
@@ -0,0 +1,107 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/reflection/internal/def_builder.h"
+
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "upb/mem/arena.hpp"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+struct IdentTestData {
+ absl::string_view text;
+ bool ok;
+};
+
+class FullIdentTestBase : public testing::TestWithParam<IdentTestData> {};
+
+TEST_P(FullIdentTestBase, CheckFullIdent) {
+ upb_Status status;
+ upb_DefBuilder ctx;
+ upb::Arena arena;
+ ctx.status = &status;
+ ctx.arena = arena.ptr();
+ upb_Status_Clear(&status);
+
+ if (UPB_SETJMP(ctx.err)) {
+ EXPECT_FALSE(GetParam().ok);
+ } else {
+ _upb_DefBuilder_CheckIdentFull(
+ &ctx, upb_StringView_FromDataAndSize(GetParam().text.data(),
+ GetParam().text.size()));
+ EXPECT_TRUE(GetParam().ok);
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(FullIdentTest, FullIdentTestBase,
+ testing::ValuesIn(std::vector<IdentTestData>{
+ {"foo.bar", true},
+ {"foo.", true},
+ {"foo", true},
+
+ {"foo.7bar", false},
+ {".foo", false},
+ {"#", false},
+ {".", false},
+ {"", false}}));
+
+class PartIdentTestBase : public testing::TestWithParam<IdentTestData> {};
+
+TEST_P(PartIdentTestBase, TestNotFullIdent) {
+ upb_Status status;
+ upb_DefBuilder ctx;
+ upb::Arena arena;
+ ctx.status = &status;
+ ctx.arena = arena.ptr();
+ upb_Status_Clear(&status);
+
+ if (UPB_SETJMP(ctx.err)) {
+ EXPECT_FALSE(GetParam().ok);
+ } else {
+ _upb_DefBuilder_MakeFullName(
+ &ctx, "abc",
+ upb_StringView_FromDataAndSize(GetParam().text.data(),
+ GetParam().text.size()));
+ EXPECT_TRUE(GetParam().ok);
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(PartIdentTest, PartIdentTestBase,
+ testing::ValuesIn(std::vector<IdentTestData>{
+ {"foo", true},
+ {"foo1", true},
+
+ {"foo.bar", false},
+ {"1foo", false},
+ {"#", false},
+ {".", false},
+ {"", false}}));
diff --git a/upb/upb/reflection/def_pool.c b/upb/upb/reflection/def_pool.c
new file mode 100644
index 0000000..76733ff
--- /dev/null
+++ b/upb/upb/reflection/def_pool.c
@@ -0,0 +1,466 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/reflection/internal/def_pool.h"
+
+#include "upb/hash/int_table.h"
+#include "upb/hash/str_table.h"
+#include "upb/reflection/def_type.h"
+#include "upb/reflection/internal/def_builder.h"
+#include "upb/reflection/internal/enum_def.h"
+#include "upb/reflection/internal/enum_value_def.h"
+#include "upb/reflection/internal/field_def.h"
+#include "upb/reflection/internal/file_def.h"
+#include "upb/reflection/internal/message_def.h"
+#include "upb/reflection/internal/service_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+struct upb_DefPool {
+ upb_Arena* arena;
+ upb_strtable syms; // full_name -> packed def ptr
+ upb_strtable files; // file_name -> (upb_FileDef*)
+ upb_inttable exts; // (upb_MiniTableExtension*) -> (upb_FieldDef*)
+ upb_ExtensionRegistry* extreg;
+ upb_MiniTablePlatform platform;
+ void* scratch_data;
+ size_t scratch_size;
+ size_t bytes_loaded;
+};
+
+void upb_DefPool_Free(upb_DefPool* s) {
+ upb_Arena_Free(s->arena);
+ upb_gfree(s->scratch_data);
+ upb_gfree(s);
+}
+
+upb_DefPool* upb_DefPool_New(void) {
+ upb_DefPool* s = upb_gmalloc(sizeof(*s));
+ if (!s) return NULL;
+
+ s->arena = upb_Arena_New();
+ s->bytes_loaded = 0;
+
+ s->scratch_size = 240;
+ s->scratch_data = upb_gmalloc(s->scratch_size);
+ if (!s->scratch_data) goto err;
+
+ if (!upb_strtable_init(&s->syms, 32, s->arena)) goto err;
+ if (!upb_strtable_init(&s->files, 4, s->arena)) goto err;
+ if (!upb_inttable_init(&s->exts, s->arena)) goto err;
+
+ s->extreg = upb_ExtensionRegistry_New(s->arena);
+ if (!s->extreg) goto err;
+
+ s->platform = kUpb_MiniTablePlatform_Native;
+
+ return s;
+
+err:
+ upb_DefPool_Free(s);
+ return NULL;
+}
+
+bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext,
+ const upb_FieldDef* f) {
+ return upb_inttable_insert(&s->exts, (uintptr_t)ext, upb_value_constptr(f),
+ s->arena);
+}
+
+bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v,
+ upb_Status* status) {
+ // TODO: table should support an operation "tryinsert" to avoid the double
+ // lookup.
+ if (upb_strtable_lookup2(&s->syms, sym.data, sym.size, NULL)) {
+ upb_Status_SetErrorFormat(status, "duplicate symbol '%s'", sym.data);
+ return false;
+ }
+ if (!upb_strtable_insert(&s->syms, sym.data, sym.size, v, s->arena)) {
+ upb_Status_SetErrorMessage(status, "out of memory");
+ return false;
+ }
+ return true;
+}
+
+static const void* _upb_DefPool_Unpack(const upb_DefPool* s, const char* sym,
+ size_t size, upb_deftype_t type) {
+ upb_value v;
+ return upb_strtable_lookup2(&s->syms, sym, size, &v)
+ ? _upb_DefType_Unpack(v, type)
+ : NULL;
+}
+
+bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size,
+ upb_value* v) {
+ return upb_strtable_lookup2(&s->syms, sym, size, v);
+}
+
+upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s) {
+ return s->extreg;
+}
+
+void** _upb_DefPool_ScratchData(const upb_DefPool* s) {
+ return (void**)&s->scratch_data;
+}
+
+size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s) {
+ return (size_t*)&s->scratch_size;
+}
+
+void _upb_DefPool_SetPlatform(upb_DefPool* s, upb_MiniTablePlatform platform) {
+ assert(upb_strtable_count(&s->files) == 0);
+ s->platform = platform;
+}
+
+const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s,
+ const char* sym) {
+ return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_MSG);
+}
+
+const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize(
+ const upb_DefPool* s, const char* sym, size_t len) {
+ return _upb_DefPool_Unpack(s, sym, len, UPB_DEFTYPE_MSG);
+}
+
+const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s,
+ const char* sym) {
+ return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUM);
+}
+
+const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s,
+ const char* sym) {
+ return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUMVAL);
+}
+
+const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s,
+ const char* name) {
+ upb_value v;
+ return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v)
+ : NULL;
+}
+
+const upb_FileDef* upb_DefPool_FindFileByNameWithSize(const upb_DefPool* s,
+ const char* name,
+ size_t len) {
+ upb_value v;
+ return upb_strtable_lookup2(&s->files, name, len, &v)
+ ? upb_value_getconstptr(v)
+ : NULL;
+}
+
+const upb_FieldDef* upb_DefPool_FindExtensionByNameWithSize(
+ const upb_DefPool* s, const char* name, size_t size) {
+ upb_value v;
+ if (!upb_strtable_lookup2(&s->syms, name, size, &v)) return NULL;
+
+ switch (_upb_DefType_Type(v)) {
+ case UPB_DEFTYPE_FIELD:
+ return _upb_DefType_Unpack(v, UPB_DEFTYPE_FIELD);
+ case UPB_DEFTYPE_MSG: {
+ const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG);
+ return _upb_MessageDef_InMessageSet(m)
+ ? upb_MessageDef_NestedExtension(m, 0)
+ : NULL;
+ }
+ default:
+ break;
+ }
+
+ return NULL;
+}
+
+const upb_FieldDef* upb_DefPool_FindExtensionByName(const upb_DefPool* s,
+ const char* sym) {
+ return upb_DefPool_FindExtensionByNameWithSize(s, sym, strlen(sym));
+}
+
+const upb_ServiceDef* upb_DefPool_FindServiceByName(const upb_DefPool* s,
+ const char* name) {
+ return _upb_DefPool_Unpack(s, name, strlen(name), UPB_DEFTYPE_SERVICE);
+}
+
+const upb_ServiceDef* upb_DefPool_FindServiceByNameWithSize(
+ const upb_DefPool* s, const char* name, size_t size) {
+ return _upb_DefPool_Unpack(s, name, size, UPB_DEFTYPE_SERVICE);
+}
+
+const upb_FileDef* upb_DefPool_FindFileContainingSymbol(const upb_DefPool* s,
+ const char* name) {
+ upb_value v;
+ // TODO(haberman): non-extension fields and oneofs.
+ if (upb_strtable_lookup(&s->syms, name, &v)) {
+ switch (_upb_DefType_Type(v)) {
+ case UPB_DEFTYPE_EXT: {
+ const upb_FieldDef* f = _upb_DefType_Unpack(v, UPB_DEFTYPE_EXT);
+ return upb_FieldDef_File(f);
+ }
+ case UPB_DEFTYPE_MSG: {
+ const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG);
+ return upb_MessageDef_File(m);
+ }
+ case UPB_DEFTYPE_ENUM: {
+ const upb_EnumDef* e = _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUM);
+ return upb_EnumDef_File(e);
+ }
+ case UPB_DEFTYPE_ENUMVAL: {
+ const upb_EnumValueDef* ev =
+ _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUMVAL);
+ return upb_EnumDef_File(upb_EnumValueDef_Enum(ev));
+ }
+ case UPB_DEFTYPE_SERVICE: {
+ const upb_ServiceDef* service =
+ _upb_DefType_Unpack(v, UPB_DEFTYPE_SERVICE);
+ return upb_ServiceDef_File(service);
+ }
+ default:
+ UPB_UNREACHABLE();
+ }
+ }
+
+ const char* last_dot = strrchr(name, '.');
+ if (last_dot) {
+ const upb_MessageDef* parent =
+ upb_DefPool_FindMessageByNameWithSize(s, name, last_dot - name);
+ if (parent) {
+ const char* shortname = last_dot + 1;
+ if (upb_MessageDef_FindByNameWithSize(parent, shortname,
+ strlen(shortname), NULL, NULL)) {
+ return upb_MessageDef_File(parent);
+ }
+ }
+ }
+
+ return NULL;
+}
+
+static void remove_filedef(upb_DefPool* s, upb_FileDef* file) {
+ intptr_t iter = UPB_INTTABLE_BEGIN;
+ upb_StringView key;
+ upb_value val;
+ while (upb_strtable_next2(&s->syms, &key, &val, &iter)) {
+ const upb_FileDef* f;
+ switch (_upb_DefType_Type(val)) {
+ case UPB_DEFTYPE_EXT:
+ f = upb_FieldDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_EXT));
+ break;
+ case UPB_DEFTYPE_MSG:
+ f = upb_MessageDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_MSG));
+ break;
+ case UPB_DEFTYPE_ENUM:
+ f = upb_EnumDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_ENUM));
+ break;
+ case UPB_DEFTYPE_ENUMVAL:
+ f = upb_EnumDef_File(upb_EnumValueDef_Enum(
+ _upb_DefType_Unpack(val, UPB_DEFTYPE_ENUMVAL)));
+ break;
+ case UPB_DEFTYPE_SERVICE:
+ f = upb_ServiceDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_SERVICE));
+ break;
+ default:
+ UPB_UNREACHABLE();
+ }
+
+ if (f == file) upb_strtable_removeiter(&s->syms, &iter);
+ }
+}
+
+static const upb_FileDef* upb_DefBuilder_AddFileToPool(
+ upb_DefBuilder* const builder, upb_DefPool* const s,
+ const UPB_DESC(FileDescriptorProto) * const file_proto,
+ const upb_StringView name, upb_Status* const status) {
+ if (UPB_SETJMP(builder->err) != 0) {
+ UPB_ASSERT(!upb_Status_IsOk(status));
+ if (builder->file) {
+ remove_filedef(s, builder->file);
+ builder->file = NULL;
+ }
+ } else if (!builder->arena || !builder->tmp_arena) {
+ _upb_DefBuilder_OomErr(builder);
+ } else {
+ _upb_FileDef_Create(builder, file_proto);
+ upb_strtable_insert(&s->files, name.data, name.size,
+ upb_value_constptr(builder->file), builder->arena);
+ UPB_ASSERT(upb_Status_IsOk(status));
+ upb_Arena_Fuse(s->arena, builder->arena);
+ }
+
+ if (builder->arena) upb_Arena_Free(builder->arena);
+ if (builder->tmp_arena) upb_Arena_Free(builder->tmp_arena);
+ return builder->file;
+}
+
+static const upb_FileDef* _upb_DefPool_AddFile(
+ upb_DefPool* s, const UPB_DESC(FileDescriptorProto) * file_proto,
+ const upb_MiniTableFile* layout, upb_Status* status) {
+ const upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto);
+
+ // Determine whether we already know about this file.
+ {
+ upb_value v;
+ if (upb_strtable_lookup2(&s->files, name.data, name.size, &v)) {
+ upb_Status_SetErrorFormat(status,
+ "duplicate file name " UPB_STRINGVIEW_FORMAT,
+ UPB_STRINGVIEW_ARGS(name));
+ return NULL;
+ }
+ }
+
+ upb_DefBuilder ctx = {
+ .symtab = s,
+ .layout = layout,
+ .platform = s->platform,
+ .msg_count = 0,
+ .enum_count = 0,
+ .ext_count = 0,
+ .status = status,
+ .file = NULL,
+ .arena = upb_Arena_New(),
+ .tmp_arena = upb_Arena_New(),
+ };
+
+ return upb_DefBuilder_AddFileToPool(&ctx, s, file_proto, name, status);
+}
+
+const upb_FileDef* upb_DefPool_AddFile(upb_DefPool* s,
+ const UPB_DESC(FileDescriptorProto) *
+ file_proto,
+ upb_Status* status) {
+ return _upb_DefPool_AddFile(s, file_proto, NULL, status);
+}
+
+bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init,
+ bool rebuild_minitable) {
+ /* Since this function should never fail (it would indicate a bug in upb) we
+ * print errors to stderr instead of returning error status to the user. */
+ _upb_DefPool_Init** deps = init->deps;
+ UPB_DESC(FileDescriptorProto) * file;
+ upb_Arena* arena;
+ upb_Status status;
+
+ upb_Status_Clear(&status);
+
+ if (upb_DefPool_FindFileByName(s, init->filename)) {
+ return true;
+ }
+
+ arena = upb_Arena_New();
+
+ for (; *deps; deps++) {
+ if (!_upb_DefPool_LoadDefInitEx(s, *deps, rebuild_minitable)) goto err;
+ }
+
+ file = UPB_DESC(FileDescriptorProto_parse_ex)(
+ init->descriptor.data, init->descriptor.size, NULL,
+ kUpb_DecodeOption_AliasString, arena);
+ s->bytes_loaded += init->descriptor.size;
+
+ if (!file) {
+ upb_Status_SetErrorFormat(
+ &status,
+ "Failed to parse compiled-in descriptor for file '%s'. This should "
+ "never happen.",
+ init->filename);
+ goto err;
+ }
+
+ const upb_MiniTableFile* mt = rebuild_minitable ? NULL : init->layout;
+ if (!_upb_DefPool_AddFile(s, file, mt, &status)) {
+ goto err;
+ }
+
+ upb_Arena_Free(arena);
+ return true;
+
+err:
+ fprintf(stderr,
+ "Error loading compiled-in descriptor for file '%s' (this should "
+ "never happen): %s\n",
+ init->filename, upb_Status_ErrorMessage(&status));
+ upb_Arena_Free(arena);
+ return false;
+}
+
+size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s) {
+ return s->bytes_loaded;
+}
+
+upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s) { return s->arena; }
+
+const upb_FieldDef* upb_DefPool_FindExtensionByMiniTable(
+ const upb_DefPool* s, const upb_MiniTableExtension* ext) {
+ upb_value v;
+ bool ok = upb_inttable_lookup(&s->exts, (uintptr_t)ext, &v);
+ UPB_ASSERT(ok);
+ return upb_value_getconstptr(v);
+}
+
+const upb_FieldDef* upb_DefPool_FindExtensionByNumber(const upb_DefPool* s,
+ const upb_MessageDef* m,
+ int32_t fieldnum) {
+ const upb_MiniTable* t = upb_MessageDef_MiniTable(m);
+ const upb_MiniTableExtension* ext =
+ upb_ExtensionRegistry_Lookup(s->extreg, t, fieldnum);
+ return ext ? upb_DefPool_FindExtensionByMiniTable(s, ext) : NULL;
+}
+
+const upb_ExtensionRegistry* upb_DefPool_ExtensionRegistry(
+ const upb_DefPool* s) {
+ return s->extreg;
+}
+
+const upb_FieldDef** upb_DefPool_GetAllExtensions(const upb_DefPool* s,
+ const upb_MessageDef* m,
+ size_t* count) {
+ size_t n = 0;
+ intptr_t iter = UPB_INTTABLE_BEGIN;
+ uintptr_t key;
+ upb_value val;
+ // This is O(all exts) instead of O(exts for m). If we need this to be
+ // efficient we may need to make extreg into a two-level table, or have a
+ // second per-message index.
+ while (upb_inttable_next(&s->exts, &key, &val, &iter)) {
+ const upb_FieldDef* f = upb_value_getconstptr(val);
+ if (upb_FieldDef_ContainingType(f) == m) n++;
+ }
+ const upb_FieldDef** exts = malloc(n * sizeof(*exts));
+ iter = UPB_INTTABLE_BEGIN;
+ size_t i = 0;
+ while (upb_inttable_next(&s->exts, &key, &val, &iter)) {
+ const upb_FieldDef* f = upb_value_getconstptr(val);
+ if (upb_FieldDef_ContainingType(f) == m) exts[i++] = f;
+ }
+ *count = n;
+ return exts;
+}
+
+bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init) {
+ return _upb_DefPool_LoadDefInitEx(s, init, false);
+}
diff --git a/upb/upb/reflection/def_pool.h b/upb/upb/reflection/def_pool.h
new file mode 100644
index 0000000..7ca3619
--- /dev/null
+++ b/upb/upb/reflection/def_pool.h
@@ -0,0 +1,110 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// IWYU pragma: private, include "upb/reflection/def.h"
+
+#ifndef UPB_REFLECTION_DEF_POOL_H_
+#define UPB_REFLECTION_DEF_POOL_H_
+
+#include "upb/base/status.h"
+#include "upb/base/string_view.h"
+#include "upb/reflection/common.h"
+#include "upb/reflection/def_type.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+UPB_API void upb_DefPool_Free(upb_DefPool* s);
+
+UPB_API upb_DefPool* upb_DefPool_New(void);
+
+UPB_API const upb_MessageDef* upb_DefPool_FindMessageByName(
+ const upb_DefPool* s, const char* sym);
+
+const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize(
+ const upb_DefPool* s, const char* sym, size_t len);
+
+UPB_API const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s,
+ const char* sym);
+
+const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s,
+ const char* sym);
+
+const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s,
+ const char* name);
+
+const upb_FileDef* upb_DefPool_FindFileByNameWithSize(const upb_DefPool* s,
+ const char* name,
+ size_t len);
+
+const upb_FieldDef* upb_DefPool_FindExtensionByMiniTable(
+ const upb_DefPool* s, const upb_MiniTableExtension* ext);
+
+const upb_FieldDef* upb_DefPool_FindExtensionByName(const upb_DefPool* s,
+ const char* sym);
+
+const upb_FieldDef* upb_DefPool_FindExtensionByNameWithSize(
+ const upb_DefPool* s, const char* name, size_t size);
+
+const upb_FieldDef* upb_DefPool_FindExtensionByNumber(const upb_DefPool* s,
+ const upb_MessageDef* m,
+ int32_t fieldnum);
+
+const upb_ServiceDef* upb_DefPool_FindServiceByName(const upb_DefPool* s,
+ const char* name);
+
+const upb_ServiceDef* upb_DefPool_FindServiceByNameWithSize(
+ const upb_DefPool* s, const char* name, size_t size);
+
+const upb_FileDef* upb_DefPool_FindFileContainingSymbol(const upb_DefPool* s,
+ const char* name);
+
+UPB_API const upb_FileDef* upb_DefPool_AddFile(
+ upb_DefPool* s, const UPB_DESC(FileDescriptorProto) * file_proto,
+ upb_Status* status);
+
+const upb_ExtensionRegistry* upb_DefPool_ExtensionRegistry(
+ const upb_DefPool* s);
+
+const upb_FieldDef** upb_DefPool_GetAllExtensions(const upb_DefPool* s,
+ const upb_MessageDef* m,
+ size_t* count);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_DEF_POOL_H_ */
diff --git a/upb/upb/reflection/def_type.c b/upb/upb/reflection/def_type.c
new file mode 100644
index 0000000..dee6b2c
--- /dev/null
+++ b/upb/upb/reflection/def_type.c
@@ -0,0 +1,53 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/reflection/def_type.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+upb_deftype_t _upb_DefType_Type(upb_value v) {
+ const uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
+ return num & UPB_DEFTYPE_MASK;
+}
+
+upb_value _upb_DefType_Pack(const void* ptr, upb_deftype_t type) {
+ uintptr_t num = (uintptr_t)ptr;
+ UPB_ASSERT((num & UPB_DEFTYPE_MASK) == 0);
+ num |= type;
+ return upb_value_constptr((const void*)num);
+}
+
+const void* _upb_DefType_Unpack(upb_value v, upb_deftype_t type) {
+ uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
+ return (num & UPB_DEFTYPE_MASK) == type
+ ? (const void*)(num & ~UPB_DEFTYPE_MASK)
+ : NULL;
+}
diff --git a/upb/upb/reflection/def_type.h b/upb/upb/reflection/def_type.h
new file mode 100644
index 0000000..6e1b431
--- /dev/null
+++ b/upb/upb/reflection/def_type.h
@@ -0,0 +1,84 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_REFLECTION_DEF_TYPE_H_
+#define UPB_REFLECTION_DEF_TYPE_H_
+
+#include "upb/hash/common.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+// Inside a symtab we store tagged pointers to specific def types.
+typedef enum {
+ UPB_DEFTYPE_MASK = 7,
+
+ // Only inside symtab table.
+ UPB_DEFTYPE_EXT = 0,
+ UPB_DEFTYPE_MSG = 1,
+ UPB_DEFTYPE_ENUM = 2,
+ UPB_DEFTYPE_ENUMVAL = 3,
+ UPB_DEFTYPE_SERVICE = 4,
+
+ // Only inside message table.
+ UPB_DEFTYPE_FIELD = 0,
+ UPB_DEFTYPE_ONEOF = 1,
+ UPB_DEFTYPE_FIELD_JSONNAME = 2,
+} upb_deftype_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Our 3-bit pointer tagging requires all pointers to be multiples of 8.
+// The arena will always yield 8-byte-aligned addresses, however we put
+// the defs into arrays. For each element in the array to be 8-byte-aligned,
+// the sizes of each def type must also be a multiple of 8.
+//
+// If any of these asserts fail, we need to add or remove padding on 32-bit
+// machines (64-bit machines will have 8-byte alignment already due to
+// pointers, which all of these structs have).
+UPB_INLINE void _upb_DefType_CheckPadding(size_t size) {
+ UPB_ASSERT((size & UPB_DEFTYPE_MASK) == 0);
+}
+
+upb_deftype_t _upb_DefType_Type(upb_value v);
+
+upb_value _upb_DefType_Pack(const void* ptr, upb_deftype_t type);
+
+const void* _upb_DefType_Unpack(upb_value v, upb_deftype_t type);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_DEF_TYPE_H_ */
diff --git a/upb/upb/reflection/desc_state.c b/upb/upb/reflection/desc_state.c
new file mode 100644
index 0000000..7868308
--- /dev/null
+++ b/upb/upb/reflection/desc_state.c
@@ -0,0 +1,56 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/reflection/internal/desc_state.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+bool _upb_DescState_Grow(upb_DescState* d, upb_Arena* a) {
+ const size_t oldbufsize = d->bufsize;
+ const int used = d->ptr - d->buf;
+
+ if (!d->buf) {
+ d->buf = upb_Arena_Malloc(a, d->bufsize);
+ if (!d->buf) return false;
+ d->ptr = d->buf;
+ d->e.end = d->buf + d->bufsize;
+ }
+
+ if (oldbufsize - used < kUpb_MtDataEncoder_MinSize) {
+ d->bufsize *= 2;
+ d->buf = upb_Arena_Realloc(a, d->buf, oldbufsize, d->bufsize);
+ if (!d->buf) return false;
+ d->ptr = d->buf + used;
+ d->e.end = d->buf + d->bufsize;
+ }
+
+ return true;
+}
diff --git a/upb/upb/reflection/enum_def.c b/upb/upb/reflection/enum_def.c
new file mode 100644
index 0000000..df384ba
--- /dev/null
+++ b/upb/upb/reflection/enum_def.c
@@ -0,0 +1,314 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/reflection/internal/enum_def.h"
+
+#include "upb/hash/int_table.h"
+#include "upb/hash/str_table.h"
+#include "upb/mini_descriptor/decode.h"
+#include "upb/reflection/def_type.h"
+#include "upb/reflection/internal/def_builder.h"
+#include "upb/reflection/internal/desc_state.h"
+#include "upb/reflection/internal/enum_reserved_range.h"
+#include "upb/reflection/internal/enum_value_def.h"
+#include "upb/reflection/internal/file_def.h"
+#include "upb/reflection/internal/message_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+struct upb_EnumDef {
+ const UPB_DESC(EnumOptions) * opts;
+ const upb_MiniTableEnum* layout; // Only for proto2.
+ const upb_FileDef* file;
+ const upb_MessageDef* containing_type; // Could be merged with "file".
+ const char* full_name;
+ upb_strtable ntoi;
+ upb_inttable iton;
+ const upb_EnumValueDef* values;
+ const upb_EnumReservedRange* res_ranges;
+ const upb_StringView* res_names;
+ int value_count;
+ int res_range_count;
+ int res_name_count;
+ int32_t defaultval;
+ bool is_closed;
+ bool is_sorted; // Whether all of the values are defined in ascending order.
+};
+
+upb_EnumDef* _upb_EnumDef_At(const upb_EnumDef* e, int i) {
+ return (upb_EnumDef*)&e[i];
+}
+
+const upb_MiniTableEnum* _upb_EnumDef_MiniTable(const upb_EnumDef* e) {
+ return e->layout;
+}
+
+bool _upb_EnumDef_Insert(upb_EnumDef* e, upb_EnumValueDef* v, upb_Arena* a) {
+ const char* name = upb_EnumValueDef_Name(v);
+ const upb_value val = upb_value_constptr(v);
+ bool ok = upb_strtable_insert(&e->ntoi, name, strlen(name), val, a);
+ if (!ok) return false;
+
+ // Multiple enumerators can have the same number, first one wins.
+ const int number = upb_EnumValueDef_Number(v);
+ if (!upb_inttable_lookup(&e->iton, number, NULL)) {
+ return upb_inttable_insert(&e->iton, number, val, a);
+ }
+ return true;
+}
+
+const UPB_DESC(EnumOptions) * upb_EnumDef_Options(const upb_EnumDef* e) {
+ return e->opts;
+}
+
+bool upb_EnumDef_HasOptions(const upb_EnumDef* e) {
+ return e->opts != (void*)kUpbDefOptDefault;
+}
+
+const char* upb_EnumDef_FullName(const upb_EnumDef* e) { return e->full_name; }
+
+const char* upb_EnumDef_Name(const upb_EnumDef* e) {
+ return _upb_DefBuilder_FullToShort(e->full_name);
+}
+
+const upb_FileDef* upb_EnumDef_File(const upb_EnumDef* e) { return e->file; }
+
+const upb_MessageDef* upb_EnumDef_ContainingType(const upb_EnumDef* e) {
+ return e->containing_type;
+}
+
+int32_t upb_EnumDef_Default(const upb_EnumDef* e) {
+ UPB_ASSERT(upb_EnumDef_FindValueByNumber(e, e->defaultval));
+ return e->defaultval;
+}
+
+int upb_EnumDef_ReservedRangeCount(const upb_EnumDef* e) {
+ return e->res_range_count;
+}
+
+const upb_EnumReservedRange* upb_EnumDef_ReservedRange(const upb_EnumDef* e,
+ int i) {
+ UPB_ASSERT(0 <= i && i < e->res_range_count);
+ return _upb_EnumReservedRange_At(e->res_ranges, i);
+}
+
+int upb_EnumDef_ReservedNameCount(const upb_EnumDef* e) {
+ return e->res_name_count;
+}
+
+upb_StringView upb_EnumDef_ReservedName(const upb_EnumDef* e, int i) {
+ UPB_ASSERT(0 <= i && i < e->res_name_count);
+ return e->res_names[i];
+}
+
+int upb_EnumDef_ValueCount(const upb_EnumDef* e) { return e->value_count; }
+
+const upb_EnumValueDef* upb_EnumDef_FindValueByName(const upb_EnumDef* e,
+ const char* name) {
+ return upb_EnumDef_FindValueByNameWithSize(e, name, strlen(name));
+}
+
+const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize(
+ const upb_EnumDef* e, const char* name, size_t size) {
+ upb_value v;
+ return upb_strtable_lookup2(&e->ntoi, name, size, &v)
+ ? upb_value_getconstptr(v)
+ : NULL;
+}
+
+const upb_EnumValueDef* upb_EnumDef_FindValueByNumber(const upb_EnumDef* e,
+ int32_t num) {
+ upb_value v;
+ return upb_inttable_lookup(&e->iton, num, &v) ? upb_value_getconstptr(v)
+ : NULL;
+}
+
+bool upb_EnumDef_CheckNumber(const upb_EnumDef* e, int32_t num) {
+ // We could use upb_EnumDef_FindValueByNumber(e, num) != NULL, but we expect
+ // this to be faster (especially for small numbers).
+ return upb_MiniTableEnum_CheckValue(e->layout, num);
+}
+
+const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i) {
+ UPB_ASSERT(0 <= i && i < e->value_count);
+ return _upb_EnumValueDef_At(e->values, i);
+}
+
+bool upb_EnumDef_IsClosed(const upb_EnumDef* e) { return e->is_closed; }
+
+bool upb_EnumDef_MiniDescriptorEncode(const upb_EnumDef* e, upb_Arena* a,
+ upb_StringView* out) {
+ upb_DescState s;
+ _upb_DescState_Init(&s);
+
+ const upb_EnumValueDef** sorted = NULL;
+ if (!e->is_sorted) {
+ sorted = _upb_EnumValueDefs_Sorted(e->values, e->value_count, a);
+ if (!sorted) return false;
+ }
+
+ if (!_upb_DescState_Grow(&s, a)) return false;
+ s.ptr = upb_MtDataEncoder_StartEnum(&s.e, s.ptr);
+
+ // Duplicate values are allowed but we only encode each value once.
+ uint32_t previous = 0;
+
+ for (int i = 0; i < e->value_count; i++) {
+ const uint32_t current =
+ upb_EnumValueDef_Number(sorted ? sorted[i] : upb_EnumDef_Value(e, i));
+ if (i != 0 && previous == current) continue;
+
+ if (!_upb_DescState_Grow(&s, a)) return false;
+ s.ptr = upb_MtDataEncoder_PutEnumValue(&s.e, s.ptr, current);
+ previous = current;
+ }
+
+ if (!_upb_DescState_Grow(&s, a)) return false;
+ s.ptr = upb_MtDataEncoder_EndEnum(&s.e, s.ptr);
+
+ // There will always be room for this '\0' in the encoder buffer because
+ // kUpb_MtDataEncoder_MinSize is overkill for upb_MtDataEncoder_EndEnum().
+ UPB_ASSERT(s.ptr < s.buf + s.bufsize);
+ *s.ptr = '\0';
+
+ out->data = s.buf;
+ out->size = s.ptr - s.buf;
+ return true;
+}
+
+static upb_MiniTableEnum* create_enumlayout(upb_DefBuilder* ctx,
+ const upb_EnumDef* e) {
+ upb_StringView sv;
+ bool ok = upb_EnumDef_MiniDescriptorEncode(e, ctx->tmp_arena, &sv);
+ if (!ok) _upb_DefBuilder_Errf(ctx, "OOM while building enum MiniDescriptor");
+
+ upb_Status status;
+ upb_MiniTableEnum* layout =
+ upb_MiniTableEnum_Build(sv.data, sv.size, ctx->arena, &status);
+ if (!layout)
+ _upb_DefBuilder_Errf(ctx, "Error building enum MiniTable: %s", status.msg);
+ return layout;
+}
+
+static upb_StringView* _upb_EnumReservedNames_New(
+ upb_DefBuilder* ctx, int n, const upb_StringView* protos) {
+ upb_StringView* sv = _upb_DefBuilder_Alloc(ctx, sizeof(upb_StringView) * n);
+ for (int i = 0; i < n; i++) {
+ sv[i].data =
+ upb_strdup2(protos[i].data, protos[i].size, _upb_DefBuilder_Arena(ctx));
+ sv[i].size = protos[i].size;
+ }
+ return sv;
+}
+
+static void create_enumdef(upb_DefBuilder* ctx, const char* prefix,
+ const UPB_DESC(EnumDescriptorProto) * enum_proto,
+ upb_EnumDef* e) {
+ const UPB_DESC(EnumValueDescriptorProto)* const* values;
+ const UPB_DESC(EnumDescriptorProto_EnumReservedRange)* const* res_ranges;
+ const upb_StringView* res_names;
+ upb_StringView name;
+ size_t n_value, n_res_range, n_res_name;
+
+ // Must happen before _upb_DefBuilder_Add()
+ e->file = _upb_DefBuilder_File(ctx);
+
+ name = UPB_DESC(EnumDescriptorProto_name)(enum_proto);
+
+ e->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name);
+ _upb_DefBuilder_Add(ctx, e->full_name,
+ _upb_DefType_Pack(e, UPB_DEFTYPE_ENUM));
+
+ e->is_closed = (!UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3) &&
+ (upb_FileDef_Syntax(e->file) == kUpb_Syntax_Proto2);
+
+ values = UPB_DESC(EnumDescriptorProto_value)(enum_proto, &n_value);
+
+ bool ok = upb_strtable_init(&e->ntoi, n_value, ctx->arena);
+ if (!ok) _upb_DefBuilder_OomErr(ctx);
+
+ ok = upb_inttable_init(&e->iton, ctx->arena);
+ if (!ok) _upb_DefBuilder_OomErr(ctx);
+
+ e->defaultval = 0;
+ e->value_count = n_value;
+ e->values =
+ _upb_EnumValueDefs_New(ctx, prefix, n_value, values, e, &e->is_sorted);
+
+ if (n_value == 0) {
+ _upb_DefBuilder_Errf(ctx, "enums must contain at least one value (%s)",
+ e->full_name);
+ }
+
+ res_ranges =
+ UPB_DESC(EnumDescriptorProto_reserved_range)(enum_proto, &n_res_range);
+ e->res_range_count = n_res_range;
+ e->res_ranges = _upb_EnumReservedRanges_New(ctx, n_res_range, res_ranges, e);
+
+ res_names =
+ UPB_DESC(EnumDescriptorProto_reserved_name)(enum_proto, &n_res_name);
+ e->res_name_count = n_res_name;
+ e->res_names = _upb_EnumReservedNames_New(ctx, n_res_name, res_names);
+
+ UPB_DEF_SET_OPTIONS(e->opts, EnumDescriptorProto, EnumOptions, enum_proto);
+
+ upb_inttable_compact(&e->iton, ctx->arena);
+
+ if (e->is_closed) {
+ if (ctx->layout) {
+ UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count);
+ e->layout = ctx->layout->enums[ctx->enum_count++];
+ } else {
+ e->layout = create_enumlayout(ctx, e);
+ }
+ } else {
+ e->layout = NULL;
+ }
+}
+
+upb_EnumDef* _upb_EnumDefs_New(
+ upb_DefBuilder* ctx, int n,
+ const UPB_DESC(EnumDescriptorProto) * const* protos,
+ const upb_MessageDef* containing_type) {
+ _upb_DefType_CheckPadding(sizeof(upb_EnumDef));
+
+ // If a containing type is defined then get the full name from that.
+ // Otherwise use the package name from the file def.
+ const char* name = containing_type ? upb_MessageDef_FullName(containing_type)
+ : _upb_FileDef_RawPackage(ctx->file);
+
+ upb_EnumDef* e = _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumDef) * n);
+ for (int i = 0; i < n; i++) {
+ create_enumdef(ctx, name, protos[i], &e[i]);
+ e[i].containing_type = containing_type;
+ }
+ return e;
+}
diff --git a/upb/upb/reflection/enum_def.h b/upb/upb/reflection/enum_def.h
new file mode 100644
index 0000000..7ff0737
--- /dev/null
+++ b/upb/upb/reflection/enum_def.h
@@ -0,0 +1,83 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// IWYU pragma: private, include "upb/reflection/def.h"
+
+#ifndef UPB_REFLECTION_ENUM_DEF_H_
+#define UPB_REFLECTION_ENUM_DEF_H_
+
+#include "upb/base/string_view.h"
+#include "upb/reflection/common.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool upb_EnumDef_CheckNumber(const upb_EnumDef* e, int32_t num);
+const upb_MessageDef* upb_EnumDef_ContainingType(const upb_EnumDef* e);
+int32_t upb_EnumDef_Default(const upb_EnumDef* e);
+UPB_API const upb_FileDef* upb_EnumDef_File(const upb_EnumDef* e);
+const upb_EnumValueDef* upb_EnumDef_FindValueByName(const upb_EnumDef* e,
+ const char* name);
+UPB_API const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize(
+ const upb_EnumDef* e, const char* name, size_t size);
+UPB_API const upb_EnumValueDef* upb_EnumDef_FindValueByNumber(
+ const upb_EnumDef* e, int32_t num);
+UPB_API const char* upb_EnumDef_FullName(const upb_EnumDef* e);
+bool upb_EnumDef_HasOptions(const upb_EnumDef* e);
+bool upb_EnumDef_IsClosed(const upb_EnumDef* e);
+
+// Creates a mini descriptor string for an enum, returns true on success.
+bool upb_EnumDef_MiniDescriptorEncode(const upb_EnumDef* e, upb_Arena* a,
+ upb_StringView* out);
+
+const char* upb_EnumDef_Name(const upb_EnumDef* e);
+const UPB_DESC(EnumOptions) * upb_EnumDef_Options(const upb_EnumDef* e);
+
+upb_StringView upb_EnumDef_ReservedName(const upb_EnumDef* e, int i);
+int upb_EnumDef_ReservedNameCount(const upb_EnumDef* e);
+
+const upb_EnumReservedRange* upb_EnumDef_ReservedRange(const upb_EnumDef* e,
+ int i);
+int upb_EnumDef_ReservedRangeCount(const upb_EnumDef* e);
+
+UPB_API const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i);
+UPB_API int upb_EnumDef_ValueCount(const upb_EnumDef* e);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_ENUM_DEF_H_ */
diff --git a/upb/upb/reflection/enum_reserved_range.c b/upb/upb/reflection/enum_reserved_range.c
new file mode 100644
index 0000000..906261c
--- /dev/null
+++ b/upb/upb/reflection/enum_reserved_range.c
@@ -0,0 +1,86 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/reflection/internal/enum_reserved_range.h"
+
+#include "upb/reflection/enum_def.h"
+#include "upb/reflection/field_def.h"
+#include "upb/reflection/internal/def_builder.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+struct upb_EnumReservedRange {
+ int32_t start;
+ int32_t end;
+};
+
+upb_EnumReservedRange* _upb_EnumReservedRange_At(const upb_EnumReservedRange* r,
+ int i) {
+ return (upb_EnumReservedRange*)&r[i];
+}
+
+int32_t upb_EnumReservedRange_Start(const upb_EnumReservedRange* r) {
+ return r->start;
+}
+int32_t upb_EnumReservedRange_End(const upb_EnumReservedRange* r) {
+ return r->end;
+}
+
+upb_EnumReservedRange* _upb_EnumReservedRanges_New(
+ upb_DefBuilder* ctx, int n,
+ const UPB_DESC(EnumDescriptorProto_EnumReservedRange) * const* protos,
+ const upb_EnumDef* e) {
+ upb_EnumReservedRange* r =
+ _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumReservedRange) * n);
+
+ for (int i = 0; i < n; i++) {
+ const int32_t start =
+ UPB_DESC(EnumDescriptorProto_EnumReservedRange_start)(protos[i]);
+ const int32_t end =
+ UPB_DESC(EnumDescriptorProto_EnumReservedRange_end)(protos[i]);
+
+ // A full validation would also check that each range is disjoint, and that
+ // none of the fields overlap with the extension ranges, but we are just
+ // sanity checking here.
+
+ // Note: Not a typo! Unlike extension ranges and message reserved ranges,
+ // the end value of an enum reserved range is *inclusive*!
+ if (end < start) {
+ _upb_DefBuilder_Errf(ctx, "Reserved range (%d, %d) is invalid, enum=%s\n",
+ (int)start, (int)end, upb_EnumDef_FullName(e));
+ }
+
+ r[i].start = start;
+ r[i].end = end;
+ }
+
+ return r;
+}
diff --git a/upb/upb/reflection/enum_reserved_range.h b/upb/upb/reflection/enum_reserved_range.h
new file mode 100644
index 0000000..e1bf5ab
--- /dev/null
+++ b/upb/upb/reflection/enum_reserved_range.h
@@ -0,0 +1,54 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// IWYU pragma: private, include "upb/reflection/def.h"
+
+#ifndef UPB_REFLECTION_ENUM_RESERVED_RANGE_H_
+#define UPB_REFLECTION_ENUM_RESERVED_RANGE_H_
+
+#include "upb/reflection/common.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int32_t upb_EnumReservedRange_Start(const upb_EnumReservedRange* r);
+int32_t upb_EnumReservedRange_End(const upb_EnumReservedRange* r);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_ENUM_RESERVED_RANGE_H_ */
diff --git a/upb/upb/reflection/enum_value_def.c b/upb/upb/reflection/enum_value_def.c
new file mode 100644
index 0000000..000697b
--- /dev/null
+++ b/upb/upb/reflection/enum_value_def.c
@@ -0,0 +1,148 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/reflection/internal/enum_value_def.h"
+
+#include "upb/reflection/def_type.h"
+#include "upb/reflection/internal/def_builder.h"
+#include "upb/reflection/internal/enum_def.h"
+#include "upb/reflection/internal/file_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+struct upb_EnumValueDef {
+ const UPB_DESC(EnumValueOptions) * opts;
+ const upb_EnumDef* parent;
+ const char* full_name;
+ int32_t number;
+};
+
+upb_EnumValueDef* _upb_EnumValueDef_At(const upb_EnumValueDef* v, int i) {
+ return (upb_EnumValueDef*)&v[i];
+}
+
+static int _upb_EnumValueDef_Compare(const void* p1, const void* p2) {
+ const uint32_t v1 = (*(const upb_EnumValueDef**)p1)->number;
+ const uint32_t v2 = (*(const upb_EnumValueDef**)p2)->number;
+ return (v1 < v2) ? -1 : (v1 > v2);
+}
+
+const upb_EnumValueDef** _upb_EnumValueDefs_Sorted(const upb_EnumValueDef* v,
+ int n, upb_Arena* a) {
+ // TODO: Try to replace this arena alloc with a persistent scratch buffer.
+ upb_EnumValueDef** out =
+ (upb_EnumValueDef**)upb_Arena_Malloc(a, n * sizeof(void*));
+ if (!out) return NULL;
+
+ for (int i = 0; i < n; i++) {
+ out[i] = (upb_EnumValueDef*)&v[i];
+ }
+ qsort(out, n, sizeof(void*), _upb_EnumValueDef_Compare);
+
+ return (const upb_EnumValueDef**)out;
+}
+
+const UPB_DESC(EnumValueOptions) *
+ upb_EnumValueDef_Options(const upb_EnumValueDef* v) {
+ return v->opts;
+}
+
+bool upb_EnumValueDef_HasOptions(const upb_EnumValueDef* v) {
+ return v->opts != (void*)kUpbDefOptDefault;
+}
+
+const upb_EnumDef* upb_EnumValueDef_Enum(const upb_EnumValueDef* v) {
+ return v->parent;
+}
+
+const char* upb_EnumValueDef_FullName(const upb_EnumValueDef* v) {
+ return v->full_name;
+}
+
+const char* upb_EnumValueDef_Name(const upb_EnumValueDef* v) {
+ return _upb_DefBuilder_FullToShort(v->full_name);
+}
+
+int32_t upb_EnumValueDef_Number(const upb_EnumValueDef* v) { return v->number; }
+
+uint32_t upb_EnumValueDef_Index(const upb_EnumValueDef* v) {
+ // Compute index in our parent's array.
+ return v - upb_EnumDef_Value(v->parent, 0);
+}
+
+static void create_enumvaldef(upb_DefBuilder* ctx, const char* prefix,
+ const UPB_DESC(EnumValueDescriptorProto) *
+ val_proto,
+ upb_EnumDef* e, upb_EnumValueDef* v) {
+ upb_StringView name = UPB_DESC(EnumValueDescriptorProto_name)(val_proto);
+
+ v->parent = e; // Must happen prior to _upb_DefBuilder_Add()
+ v->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name);
+ v->number = UPB_DESC(EnumValueDescriptorProto_number)(val_proto);
+ _upb_DefBuilder_Add(ctx, v->full_name,
+ _upb_DefType_Pack(v, UPB_DEFTYPE_ENUMVAL));
+
+ UPB_DEF_SET_OPTIONS(v->opts, EnumValueDescriptorProto, EnumValueOptions,
+ val_proto);
+
+ bool ok = _upb_EnumDef_Insert(e, v, ctx->arena);
+ if (!ok) _upb_DefBuilder_OomErr(ctx);
+}
+
+// Allocate and initialize an array of |n| enum value defs owned by |e|.
+upb_EnumValueDef* _upb_EnumValueDefs_New(
+ upb_DefBuilder* ctx, const char* prefix, int n,
+ const UPB_DESC(EnumValueDescriptorProto) * const* protos, upb_EnumDef* e,
+ bool* is_sorted) {
+ _upb_DefType_CheckPadding(sizeof(upb_EnumValueDef));
+
+ upb_EnumValueDef* v =
+ _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumValueDef) * n);
+
+ *is_sorted = true;
+ uint32_t previous = 0;
+ for (int i = 0; i < n; i++) {
+ create_enumvaldef(ctx, prefix, protos[i], e, &v[i]);
+
+ const uint32_t current = v[i].number;
+ if (previous > current) *is_sorted = false;
+ previous = current;
+ }
+
+ if (upb_FileDef_Syntax(ctx->file) == kUpb_Syntax_Proto3 && n > 0 &&
+ v[0].number != 0) {
+ _upb_DefBuilder_Errf(ctx,
+ "for proto3, the first enum value must be zero (%s)",
+ upb_EnumDef_FullName(e));
+ }
+
+ return v;
+}
diff --git a/upb/upb/reflection/enum_value_def.h b/upb/upb/reflection/enum_value_def.h
new file mode 100644
index 0000000..6023103
--- /dev/null
+++ b/upb/upb/reflection/enum_value_def.h
@@ -0,0 +1,60 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// IWYU pragma: private, include "upb/reflection/def.h"
+
+#ifndef UPB_REFLECTION_ENUM_VALUE_DEF_H_
+#define UPB_REFLECTION_ENUM_VALUE_DEF_H_
+
+#include "upb/reflection/common.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+const upb_EnumDef* upb_EnumValueDef_Enum(const upb_EnumValueDef* v);
+const char* upb_EnumValueDef_FullName(const upb_EnumValueDef* v);
+bool upb_EnumValueDef_HasOptions(const upb_EnumValueDef* v);
+uint32_t upb_EnumValueDef_Index(const upb_EnumValueDef* v);
+UPB_API const char* upb_EnumValueDef_Name(const upb_EnumValueDef* v);
+UPB_API int32_t upb_EnumValueDef_Number(const upb_EnumValueDef* v);
+const UPB_DESC(EnumValueOptions) *
+ upb_EnumValueDef_Options(const upb_EnumValueDef* v);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_ENUM_VALUE_DEF_H_ */
diff --git a/upb/upb/reflection/extension_range.c b/upb/upb/reflection/extension_range.c
new file mode 100644
index 0000000..43e723a
--- /dev/null
+++ b/upb/upb/reflection/extension_range.c
@@ -0,0 +1,97 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/reflection/internal/extension_range.h"
+
+#include "upb/reflection/field_def.h"
+#include "upb/reflection/internal/def_builder.h"
+#include "upb/reflection/message_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+struct upb_ExtensionRange {
+ const UPB_DESC(ExtensionRangeOptions) * opts;
+ int32_t start;
+ int32_t end;
+};
+
+upb_ExtensionRange* _upb_ExtensionRange_At(const upb_ExtensionRange* r, int i) {
+ return (upb_ExtensionRange*)&r[i];
+}
+
+const UPB_DESC(ExtensionRangeOptions) *
+ upb_ExtensionRange_Options(const upb_ExtensionRange* r) {
+ return r->opts;
+}
+
+bool upb_ExtensionRange_HasOptions(const upb_ExtensionRange* r) {
+ return r->opts != (void*)kUpbDefOptDefault;
+}
+
+int32_t upb_ExtensionRange_Start(const upb_ExtensionRange* r) {
+ return r->start;
+}
+
+int32_t upb_ExtensionRange_End(const upb_ExtensionRange* r) { return r->end; }
+
+upb_ExtensionRange* _upb_ExtensionRanges_New(
+ upb_DefBuilder* ctx, int n,
+ const UPB_DESC(DescriptorProto_ExtensionRange) * const* protos,
+ const upb_MessageDef* m) {
+ upb_ExtensionRange* r =
+ _upb_DefBuilder_Alloc(ctx, sizeof(upb_ExtensionRange) * n);
+
+ for (int i = 0; i < n; i++) {
+ const int32_t start =
+ UPB_DESC(DescriptorProto_ExtensionRange_start)(protos[i]);
+ const int32_t end = UPB_DESC(DescriptorProto_ExtensionRange_end)(protos[i]);
+ const int32_t max = UPB_DESC(MessageOptions_message_set_wire_format)(
+ upb_MessageDef_Options(m))
+ ? INT32_MAX
+ : kUpb_MaxFieldNumber + 1;
+
+ // A full validation would also check that each range is disjoint, and that
+ // none of the fields overlap with the extension ranges, but we are just
+ // sanity checking here.
+ if (start < 1 || end <= start || end > max) {
+ _upb_DefBuilder_Errf(ctx,
+ "Extension range (%d, %d) is invalid, message=%s\n",
+ (int)start, (int)end, upb_MessageDef_FullName(m));
+ }
+
+ r[i].start = start;
+ r[i].end = end;
+ UPB_DEF_SET_OPTIONS(r[i].opts, DescriptorProto_ExtensionRange,
+ ExtensionRangeOptions, protos[i]);
+ }
+
+ return r;
+}
diff --git a/upb/upb/reflection/extension_range.h b/upb/upb/reflection/extension_range.h
new file mode 100644
index 0000000..9d87721
--- /dev/null
+++ b/upb/upb/reflection/extension_range.h
@@ -0,0 +1,58 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// IWYU pragma: private, include "upb/reflection/def.h"
+
+#ifndef UPB_REFLECTION_EXTENSION_RANGE_H_
+#define UPB_REFLECTION_EXTENSION_RANGE_H_
+
+#include "upb/reflection/common.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int32_t upb_ExtensionRange_Start(const upb_ExtensionRange* r);
+int32_t upb_ExtensionRange_End(const upb_ExtensionRange* r);
+
+bool upb_ExtensionRange_HasOptions(const upb_ExtensionRange* r);
+const UPB_DESC(ExtensionRangeOptions) *
+ upb_ExtensionRange_Options(const upb_ExtensionRange* r);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_EXTENSION_RANGE_H_ */
diff --git a/upb/upb/reflection/field_def.c b/upb/upb/reflection/field_def.c
new file mode 100644
index 0000000..5789048
--- /dev/null
+++ b/upb/upb/reflection/field_def.c
@@ -0,0 +1,943 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/reflection/internal/field_def.h"
+
+#include <ctype.h>
+#include <errno.h>
+
+#include "upb/mini_descriptor/decode.h"
+#include "upb/mini_descriptor/internal/modifiers.h"
+#include "upb/reflection/def.h"
+#include "upb/reflection/def_pool.h"
+#include "upb/reflection/def_type.h"
+#include "upb/reflection/internal/def_builder.h"
+#include "upb/reflection/internal/desc_state.h"
+#include "upb/reflection/internal/enum_def.h"
+#include "upb/reflection/internal/enum_value_def.h"
+#include "upb/reflection/internal/file_def.h"
+#include "upb/reflection/internal/message_def.h"
+#include "upb/reflection/internal/oneof_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#define UPB_FIELD_TYPE_UNSPECIFIED 0
+
+typedef struct {
+ size_t len;
+ char str[1]; // Null-terminated string data follows.
+} str_t;
+
+struct upb_FieldDef {
+ const UPB_DESC(FieldOptions) * opts;
+ const upb_FileDef* file;
+ const upb_MessageDef* msgdef;
+ const char* full_name;
+ const char* json_name;
+ union {
+ int64_t sint;
+ uint64_t uint;
+ double dbl;
+ float flt;
+ bool boolean;
+ str_t* str;
+ void* msg; // Always NULL.
+ } defaultval;
+ union {
+ const upb_OneofDef* oneof;
+ const upb_MessageDef* extension_scope;
+ } scope;
+ union {
+ const upb_MessageDef* msgdef;
+ const upb_EnumDef* enumdef;
+ const UPB_DESC(FieldDescriptorProto) * unresolved;
+ } sub;
+ uint32_t number_;
+ uint16_t index_;
+ uint16_t layout_index; // Index into msgdef->layout->fields or file->exts
+ bool has_default;
+ bool has_json_name;
+ bool has_presence;
+ bool is_extension;
+ bool is_packed;
+ bool is_proto3_optional;
+ upb_FieldType type_;
+ upb_Label label_;
+#if UINTPTR_MAX == 0xffffffff
+ uint32_t padding; // Increase size to a multiple of 8.
+#endif
+};
+
+upb_FieldDef* _upb_FieldDef_At(const upb_FieldDef* f, int i) {
+ return (upb_FieldDef*)&f[i];
+}
+
+const UPB_DESC(FieldOptions) * upb_FieldDef_Options(const upb_FieldDef* f) {
+ return f->opts;
+}
+
+bool upb_FieldDef_HasOptions(const upb_FieldDef* f) {
+ return f->opts != (void*)kUpbDefOptDefault;
+}
+
+const char* upb_FieldDef_FullName(const upb_FieldDef* f) {
+ return f->full_name;
+}
+
+upb_CType upb_FieldDef_CType(const upb_FieldDef* f) {
+ switch (f->type_) {
+ case kUpb_FieldType_Double:
+ return kUpb_CType_Double;
+ case kUpb_FieldType_Float:
+ return kUpb_CType_Float;
+ case kUpb_FieldType_Int64:
+ case kUpb_FieldType_SInt64:
+ case kUpb_FieldType_SFixed64:
+ return kUpb_CType_Int64;
+ case kUpb_FieldType_Int32:
+ case kUpb_FieldType_SFixed32:
+ case kUpb_FieldType_SInt32:
+ return kUpb_CType_Int32;
+ case kUpb_FieldType_UInt64:
+ case kUpb_FieldType_Fixed64:
+ return kUpb_CType_UInt64;
+ case kUpb_FieldType_UInt32:
+ case kUpb_FieldType_Fixed32:
+ return kUpb_CType_UInt32;
+ case kUpb_FieldType_Enum:
+ return kUpb_CType_Enum;
+ case kUpb_FieldType_Bool:
+ return kUpb_CType_Bool;
+ case kUpb_FieldType_String:
+ return kUpb_CType_String;
+ case kUpb_FieldType_Bytes:
+ return kUpb_CType_Bytes;
+ case kUpb_FieldType_Group:
+ case kUpb_FieldType_Message:
+ return kUpb_CType_Message;
+ }
+ UPB_UNREACHABLE();
+}
+
+upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f) { return f->type_; }
+
+uint32_t upb_FieldDef_Index(const upb_FieldDef* f) { return f->index_; }
+
+upb_Label upb_FieldDef_Label(const upb_FieldDef* f) { return f->label_; }
+
+uint32_t upb_FieldDef_Number(const upb_FieldDef* f) { return f->number_; }
+
+bool upb_FieldDef_IsExtension(const upb_FieldDef* f) { return f->is_extension; }
+
+bool upb_FieldDef_IsPacked(const upb_FieldDef* f) { return f->is_packed; }
+
+const char* upb_FieldDef_Name(const upb_FieldDef* f) {
+ return _upb_DefBuilder_FullToShort(f->full_name);
+}
+
+const char* upb_FieldDef_JsonName(const upb_FieldDef* f) {
+ return f->json_name;
+}
+
+bool upb_FieldDef_HasJsonName(const upb_FieldDef* f) {
+ return f->has_json_name;
+}
+
+const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f) { return f->file; }
+
+const upb_MessageDef* upb_FieldDef_ContainingType(const upb_FieldDef* f) {
+ return f->msgdef;
+}
+
+const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f) {
+ return f->is_extension ? f->scope.extension_scope : NULL;
+}
+
+const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f) {
+ return f->is_extension ? NULL : f->scope.oneof;
+}
+
+const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f) {
+ const upb_OneofDef* oneof = upb_FieldDef_ContainingOneof(f);
+ if (!oneof || upb_OneofDef_IsSynthetic(oneof)) return NULL;
+ return oneof;
+}
+
+upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f) {
+ upb_MessageValue ret;
+
+ if (upb_FieldDef_IsRepeated(f) || upb_FieldDef_IsSubMessage(f)) {
+ return (upb_MessageValue){.msg_val = NULL};
+ }
+
+ switch (upb_FieldDef_CType(f)) {
+ case kUpb_CType_Bool:
+ return (upb_MessageValue){.bool_val = f->defaultval.boolean};
+ case kUpb_CType_Int64:
+ return (upb_MessageValue){.int64_val = f->defaultval.sint};
+ case kUpb_CType_UInt64:
+ return (upb_MessageValue){.uint64_val = f->defaultval.uint};
+ case kUpb_CType_Enum:
+ case kUpb_CType_Int32:
+ return (upb_MessageValue){.int32_val = (int32_t)f->defaultval.sint};
+ case kUpb_CType_UInt32:
+ return (upb_MessageValue){.uint32_val = (uint32_t)f->defaultval.uint};
+ case kUpb_CType_Float:
+ return (upb_MessageValue){.float_val = f->defaultval.flt};
+ case kUpb_CType_Double:
+ return (upb_MessageValue){.double_val = f->defaultval.dbl};
+ case kUpb_CType_String:
+ case kUpb_CType_Bytes: {
+ str_t* str = f->defaultval.str;
+ if (str) {
+ return (upb_MessageValue){
+ .str_val = (upb_StringView){.data = str->str, .size = str->len}};
+ } else {
+ return (upb_MessageValue){
+ .str_val = (upb_StringView){.data = NULL, .size = 0}};
+ }
+ }
+ default:
+ UPB_UNREACHABLE();
+ }
+
+ return ret;
+}
+
+const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f) {
+ return upb_FieldDef_CType(f) == kUpb_CType_Message ? f->sub.msgdef : NULL;
+}
+
+const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f) {
+ return upb_FieldDef_CType(f) == kUpb_CType_Enum ? f->sub.enumdef : NULL;
+}
+
+const upb_MiniTableField* upb_FieldDef_MiniTable(const upb_FieldDef* f) {
+ if (upb_FieldDef_IsExtension(f)) {
+ const upb_FileDef* file = upb_FieldDef_File(f);
+ return (upb_MiniTableField*)_upb_FileDef_ExtensionMiniTable(
+ file, f->layout_index);
+ } else {
+ const upb_MiniTable* layout = upb_MessageDef_MiniTable(f->msgdef);
+ return &layout->fields[f->layout_index];
+ }
+}
+
+const upb_MiniTableExtension* _upb_FieldDef_ExtensionMiniTable(
+ const upb_FieldDef* f) {
+ UPB_ASSERT(upb_FieldDef_IsExtension(f));
+ const upb_FileDef* file = upb_FieldDef_File(f);
+ return _upb_FileDef_ExtensionMiniTable(file, f->layout_index);
+}
+
+bool _upb_FieldDef_IsClosedEnum(const upb_FieldDef* f) {
+ if (f->type_ != kUpb_FieldType_Enum) return false;
+ return upb_EnumDef_IsClosed(f->sub.enumdef);
+}
+
+bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) {
+ return f->is_proto3_optional;
+}
+
+int _upb_FieldDef_LayoutIndex(const upb_FieldDef* f) { return f->layout_index; }
+
+uint64_t _upb_FieldDef_Modifiers(const upb_FieldDef* f) {
+ uint64_t out = f->is_packed ? kUpb_FieldModifier_IsPacked : 0;
+
+ switch (f->label_) {
+ case kUpb_Label_Optional:
+ if (!upb_FieldDef_HasPresence(f)) {
+ out |= kUpb_FieldModifier_IsProto3Singular;
+ }
+ break;
+ case kUpb_Label_Repeated:
+ out |= kUpb_FieldModifier_IsRepeated;
+ break;
+ case kUpb_Label_Required:
+ out |= kUpb_FieldModifier_IsRequired;
+ break;
+ }
+
+ if (_upb_FieldDef_IsClosedEnum(f)) {
+ out |= kUpb_FieldModifier_IsClosedEnum;
+ }
+ return out;
+}
+
+bool upb_FieldDef_HasDefault(const upb_FieldDef* f) { return f->has_default; }
+bool upb_FieldDef_HasPresence(const upb_FieldDef* f) { return f->has_presence; }
+
+bool upb_FieldDef_HasSubDef(const upb_FieldDef* f) {
+ return upb_FieldDef_IsSubMessage(f) ||
+ upb_FieldDef_CType(f) == kUpb_CType_Enum;
+}
+
+bool upb_FieldDef_IsMap(const upb_FieldDef* f) {
+ return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsSubMessage(f) &&
+ upb_MessageDef_IsMapEntry(upb_FieldDef_MessageSubDef(f));
+}
+
+bool upb_FieldDef_IsOptional(const upb_FieldDef* f) {
+ return upb_FieldDef_Label(f) == kUpb_Label_Optional;
+}
+
+bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) {
+ return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f);
+}
+
+bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) {
+ return upb_FieldDef_Label(f) == kUpb_Label_Repeated;
+}
+
+bool upb_FieldDef_IsRequired(const upb_FieldDef* f) {
+ return upb_FieldDef_Label(f) == kUpb_Label_Required;
+}
+
+bool upb_FieldDef_IsString(const upb_FieldDef* f) {
+ return upb_FieldDef_CType(f) == kUpb_CType_String ||
+ upb_FieldDef_CType(f) == kUpb_CType_Bytes;
+}
+
+bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f) {
+ return upb_FieldDef_CType(f) == kUpb_CType_Message;
+}
+
+static bool between(int32_t x, int32_t low, int32_t high) {
+ return x >= low && x <= high;
+}
+
+bool upb_FieldDef_checklabel(int32_t label) { return between(label, 1, 3); }
+bool upb_FieldDef_checktype(int32_t type) { return between(type, 1, 11); }
+bool upb_FieldDef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
+
+bool upb_FieldDef_checkdescriptortype(int32_t type) {
+ return between(type, 1, 18);
+}
+
+static bool streql2(const char* a, size_t n, const char* b) {
+ return n == strlen(b) && memcmp(a, b, n) == 0;
+}
+
+// Implement the transformation as described in the spec:
+// 1. upper case all letters after an underscore.
+// 2. remove all underscores.
+static char* make_json_name(const char* name, size_t size, upb_Arena* a) {
+ char* out = upb_Arena_Malloc(a, size + 1); // +1 is to add a trailing '\0'
+ if (out == NULL) return NULL;
+
+ bool ucase_next = false;
+ char* des = out;
+ for (size_t i = 0; i < size; i++) {
+ if (name[i] == '_') {
+ ucase_next = true;
+ } else {
+ *des++ = ucase_next ? toupper(name[i]) : name[i];
+ ucase_next = false;
+ }
+ }
+ *des++ = '\0';
+ return out;
+}
+
+static str_t* newstr(upb_DefBuilder* ctx, const char* data, size_t len) {
+ str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len);
+ if (!ret) _upb_DefBuilder_OomErr(ctx);
+ ret->len = len;
+ if (len) memcpy(ret->str, data, len);
+ ret->str[len] = '\0';
+ return ret;
+}
+
+static str_t* unescape(upb_DefBuilder* ctx, const upb_FieldDef* f,
+ const char* data, size_t len) {
+ // Size here is an upper bound; escape sequences could ultimately shrink it.
+ str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len);
+ char* dst = &ret->str[0];
+ const char* src = data;
+ const char* end = data + len;
+
+ while (src < end) {
+ if (*src == '\\') {
+ src++;
+ *dst++ = _upb_DefBuilder_ParseEscape(ctx, f, &src, end);
+ } else {
+ *dst++ = *src++;
+ }
+ }
+
+ ret->len = dst - &ret->str[0];
+ return ret;
+}
+
+static void parse_default(upb_DefBuilder* ctx, const char* str, size_t len,
+ upb_FieldDef* f) {
+ char* end;
+ char nullz[64];
+ errno = 0;
+
+ switch (upb_FieldDef_CType(f)) {
+ case kUpb_CType_Int32:
+ case kUpb_CType_Int64:
+ case kUpb_CType_UInt32:
+ case kUpb_CType_UInt64:
+ case kUpb_CType_Double:
+ case kUpb_CType_Float:
+ // Standard C number parsing functions expect null-terminated strings.
+ if (len >= sizeof(nullz) - 1) {
+ _upb_DefBuilder_Errf(ctx, "Default too long: %.*s", (int)len, str);
+ }
+ memcpy(nullz, str, len);
+ nullz[len] = '\0';
+ str = nullz;
+ break;
+ default:
+ break;
+ }
+
+ switch (upb_FieldDef_CType(f)) {
+ case kUpb_CType_Int32: {
+ long val = strtol(str, &end, 0);
+ if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) {
+ goto invalid;
+ }
+ f->defaultval.sint = val;
+ break;
+ }
+ case kUpb_CType_Enum: {
+ const upb_EnumDef* e = f->sub.enumdef;
+ const upb_EnumValueDef* ev =
+ upb_EnumDef_FindValueByNameWithSize(e, str, len);
+ if (!ev) {
+ goto invalid;
+ }
+ f->defaultval.sint = upb_EnumValueDef_Number(ev);
+ break;
+ }
+ case kUpb_CType_Int64: {
+ long long val = strtoll(str, &end, 0);
+ if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) {
+ goto invalid;
+ }
+ f->defaultval.sint = val;
+ break;
+ }
+ case kUpb_CType_UInt32: {
+ unsigned long val = strtoul(str, &end, 0);
+ if (val > UINT32_MAX || errno == ERANGE || *end) {
+ goto invalid;
+ }
+ f->defaultval.uint = val;
+ break;
+ }
+ case kUpb_CType_UInt64: {
+ unsigned long long val = strtoull(str, &end, 0);
+ if (val > UINT64_MAX || errno == ERANGE || *end) {
+ goto invalid;
+ }
+ f->defaultval.uint = val;
+ break;
+ }
+ case kUpb_CType_Double: {
+ double val = strtod(str, &end);
+ if (errno == ERANGE || *end) {
+ goto invalid;
+ }
+ f->defaultval.dbl = val;
+ break;
+ }
+ case kUpb_CType_Float: {
+ float val = strtof(str, &end);
+ if (errno == ERANGE || *end) {
+ goto invalid;
+ }
+ f->defaultval.flt = val;
+ break;
+ }
+ case kUpb_CType_Bool: {
+ if (streql2(str, len, "false")) {
+ f->defaultval.boolean = false;
+ } else if (streql2(str, len, "true")) {
+ f->defaultval.boolean = true;
+ } else {
+ goto invalid;
+ }
+ break;
+ }
+ case kUpb_CType_String:
+ f->defaultval.str = newstr(ctx, str, len);
+ break;
+ case kUpb_CType_Bytes:
+ f->defaultval.str = unescape(ctx, f, str, len);
+ break;
+ case kUpb_CType_Message:
+ /* Should not have a default value. */
+ _upb_DefBuilder_Errf(ctx, "Message should not have a default (%s)",
+ upb_FieldDef_FullName(f));
+ }
+
+ return;
+
+invalid:
+ _upb_DefBuilder_Errf(ctx, "Invalid default '%.*s' for field %s of type %d",
+ (int)len, str, upb_FieldDef_FullName(f),
+ (int)upb_FieldDef_Type(f));
+}
+
+static void set_default_default(upb_DefBuilder* ctx, upb_FieldDef* f) {
+ switch (upb_FieldDef_CType(f)) {
+ case kUpb_CType_Int32:
+ case kUpb_CType_Int64:
+ f->defaultval.sint = 0;
+ break;
+ case kUpb_CType_UInt64:
+ case kUpb_CType_UInt32:
+ f->defaultval.uint = 0;
+ break;
+ case kUpb_CType_Double:
+ case kUpb_CType_Float:
+ f->defaultval.dbl = 0;
+ break;
+ case kUpb_CType_String:
+ case kUpb_CType_Bytes:
+ f->defaultval.str = newstr(ctx, NULL, 0);
+ break;
+ case kUpb_CType_Bool:
+ f->defaultval.boolean = false;
+ break;
+ case kUpb_CType_Enum: {
+ const upb_EnumValueDef* v = upb_EnumDef_Value(f->sub.enumdef, 0);
+ f->defaultval.sint = upb_EnumValueDef_Number(v);
+ break;
+ }
+ case kUpb_CType_Message:
+ break;
+ }
+}
+
+static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix,
+ const UPB_DESC(FieldDescriptorProto) *
+ field_proto,
+ upb_MessageDef* m, upb_FieldDef* f) {
+ // Must happen before _upb_DefBuilder_Add()
+ f->file = _upb_DefBuilder_File(ctx);
+
+ if (!UPB_DESC(FieldDescriptorProto_has_name)(field_proto)) {
+ _upb_DefBuilder_Errf(ctx, "field has no name");
+ }
+
+ const upb_StringView name = UPB_DESC(FieldDescriptorProto_name)(field_proto);
+
+ f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name);
+ f->label_ = (int)UPB_DESC(FieldDescriptorProto_label)(field_proto);
+ f->number_ = UPB_DESC(FieldDescriptorProto_number)(field_proto);
+ f->is_proto3_optional =
+ UPB_DESC(FieldDescriptorProto_proto3_optional)(field_proto);
+ f->msgdef = m;
+ f->scope.oneof = NULL;
+
+ f->has_json_name = UPB_DESC(FieldDescriptorProto_has_json_name)(field_proto);
+ if (f->has_json_name) {
+ const upb_StringView sv =
+ UPB_DESC(FieldDescriptorProto_json_name)(field_proto);
+ f->json_name = upb_strdup2(sv.data, sv.size, ctx->arena);
+ } else {
+ f->json_name = make_json_name(name.data, name.size, ctx->arena);
+ }
+ if (!f->json_name) _upb_DefBuilder_OomErr(ctx);
+
+ const bool has_type = UPB_DESC(FieldDescriptorProto_has_type)(field_proto);
+ const bool has_type_name =
+ UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto);
+
+ f->type_ = (int)UPB_DESC(FieldDescriptorProto_type)(field_proto);
+
+ if (has_type) {
+ switch (f->type_) {
+ case kUpb_FieldType_Message:
+ case kUpb_FieldType_Group:
+ case kUpb_FieldType_Enum:
+ if (!has_type_name) {
+ _upb_DefBuilder_Errf(ctx, "field of type %d requires type name (%s)",
+ (int)f->type_, f->full_name);
+ }
+ break;
+ default:
+ if (has_type_name) {
+ _upb_DefBuilder_Errf(
+ ctx, "invalid type for field with type_name set (%s, %d)",
+ f->full_name, (int)f->type_);
+ }
+ }
+ }
+
+ if (!has_type && has_type_name) {
+ f->type_ =
+ UPB_FIELD_TYPE_UNSPECIFIED; // We'll assign this in resolve_subdef()
+ } else {
+ if (f->type_ < kUpb_FieldType_Double || f->type_ > kUpb_FieldType_SInt64) {
+ _upb_DefBuilder_Errf(ctx, "invalid type for field %s (%d)", f->full_name,
+ f->type_);
+ }
+ }
+
+ if (f->label_ < kUpb_Label_Optional || f->label_ > kUpb_Label_Repeated) {
+ _upb_DefBuilder_Errf(ctx, "invalid label for field %s (%d)", f->full_name,
+ f->label_);
+ }
+
+ /* We can't resolve the subdef or (in the case of extensions) the containing
+ * message yet, because it may not have been defined yet. We stash a pointer
+ * to the field_proto until later when we can properly resolve it. */
+ f->sub.unresolved = field_proto;
+
+ if (f->label_ == kUpb_Label_Required &&
+ upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) {
+ _upb_DefBuilder_Errf(ctx, "proto3 fields cannot be required (%s)",
+ f->full_name);
+ }
+
+ if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) {
+ int oneof_index = UPB_DESC(FieldDescriptorProto_oneof_index)(field_proto);
+
+ if (upb_FieldDef_Label(f) != kUpb_Label_Optional) {
+ _upb_DefBuilder_Errf(ctx, "fields in oneof must have OPTIONAL label (%s)",
+ f->full_name);
+ }
+
+ if (!m) {
+ _upb_DefBuilder_Errf(ctx, "oneof field (%s) has no containing msg",
+ f->full_name);
+ }
+
+ if (oneof_index >= upb_MessageDef_OneofCount(m)) {
+ _upb_DefBuilder_Errf(ctx, "oneof_index out of range (%s)", f->full_name);
+ }
+
+ upb_OneofDef* oneof = (upb_OneofDef*)upb_MessageDef_Oneof(m, oneof_index);
+ f->scope.oneof = oneof;
+
+ _upb_OneofDef_Insert(ctx, oneof, f, name.data, name.size);
+ }
+
+ UPB_DEF_SET_OPTIONS(f->opts, FieldDescriptorProto, FieldOptions, field_proto);
+
+ if (UPB_DESC(FieldOptions_has_packed)(f->opts)) {
+ f->is_packed = UPB_DESC(FieldOptions_packed)(f->opts);
+ } else {
+ // Repeated fields default to packed for proto3 only.
+ f->is_packed = has_type && upb_FieldDef_IsPrimitive(f) &&
+ f->label_ == kUpb_Label_Repeated &&
+ upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3;
+ }
+
+ f->has_presence =
+ (!upb_FieldDef_IsRepeated(f)) &&
+ (f->type_ == kUpb_FieldType_Message || f->type_ == kUpb_FieldType_Group ||
+ upb_FieldDef_ContainingOneof(f) ||
+ (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto2));
+}
+
+static void _upb_FieldDef_CreateExt(upb_DefBuilder* ctx, const char* prefix,
+ const UPB_DESC(FieldDescriptorProto) *
+ field_proto,
+ upb_MessageDef* m, upb_FieldDef* f) {
+ f->is_extension = true;
+ _upb_FieldDef_Create(ctx, prefix, field_proto, m, f);
+
+ if (UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) {
+ _upb_DefBuilder_Errf(ctx, "oneof_index provided for extension field (%s)",
+ f->full_name);
+ }
+
+ f->scope.extension_scope = m;
+ _upb_DefBuilder_Add(ctx, f->full_name, _upb_DefType_Pack(f, UPB_DEFTYPE_EXT));
+ f->layout_index = ctx->ext_count++;
+
+ if (ctx->layout) {
+ UPB_ASSERT(_upb_FieldDef_ExtensionMiniTable(f)->field.number == f->number_);
+ }
+}
+
+static void _upb_FieldDef_CreateNotExt(upb_DefBuilder* ctx, const char* prefix,
+ const UPB_DESC(FieldDescriptorProto) *
+ field_proto,
+ upb_MessageDef* m, upb_FieldDef* f) {
+ f->is_extension = false;
+ _upb_FieldDef_Create(ctx, prefix, field_proto, m, f);
+
+ if (!UPB_DESC(FieldDescriptorProto_has_oneof_index)(field_proto)) {
+ if (f->is_proto3_optional) {
+ _upb_DefBuilder_Errf(
+ ctx,
+ "non-extension field (%s) with proto3_optional was not in a oneof",
+ f->full_name);
+ }
+ }
+
+ _upb_MessageDef_InsertField(ctx, m, f);
+}
+
+upb_FieldDef* _upb_Extensions_New(
+ upb_DefBuilder* ctx, int n,
+ const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix,
+ upb_MessageDef* m) {
+ _upb_DefType_CheckPadding(sizeof(upb_FieldDef));
+ upb_FieldDef* defs =
+ (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n);
+
+ for (int i = 0; i < n; i++) {
+ upb_FieldDef* f = &defs[i];
+
+ _upb_FieldDef_CreateExt(ctx, prefix, protos[i], m, f);
+ f->index_ = i;
+ }
+
+ return defs;
+}
+
+upb_FieldDef* _upb_FieldDefs_New(
+ upb_DefBuilder* ctx, int n,
+ const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix,
+ upb_MessageDef* m, bool* is_sorted) {
+ _upb_DefType_CheckPadding(sizeof(upb_FieldDef));
+ upb_FieldDef* defs =
+ (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n);
+
+ uint32_t previous = 0;
+ for (int i = 0; i < n; i++) {
+ upb_FieldDef* f = &defs[i];
+
+ _upb_FieldDef_CreateNotExt(ctx, prefix, protos[i], m, f);
+ f->index_ = i;
+ if (!ctx->layout) {
+ // Speculate that the def fields are sorted. We will always sort the
+ // MiniTable fields, so if defs are sorted then indices will match.
+ //
+ // If this is incorrect, we will overwrite later.
+ f->layout_index = i;
+ }
+
+ const uint32_t current = f->number_;
+ if (previous > current) *is_sorted = false;
+ previous = current;
+ }
+
+ return defs;
+}
+
+static void resolve_subdef(upb_DefBuilder* ctx, const char* prefix,
+ upb_FieldDef* f) {
+ const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved;
+ upb_StringView name = UPB_DESC(FieldDescriptorProto_type_name)(field_proto);
+ bool has_name = UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto);
+ switch ((int)f->type_) {
+ case UPB_FIELD_TYPE_UNSPECIFIED: {
+ // Type was not specified and must be inferred.
+ UPB_ASSERT(has_name);
+ upb_deftype_t type;
+ const void* def =
+ _upb_DefBuilder_ResolveAny(ctx, f->full_name, prefix, name, &type);
+ switch (type) {
+ case UPB_DEFTYPE_ENUM:
+ f->sub.enumdef = def;
+ f->type_ = kUpb_FieldType_Enum;
+ if (!UPB_DESC(FieldOptions_has_packed)(f->opts)) {
+ f->is_packed = f->label_ == kUpb_Label_Repeated &&
+ upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3;
+ }
+ break;
+ case UPB_DEFTYPE_MSG:
+ f->sub.msgdef = def;
+ f->type_ = kUpb_FieldType_Message; // It appears there is no way of
+ // this being a group.
+ f->has_presence = !upb_FieldDef_IsRepeated(f);
+ break;
+ default:
+ _upb_DefBuilder_Errf(ctx, "Couldn't resolve type name for field %s",
+ f->full_name);
+ }
+ break;
+ }
+ case kUpb_FieldType_Message:
+ case kUpb_FieldType_Group:
+ UPB_ASSERT(has_name);
+ f->sub.msgdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name,
+ UPB_DEFTYPE_MSG);
+ break;
+ case kUpb_FieldType_Enum:
+ UPB_ASSERT(has_name);
+ f->sub.enumdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name,
+ UPB_DEFTYPE_ENUM);
+ break;
+ default:
+ // No resolution necessary.
+ break;
+ }
+}
+
+static int _upb_FieldDef_Compare(const void* p1, const void* p2) {
+ const uint32_t v1 = (*(upb_FieldDef**)p1)->number_;
+ const uint32_t v2 = (*(upb_FieldDef**)p2)->number_;
+ return (v1 < v2) ? -1 : (v1 > v2);
+}
+
+// _upb_FieldDefs_Sorted() is mostly a pure function of its inputs, but has one
+// critical side effect that we depend on: it sets layout_index appropriately
+// for non-sorted lists of fields.
+const upb_FieldDef** _upb_FieldDefs_Sorted(const upb_FieldDef* f, int n,
+ upb_Arena* a) {
+ // TODO(salo): Replace this arena alloc with a persistent scratch buffer.
+ upb_FieldDef** out = (upb_FieldDef**)upb_Arena_Malloc(a, n * sizeof(void*));
+ if (!out) return NULL;
+
+ for (int i = 0; i < n; i++) {
+ out[i] = (upb_FieldDef*)&f[i];
+ }
+ qsort(out, n, sizeof(void*), _upb_FieldDef_Compare);
+
+ for (int i = 0; i < n; i++) {
+ out[i]->layout_index = i;
+ }
+ return (const upb_FieldDef**)out;
+}
+
+bool upb_FieldDef_MiniDescriptorEncode(const upb_FieldDef* f, upb_Arena* a,
+ upb_StringView* out) {
+ UPB_ASSERT(f->is_extension);
+
+ upb_DescState s;
+ _upb_DescState_Init(&s);
+
+ const int number = upb_FieldDef_Number(f);
+ const uint64_t modifiers = _upb_FieldDef_Modifiers(f);
+
+ if (!_upb_DescState_Grow(&s, a)) return false;
+ s.ptr = upb_MtDataEncoder_EncodeExtension(&s.e, s.ptr, f->type_, number,
+ modifiers);
+ *s.ptr = '\0';
+
+ out->data = s.buf;
+ out->size = s.ptr - s.buf;
+ return true;
+}
+
+static void resolve_extension(upb_DefBuilder* ctx, const char* prefix,
+ upb_FieldDef* f,
+ const UPB_DESC(FieldDescriptorProto) *
+ field_proto) {
+ if (!UPB_DESC(FieldDescriptorProto_has_extendee)(field_proto)) {
+ _upb_DefBuilder_Errf(ctx, "extension for field '%s' had no extendee",
+ f->full_name);
+ }
+
+ upb_StringView name = UPB_DESC(FieldDescriptorProto_extendee)(field_proto);
+ const upb_MessageDef* m =
+ _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG);
+ f->msgdef = m;
+
+ if (!_upb_MessageDef_IsValidExtensionNumber(m, f->number_)) {
+ _upb_DefBuilder_Errf(
+ ctx,
+ "field number %u in extension %s has no extension range in message %s",
+ (unsigned)f->number_, f->full_name, upb_MessageDef_FullName(m));
+ }
+}
+
+void _upb_FieldDef_BuildMiniTableExtension(upb_DefBuilder* ctx,
+ const upb_FieldDef* f) {
+ const upb_MiniTableExtension* ext = _upb_FieldDef_ExtensionMiniTable(f);
+
+ if (ctx->layout) {
+ UPB_ASSERT(upb_FieldDef_Number(f) == ext->field.number);
+ } else {
+ upb_StringView desc;
+ if (!upb_FieldDef_MiniDescriptorEncode(f, ctx->tmp_arena, &desc)) {
+ _upb_DefBuilder_OomErr(ctx);
+ }
+
+ upb_MiniTableExtension* mut_ext = (upb_MiniTableExtension*)ext;
+ upb_MiniTableSub sub = {NULL};
+ if (upb_FieldDef_IsSubMessage(f)) {
+ sub.submsg = upb_MessageDef_MiniTable(f->sub.msgdef);
+ } else if (_upb_FieldDef_IsClosedEnum(f)) {
+ sub.subenum = _upb_EnumDef_MiniTable(f->sub.enumdef);
+ }
+ bool ok2 = upb_MiniTableExtension_Init(desc.data, desc.size, mut_ext,
+ upb_MessageDef_MiniTable(f->msgdef),
+ sub, ctx->status);
+ if (!ok2) _upb_DefBuilder_Errf(ctx, "Could not build extension mini table");
+ }
+
+ bool ok = _upb_DefPool_InsertExt(ctx->symtab, ext, f);
+ if (!ok) _upb_DefBuilder_OomErr(ctx);
+}
+
+static void resolve_default(upb_DefBuilder* ctx, upb_FieldDef* f,
+ const UPB_DESC(FieldDescriptorProto) *
+ field_proto) {
+ // Have to delay resolving of the default value until now because of the enum
+ // case, since enum defaults are specified with a label.
+ if (UPB_DESC(FieldDescriptorProto_has_default_value)(field_proto)) {
+ upb_StringView defaultval =
+ UPB_DESC(FieldDescriptorProto_default_value)(field_proto);
+
+ if (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) {
+ _upb_DefBuilder_Errf(ctx,
+ "proto3 fields cannot have explicit defaults (%s)",
+ f->full_name);
+ }
+
+ if (upb_FieldDef_IsSubMessage(f)) {
+ _upb_DefBuilder_Errf(ctx,
+ "message fields cannot have explicit defaults (%s)",
+ f->full_name);
+ }
+
+ parse_default(ctx, defaultval.data, defaultval.size, f);
+ f->has_default = true;
+ } else {
+ set_default_default(ctx, f);
+ f->has_default = false;
+ }
+}
+
+void _upb_FieldDef_Resolve(upb_DefBuilder* ctx, const char* prefix,
+ upb_FieldDef* f) {
+ // We have to stash this away since resolve_subdef() may overwrite it.
+ const UPB_DESC(FieldDescriptorProto)* field_proto = f->sub.unresolved;
+
+ resolve_subdef(ctx, prefix, f);
+ resolve_default(ctx, f, field_proto);
+
+ if (f->is_extension) {
+ resolve_extension(ctx, prefix, f, field_proto);
+ }
+}
diff --git a/upb/upb/reflection/field_def.h b/upb/upb/reflection/field_def.h
new file mode 100644
index 0000000..fb8b317
--- /dev/null
+++ b/upb/upb/reflection/field_def.h
@@ -0,0 +1,96 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// IWYU pragma: private, include "upb/reflection/def.h"
+
+#ifndef UPB_REFLECTION_FIELD_DEF_H_
+#define UPB_REFLECTION_FIELD_DEF_H_
+
+#include "upb/base/string_view.h"
+#include "upb/reflection/common.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+// Maximum field number allowed for FieldDefs.
+// This is an inherent limit of the protobuf wire format.
+#define kUpb_MaxFieldNumber ((1 << 29) - 1)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f);
+UPB_API const upb_MessageDef* upb_FieldDef_ContainingType(
+ const upb_FieldDef* f);
+UPB_API upb_CType upb_FieldDef_CType(const upb_FieldDef* f);
+UPB_API upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f);
+UPB_API const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f);
+const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f);
+UPB_API const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f);
+const char* upb_FieldDef_FullName(const upb_FieldDef* f);
+bool upb_FieldDef_HasDefault(const upb_FieldDef* f);
+bool upb_FieldDef_HasJsonName(const upb_FieldDef* f);
+bool upb_FieldDef_HasOptions(const upb_FieldDef* f);
+UPB_API bool upb_FieldDef_HasPresence(const upb_FieldDef* f);
+bool upb_FieldDef_HasSubDef(const upb_FieldDef* f);
+uint32_t upb_FieldDef_Index(const upb_FieldDef* f);
+bool upb_FieldDef_IsExtension(const upb_FieldDef* f);
+UPB_API bool upb_FieldDef_IsMap(const upb_FieldDef* f);
+bool upb_FieldDef_IsOptional(const upb_FieldDef* f);
+bool upb_FieldDef_IsPacked(const upb_FieldDef* f);
+bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f);
+UPB_API bool upb_FieldDef_IsRepeated(const upb_FieldDef* f);
+bool upb_FieldDef_IsRequired(const upb_FieldDef* f);
+bool upb_FieldDef_IsString(const upb_FieldDef* f);
+UPB_API bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f);
+UPB_API const char* upb_FieldDef_JsonName(const upb_FieldDef* f);
+UPB_API upb_Label upb_FieldDef_Label(const upb_FieldDef* f);
+UPB_API const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f);
+
+// Creates a mini descriptor string for a field, returns true on success.
+bool upb_FieldDef_MiniDescriptorEncode(const upb_FieldDef* f, upb_Arena* a,
+ upb_StringView* out);
+
+const upb_MiniTableField* upb_FieldDef_MiniTable(const upb_FieldDef* f);
+UPB_API const char* upb_FieldDef_Name(const upb_FieldDef* f);
+UPB_API uint32_t upb_FieldDef_Number(const upb_FieldDef* f);
+const UPB_DESC(FieldOptions) * upb_FieldDef_Options(const upb_FieldDef* f);
+UPB_API const upb_OneofDef* upb_FieldDef_RealContainingOneof(
+ const upb_FieldDef* f);
+UPB_API upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_FIELD_DEF_H_ */
diff --git a/upb/upb/reflection/file_def.c b/upb/upb/reflection/file_def.c
new file mode 100644
index 0000000..c2b09e1
--- /dev/null
+++ b/upb/upb/reflection/file_def.c
@@ -0,0 +1,374 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/reflection/internal/file_def.h"
+
+#include "upb/reflection/def_pool.h"
+#include "upb/reflection/internal/def_builder.h"
+#include "upb/reflection/internal/enum_def.h"
+#include "upb/reflection/internal/field_def.h"
+#include "upb/reflection/internal/message_def.h"
+#include "upb/reflection/internal/service_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+struct upb_FileDef {
+ const UPB_DESC(FileOptions) * opts;
+ const char* name;
+ const char* package;
+ const char* edition;
+
+ const upb_FileDef** deps;
+ const int32_t* public_deps;
+ const int32_t* weak_deps;
+ const upb_MessageDef* top_lvl_msgs;
+ const upb_EnumDef* top_lvl_enums;
+ const upb_FieldDef* top_lvl_exts;
+ const upb_ServiceDef* services;
+ const upb_MiniTableExtension** ext_layouts;
+ const upb_DefPool* symtab;
+
+ int dep_count;
+ int public_dep_count;
+ int weak_dep_count;
+ int top_lvl_msg_count;
+ int top_lvl_enum_count;
+ int top_lvl_ext_count;
+ int service_count;
+ int ext_count; // All exts in the file.
+ upb_Syntax syntax;
+};
+
+const UPB_DESC(FileOptions) * upb_FileDef_Options(const upb_FileDef* f) {
+ return f->opts;
+}
+
+bool upb_FileDef_HasOptions(const upb_FileDef* f) {
+ return f->opts != (void*)kUpbDefOptDefault;
+}
+
+const char* upb_FileDef_Name(const upb_FileDef* f) { return f->name; }
+
+const char* upb_FileDef_Package(const upb_FileDef* f) {
+ return f->package ? f->package : "";
+}
+
+const char* upb_FileDef_Edition(const upb_FileDef* f) {
+ return f->edition ? f->edition : "";
+}
+
+const char* _upb_FileDef_RawPackage(const upb_FileDef* f) { return f->package; }
+
+upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f) { return f->syntax; }
+
+int upb_FileDef_TopLevelMessageCount(const upb_FileDef* f) {
+ return f->top_lvl_msg_count;
+}
+
+int upb_FileDef_DependencyCount(const upb_FileDef* f) { return f->dep_count; }
+
+int upb_FileDef_PublicDependencyCount(const upb_FileDef* f) {
+ return f->public_dep_count;
+}
+
+int upb_FileDef_WeakDependencyCount(const upb_FileDef* f) {
+ return f->weak_dep_count;
+}
+
+const int32_t* _upb_FileDef_PublicDependencyIndexes(const upb_FileDef* f) {
+ return f->public_deps;
+}
+
+const int32_t* _upb_FileDef_WeakDependencyIndexes(const upb_FileDef* f) {
+ return f->weak_deps;
+}
+
+int upb_FileDef_TopLevelEnumCount(const upb_FileDef* f) {
+ return f->top_lvl_enum_count;
+}
+
+int upb_FileDef_TopLevelExtensionCount(const upb_FileDef* f) {
+ return f->top_lvl_ext_count;
+}
+
+int upb_FileDef_ServiceCount(const upb_FileDef* f) { return f->service_count; }
+
+const upb_FileDef* upb_FileDef_Dependency(const upb_FileDef* f, int i) {
+ UPB_ASSERT(0 <= i && i < f->dep_count);
+ return f->deps[i];
+}
+
+const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i) {
+ UPB_ASSERT(0 <= i && i < f->public_dep_count);
+ return f->deps[f->public_deps[i]];
+}
+
+const upb_FileDef* upb_FileDef_WeakDependency(const upb_FileDef* f, int i) {
+ UPB_ASSERT(0 <= i && i < f->public_dep_count);
+ return f->deps[f->weak_deps[i]];
+}
+
+const upb_MessageDef* upb_FileDef_TopLevelMessage(const upb_FileDef* f, int i) {
+ UPB_ASSERT(0 <= i && i < f->top_lvl_msg_count);
+ return _upb_MessageDef_At(f->top_lvl_msgs, i);
+}
+
+const upb_EnumDef* upb_FileDef_TopLevelEnum(const upb_FileDef* f, int i) {
+ UPB_ASSERT(0 <= i && i < f->top_lvl_enum_count);
+ return _upb_EnumDef_At(f->top_lvl_enums, i);
+}
+
+const upb_FieldDef* upb_FileDef_TopLevelExtension(const upb_FileDef* f, int i) {
+ UPB_ASSERT(0 <= i && i < f->top_lvl_ext_count);
+ return _upb_FieldDef_At(f->top_lvl_exts, i);
+}
+
+const upb_ServiceDef* upb_FileDef_Service(const upb_FileDef* f, int i) {
+ UPB_ASSERT(0 <= i && i < f->service_count);
+ return _upb_ServiceDef_At(f->services, i);
+}
+
+const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f) { return f->symtab; }
+
+const upb_MiniTableExtension* _upb_FileDef_ExtensionMiniTable(
+ const upb_FileDef* f, int i) {
+ return f->ext_layouts[i];
+}
+
+static char* strviewdup(upb_DefBuilder* ctx, upb_StringView view) {
+ char* ret = upb_strdup2(view.data, view.size, _upb_DefBuilder_Arena(ctx));
+ if (!ret) _upb_DefBuilder_OomErr(ctx);
+ return ret;
+}
+
+static bool streql_view(upb_StringView view, const char* b) {
+ return view.size == strlen(b) && memcmp(view.data, b, view.size) == 0;
+}
+
+static int count_exts_in_msg(const UPB_DESC(DescriptorProto) * msg_proto) {
+ size_t n;
+ UPB_DESC(DescriptorProto_extension)(msg_proto, &n);
+ int ext_count = n;
+
+ const UPB_DESC(DescriptorProto)* const* nested_msgs =
+ UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n);
+ for (size_t i = 0; i < n; i++) {
+ ext_count += count_exts_in_msg(nested_msgs[i]);
+ }
+
+ return ext_count;
+}
+
+// Allocate and initialize one file def, and add it to the context object.
+void _upb_FileDef_Create(upb_DefBuilder* ctx,
+ const UPB_DESC(FileDescriptorProto) * file_proto) {
+ upb_FileDef* file = _upb_DefBuilder_Alloc(ctx, sizeof(upb_FileDef));
+ ctx->file = file;
+
+ const UPB_DESC(DescriptorProto)* const* msgs;
+ const UPB_DESC(EnumDescriptorProto)* const* enums;
+ const UPB_DESC(FieldDescriptorProto)* const* exts;
+ const UPB_DESC(ServiceDescriptorProto)* const* services;
+ const upb_StringView* strs;
+ const int32_t* public_deps;
+ const int32_t* weak_deps;
+ size_t n;
+
+ file->symtab = ctx->symtab;
+
+ // Count all extensions in the file, to build a flat array of layouts.
+ UPB_DESC(FileDescriptorProto_extension)(file_proto, &n);
+ int ext_count = n;
+ msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n);
+ for (size_t i = 0; i < n; i++) {
+ ext_count += count_exts_in_msg(msgs[i]);
+ }
+ file->ext_count = ext_count;
+
+ if (ctx->layout) {
+ // We are using the ext layouts that were passed in.
+ file->ext_layouts = ctx->layout->exts;
+ if (ctx->layout->ext_count != file->ext_count) {
+ _upb_DefBuilder_Errf(ctx,
+ "Extension count did not match layout (%d vs %d)",
+ ctx->layout->ext_count, file->ext_count);
+ }
+ } else {
+ // We are building ext layouts from scratch.
+ file->ext_layouts = _upb_DefBuilder_Alloc(
+ ctx, sizeof(*file->ext_layouts) * file->ext_count);
+ upb_MiniTableExtension* ext =
+ _upb_DefBuilder_Alloc(ctx, sizeof(*ext) * file->ext_count);
+ for (int i = 0; i < file->ext_count; i++) {
+ file->ext_layouts[i] = &ext[i];
+ }
+ }
+
+ upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto);
+ file->name = strviewdup(ctx, name);
+ if (strlen(file->name) != name.size) {
+ _upb_DefBuilder_Errf(ctx, "File name contained embedded NULL");
+ }
+
+ upb_StringView package = UPB_DESC(FileDescriptorProto_package)(file_proto);
+
+ if (package.size) {
+ _upb_DefBuilder_CheckIdentFull(ctx, package);
+ file->package = strviewdup(ctx, package);
+ } else {
+ file->package = NULL;
+ }
+
+ upb_StringView edition = UPB_DESC(FileDescriptorProto_edition)(file_proto);
+
+ if (edition.size == 0) {
+ file->edition = NULL;
+ } else {
+ // TODO(b/267770604): How should we validate this?
+ file->edition = strviewdup(ctx, edition);
+ if (strlen(file->edition) != edition.size) {
+ _upb_DefBuilder_Errf(ctx, "Edition name contained embedded NULL");
+ }
+ }
+
+ if (UPB_DESC(FileDescriptorProto_has_syntax)(file_proto)) {
+ upb_StringView syntax = UPB_DESC(FileDescriptorProto_syntax)(file_proto);
+
+ if (streql_view(syntax, "proto2")) {
+ file->syntax = kUpb_Syntax_Proto2;
+ } else if (streql_view(syntax, "proto3")) {
+ file->syntax = kUpb_Syntax_Proto3;
+ } else {
+ _upb_DefBuilder_Errf(ctx, "Invalid syntax '" UPB_STRINGVIEW_FORMAT "'",
+ UPB_STRINGVIEW_ARGS(syntax));
+ }
+ } else {
+ file->syntax = kUpb_Syntax_Proto2;
+ }
+
+ // Read options.
+ UPB_DEF_SET_OPTIONS(file->opts, FileDescriptorProto, FileOptions, file_proto);
+
+ // Verify dependencies.
+ strs = UPB_DESC(FileDescriptorProto_dependency)(file_proto, &n);
+ file->dep_count = n;
+ file->deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->deps) * n);
+
+ for (size_t i = 0; i < n; i++) {
+ upb_StringView str = strs[i];
+ file->deps[i] =
+ upb_DefPool_FindFileByNameWithSize(ctx->symtab, str.data, str.size);
+ if (!file->deps[i]) {
+ _upb_DefBuilder_Errf(ctx,
+ "Depends on file '" UPB_STRINGVIEW_FORMAT
+ "', but it has not been loaded",
+ UPB_STRINGVIEW_ARGS(str));
+ }
+ }
+
+ public_deps = UPB_DESC(FileDescriptorProto_public_dependency)(file_proto, &n);
+ file->public_dep_count = n;
+ file->public_deps =
+ _upb_DefBuilder_Alloc(ctx, sizeof(*file->public_deps) * n);
+ int32_t* mutable_public_deps = (int32_t*)file->public_deps;
+ for (size_t i = 0; i < n; i++) {
+ if (public_deps[i] >= file->dep_count) {
+ _upb_DefBuilder_Errf(ctx, "public_dep %d is out of range",
+ (int)public_deps[i]);
+ }
+ mutable_public_deps[i] = public_deps[i];
+ }
+
+ weak_deps = UPB_DESC(FileDescriptorProto_weak_dependency)(file_proto, &n);
+ file->weak_dep_count = n;
+ file->weak_deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->weak_deps) * n);
+ int32_t* mutable_weak_deps = (int32_t*)file->weak_deps;
+ for (size_t i = 0; i < n; i++) {
+ if (weak_deps[i] >= file->dep_count) {
+ _upb_DefBuilder_Errf(ctx, "weak_dep %d is out of range",
+ (int)weak_deps[i]);
+ }
+ mutable_weak_deps[i] = weak_deps[i];
+ }
+
+ // Create enums.
+ enums = UPB_DESC(FileDescriptorProto_enum_type)(file_proto, &n);
+ file->top_lvl_enum_count = n;
+ file->top_lvl_enums = _upb_EnumDefs_New(ctx, n, enums, NULL);
+
+ // Create extensions.
+ exts = UPB_DESC(FileDescriptorProto_extension)(file_proto, &n);
+ file->top_lvl_ext_count = n;
+ file->top_lvl_exts = _upb_Extensions_New(ctx, n, exts, file->package, NULL);
+
+ // Create messages.
+ msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n);
+ file->top_lvl_msg_count = n;
+ file->top_lvl_msgs = _upb_MessageDefs_New(ctx, n, msgs, NULL);
+
+ // Create services.
+ services = UPB_DESC(FileDescriptorProto_service)(file_proto, &n);
+ file->service_count = n;
+ file->services = _upb_ServiceDefs_New(ctx, n, services);
+
+ // Now that all names are in the table, build layouts and resolve refs.
+
+ for (int i = 0; i < file->top_lvl_msg_count; i++) {
+ upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i);
+ _upb_MessageDef_Resolve(ctx, m);
+ }
+
+ for (int i = 0; i < file->top_lvl_ext_count; i++) {
+ upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i);
+ _upb_FieldDef_Resolve(ctx, file->package, f);
+ }
+
+ for (int i = 0; i < file->top_lvl_msg_count; i++) {
+ upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i);
+ _upb_MessageDef_CreateMiniTable(ctx, (upb_MessageDef*)m);
+ }
+
+ for (int i = 0; i < file->top_lvl_ext_count; i++) {
+ upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i);
+ _upb_FieldDef_BuildMiniTableExtension(ctx, f);
+ }
+
+ for (int i = 0; i < file->top_lvl_msg_count; i++) {
+ upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i);
+ _upb_MessageDef_LinkMiniTable(ctx, m);
+ }
+
+ if (file->ext_count) {
+ bool ok = upb_ExtensionRegistry_AddArray(
+ _upb_DefPool_ExtReg(ctx->symtab), file->ext_layouts, file->ext_count);
+ if (!ok) _upb_DefBuilder_OomErr(ctx);
+ }
+}
diff --git a/upb/upb/reflection/file_def.h b/upb/upb/reflection/file_def.h
new file mode 100644
index 0000000..4769876
--- /dev/null
+++ b/upb/upb/reflection/file_def.h
@@ -0,0 +1,80 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// IWYU pragma: private, include "upb/reflection/def.h"
+
+#ifndef UPB_REFLECTION_FILE_DEF_H_
+#define UPB_REFLECTION_FILE_DEF_H_
+
+#include "upb/reflection/common.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+const upb_FileDef* upb_FileDef_Dependency(const upb_FileDef* f, int i);
+int upb_FileDef_DependencyCount(const upb_FileDef* f);
+bool upb_FileDef_HasOptions(const upb_FileDef* f);
+UPB_API const char* upb_FileDef_Name(const upb_FileDef* f);
+const UPB_DESC(FileOptions) * upb_FileDef_Options(const upb_FileDef* f);
+const char* upb_FileDef_Package(const upb_FileDef* f);
+const char* upb_FileDef_Edition(const upb_FileDef* f);
+UPB_API const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f);
+
+const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i);
+int upb_FileDef_PublicDependencyCount(const upb_FileDef* f);
+
+const upb_ServiceDef* upb_FileDef_Service(const upb_FileDef* f, int i);
+int upb_FileDef_ServiceCount(const upb_FileDef* f);
+
+UPB_API upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f);
+
+const upb_EnumDef* upb_FileDef_TopLevelEnum(const upb_FileDef* f, int i);
+int upb_FileDef_TopLevelEnumCount(const upb_FileDef* f);
+
+const upb_FieldDef* upb_FileDef_TopLevelExtension(const upb_FileDef* f, int i);
+int upb_FileDef_TopLevelExtensionCount(const upb_FileDef* f);
+
+const upb_MessageDef* upb_FileDef_TopLevelMessage(const upb_FileDef* f, int i);
+int upb_FileDef_TopLevelMessageCount(const upb_FileDef* f);
+
+const upb_FileDef* upb_FileDef_WeakDependency(const upb_FileDef* f, int i);
+int upb_FileDef_WeakDependencyCount(const upb_FileDef* f);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_FILE_DEF_H_ */
diff --git a/upb/upb/reflection/internal/def_builder.h b/upb/upb/reflection/internal/def_builder.h
new file mode 100644
index 0000000..f32d98f
--- /dev/null
+++ b/upb/upb/reflection/internal/def_builder.h
@@ -0,0 +1,160 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_
+#define UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_
+
+#include "upb/reflection/common.h"
+#include "upb/reflection/def_type.h"
+#include "upb/reflection/internal/def_pool.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+// We want to copy the options verbatim into the destination options proto.
+// We use serialize+parse as our deep copy.
+#define UPB_DEF_SET_OPTIONS(target, desc_type, options_type, proto) \
+ if (UPB_DESC(desc_type##_has_options)(proto)) { \
+ size_t size; \
+ char* pb = UPB_DESC(options_type##_serialize)( \
+ UPB_DESC(desc_type##_options)(proto), ctx->tmp_arena, &size); \
+ if (!pb) _upb_DefBuilder_OomErr(ctx); \
+ target = \
+ UPB_DESC(options_type##_parse)(pb, size, _upb_DefBuilder_Arena(ctx)); \
+ if (!target) _upb_DefBuilder_OomErr(ctx); \
+ } else { \
+ target = (const UPB_DESC(options_type)*)kUpbDefOptDefault; \
+ }
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct upb_DefBuilder {
+ upb_DefPool* symtab;
+ upb_FileDef* file; // File we are building.
+ upb_Arena* arena; // Allocate defs here.
+ upb_Arena* tmp_arena; // For temporary allocations.
+ upb_Status* status; // Record errors here.
+ const upb_MiniTableFile* layout; // NULL if we should build layouts.
+ upb_MiniTablePlatform platform; // Platform we are targeting.
+ int enum_count; // Count of enums built so far.
+ int msg_count; // Count of messages built so far.
+ int ext_count; // Count of extensions built so far.
+ jmp_buf err; // longjmp() on error.
+};
+
+extern const char* kUpbDefOptDefault;
+
+// ctx->status has already been set elsewhere so just fail/longjmp()
+UPB_NORETURN void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx);
+
+UPB_NORETURN void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt,
+ ...) UPB_PRINTF(2, 3);
+UPB_NORETURN void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx);
+
+const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx,
+ const char* prefix,
+ upb_StringView name);
+
+// Given a symbol and the base symbol inside which it is defined,
+// find the symbol's definition.
+const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx,
+ const char* from_name_dbg,
+ const char* base, upb_StringView sym,
+ upb_deftype_t* type);
+
+const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx,
+ const char* from_name_dbg, const char* base,
+ upb_StringView sym, upb_deftype_t type);
+
+char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f,
+ const char** src, const char* end);
+
+const char* _upb_DefBuilder_FullToShort(const char* fullname);
+
+UPB_INLINE void* _upb_DefBuilder_Alloc(upb_DefBuilder* ctx, size_t bytes) {
+ if (bytes == 0) return NULL;
+ void* ret = upb_Arena_Malloc(ctx->arena, bytes);
+ if (!ret) _upb_DefBuilder_OomErr(ctx);
+ return ret;
+}
+
+// Adds a symbol |v| to the symtab, which must be a def pointer previously
+// packed with pack_def(). The def's pointer to upb_FileDef* must be set before
+// adding, so we know which entries to remove if building this file fails.
+UPB_INLINE void _upb_DefBuilder_Add(upb_DefBuilder* ctx, const char* name,
+ upb_value v) {
+ upb_StringView sym = {.data = name, .size = strlen(name)};
+ bool ok = _upb_DefPool_InsertSym(ctx->symtab, sym, v, ctx->status);
+ if (!ok) _upb_DefBuilder_FailJmp(ctx);
+}
+
+UPB_INLINE upb_Arena* _upb_DefBuilder_Arena(const upb_DefBuilder* ctx) {
+ return ctx->arena;
+}
+
+UPB_INLINE upb_FileDef* _upb_DefBuilder_File(const upb_DefBuilder* ctx) {
+ return ctx->file;
+}
+
+// This version of CheckIdent() is only called by other, faster versions after
+// they detect a parsing error.
+void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name,
+ bool full);
+
+// Verify a full identifier string. This is slightly more complicated than
+// verifying a relative identifier string because we must track '.' chars.
+UPB_INLINE void _upb_DefBuilder_CheckIdentFull(upb_DefBuilder* ctx,
+ upb_StringView name) {
+ bool good = name.size > 0;
+ bool start = true;
+
+ for (size_t i = 0; i < name.size; i++) {
+ const char c = name.data[i];
+ const char d = c | 0x20; // force lowercase
+ const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_');
+ const bool is_numer = ('0' <= c) & (c <= '9') & !start;
+ const bool is_dot = (c == '.') & !start;
+
+ good &= is_alpha | is_numer | is_dot;
+ start = is_dot;
+ }
+
+ if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, true);
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_DEF_BUILDER_INTERNAL_H_ */
diff --git a/upb/upb/reflection/internal/def_pool.h b/upb/upb/reflection/internal/def_pool.h
new file mode 100644
index 0000000..ce8f623
--- /dev/null
+++ b/upb/upb/reflection/internal/def_pool.h
@@ -0,0 +1,80 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_REFLECTION_DEF_POOL_INTERNAL_H_
+#define UPB_REFLECTION_DEF_POOL_INTERNAL_H_
+
+#include "upb/mini_descriptor/decode.h"
+#include "upb/reflection/def_pool.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s);
+size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s);
+upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s);
+
+bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext,
+ const upb_FieldDef* f);
+bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v,
+ upb_Status* status);
+bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size,
+ upb_value* v);
+
+void** _upb_DefPool_ScratchData(const upb_DefPool* s);
+size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s);
+void _upb_DefPool_SetPlatform(upb_DefPool* s, upb_MiniTablePlatform platform);
+
+// For generated code only: loads a generated descriptor.
+typedef struct _upb_DefPool_Init {
+ struct _upb_DefPool_Init** deps; // Dependencies of this file.
+ const upb_MiniTableFile* layout;
+ const char* filename;
+ upb_StringView descriptor; // Serialized descriptor.
+} _upb_DefPool_Init;
+
+bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init);
+
+// Should only be directly called by tests. This variant lets us suppress
+// the use of compiled-in tables, forcing a rebuild of the tables at runtime.
+bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init,
+ bool rebuild_minitable);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_DEF_POOL_INTERNAL_H_ */
diff --git a/upb/upb/reflection/internal/desc_state.h b/upb/upb/reflection/internal/desc_state.h
new file mode 100644
index 0000000..eca7127
--- /dev/null
+++ b/upb/upb/reflection/internal/desc_state.h
@@ -0,0 +1,67 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_REFLECTION_DESC_STATE_INTERNAL_H_
+#define UPB_REFLECTION_DESC_STATE_INTERNAL_H_
+
+#include "upb/mem/arena.h"
+#include "upb/mini_descriptor/internal/encode.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+// Manages the storage for mini descriptor strings as they are being encoded.
+// TODO(b/234740652): Move some of this state directly into the encoder, maybe.
+typedef struct {
+ upb_MtDataEncoder e;
+ size_t bufsize;
+ char* buf;
+ char* ptr;
+} upb_DescState;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+UPB_INLINE void _upb_DescState_Init(upb_DescState* d) {
+ d->bufsize = kUpb_MtDataEncoder_MinSize * 2;
+ d->buf = NULL;
+ d->ptr = NULL;
+}
+
+bool _upb_DescState_Grow(upb_DescState* d, upb_Arena* a);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_DESC_STATE_INTERNAL_H_ */
diff --git a/upb/upb/reflection/internal/enum_def.h b/upb/upb/reflection/internal/enum_def.h
new file mode 100644
index 0000000..3e69280
--- /dev/null
+++ b/upb/upb/reflection/internal/enum_def.h
@@ -0,0 +1,59 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_REFLECTION_ENUM_DEF_INTERNAL_H_
+#define UPB_REFLECTION_ENUM_DEF_INTERNAL_H_
+
+#include "upb/reflection/enum_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+upb_EnumDef* _upb_EnumDef_At(const upb_EnumDef* e, int i);
+bool _upb_EnumDef_Insert(upb_EnumDef* e, upb_EnumValueDef* v, upb_Arena* a);
+const upb_MiniTableEnum* _upb_EnumDef_MiniTable(const upb_EnumDef* e);
+
+// Allocate and initialize an array of |n| enum defs.
+upb_EnumDef* _upb_EnumDefs_New(
+ upb_DefBuilder* ctx, int n,
+ const UPB_DESC(EnumDescriptorProto) * const* protos,
+ const upb_MessageDef* containing_type);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_ENUM_DEF_INTERNAL_H_ */
diff --git a/upb/upb/reflection/internal/enum_reserved_range.h b/upb/upb/reflection/internal/enum_reserved_range.h
new file mode 100644
index 0000000..8c78399
--- /dev/null
+++ b/upb/upb/reflection/internal/enum_reserved_range.h
@@ -0,0 +1,58 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_REFLECTION_ENUM_RESERVED_RANGE_INTERNAL_H_
+#define UPB_REFLECTION_ENUM_RESERVED_RANGE_INTERNAL_H_
+
+#include "upb/reflection/enum_reserved_range.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+upb_EnumReservedRange* _upb_EnumReservedRange_At(const upb_EnumReservedRange* r,
+ int i);
+
+// Allocate and initialize an array of |n| reserved ranges owned by |e|.
+upb_EnumReservedRange* _upb_EnumReservedRanges_New(
+ upb_DefBuilder* ctx, int n,
+ const UPB_DESC(EnumDescriptorProto_EnumReservedRange) * const* protos,
+ const upb_EnumDef* e);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_ENUM_RESERVED_RANGE_INTERNAL_H_ */
diff --git a/upb/upb/reflection/internal/enum_value_def.h b/upb/upb/reflection/internal/enum_value_def.h
new file mode 100644
index 0000000..164a092
--- /dev/null
+++ b/upb/upb/reflection/internal/enum_value_def.h
@@ -0,0 +1,60 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_REFLECTION_ENUM_VALUE_DEF_INTERNAL_H_
+#define UPB_REFLECTION_ENUM_VALUE_DEF_INTERNAL_H_
+
+#include "upb/reflection/enum_value_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+upb_EnumValueDef* _upb_EnumValueDef_At(const upb_EnumValueDef* v, int i);
+
+// Allocate and initialize an array of |n| enum value defs owned by |e|.
+upb_EnumValueDef* _upb_EnumValueDefs_New(
+ upb_DefBuilder* ctx, const char* prefix, int n,
+ const UPB_DESC(EnumValueDescriptorProto) * const* protos, upb_EnumDef* e,
+ bool* is_sorted);
+
+const upb_EnumValueDef** _upb_EnumValueDefs_Sorted(const upb_EnumValueDef* v,
+ int n, upb_Arena* a);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_ENUM_VALUE_DEF_INTERNAL_H_ */
diff --git a/upb/upb/reflection/internal/extension_range.h b/upb/upb/reflection/internal/extension_range.h
new file mode 100644
index 0000000..972ffe5
--- /dev/null
+++ b/upb/upb/reflection/internal/extension_range.h
@@ -0,0 +1,57 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_REFLECTION_EXTENSION_RANGE_INTERNAL_H_
+#define UPB_REFLECTION_EXTENSION_RANGE_INTERNAL_H_
+
+#include "upb/reflection/extension_range.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+upb_ExtensionRange* _upb_ExtensionRange_At(const upb_ExtensionRange* r, int i);
+
+// Allocate and initialize an array of |n| extension ranges owned by |m|.
+upb_ExtensionRange* _upb_ExtensionRanges_New(
+ upb_DefBuilder* ctx, int n,
+ const UPB_DESC(DescriptorProto_ExtensionRange) * const* protos,
+ const upb_MessageDef* m);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_EXTENSION_RANGE_INTERNAL_H_ */
diff --git a/upb/upb/reflection/internal/field_def.h b/upb/upb/reflection/internal/field_def.h
new file mode 100644
index 0000000..38f074d
--- /dev/null
+++ b/upb/upb/reflection/internal/field_def.h
@@ -0,0 +1,79 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_REFLECTION_FIELD_DEF_INTERNAL_H_
+#define UPB_REFLECTION_FIELD_DEF_INTERNAL_H_
+
+#include "upb/reflection/field_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+upb_FieldDef* _upb_FieldDef_At(const upb_FieldDef* f, int i);
+
+const upb_MiniTableExtension* _upb_FieldDef_ExtensionMiniTable(
+ const upb_FieldDef* f);
+bool _upb_FieldDef_IsClosedEnum(const upb_FieldDef* f);
+bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f);
+int _upb_FieldDef_LayoutIndex(const upb_FieldDef* f);
+uint64_t _upb_FieldDef_Modifiers(const upb_FieldDef* f);
+void _upb_FieldDef_Resolve(upb_DefBuilder* ctx, const char* prefix,
+ upb_FieldDef* f);
+void _upb_FieldDef_BuildMiniTableExtension(upb_DefBuilder* ctx,
+ const upb_FieldDef* f);
+
+// Allocate and initialize an array of |n| extensions (field defs).
+upb_FieldDef* _upb_Extensions_New(
+ upb_DefBuilder* ctx, int n,
+ const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix,
+ upb_MessageDef* m);
+
+// Allocate and initialize an array of |n| field defs.
+upb_FieldDef* _upb_FieldDefs_New(
+ upb_DefBuilder* ctx, int n,
+ const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix,
+ upb_MessageDef* m, bool* is_sorted);
+
+// Allocate and return a list of pointers to the |n| field defs in |ff|,
+// sorted by field number.
+const upb_FieldDef** _upb_FieldDefs_Sorted(const upb_FieldDef* f, int n,
+ upb_Arena* a);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_FIELD_DEF_INTERNAL_H_ */
diff --git a/upb/upb/reflection/internal/file_def.h b/upb/upb/reflection/internal/file_def.h
new file mode 100644
index 0000000..05a2f90
--- /dev/null
+++ b/upb/upb/reflection/internal/file_def.h
@@ -0,0 +1,60 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_REFLECTION_FILE_DEF_INTERNAL_H_
+#define UPB_REFLECTION_FILE_DEF_INTERNAL_H_
+
+#include "upb/reflection/file_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+const upb_MiniTableExtension* _upb_FileDef_ExtensionMiniTable(
+ const upb_FileDef* f, int i);
+const int32_t* _upb_FileDef_PublicDependencyIndexes(const upb_FileDef* f);
+const int32_t* _upb_FileDef_WeakDependencyIndexes(const upb_FileDef* f);
+
+// upb_FileDef_Package() returns "" if f->package is NULL, this does not.
+const char* _upb_FileDef_RawPackage(const upb_FileDef* f);
+
+void _upb_FileDef_Create(upb_DefBuilder* ctx,
+ const UPB_DESC(FileDescriptorProto) * file_proto);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_FILE_DEF_INTERNAL_H_ */
diff --git a/upb/upb/reflection/internal/message_def.h b/upb/upb/reflection/internal/message_def.h
new file mode 100644
index 0000000..711ff27
--- /dev/null
+++ b/upb/upb/reflection/internal/message_def.h
@@ -0,0 +1,66 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_REFLECTION_MESSAGE_DEF_INTERNAL_H_
+#define UPB_REFLECTION_MESSAGE_DEF_INTERNAL_H_
+
+#include "upb/reflection/message_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+upb_MessageDef* _upb_MessageDef_At(const upb_MessageDef* m, int i);
+bool _upb_MessageDef_InMessageSet(const upb_MessageDef* m);
+bool _upb_MessageDef_Insert(upb_MessageDef* m, const char* name, size_t size,
+ upb_value v, upb_Arena* a);
+void _upb_MessageDef_InsertField(upb_DefBuilder* ctx, upb_MessageDef* m,
+ const upb_FieldDef* f);
+bool _upb_MessageDef_IsValidExtensionNumber(const upb_MessageDef* m, int n);
+void _upb_MessageDef_CreateMiniTable(upb_DefBuilder* ctx, upb_MessageDef* m);
+void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx,
+ const upb_MessageDef* m);
+void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m);
+
+// Allocate and initialize an array of |n| message defs.
+upb_MessageDef* _upb_MessageDefs_New(
+ upb_DefBuilder* ctx, int n, const UPB_DESC(DescriptorProto) * const* protos,
+ const upb_MessageDef* containing_type);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_MESSAGE_DEF_INTERNAL_H_ */
diff --git a/upb/upb/reflection/internal/message_reserved_range.h b/upb/upb/reflection/internal/message_reserved_range.h
new file mode 100644
index 0000000..fa0ed4a
--- /dev/null
+++ b/upb/upb/reflection/internal/message_reserved_range.h
@@ -0,0 +1,58 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_REFLECTION_MESSAGE_RESERVED_RANGE_INTERNAL_H_
+#define UPB_REFLECTION_MESSAGE_RESERVED_RANGE_INTERNAL_H_
+
+#include "upb/reflection/message_reserved_range.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+upb_MessageReservedRange* _upb_MessageReservedRange_At(
+ const upb_MessageReservedRange* r, int i);
+
+// Allocate and initialize an array of |n| reserved ranges owned by |m|.
+upb_MessageReservedRange* _upb_MessageReservedRanges_New(
+ upb_DefBuilder* ctx, int n,
+ const UPB_DESC(DescriptorProto_ReservedRange) * const* protos,
+ const upb_MessageDef* m);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_MESSAGE_RESERVED_RANGE_INTERNAL_H_ */
diff --git a/upb/upb/reflection/internal/method_def.h b/upb/upb/reflection/internal/method_def.h
new file mode 100644
index 0000000..b29b5fc
--- /dev/null
+++ b/upb/upb/reflection/internal/method_def.h
@@ -0,0 +1,56 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_REFLECTION_METHOD_DEF_INTERNAL_H_
+#define UPB_REFLECTION_METHOD_DEF_INTERNAL_H_
+
+#include "upb/reflection/method_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+upb_MethodDef* _upb_MethodDef_At(const upb_MethodDef* m, int i);
+
+// Allocate and initialize an array of |n| method defs owned by |s|.
+upb_MethodDef* _upb_MethodDefs_New(
+ upb_DefBuilder* ctx, int n,
+ const UPB_DESC(MethodDescriptorProto) * const* protos, upb_ServiceDef* s);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_METHOD_DEF_INTERNAL_H_ */
diff --git a/upb/upb/reflection/internal/oneof_def.h b/upb/upb/reflection/internal/oneof_def.h
new file mode 100644
index 0000000..e4678ae
--- /dev/null
+++ b/upb/upb/reflection/internal/oneof_def.h
@@ -0,0 +1,60 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_REFLECTION_ONEOF_DEF_INTERNAL_H_
+#define UPB_REFLECTION_ONEOF_DEF_INTERNAL_H_
+
+#include "upb/reflection/oneof_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+upb_OneofDef* _upb_OneofDef_At(const upb_OneofDef* o, int i);
+void _upb_OneofDef_Insert(upb_DefBuilder* ctx, upb_OneofDef* o,
+ const upb_FieldDef* f, const char* name, size_t size);
+
+// Allocate and initialize an array of |n| oneof defs owned by |m|.
+upb_OneofDef* _upb_OneofDefs_New(
+ upb_DefBuilder* ctx, int n,
+ const UPB_DESC(OneofDescriptorProto) * const* protos, upb_MessageDef* m);
+
+size_t _upb_OneofDefs_Finalize(upb_DefBuilder* ctx, upb_MessageDef* m);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_ONEOF_DEF_INTERNAL_H_ */
diff --git a/upb/upb/reflection/internal/service_def.h b/upb/upb/reflection/internal/service_def.h
new file mode 100644
index 0000000..8cab160
--- /dev/null
+++ b/upb/upb/reflection/internal/service_def.h
@@ -0,0 +1,56 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_
+#define UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_
+
+#include "upb/reflection/service_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int i);
+
+// Allocate and initialize an array of |n| service defs.
+upb_ServiceDef* _upb_ServiceDefs_New(
+ upb_DefBuilder* ctx, int n,
+ const UPB_DESC(ServiceDescriptorProto) * const* protos);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_SERVICE_DEF_INTERNAL_H_ */
diff --git a/upb/upb/reflection/message.c b/upb/upb/reflection/message.c
new file mode 100644
index 0000000..19944d8
--- /dev/null
+++ b/upb/upb/reflection/message.c
@@ -0,0 +1,236 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/reflection/message.h"
+
+#include <string.h>
+
+#include "upb/collections/map.h"
+#include "upb/hash/common.h"
+#include "upb/message/accessors.h"
+#include "upb/message/message.h"
+#include "upb/mini_table/field.h"
+#include "upb/reflection/def.h"
+#include "upb/reflection/def_pool.h"
+#include "upb/reflection/def_type.h"
+#include "upb/reflection/internal/field_def.h"
+#include "upb/reflection/message_def.h"
+#include "upb/reflection/oneof_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+bool upb_Message_HasFieldByDef(const upb_Message* msg, const upb_FieldDef* f) {
+ UPB_ASSERT(upb_FieldDef_HasPresence(f));
+ return upb_Message_HasField(msg, upb_FieldDef_MiniTable(f));
+}
+
+const upb_FieldDef* upb_Message_WhichOneof(const upb_Message* msg,
+ const upb_OneofDef* o) {
+ const upb_FieldDef* f = upb_OneofDef_Field(o, 0);
+ if (upb_OneofDef_IsSynthetic(o)) {
+ UPB_ASSERT(upb_OneofDef_FieldCount(o) == 1);
+ return upb_Message_HasFieldByDef(msg, f) ? f : NULL;
+ } else {
+ const upb_MiniTableField* field = upb_FieldDef_MiniTable(f);
+ uint32_t oneof_case = upb_Message_WhichOneofFieldNumber(msg, field);
+ f = oneof_case ? upb_OneofDef_LookupNumber(o, oneof_case) : NULL;
+ UPB_ASSERT((f != NULL) == (oneof_case != 0));
+ return f;
+ }
+}
+
+upb_MessageValue upb_Message_GetFieldByDef(const upb_Message* msg,
+ const upb_FieldDef* f) {
+ upb_MessageValue default_val = upb_FieldDef_Default(f);
+ upb_MessageValue ret;
+ _upb_Message_GetField(msg, upb_FieldDef_MiniTable(f), &default_val, &ret);
+ return ret;
+}
+
+upb_MutableMessageValue upb_Message_Mutable(upb_Message* msg,
+ const upb_FieldDef* f,
+ upb_Arena* a) {
+ UPB_ASSERT(upb_FieldDef_IsSubMessage(f) || upb_FieldDef_IsRepeated(f));
+ if (upb_FieldDef_HasPresence(f) && !upb_Message_HasFieldByDef(msg, f)) {
+ // We need to skip the upb_Message_GetFieldByDef() call in this case.
+ goto make;
+ }
+
+ upb_MessageValue val = upb_Message_GetFieldByDef(msg, f);
+ if (val.array_val) {
+ return (upb_MutableMessageValue){.array = (upb_Array*)val.array_val};
+ }
+
+ upb_MutableMessageValue ret;
+make:
+ if (!a) return (upb_MutableMessageValue){.array = NULL};
+ if (upb_FieldDef_IsMap(f)) {
+ const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
+ const upb_FieldDef* key =
+ upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber);
+ const upb_FieldDef* value =
+ upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber);
+ ret.map =
+ upb_Map_New(a, upb_FieldDef_CType(key), upb_FieldDef_CType(value));
+ } else if (upb_FieldDef_IsRepeated(f)) {
+ ret.array = upb_Array_New(a, upb_FieldDef_CType(f));
+ } else {
+ UPB_ASSERT(upb_FieldDef_IsSubMessage(f));
+ const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f);
+ ret.msg = upb_Message_New(upb_MessageDef_MiniTable(m), a);
+ }
+
+ val.array_val = ret.array;
+ upb_Message_SetFieldByDef(msg, f, val, a);
+
+ return ret;
+}
+
+bool upb_Message_SetFieldByDef(upb_Message* msg, const upb_FieldDef* f,
+ upb_MessageValue val, upb_Arena* a) {
+ return _upb_Message_SetField(msg, upb_FieldDef_MiniTable(f), &val, a);
+}
+
+void upb_Message_ClearFieldByDef(upb_Message* msg, const upb_FieldDef* f) {
+ upb_Message_ClearField(msg, upb_FieldDef_MiniTable(f));
+}
+
+void upb_Message_ClearByDef(upb_Message* msg, const upb_MessageDef* m) {
+ upb_Message_Clear(msg, upb_MessageDef_MiniTable(m));
+}
+
+bool upb_Message_Next(const upb_Message* msg, const upb_MessageDef* m,
+ const upb_DefPool* ext_pool, const upb_FieldDef** out_f,
+ upb_MessageValue* out_val, size_t* iter) {
+ size_t i = *iter;
+ size_t n = upb_MessageDef_FieldCount(m);
+ UPB_UNUSED(ext_pool);
+
+ // Iterate over normal fields, returning the first one that is set.
+ while (++i < n) {
+ const upb_FieldDef* f = upb_MessageDef_Field(m, i);
+ const upb_MiniTableField* field = upb_FieldDef_MiniTable(f);
+ upb_MessageValue val = upb_Message_GetFieldByDef(msg, f);
+
+ // Skip field if unset or empty.
+ if (upb_MiniTableField_HasPresence(field)) {
+ if (!upb_Message_HasFieldByDef(msg, f)) continue;
+ } else {
+ switch (upb_FieldMode_Get(field)) {
+ case kUpb_FieldMode_Map:
+ if (!val.map_val || upb_Map_Size(val.map_val) == 0) continue;
+ break;
+ case kUpb_FieldMode_Array:
+ if (!val.array_val || upb_Array_Size(val.array_val) == 0) continue;
+ break;
+ case kUpb_FieldMode_Scalar:
+ if (!_upb_MiniTable_ValueIsNonZero(&val, field)) continue;
+ break;
+ }
+ }
+
+ *out_val = val;
+ *out_f = f;
+ *iter = i;
+ return true;
+ }
+
+ if (ext_pool) {
+ // Return any extensions that are set.
+ size_t count;
+ const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &count);
+ if (i - n < count) {
+ ext += count - 1 - (i - n);
+ memcpy(out_val, &ext->data, sizeof(*out_val));
+ *out_f = upb_DefPool_FindExtensionByMiniTable(ext_pool, ext->ext);
+ *iter = i;
+ return true;
+ }
+ }
+
+ *iter = i;
+ return false;
+}
+
+bool _upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m,
+ int depth) {
+ size_t iter = kUpb_Message_Begin;
+ const upb_FieldDef* f;
+ upb_MessageValue val;
+ bool ret = true;
+
+ if (--depth == 0) return false;
+
+ _upb_Message_DiscardUnknown_shallow(msg);
+
+ while (upb_Message_Next(msg, m, NULL /*ext_pool*/, &f, &val, &iter)) {
+ const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f);
+ if (!subm) continue;
+ if (upb_FieldDef_IsMap(f)) {
+ const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(subm, 2);
+ const upb_MessageDef* val_m = upb_FieldDef_MessageSubDef(val_f);
+ upb_Map* map = (upb_Map*)val.map_val;
+ size_t iter = kUpb_Map_Begin;
+
+ if (!val_m) continue;
+
+ upb_MessageValue map_key, map_val;
+ while (upb_Map_Next(map, &map_key, &map_val, &iter)) {
+ if (!_upb_Message_DiscardUnknown((upb_Message*)map_val.msg_val, val_m,
+ depth)) {
+ ret = false;
+ }
+ }
+ } else if (upb_FieldDef_IsRepeated(f)) {
+ const upb_Array* arr = val.array_val;
+ size_t i, n = upb_Array_Size(arr);
+ for (i = 0; i < n; i++) {
+ upb_MessageValue elem = upb_Array_Get(arr, i);
+ if (!_upb_Message_DiscardUnknown((upb_Message*)elem.msg_val, subm,
+ depth)) {
+ ret = false;
+ }
+ }
+ } else {
+ if (!_upb_Message_DiscardUnknown((upb_Message*)val.msg_val, subm,
+ depth)) {
+ ret = false;
+ }
+ }
+ }
+
+ return ret;
+}
+
+bool upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m,
+ int maxdepth) {
+ return _upb_Message_DiscardUnknown(msg, m, maxdepth);
+}
diff --git a/upb/upb/reflection/message.h b/upb/upb/reflection/message.h
new file mode 100644
index 0000000..42abc34
--- /dev/null
+++ b/upb/upb/reflection/message.h
@@ -0,0 +1,107 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_REFLECTION_MESSAGE_H_
+#define UPB_REFLECTION_MESSAGE_H_
+
+#include "upb/collections/map.h"
+#include "upb/reflection/common.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Returns a mutable pointer to a map, array, or submessage value. If the given
+// arena is non-NULL this will construct a new object if it was not previously
+// present. May not be called for primitive fields.
+UPB_API upb_MutableMessageValue upb_Message_Mutable(upb_Message* msg,
+ const upb_FieldDef* f,
+ upb_Arena* a);
+
+// Returns the field that is set in the oneof, or NULL if none are set.
+UPB_API const upb_FieldDef* upb_Message_WhichOneof(const upb_Message* msg,
+ const upb_OneofDef* o);
+
+// Clear all data and unknown fields.
+void upb_Message_ClearByDef(upb_Message* msg, const upb_MessageDef* m);
+
+// Clears any field presence and sets the value back to its default.
+UPB_API void upb_Message_ClearFieldByDef(upb_Message* msg,
+ const upb_FieldDef* f);
+
+// May only be called for fields where upb_FieldDef_HasPresence(f) == true.
+UPB_API bool upb_Message_HasFieldByDef(const upb_Message* msg,
+ const upb_FieldDef* f);
+
+// Returns the value in the message associated with this field def.
+UPB_API upb_MessageValue upb_Message_GetFieldByDef(const upb_Message* msg,
+ const upb_FieldDef* f);
+
+// Sets the given field to the given value. For a msg/array/map/string, the
+// caller must ensure that the target data outlives |msg| (by living either in
+// the same arena or a different arena that outlives it).
+//
+// Returns false if allocation fails.
+UPB_API bool upb_Message_SetFieldByDef(upb_Message* msg, const upb_FieldDef* f,
+ upb_MessageValue val, upb_Arena* a);
+
+// Iterate over present fields.
+//
+// size_t iter = kUpb_Message_Begin;
+// const upb_FieldDef *f;
+// upb_MessageValue val;
+// while (upb_Message_Next(msg, m, ext_pool, &f, &val, &iter)) {
+// process_field(f, val);
+// }
+//
+// If ext_pool is NULL, no extensions will be returned. If the given symtab
+// returns extensions that don't match what is in this message, those extensions
+// will be skipped.
+
+#define kUpb_Message_Begin -1
+
+bool upb_Message_Next(const upb_Message* msg, const upb_MessageDef* m,
+ const upb_DefPool* ext_pool, const upb_FieldDef** f,
+ upb_MessageValue* val, size_t* iter);
+
+// Clears all unknown field data from this message and all submessages.
+UPB_API bool upb_Message_DiscardUnknown(upb_Message* msg,
+ const upb_MessageDef* m, int maxdepth);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_MESSAGE_H_ */
diff --git a/upb/upb/reflection/message.hpp b/upb/upb/reflection/message.hpp
new file mode 100644
index 0000000..e5ab2a8
--- /dev/null
+++ b/upb/upb/reflection/message.hpp
@@ -0,0 +1,42 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_REFLECTION_MESSAGE_HPP_
+#define UPB_REFLECTION_MESSAGE_HPP_
+
+#include "upb/reflection/message.h"
+
+namespace upb {
+
+typedef upb_MessageValue MessageValue;
+
+} // namespace upb
+
+#endif // UPB_REFLECTION_MESSAGE_HPP_
diff --git a/upb/upb/reflection/message_def.c b/upb/upb/reflection/message_def.c
new file mode 100644
index 0000000..211e597
--- /dev/null
+++ b/upb/upb/reflection/message_def.c
@@ -0,0 +1,724 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/reflection/internal/message_def.h"
+
+#include "upb/hash/int_table.h"
+#include "upb/hash/str_table.h"
+#include "upb/mini_descriptor/decode.h"
+#include "upb/mini_descriptor/internal/modifiers.h"
+#include "upb/reflection/def.h"
+#include "upb/reflection/def_type.h"
+#include "upb/reflection/internal/def_builder.h"
+#include "upb/reflection/internal/desc_state.h"
+#include "upb/reflection/internal/enum_def.h"
+#include "upb/reflection/internal/extension_range.h"
+#include "upb/reflection/internal/field_def.h"
+#include "upb/reflection/internal/file_def.h"
+#include "upb/reflection/internal/message_reserved_range.h"
+#include "upb/reflection/internal/oneof_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+struct upb_MessageDef {
+ const UPB_DESC(MessageOptions) * opts;
+ const upb_MiniTable* layout;
+ const upb_FileDef* file;
+ const upb_MessageDef* containing_type;
+ const char* full_name;
+
+ // Tables for looking up fields by number and name.
+ upb_inttable itof;
+ upb_strtable ntof;
+
+ /* All nested defs.
+ * MEM: We could save some space here by putting nested defs in a contiguous
+ * region and calculating counts from offsets or vice-versa. */
+ const upb_FieldDef* fields;
+ const upb_OneofDef* oneofs;
+ const upb_ExtensionRange* ext_ranges;
+ const upb_StringView* res_names;
+ const upb_MessageDef* nested_msgs;
+ const upb_MessageReservedRange* res_ranges;
+ const upb_EnumDef* nested_enums;
+ const upb_FieldDef* nested_exts;
+
+ // TODO(salo): These counters don't need anywhere near 32 bits.
+ int field_count;
+ int real_oneof_count;
+ int oneof_count;
+ int ext_range_count;
+ int res_range_count;
+ int res_name_count;
+ int nested_msg_count;
+ int nested_enum_count;
+ int nested_ext_count;
+ bool in_message_set;
+ bool is_sorted;
+ upb_WellKnown well_known_type;
+#if UINTPTR_MAX == 0xffffffff
+ uint32_t padding; // Increase size to a multiple of 8.
+#endif
+};
+
+static void assign_msg_wellknowntype(upb_MessageDef* m) {
+ const char* name = m->full_name;
+ if (name == NULL) {
+ m->well_known_type = kUpb_WellKnown_Unspecified;
+ return;
+ }
+ if (!strcmp(name, "google.protobuf.Any")) {
+ m->well_known_type = kUpb_WellKnown_Any;
+ } else if (!strcmp(name, "google.protobuf.FieldMask")) {
+ m->well_known_type = kUpb_WellKnown_FieldMask;
+ } else if (!strcmp(name, "google.protobuf.Duration")) {
+ m->well_known_type = kUpb_WellKnown_Duration;
+ } else if (!strcmp(name, "google.protobuf.Timestamp")) {
+ m->well_known_type = kUpb_WellKnown_Timestamp;
+ } else if (!strcmp(name, "google.protobuf.DoubleValue")) {
+ m->well_known_type = kUpb_WellKnown_DoubleValue;
+ } else if (!strcmp(name, "google.protobuf.FloatValue")) {
+ m->well_known_type = kUpb_WellKnown_FloatValue;
+ } else if (!strcmp(name, "google.protobuf.Int64Value")) {
+ m->well_known_type = kUpb_WellKnown_Int64Value;
+ } else if (!strcmp(name, "google.protobuf.UInt64Value")) {
+ m->well_known_type = kUpb_WellKnown_UInt64Value;
+ } else if (!strcmp(name, "google.protobuf.Int32Value")) {
+ m->well_known_type = kUpb_WellKnown_Int32Value;
+ } else if (!strcmp(name, "google.protobuf.UInt32Value")) {
+ m->well_known_type = kUpb_WellKnown_UInt32Value;
+ } else if (!strcmp(name, "google.protobuf.BoolValue")) {
+ m->well_known_type = kUpb_WellKnown_BoolValue;
+ } else if (!strcmp(name, "google.protobuf.StringValue")) {
+ m->well_known_type = kUpb_WellKnown_StringValue;
+ } else if (!strcmp(name, "google.protobuf.BytesValue")) {
+ m->well_known_type = kUpb_WellKnown_BytesValue;
+ } else if (!strcmp(name, "google.protobuf.Value")) {
+ m->well_known_type = kUpb_WellKnown_Value;
+ } else if (!strcmp(name, "google.protobuf.ListValue")) {
+ m->well_known_type = kUpb_WellKnown_ListValue;
+ } else if (!strcmp(name, "google.protobuf.Struct")) {
+ m->well_known_type = kUpb_WellKnown_Struct;
+ } else {
+ m->well_known_type = kUpb_WellKnown_Unspecified;
+ }
+}
+
+upb_MessageDef* _upb_MessageDef_At(const upb_MessageDef* m, int i) {
+ return (upb_MessageDef*)&m[i];
+}
+
+bool _upb_MessageDef_IsValidExtensionNumber(const upb_MessageDef* m, int n) {
+ for (int i = 0; i < m->ext_range_count; i++) {
+ const upb_ExtensionRange* r = upb_MessageDef_ExtensionRange(m, i);
+ if (upb_ExtensionRange_Start(r) <= n && n < upb_ExtensionRange_End(r)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+const UPB_DESC(MessageOptions) *
+ upb_MessageDef_Options(const upb_MessageDef* m) {
+ return m->opts;
+}
+
+bool upb_MessageDef_HasOptions(const upb_MessageDef* m) {
+ return m->opts != (void*)kUpbDefOptDefault;
+}
+
+const char* upb_MessageDef_FullName(const upb_MessageDef* m) {
+ return m->full_name;
+}
+
+const upb_FileDef* upb_MessageDef_File(const upb_MessageDef* m) {
+ return m->file;
+}
+
+const upb_MessageDef* upb_MessageDef_ContainingType(const upb_MessageDef* m) {
+ return m->containing_type;
+}
+
+const char* upb_MessageDef_Name(const upb_MessageDef* m) {
+ return _upb_DefBuilder_FullToShort(m->full_name);
+}
+
+upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m) {
+ return upb_FileDef_Syntax(m->file);
+}
+
+const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m,
+ uint32_t i) {
+ upb_value val;
+ return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val)
+ : NULL;
+}
+
+const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize(
+ const upb_MessageDef* m, const char* name, size_t size) {
+ upb_value val;
+
+ if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) {
+ return NULL;
+ }
+
+ return _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD);
+}
+
+const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize(
+ const upb_MessageDef* m, const char* name, size_t size) {
+ upb_value val;
+
+ if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) {
+ return NULL;
+ }
+
+ return _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF);
+}
+
+bool _upb_MessageDef_Insert(upb_MessageDef* m, const char* name, size_t len,
+ upb_value v, upb_Arena* a) {
+ return upb_strtable_insert(&m->ntof, name, len, v, a);
+}
+
+bool upb_MessageDef_FindByNameWithSize(const upb_MessageDef* m,
+ const char* name, size_t len,
+ const upb_FieldDef** out_f,
+ const upb_OneofDef** out_o) {
+ upb_value val;
+
+ if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
+ return false;
+ }
+
+ const upb_FieldDef* f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD);
+ const upb_OneofDef* o = _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF);
+ if (out_f) *out_f = f;
+ if (out_o) *out_o = o;
+ return f || o; /* False if this was a JSON name. */
+}
+
+const upb_FieldDef* upb_MessageDef_FindByJsonNameWithSize(
+ const upb_MessageDef* m, const char* name, size_t size) {
+ upb_value val;
+ const upb_FieldDef* f;
+
+ if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) {
+ return NULL;
+ }
+
+ f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD);
+ if (!f) f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD_JSONNAME);
+
+ return f;
+}
+
+int upb_MessageDef_ExtensionRangeCount(const upb_MessageDef* m) {
+ return m->ext_range_count;
+}
+
+int upb_MessageDef_ReservedRangeCount(const upb_MessageDef* m) {
+ return m->res_range_count;
+}
+
+int upb_MessageDef_ReservedNameCount(const upb_MessageDef* m) {
+ return m->res_name_count;
+}
+
+int upb_MessageDef_FieldCount(const upb_MessageDef* m) {
+ return m->field_count;
+}
+
+int upb_MessageDef_OneofCount(const upb_MessageDef* m) {
+ return m->oneof_count;
+}
+
+int upb_MessageDef_RealOneofCount(const upb_MessageDef* m) {
+ return m->real_oneof_count;
+}
+
+int upb_MessageDef_NestedMessageCount(const upb_MessageDef* m) {
+ return m->nested_msg_count;
+}
+
+int upb_MessageDef_NestedEnumCount(const upb_MessageDef* m) {
+ return m->nested_enum_count;
+}
+
+int upb_MessageDef_NestedExtensionCount(const upb_MessageDef* m) {
+ return m->nested_ext_count;
+}
+
+const upb_MiniTable* upb_MessageDef_MiniTable(const upb_MessageDef* m) {
+ return m->layout;
+}
+
+const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m,
+ int i) {
+ UPB_ASSERT(0 <= i && i < m->ext_range_count);
+ return _upb_ExtensionRange_At(m->ext_ranges, i);
+}
+
+const upb_MessageReservedRange* upb_MessageDef_ReservedRange(
+ const upb_MessageDef* m, int i) {
+ UPB_ASSERT(0 <= i && i < m->res_range_count);
+ return _upb_MessageReservedRange_At(m->res_ranges, i);
+}
+
+upb_StringView upb_MessageDef_ReservedName(const upb_MessageDef* m, int i) {
+ UPB_ASSERT(0 <= i && i < m->res_name_count);
+ return m->res_names[i];
+}
+
+const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m, int i) {
+ UPB_ASSERT(0 <= i && i < m->field_count);
+ return _upb_FieldDef_At(m->fields, i);
+}
+
+const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m, int i) {
+ UPB_ASSERT(0 <= i && i < m->oneof_count);
+ return _upb_OneofDef_At(m->oneofs, i);
+}
+
+const upb_MessageDef* upb_MessageDef_NestedMessage(const upb_MessageDef* m,
+ int i) {
+ UPB_ASSERT(0 <= i && i < m->nested_msg_count);
+ return &m->nested_msgs[i];
+}
+
+const upb_EnumDef* upb_MessageDef_NestedEnum(const upb_MessageDef* m, int i) {
+ UPB_ASSERT(0 <= i && i < m->nested_enum_count);
+ return _upb_EnumDef_At(m->nested_enums, i);
+}
+
+const upb_FieldDef* upb_MessageDef_NestedExtension(const upb_MessageDef* m,
+ int i) {
+ UPB_ASSERT(0 <= i && i < m->nested_ext_count);
+ return _upb_FieldDef_At(m->nested_exts, i);
+}
+
+upb_WellKnown upb_MessageDef_WellKnownType(const upb_MessageDef* m) {
+ return m->well_known_type;
+}
+
+bool _upb_MessageDef_InMessageSet(const upb_MessageDef* m) {
+ return m->in_message_set;
+}
+
+const upb_FieldDef* upb_MessageDef_FindFieldByName(const upb_MessageDef* m,
+ const char* name) {
+ return upb_MessageDef_FindFieldByNameWithSize(m, name, strlen(name));
+}
+
+const upb_OneofDef* upb_MessageDef_FindOneofByName(const upb_MessageDef* m,
+ const char* name) {
+ return upb_MessageDef_FindOneofByNameWithSize(m, name, strlen(name));
+}
+
+bool upb_MessageDef_IsMapEntry(const upb_MessageDef* m) {
+ return UPB_DESC(MessageOptions_map_entry)(m->opts);
+}
+
+bool upb_MessageDef_IsMessageSet(const upb_MessageDef* m) {
+ return UPB_DESC(MessageOptions_message_set_wire_format)(m->opts);
+}
+
+static upb_MiniTable* _upb_MessageDef_MakeMiniTable(upb_DefBuilder* ctx,
+ const upb_MessageDef* m) {
+ upb_StringView desc;
+ // Note: this will assign layout_index for fields, so upb_FieldDef_MiniTable()
+ // is safe to call only after this call.
+ bool ok = upb_MessageDef_MiniDescriptorEncode(m, ctx->tmp_arena, &desc);
+ if (!ok) _upb_DefBuilder_OomErr(ctx);
+
+ void** scratch_data = _upb_DefPool_ScratchData(ctx->symtab);
+ size_t* scratch_size = _upb_DefPool_ScratchSize(ctx->symtab);
+ upb_MiniTable* ret = upb_MiniTable_BuildWithBuf(
+ desc.data, desc.size, ctx->platform, ctx->arena, scratch_data,
+ scratch_size, ctx->status);
+ if (!ret) _upb_DefBuilder_FailJmp(ctx);
+
+ return ret;
+}
+
+void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m) {
+ for (int i = 0; i < m->field_count; i++) {
+ upb_FieldDef* f = (upb_FieldDef*)upb_MessageDef_Field(m, i);
+ _upb_FieldDef_Resolve(ctx, m->full_name, f);
+ }
+
+ m->in_message_set = false;
+ for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) {
+ upb_FieldDef* ext = (upb_FieldDef*)upb_MessageDef_NestedExtension(m, i);
+ _upb_FieldDef_Resolve(ctx, m->full_name, ext);
+ if (upb_FieldDef_Type(ext) == kUpb_FieldType_Message &&
+ upb_FieldDef_Label(ext) == kUpb_Label_Optional &&
+ upb_FieldDef_MessageSubDef(ext) == m &&
+ UPB_DESC(MessageOptions_message_set_wire_format)(
+ upb_MessageDef_Options(upb_FieldDef_ContainingType(ext)))) {
+ m->in_message_set = true;
+ }
+ }
+
+ for (int i = 0; i < upb_MessageDef_NestedMessageCount(m); i++) {
+ upb_MessageDef* n = (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i);
+ _upb_MessageDef_Resolve(ctx, n);
+ }
+}
+
+void _upb_MessageDef_InsertField(upb_DefBuilder* ctx, upb_MessageDef* m,
+ const upb_FieldDef* f) {
+ const int32_t field_number = upb_FieldDef_Number(f);
+
+ if (field_number <= 0 || field_number > kUpb_MaxFieldNumber) {
+ _upb_DefBuilder_Errf(ctx, "invalid field number (%u)", field_number);
+ }
+
+ const char* json_name = upb_FieldDef_JsonName(f);
+ const char* shortname = upb_FieldDef_Name(f);
+ const size_t shortnamelen = strlen(shortname);
+
+ upb_value v = upb_value_constptr(f);
+
+ upb_value existing_v;
+ if (upb_strtable_lookup(&m->ntof, shortname, &existing_v)) {
+ _upb_DefBuilder_Errf(ctx, "duplicate field name (%s)", shortname);
+ }
+
+ const upb_value field_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD);
+ bool ok =
+ _upb_MessageDef_Insert(m, shortname, shortnamelen, field_v, ctx->arena);
+ if (!ok) _upb_DefBuilder_OomErr(ctx);
+
+ if (strcmp(shortname, json_name) != 0) {
+ if (upb_strtable_lookup(&m->ntof, json_name, &v)) {
+ _upb_DefBuilder_Errf(ctx, "duplicate json_name (%s)", json_name);
+ }
+
+ const size_t json_size = strlen(json_name);
+ const upb_value json_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD_JSONNAME);
+ ok = _upb_MessageDef_Insert(m, json_name, json_size, json_v, ctx->arena);
+ if (!ok) _upb_DefBuilder_OomErr(ctx);
+ }
+
+ if (upb_inttable_lookup(&m->itof, field_number, NULL)) {
+ _upb_DefBuilder_Errf(ctx, "duplicate field number (%u)", field_number);
+ }
+
+ ok = upb_inttable_insert(&m->itof, field_number, v, ctx->arena);
+ if (!ok) _upb_DefBuilder_OomErr(ctx);
+}
+
+void _upb_MessageDef_CreateMiniTable(upb_DefBuilder* ctx, upb_MessageDef* m) {
+ if (ctx->layout == NULL) {
+ m->layout = _upb_MessageDef_MakeMiniTable(ctx, m);
+ } else {
+ UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count);
+ m->layout = ctx->layout->msgs[ctx->msg_count++];
+ UPB_ASSERT(m->field_count == m->layout->field_count);
+
+ // We don't need the result of this call, but it will assign layout_index
+ // for all the fields in O(n lg n) time.
+ _upb_FieldDefs_Sorted(m->fields, m->field_count, ctx->tmp_arena);
+ }
+
+ for (int i = 0; i < m->nested_msg_count; i++) {
+ upb_MessageDef* nested =
+ (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i);
+ _upb_MessageDef_CreateMiniTable(ctx, nested);
+ }
+}
+
+void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx,
+ const upb_MessageDef* m) {
+ for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) {
+ const upb_FieldDef* ext = upb_MessageDef_NestedExtension(m, i);
+ _upb_FieldDef_BuildMiniTableExtension(ctx, ext);
+ }
+
+ for (int i = 0; i < m->nested_msg_count; i++) {
+ _upb_MessageDef_LinkMiniTable(ctx, upb_MessageDef_NestedMessage(m, i));
+ }
+
+ if (ctx->layout) return;
+
+ for (int i = 0; i < m->field_count; i++) {
+ const upb_FieldDef* f = upb_MessageDef_Field(m, i);
+ const upb_MessageDef* sub_m = upb_FieldDef_MessageSubDef(f);
+ const upb_EnumDef* sub_e = upb_FieldDef_EnumSubDef(f);
+ const int layout_index = _upb_FieldDef_LayoutIndex(f);
+ upb_MiniTable* mt = (upb_MiniTable*)upb_MessageDef_MiniTable(m);
+
+ UPB_ASSERT(layout_index < m->field_count);
+ upb_MiniTableField* mt_f =
+ (upb_MiniTableField*)&m->layout->fields[layout_index];
+ if (sub_m) {
+ if (!mt->subs) {
+ _upb_DefBuilder_Errf(ctx, "unexpected submsg for (%s)", m->full_name);
+ }
+ UPB_ASSERT(mt_f);
+ UPB_ASSERT(sub_m->layout);
+ if (UPB_UNLIKELY(!upb_MiniTable_SetSubMessage(mt, mt_f, sub_m->layout))) {
+ _upb_DefBuilder_Errf(ctx, "invalid submsg for (%s)", m->full_name);
+ }
+ } else if (_upb_FieldDef_IsClosedEnum(f)) {
+ const upb_MiniTableEnum* mt_e = _upb_EnumDef_MiniTable(sub_e);
+ if (UPB_UNLIKELY(!upb_MiniTable_SetSubEnum(mt, mt_f, mt_e))) {
+ _upb_DefBuilder_Errf(ctx, "invalid subenum for (%s)", m->full_name);
+ }
+ }
+ }
+
+#ifndef NDEBUG
+ for (int i = 0; i < m->field_count; i++) {
+ const upb_FieldDef* f = upb_MessageDef_Field(m, i);
+ const int layout_index = _upb_FieldDef_LayoutIndex(f);
+ UPB_ASSERT(layout_index < m->layout->field_count);
+ const upb_MiniTableField* mt_f = &m->layout->fields[layout_index];
+ UPB_ASSERT(upb_FieldDef_Type(f) == upb_MiniTableField_Type(mt_f));
+ UPB_ASSERT(upb_FieldDef_CType(f) == upb_MiniTableField_CType(mt_f));
+ UPB_ASSERT(upb_FieldDef_HasPresence(f) ==
+ upb_MiniTableField_HasPresence(mt_f));
+ }
+#endif
+}
+
+static uint64_t _upb_MessageDef_Modifiers(const upb_MessageDef* m) {
+ uint64_t out = 0;
+ if (upb_FileDef_Syntax(m->file) == kUpb_Syntax_Proto3) {
+ out |= kUpb_MessageModifier_ValidateUtf8;
+ out |= kUpb_MessageModifier_DefaultIsPacked;
+ }
+ if (m->ext_range_count) {
+ out |= kUpb_MessageModifier_IsExtendable;
+ }
+ return out;
+}
+
+static bool _upb_MessageDef_EncodeMap(upb_DescState* s, const upb_MessageDef* m,
+ upb_Arena* a) {
+ if (m->field_count != 2) return false;
+
+ const upb_FieldDef* key_field = upb_MessageDef_Field(m, 0);
+ const upb_FieldDef* val_field = upb_MessageDef_Field(m, 1);
+ if (key_field == NULL || val_field == NULL) return false;
+
+ UPB_ASSERT(_upb_FieldDef_LayoutIndex(key_field) == 0);
+ UPB_ASSERT(_upb_FieldDef_LayoutIndex(val_field) == 1);
+
+ s->ptr = upb_MtDataEncoder_EncodeMap(
+ &s->e, s->ptr, upb_FieldDef_Type(key_field), upb_FieldDef_Type(val_field),
+ _upb_FieldDef_Modifiers(key_field), _upb_FieldDef_Modifiers(val_field));
+ return true;
+}
+
+static bool _upb_MessageDef_EncodeMessage(upb_DescState* s,
+ const upb_MessageDef* m,
+ upb_Arena* a) {
+ const upb_FieldDef** sorted = NULL;
+ if (!m->is_sorted) {
+ sorted = _upb_FieldDefs_Sorted(m->fields, m->field_count, a);
+ if (!sorted) return false;
+ }
+
+ s->ptr = upb_MtDataEncoder_StartMessage(&s->e, s->ptr,
+ _upb_MessageDef_Modifiers(m));
+
+ for (int i = 0; i < m->field_count; i++) {
+ const upb_FieldDef* f = sorted ? sorted[i] : upb_MessageDef_Field(m, i);
+ const upb_FieldType type = upb_FieldDef_Type(f);
+ const int number = upb_FieldDef_Number(f);
+ const uint64_t modifiers = _upb_FieldDef_Modifiers(f);
+
+ if (!_upb_DescState_Grow(s, a)) return false;
+ s->ptr = upb_MtDataEncoder_PutField(&s->e, s->ptr, type, number, modifiers);
+ }
+
+ for (int i = 0; i < m->real_oneof_count; i++) {
+ if (!_upb_DescState_Grow(s, a)) return false;
+ s->ptr = upb_MtDataEncoder_StartOneof(&s->e, s->ptr);
+
+ const upb_OneofDef* o = upb_MessageDef_Oneof(m, i);
+ const int field_count = upb_OneofDef_FieldCount(o);
+ for (int j = 0; j < field_count; j++) {
+ const int number = upb_FieldDef_Number(upb_OneofDef_Field(o, j));
+
+ if (!_upb_DescState_Grow(s, a)) return false;
+ s->ptr = upb_MtDataEncoder_PutOneofField(&s->e, s->ptr, number);
+ }
+ }
+
+ return true;
+}
+
+static bool _upb_MessageDef_EncodeMessageSet(upb_DescState* s,
+ const upb_MessageDef* m,
+ upb_Arena* a) {
+ s->ptr = upb_MtDataEncoder_EncodeMessageSet(&s->e, s->ptr);
+
+ return true;
+}
+
+bool upb_MessageDef_MiniDescriptorEncode(const upb_MessageDef* m, upb_Arena* a,
+ upb_StringView* out) {
+ upb_DescState s;
+ _upb_DescState_Init(&s);
+
+ if (!_upb_DescState_Grow(&s, a)) return false;
+
+ if (upb_MessageDef_IsMapEntry(m)) {
+ if (!_upb_MessageDef_EncodeMap(&s, m, a)) return false;
+ } else if (UPB_DESC(MessageOptions_message_set_wire_format)(m->opts)) {
+ if (!_upb_MessageDef_EncodeMessageSet(&s, m, a)) return false;
+ } else {
+ if (!_upb_MessageDef_EncodeMessage(&s, m, a)) return false;
+ }
+
+ if (!_upb_DescState_Grow(&s, a)) return false;
+ *s.ptr = '\0';
+
+ out->data = s.buf;
+ out->size = s.ptr - s.buf;
+ return true;
+}
+
+static upb_StringView* _upb_ReservedNames_New(upb_DefBuilder* ctx, int n,
+ const upb_StringView* protos) {
+ upb_StringView* sv = _upb_DefBuilder_Alloc(ctx, sizeof(upb_StringView) * n);
+ for (int i = 0; i < n; i++) {
+ sv[i].data =
+ upb_strdup2(protos[i].data, protos[i].size, _upb_DefBuilder_Arena(ctx));
+ sv[i].size = protos[i].size;
+ }
+ return sv;
+}
+
+static void create_msgdef(upb_DefBuilder* ctx, const char* prefix,
+ const UPB_DESC(DescriptorProto) * msg_proto,
+ const upb_MessageDef* containing_type,
+ upb_MessageDef* m) {
+ const UPB_DESC(OneofDescriptorProto)* const* oneofs;
+ const UPB_DESC(FieldDescriptorProto)* const* fields;
+ const UPB_DESC(DescriptorProto_ExtensionRange)* const* ext_ranges;
+ const UPB_DESC(DescriptorProto_ReservedRange)* const* res_ranges;
+ const upb_StringView* res_names;
+ size_t n_oneof, n_field, n_enum, n_ext, n_msg;
+ size_t n_ext_range, n_res_range, n_res_name;
+ upb_StringView name;
+
+ // Must happen before _upb_DefBuilder_Add()
+ m->file = _upb_DefBuilder_File(ctx);
+
+ m->containing_type = containing_type;
+ m->is_sorted = true;
+
+ name = UPB_DESC(DescriptorProto_name)(msg_proto);
+
+ m->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name);
+ _upb_DefBuilder_Add(ctx, m->full_name, _upb_DefType_Pack(m, UPB_DEFTYPE_MSG));
+
+ oneofs = UPB_DESC(DescriptorProto_oneof_decl)(msg_proto, &n_oneof);
+ fields = UPB_DESC(DescriptorProto_field)(msg_proto, &n_field);
+ ext_ranges =
+ UPB_DESC(DescriptorProto_extension_range)(msg_proto, &n_ext_range);
+ res_ranges =
+ UPB_DESC(DescriptorProto_reserved_range)(msg_proto, &n_res_range);
+ res_names = UPB_DESC(DescriptorProto_reserved_name)(msg_proto, &n_res_name);
+
+ bool ok = upb_inttable_init(&m->itof, ctx->arena);
+ if (!ok) _upb_DefBuilder_OomErr(ctx);
+
+ ok = upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena);
+ if (!ok) _upb_DefBuilder_OomErr(ctx);
+
+ UPB_DEF_SET_OPTIONS(m->opts, DescriptorProto, MessageOptions, msg_proto);
+
+ m->oneof_count = n_oneof;
+ m->oneofs = _upb_OneofDefs_New(ctx, n_oneof, oneofs, m);
+
+ m->field_count = n_field;
+ m->fields =
+ _upb_FieldDefs_New(ctx, n_field, fields, m->full_name, m, &m->is_sorted);
+
+ // Message Sets may not contain fields.
+ if (UPB_UNLIKELY(UPB_DESC(MessageOptions_message_set_wire_format)(m->opts))) {
+ if (UPB_UNLIKELY(n_field > 0)) {
+ _upb_DefBuilder_Errf(ctx, "invalid message set (%s)", m->full_name);
+ }
+ }
+
+ m->ext_range_count = n_ext_range;
+ m->ext_ranges = _upb_ExtensionRanges_New(ctx, n_ext_range, ext_ranges, m);
+
+ m->res_range_count = n_res_range;
+ m->res_ranges =
+ _upb_MessageReservedRanges_New(ctx, n_res_range, res_ranges, m);
+
+ m->res_name_count = n_res_name;
+ m->res_names = _upb_ReservedNames_New(ctx, n_res_name, res_names);
+
+ const size_t synthetic_count = _upb_OneofDefs_Finalize(ctx, m);
+ m->real_oneof_count = m->oneof_count - synthetic_count;
+
+ assign_msg_wellknowntype(m);
+ upb_inttable_compact(&m->itof, ctx->arena);
+
+ const UPB_DESC(EnumDescriptorProto)* const* enums =
+ UPB_DESC(DescriptorProto_enum_type)(msg_proto, &n_enum);
+ m->nested_enum_count = n_enum;
+ m->nested_enums = _upb_EnumDefs_New(ctx, n_enum, enums, m);
+
+ const UPB_DESC(FieldDescriptorProto)* const* exts =
+ UPB_DESC(DescriptorProto_extension)(msg_proto, &n_ext);
+ m->nested_ext_count = n_ext;
+ m->nested_exts = _upb_Extensions_New(ctx, n_ext, exts, m->full_name, m);
+
+ const UPB_DESC(DescriptorProto)* const* msgs =
+ UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n_msg);
+ m->nested_msg_count = n_msg;
+ m->nested_msgs = _upb_MessageDefs_New(ctx, n_msg, msgs, m);
+}
+
+// Allocate and initialize an array of |n| message defs.
+upb_MessageDef* _upb_MessageDefs_New(
+ upb_DefBuilder* ctx, int n, const UPB_DESC(DescriptorProto) * const* protos,
+ const upb_MessageDef* containing_type) {
+ _upb_DefType_CheckPadding(sizeof(upb_MessageDef));
+
+ const char* name = containing_type ? containing_type->full_name
+ : _upb_FileDef_RawPackage(ctx->file);
+
+ upb_MessageDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageDef) * n);
+ for (int i = 0; i < n; i++) {
+ create_msgdef(ctx, name, protos[i], containing_type, &m[i]);
+ }
+ return m;
+}
diff --git a/upb/upb/reflection/message_def.h b/upb/upb/reflection/message_def.h
new file mode 100644
index 0000000..deb6d87
--- /dev/null
+++ b/upb/upb/reflection/message_def.h
@@ -0,0 +1,179 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// IWYU pragma: private, include "upb/reflection/def.h"
+
+#ifndef UPB_REFLECTION_MESSAGE_DEF_H_
+#define UPB_REFLECTION_MESSAGE_DEF_H_
+
+#include "upb/base/string_view.h"
+#include "upb/reflection/common.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+// Well-known field tag numbers for map-entry messages.
+#define kUpb_MapEntry_KeyFieldNumber 1
+#define kUpb_MapEntry_ValueFieldNumber 2
+
+// Well-known field tag numbers for Any messages.
+#define kUpb_Any_TypeFieldNumber 1
+#define kUpb_Any_ValueFieldNumber 2
+
+// Well-known field tag numbers for duration messages.
+#define kUpb_Duration_SecondsFieldNumber 1
+#define kUpb_Duration_NanosFieldNumber 2
+
+// Well-known field tag numbers for timestamp messages.
+#define kUpb_Timestamp_SecondsFieldNumber 1
+#define kUpb_Timestamp_NanosFieldNumber 2
+
+// All the different kind of well known type messages. For simplicity of check,
+// number wrappers and string wrappers are grouped together. Make sure the
+// order and number of these groups are not changed.
+typedef enum {
+ kUpb_WellKnown_Unspecified,
+ kUpb_WellKnown_Any,
+ kUpb_WellKnown_FieldMask,
+ kUpb_WellKnown_Duration,
+ kUpb_WellKnown_Timestamp,
+
+ // number wrappers
+ kUpb_WellKnown_DoubleValue,
+ kUpb_WellKnown_FloatValue,
+ kUpb_WellKnown_Int64Value,
+ kUpb_WellKnown_UInt64Value,
+ kUpb_WellKnown_Int32Value,
+ kUpb_WellKnown_UInt32Value,
+
+ // string wrappers
+ kUpb_WellKnown_StringValue,
+ kUpb_WellKnown_BytesValue,
+ kUpb_WellKnown_BoolValue,
+ kUpb_WellKnown_Value,
+ kUpb_WellKnown_ListValue,
+ kUpb_WellKnown_Struct,
+} upb_WellKnown;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+const upb_MessageDef* upb_MessageDef_ContainingType(const upb_MessageDef* m);
+
+const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m,
+ int i);
+int upb_MessageDef_ExtensionRangeCount(const upb_MessageDef* m);
+
+UPB_API const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m,
+ int i);
+UPB_API int upb_MessageDef_FieldCount(const upb_MessageDef* m);
+
+UPB_API const upb_FileDef* upb_MessageDef_File(const upb_MessageDef* m);
+
+// Returns a field by either JSON name or regular proto name.
+const upb_FieldDef* upb_MessageDef_FindByJsonNameWithSize(
+ const upb_MessageDef* m, const char* name, size_t size);
+UPB_INLINE const upb_FieldDef* upb_MessageDef_FindByJsonName(
+ const upb_MessageDef* m, const char* name) {
+ return upb_MessageDef_FindByJsonNameWithSize(m, name, strlen(name));
+}
+
+// Lookup of either field or oneof by name. Returns whether either was found.
+// If the return is true, then the found def will be set, and the non-found
+// one set to NULL.
+UPB_API bool upb_MessageDef_FindByNameWithSize(const upb_MessageDef* m,
+ const char* name, size_t size,
+ const upb_FieldDef** f,
+ const upb_OneofDef** o);
+UPB_INLINE bool upb_MessageDef_FindByName(const upb_MessageDef* m,
+ const char* name,
+ const upb_FieldDef** f,
+ const upb_OneofDef** o) {
+ return upb_MessageDef_FindByNameWithSize(m, name, strlen(name), f, o);
+}
+
+const upb_FieldDef* upb_MessageDef_FindFieldByName(const upb_MessageDef* m,
+ const char* name);
+UPB_API const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize(
+ const upb_MessageDef* m, const char* name, size_t size);
+UPB_API const upb_FieldDef* upb_MessageDef_FindFieldByNumber(
+ const upb_MessageDef* m, uint32_t i);
+const upb_OneofDef* upb_MessageDef_FindOneofByName(const upb_MessageDef* m,
+ const char* name);
+UPB_API const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize(
+ const upb_MessageDef* m, const char* name, size_t size);
+UPB_API const char* upb_MessageDef_FullName(const upb_MessageDef* m);
+bool upb_MessageDef_HasOptions(const upb_MessageDef* m);
+bool upb_MessageDef_IsMapEntry(const upb_MessageDef* m);
+bool upb_MessageDef_IsMessageSet(const upb_MessageDef* m);
+
+// Creates a mini descriptor string for a message, returns true on success.
+bool upb_MessageDef_MiniDescriptorEncode(const upb_MessageDef* m, upb_Arena* a,
+ upb_StringView* out);
+
+UPB_API const upb_MiniTable* upb_MessageDef_MiniTable(const upb_MessageDef* m);
+const char* upb_MessageDef_Name(const upb_MessageDef* m);
+
+const upb_EnumDef* upb_MessageDef_NestedEnum(const upb_MessageDef* m, int i);
+const upb_FieldDef* upb_MessageDef_NestedExtension(const upb_MessageDef* m,
+ int i);
+const upb_MessageDef* upb_MessageDef_NestedMessage(const upb_MessageDef* m,
+ int i);
+
+int upb_MessageDef_NestedEnumCount(const upb_MessageDef* m);
+int upb_MessageDef_NestedExtensionCount(const upb_MessageDef* m);
+int upb_MessageDef_NestedMessageCount(const upb_MessageDef* m);
+
+UPB_API const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m,
+ int i);
+UPB_API int upb_MessageDef_OneofCount(const upb_MessageDef* m);
+int upb_MessageDef_RealOneofCount(const upb_MessageDef* m);
+
+const UPB_DESC(MessageOptions) *
+ upb_MessageDef_Options(const upb_MessageDef* m);
+
+upb_StringView upb_MessageDef_ReservedName(const upb_MessageDef* m, int i);
+int upb_MessageDef_ReservedNameCount(const upb_MessageDef* m);
+
+const upb_MessageReservedRange* upb_MessageDef_ReservedRange(
+ const upb_MessageDef* m, int i);
+int upb_MessageDef_ReservedRangeCount(const upb_MessageDef* m);
+
+UPB_API upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m);
+UPB_API upb_WellKnown upb_MessageDef_WellKnownType(const upb_MessageDef* m);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_MESSAGE_DEF_H_ */
diff --git a/upb/upb/reflection/message_reserved_range.c b/upb/upb/reflection/message_reserved_range.c
new file mode 100644
index 0000000..c39d8b9
--- /dev/null
+++ b/upb/upb/reflection/message_reserved_range.c
@@ -0,0 +1,84 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/reflection/enum_def.h"
+#include "upb/reflection/field_def.h"
+#include "upb/reflection/internal/def_builder.h"
+#include "upb/reflection/internal/extension_range.h"
+#include "upb/reflection/message_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+struct upb_MessageReservedRange {
+ int32_t start;
+ int32_t end;
+};
+
+upb_MessageReservedRange* _upb_MessageReservedRange_At(
+ const upb_MessageReservedRange* r, int i) {
+ return (upb_MessageReservedRange*)&r[i];
+}
+
+int32_t upb_MessageReservedRange_Start(const upb_MessageReservedRange* r) {
+ return r->start;
+}
+int32_t upb_MessageReservedRange_End(const upb_MessageReservedRange* r) {
+ return r->end;
+}
+
+upb_MessageReservedRange* _upb_MessageReservedRanges_New(
+ upb_DefBuilder* ctx, int n,
+ const UPB_DESC(DescriptorProto_ReservedRange) * const* protos,
+ const upb_MessageDef* m) {
+ upb_MessageReservedRange* r =
+ _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageReservedRange) * n);
+
+ for (int i = 0; i < n; i++) {
+ const int32_t start =
+ UPB_DESC(DescriptorProto_ReservedRange_start)(protos[i]);
+ const int32_t end = UPB_DESC(DescriptorProto_ReservedRange_end)(protos[i]);
+ const int32_t max = kUpb_MaxFieldNumber + 1;
+
+ // A full validation would also check that each range is disjoint, and that
+ // none of the fields overlap with the extension ranges, but we are just
+ // sanity checking here.
+ if (start < 1 || end <= start || end > max) {
+ _upb_DefBuilder_Errf(ctx,
+ "Reserved range (%d, %d) is invalid, message=%s\n",
+ (int)start, (int)end, upb_MessageDef_FullName(m));
+ }
+
+ r[i].start = start;
+ r[i].end = end;
+ }
+
+ return r;
+}
diff --git a/upb/upb/reflection/message_reserved_range.h b/upb/upb/reflection/message_reserved_range.h
new file mode 100644
index 0000000..6fa82d2
--- /dev/null
+++ b/upb/upb/reflection/message_reserved_range.h
@@ -0,0 +1,54 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// IWYU pragma: private, include "upb/reflection/def.h"
+
+#ifndef UPB_REFLECTION_MESSAGE_RESERVED_RANGE_H_
+#define UPB_REFLECTION_MESSAGE_RESERVED_RANGE_H_
+
+#include "upb/reflection/common.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int32_t upb_MessageReservedRange_Start(const upb_MessageReservedRange* r);
+int32_t upb_MessageReservedRange_End(const upb_MessageReservedRange* r);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_MESSAGE_RESERVED_RANGE_H_ */
diff --git a/upb/upb/reflection/method_def.c b/upb/upb/reflection/method_def.c
new file mode 100644
index 0000000..9429c4b
--- /dev/null
+++ b/upb/upb/reflection/method_def.c
@@ -0,0 +1,128 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/reflection/internal/method_def.h"
+
+#include "upb/reflection/def_type.h"
+#include "upb/reflection/internal/def_builder.h"
+#include "upb/reflection/service_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+struct upb_MethodDef {
+ const UPB_DESC(MethodOptions) * opts;
+ upb_ServiceDef* service;
+ const char* full_name;
+ const upb_MessageDef* input_type;
+ const upb_MessageDef* output_type;
+ int index;
+ bool client_streaming;
+ bool server_streaming;
+};
+
+upb_MethodDef* _upb_MethodDef_At(const upb_MethodDef* m, int i) {
+ return (upb_MethodDef*)&m[i];
+}
+
+const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m) {
+ return m->service;
+}
+
+const UPB_DESC(MethodOptions) * upb_MethodDef_Options(const upb_MethodDef* m) {
+ return m->opts;
+}
+
+bool upb_MethodDef_HasOptions(const upb_MethodDef* m) {
+ return m->opts != (void*)kUpbDefOptDefault;
+}
+
+const char* upb_MethodDef_FullName(const upb_MethodDef* m) {
+ return m->full_name;
+}
+
+const char* upb_MethodDef_Name(const upb_MethodDef* m) {
+ return _upb_DefBuilder_FullToShort(m->full_name);
+}
+
+int upb_MethodDef_Index(const upb_MethodDef* m) { return m->index; }
+
+const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m) {
+ return m->input_type;
+}
+
+const upb_MessageDef* upb_MethodDef_OutputType(const upb_MethodDef* m) {
+ return m->output_type;
+}
+
+bool upb_MethodDef_ClientStreaming(const upb_MethodDef* m) {
+ return m->client_streaming;
+}
+
+bool upb_MethodDef_ServerStreaming(const upb_MethodDef* m) {
+ return m->server_streaming;
+}
+
+static void create_method(upb_DefBuilder* ctx,
+ const UPB_DESC(MethodDescriptorProto) * method_proto,
+ upb_ServiceDef* s, upb_MethodDef* m) {
+ upb_StringView name = UPB_DESC(MethodDescriptorProto_name)(method_proto);
+
+ m->service = s;
+ m->full_name =
+ _upb_DefBuilder_MakeFullName(ctx, upb_ServiceDef_FullName(s), name);
+ m->client_streaming =
+ UPB_DESC(MethodDescriptorProto_client_streaming)(method_proto);
+ m->server_streaming =
+ UPB_DESC(MethodDescriptorProto_server_streaming)(method_proto);
+ m->input_type = _upb_DefBuilder_Resolve(
+ ctx, m->full_name, m->full_name,
+ UPB_DESC(MethodDescriptorProto_input_type)(method_proto),
+ UPB_DEFTYPE_MSG);
+ m->output_type = _upb_DefBuilder_Resolve(
+ ctx, m->full_name, m->full_name,
+ UPB_DESC(MethodDescriptorProto_output_type)(method_proto),
+ UPB_DEFTYPE_MSG);
+
+ UPB_DEF_SET_OPTIONS(m->opts, MethodDescriptorProto, MethodOptions,
+ method_proto);
+}
+
+// Allocate and initialize an array of |n| method defs belonging to |s|.
+upb_MethodDef* _upb_MethodDefs_New(
+ upb_DefBuilder* ctx, int n,
+ const UPB_DESC(MethodDescriptorProto) * const* protos, upb_ServiceDef* s) {
+ upb_MethodDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MethodDef) * n);
+ for (int i = 0; i < n; i++) {
+ create_method(ctx, protos[i], s, &m[i]);
+ m[i].index = i;
+ }
+ return m;
+}
diff --git a/upb/upb/reflection/method_def.h b/upb/upb/reflection/method_def.h
new file mode 100644
index 0000000..34c5cff
--- /dev/null
+++ b/upb/upb/reflection/method_def.h
@@ -0,0 +1,62 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// IWYU pragma: private, include "upb/reflection/def.h"
+
+#ifndef UPB_REFLECTION_METHOD_DEF_H_
+#define UPB_REFLECTION_METHOD_DEF_H_
+
+#include "upb/reflection/common.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool upb_MethodDef_ClientStreaming(const upb_MethodDef* m);
+const char* upb_MethodDef_FullName(const upb_MethodDef* m);
+bool upb_MethodDef_HasOptions(const upb_MethodDef* m);
+int upb_MethodDef_Index(const upb_MethodDef* m);
+const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m);
+const char* upb_MethodDef_Name(const upb_MethodDef* m);
+const UPB_DESC(MethodOptions) * upb_MethodDef_Options(const upb_MethodDef* m);
+const upb_MessageDef* upb_MethodDef_OutputType(const upb_MethodDef* m);
+bool upb_MethodDef_ServerStreaming(const upb_MethodDef* m);
+const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_METHOD_DEF_H_ */
diff --git a/upb/upb/reflection/oneof_def.c b/upb/upb/reflection/oneof_def.c
new file mode 100644
index 0000000..0468fa9
--- /dev/null
+++ b/upb/upb/reflection/oneof_def.c
@@ -0,0 +1,231 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/reflection/internal/oneof_def.h"
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "upb/hash/int_table.h"
+#include "upb/hash/str_table.h"
+#include "upb/reflection/def_type.h"
+#include "upb/reflection/internal/def_builder.h"
+#include "upb/reflection/internal/field_def.h"
+#include "upb/reflection/internal/message_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+struct upb_OneofDef {
+ const UPB_DESC(OneofOptions) * opts;
+ const upb_MessageDef* parent;
+ const char* full_name;
+ int field_count;
+ bool synthetic;
+ const upb_FieldDef** fields;
+ upb_strtable ntof; // lookup a field by name
+ upb_inttable itof; // lookup a field by number (index)
+#if UINTPTR_MAX == 0xffffffff
+ uint32_t padding; // Increase size to a multiple of 8.
+#endif
+};
+
+upb_OneofDef* _upb_OneofDef_At(const upb_OneofDef* o, int i) {
+ return (upb_OneofDef*)&o[i];
+}
+
+const UPB_DESC(OneofOptions) * upb_OneofDef_Options(const upb_OneofDef* o) {
+ return o->opts;
+}
+
+bool upb_OneofDef_HasOptions(const upb_OneofDef* o) {
+ return o->opts != (void*)kUpbDefOptDefault;
+}
+
+const char* upb_OneofDef_FullName(const upb_OneofDef* o) {
+ return o->full_name;
+}
+
+const char* upb_OneofDef_Name(const upb_OneofDef* o) {
+ return _upb_DefBuilder_FullToShort(o->full_name);
+}
+
+const upb_MessageDef* upb_OneofDef_ContainingType(const upb_OneofDef* o) {
+ return o->parent;
+}
+
+int upb_OneofDef_FieldCount(const upb_OneofDef* o) { return o->field_count; }
+
+const upb_FieldDef* upb_OneofDef_Field(const upb_OneofDef* o, int i) {
+ UPB_ASSERT(i < o->field_count);
+ return o->fields[i];
+}
+
+int upb_OneofDef_numfields(const upb_OneofDef* o) { return o->field_count; }
+
+uint32_t upb_OneofDef_Index(const upb_OneofDef* o) {
+ // Compute index in our parent's array.
+ return o - upb_MessageDef_Oneof(o->parent, 0);
+}
+
+bool upb_OneofDef_IsSynthetic(const upb_OneofDef* o) { return o->synthetic; }
+
+const upb_FieldDef* upb_OneofDef_LookupNameWithSize(const upb_OneofDef* o,
+ const char* name,
+ size_t size) {
+ upb_value val;
+ return upb_strtable_lookup2(&o->ntof, name, size, &val)
+ ? upb_value_getptr(val)
+ : NULL;
+}
+
+const upb_FieldDef* upb_OneofDef_LookupName(const upb_OneofDef* o,
+ const char* name) {
+ return upb_OneofDef_LookupNameWithSize(o, name, strlen(name));
+}
+
+const upb_FieldDef* upb_OneofDef_LookupNumber(const upb_OneofDef* o,
+ uint32_t num) {
+ upb_value val;
+ return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val)
+ : NULL;
+}
+
+void _upb_OneofDef_Insert(upb_DefBuilder* ctx, upb_OneofDef* o,
+ const upb_FieldDef* f, const char* name,
+ size_t size) {
+ o->field_count++;
+ if (_upb_FieldDef_IsProto3Optional(f)) o->synthetic = true;
+
+ const int number = upb_FieldDef_Number(f);
+ const upb_value v = upb_value_constptr(f);
+
+ // TODO(salo): This lookup is unfortunate because we also perform it when
+ // inserting into the message's table. Unfortunately that step occurs after
+ // this one and moving things around could be tricky so let's leave it for
+ // a future refactoring.
+ const bool number_exists = upb_inttable_lookup(&o->itof, number, NULL);
+ if (UPB_UNLIKELY(number_exists)) {
+ _upb_DefBuilder_Errf(ctx, "oneof fields have the same number (%d)", number);
+ }
+
+ // TODO(salo): More redundant work happening here.
+ const bool name_exists = upb_strtable_lookup2(&o->ntof, name, size, NULL);
+ if (UPB_UNLIKELY(name_exists)) {
+ _upb_DefBuilder_Errf(ctx, "oneof fields have the same name (%.*s)",
+ (int)size, name);
+ }
+
+ const bool ok = upb_inttable_insert(&o->itof, number, v, ctx->arena) &&
+ upb_strtable_insert(&o->ntof, name, size, v, ctx->arena);
+ if (UPB_UNLIKELY(!ok)) {
+ _upb_DefBuilder_OomErr(ctx);
+ }
+}
+
+// Returns the synthetic count.
+size_t _upb_OneofDefs_Finalize(upb_DefBuilder* ctx, upb_MessageDef* m) {
+ int synthetic_count = 0;
+
+ for (int i = 0; i < upb_MessageDef_OneofCount(m); i++) {
+ upb_OneofDef* o = (upb_OneofDef*)upb_MessageDef_Oneof(m, i);
+
+ if (o->synthetic && o->field_count != 1) {
+ _upb_DefBuilder_Errf(ctx,
+ "Synthetic oneofs must have one field, not %d: %s",
+ o->field_count, upb_OneofDef_Name(o));
+ }
+
+ if (o->synthetic) {
+ synthetic_count++;
+ } else if (synthetic_count != 0) {
+ _upb_DefBuilder_Errf(
+ ctx, "Synthetic oneofs must be after all other oneofs: %s",
+ upb_OneofDef_Name(o));
+ }
+
+ o->fields =
+ _upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef*) * o->field_count);
+ o->field_count = 0;
+ }
+
+ for (int i = 0; i < upb_MessageDef_FieldCount(m); i++) {
+ const upb_FieldDef* f = upb_MessageDef_Field(m, i);
+ upb_OneofDef* o = (upb_OneofDef*)upb_FieldDef_ContainingOneof(f);
+ if (o) {
+ o->fields[o->field_count++] = f;
+ }
+ }
+
+ return synthetic_count;
+}
+
+static void create_oneofdef(upb_DefBuilder* ctx, upb_MessageDef* m,
+ const UPB_DESC(OneofDescriptorProto) * oneof_proto,
+ const upb_OneofDef* _o) {
+ upb_OneofDef* o = (upb_OneofDef*)_o;
+ upb_StringView name = UPB_DESC(OneofDescriptorProto_name)(oneof_proto);
+
+ o->parent = m;
+ o->full_name =
+ _upb_DefBuilder_MakeFullName(ctx, upb_MessageDef_FullName(m), name);
+ o->field_count = 0;
+ o->synthetic = false;
+
+ UPB_DEF_SET_OPTIONS(o->opts, OneofDescriptorProto, OneofOptions, oneof_proto);
+
+ if (upb_MessageDef_FindByNameWithSize(m, name.data, name.size, NULL, NULL)) {
+ _upb_DefBuilder_Errf(ctx, "duplicate oneof name (%s)", o->full_name);
+ }
+
+ upb_value v = _upb_DefType_Pack(o, UPB_DEFTYPE_ONEOF);
+ bool ok = _upb_MessageDef_Insert(m, name.data, name.size, v, ctx->arena);
+ if (!ok) _upb_DefBuilder_OomErr(ctx);
+
+ ok = upb_inttable_init(&o->itof, ctx->arena);
+ if (!ok) _upb_DefBuilder_OomErr(ctx);
+
+ ok = upb_strtable_init(&o->ntof, 4, ctx->arena);
+ if (!ok) _upb_DefBuilder_OomErr(ctx);
+}
+
+// Allocate and initialize an array of |n| oneof defs.
+upb_OneofDef* _upb_OneofDefs_New(
+ upb_DefBuilder* ctx, int n,
+ const UPB_DESC(OneofDescriptorProto) * const* protos, upb_MessageDef* m) {
+ _upb_DefType_CheckPadding(sizeof(upb_OneofDef));
+
+ upb_OneofDef* o = _upb_DefBuilder_Alloc(ctx, sizeof(upb_OneofDef) * n);
+ for (int i = 0; i < n; i++) {
+ create_oneofdef(ctx, m, protos[i], &o[i]);
+ }
+ return o;
+}
diff --git a/upb/upb/reflection/oneof_def.h b/upb/upb/reflection/oneof_def.h
new file mode 100644
index 0000000..9b570d3
--- /dev/null
+++ b/upb/upb/reflection/oneof_def.h
@@ -0,0 +1,70 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// IWYU pragma: private, include "upb/reflection/def.h"
+
+#ifndef UPB_REFLECTION_ONEOF_DEF_H_
+#define UPB_REFLECTION_ONEOF_DEF_H_
+
+#include "upb/reflection/common.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+UPB_API const upb_MessageDef* upb_OneofDef_ContainingType(
+ const upb_OneofDef* o);
+UPB_API const upb_FieldDef* upb_OneofDef_Field(const upb_OneofDef* o, int i);
+UPB_API int upb_OneofDef_FieldCount(const upb_OneofDef* o);
+const char* upb_OneofDef_FullName(const upb_OneofDef* o);
+bool upb_OneofDef_HasOptions(const upb_OneofDef* o);
+uint32_t upb_OneofDef_Index(const upb_OneofDef* o);
+bool upb_OneofDef_IsSynthetic(const upb_OneofDef* o);
+const upb_FieldDef* upb_OneofDef_LookupName(const upb_OneofDef* o,
+ const char* name);
+const upb_FieldDef* upb_OneofDef_LookupNameWithSize(const upb_OneofDef* o,
+ const char* name,
+ size_t size);
+const upb_FieldDef* upb_OneofDef_LookupNumber(const upb_OneofDef* o,
+ uint32_t num);
+UPB_API const char* upb_OneofDef_Name(const upb_OneofDef* o);
+int upb_OneofDef_numfields(const upb_OneofDef* o);
+const UPB_DESC(OneofOptions) * upb_OneofDef_Options(const upb_OneofDef* o);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_ONEOF_DEF_H_ */
diff --git a/upb/upb/reflection/service_def.c b/upb/upb/reflection/service_def.c
new file mode 100644
index 0000000..658cf02
--- /dev/null
+++ b/upb/upb/reflection/service_def.c
@@ -0,0 +1,132 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/reflection/internal/service_def.h"
+
+#include "upb/reflection/def_type.h"
+#include "upb/reflection/internal/def_builder.h"
+#include "upb/reflection/internal/file_def.h"
+#include "upb/reflection/internal/method_def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+struct upb_ServiceDef {
+ const UPB_DESC(ServiceOptions) * opts;
+ const upb_FileDef* file;
+ const char* full_name;
+ upb_MethodDef* methods;
+ int method_count;
+ int index;
+};
+
+upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int index) {
+ return (upb_ServiceDef*)&s[index];
+}
+
+const UPB_DESC(ServiceOptions) *
+ upb_ServiceDef_Options(const upb_ServiceDef* s) {
+ return s->opts;
+}
+
+bool upb_ServiceDef_HasOptions(const upb_ServiceDef* s) {
+ return s->opts != (void*)kUpbDefOptDefault;
+}
+
+const char* upb_ServiceDef_FullName(const upb_ServiceDef* s) {
+ return s->full_name;
+}
+
+const char* upb_ServiceDef_Name(const upb_ServiceDef* s) {
+ return _upb_DefBuilder_FullToShort(s->full_name);
+}
+
+int upb_ServiceDef_Index(const upb_ServiceDef* s) { return s->index; }
+
+const upb_FileDef* upb_ServiceDef_File(const upb_ServiceDef* s) {
+ return s->file;
+}
+
+int upb_ServiceDef_MethodCount(const upb_ServiceDef* s) {
+ return s->method_count;
+}
+
+const upb_MethodDef* upb_ServiceDef_Method(const upb_ServiceDef* s, int i) {
+ return (i < 0 || i >= s->method_count) ? NULL
+ : _upb_MethodDef_At(s->methods, i);
+}
+
+const upb_MethodDef* upb_ServiceDef_FindMethodByName(const upb_ServiceDef* s,
+ const char* name) {
+ for (int i = 0; i < s->method_count; i++) {
+ const upb_MethodDef* m = _upb_MethodDef_At(s->methods, i);
+ if (strcmp(name, upb_MethodDef_Name(m)) == 0) {
+ return m;
+ }
+ }
+ return NULL;
+}
+
+static void create_service(upb_DefBuilder* ctx,
+ const UPB_DESC(ServiceDescriptorProto) * svc_proto,
+ upb_ServiceDef* s) {
+ upb_StringView name;
+ size_t n;
+
+ // Must happen before _upb_DefBuilder_Add()
+ s->file = _upb_DefBuilder_File(ctx);
+
+ name = UPB_DESC(ServiceDescriptorProto_name)(svc_proto);
+ const char* package = _upb_FileDef_RawPackage(s->file);
+ s->full_name = _upb_DefBuilder_MakeFullName(ctx, package, name);
+ _upb_DefBuilder_Add(ctx, s->full_name,
+ _upb_DefType_Pack(s, UPB_DEFTYPE_SERVICE));
+
+ const UPB_DESC(MethodDescriptorProto)* const* methods =
+ UPB_DESC(ServiceDescriptorProto_method)(svc_proto, &n);
+ s->method_count = n;
+ s->methods = _upb_MethodDefs_New(ctx, n, methods, s);
+
+ UPB_DEF_SET_OPTIONS(s->opts, ServiceDescriptorProto, ServiceOptions,
+ svc_proto);
+}
+
+upb_ServiceDef* _upb_ServiceDefs_New(
+ upb_DefBuilder* ctx, int n,
+ const UPB_DESC(ServiceDescriptorProto) * const* protos) {
+ _upb_DefType_CheckPadding(sizeof(upb_ServiceDef));
+
+ upb_ServiceDef* s = _upb_DefBuilder_Alloc(ctx, sizeof(upb_ServiceDef) * n);
+ for (int i = 0; i < n; i++) {
+ create_service(ctx, protos[i], &s[i]);
+ s[i].index = i;
+ }
+ return s;
+}
diff --git a/upb/upb/reflection/service_def.h b/upb/upb/reflection/service_def.h
new file mode 100644
index 0000000..6674994
--- /dev/null
+++ b/upb/upb/reflection/service_def.h
@@ -0,0 +1,63 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// IWYU pragma: private, include "upb/reflection/def.h"
+
+#ifndef UPB_REFLECTION_SERVICE_DEF_H_
+#define UPB_REFLECTION_SERVICE_DEF_H_
+
+#include "upb/reflection/common.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+const upb_FileDef* upb_ServiceDef_File(const upb_ServiceDef* s);
+const upb_MethodDef* upb_ServiceDef_FindMethodByName(const upb_ServiceDef* s,
+ const char* name);
+const char* upb_ServiceDef_FullName(const upb_ServiceDef* s);
+bool upb_ServiceDef_HasOptions(const upb_ServiceDef* s);
+int upb_ServiceDef_Index(const upb_ServiceDef* s);
+const upb_MethodDef* upb_ServiceDef_Method(const upb_ServiceDef* s, int i);
+int upb_ServiceDef_MethodCount(const upb_ServiceDef* s);
+const char* upb_ServiceDef_Name(const upb_ServiceDef* s);
+const UPB_DESC(ServiceOptions) *
+ upb_ServiceDef_Options(const upb_ServiceDef* s);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_REFLECTION_SERVICE_DEF_H_ */
diff --git a/upb/upb/reflection/stage0/google/protobuf/descriptor.upb.c b/upb/upb/reflection/stage0/google/protobuf/descriptor.upb.c
new file mode 100644
index 0000000..cc200d8
--- /dev/null
+++ b/upb/upb/reflection/stage0/google/protobuf/descriptor.upb.c
@@ -0,0 +1,413 @@
+#include <stddef.h>
+#include "upb/generated_code_support.h"
+#include "google/protobuf/descriptor.upb.h"
+
+static upb_Arena* upb_BootstrapArena() {
+ static upb_Arena* arena = NULL;
+ if (!arena) arena = upb_Arena_New();
+ return arena;
+}
+
+const upb_MiniTable* google_protobuf_FileDescriptorSet_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$G";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 1), google_protobuf_FileDescriptorProto_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_FileDescriptorProto_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$11EGGGG33<<11";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 4), google_protobuf_DescriptorProto_msg_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 5), google_protobuf_EnumDescriptorProto_msg_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 6), google_protobuf_ServiceDescriptorProto_msg_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 7), google_protobuf_FieldDescriptorProto_msg_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 8), google_protobuf_FileOptions_msg_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 9), google_protobuf_SourceCodeInfo_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_DescriptorProto_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$1GGGGG3GGE";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 2), google_protobuf_FieldDescriptorProto_msg_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 6), google_protobuf_FieldDescriptorProto_msg_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 3), google_protobuf_DescriptorProto_msg_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 4), google_protobuf_EnumDescriptorProto_msg_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 5), google_protobuf_DescriptorProto_ExtensionRange_msg_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 8), google_protobuf_OneofDescriptorProto_msg_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 7), google_protobuf_MessageOptions_msg_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 9), google_protobuf_DescriptorProto_ReservedRange_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_DescriptorProto_ExtensionRange_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$((3";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 3), google_protobuf_ExtensionRangeOptions_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_DescriptorProto_ReservedRange_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$((";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_ExtensionRangeOptions_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$Pf~G";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 999), google_protobuf_UninterpretedOption_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_FieldDescriptorProto_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$11(44113(1f/";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubEnum(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 4), google_protobuf_FieldDescriptorProto_Label_enum_init());
+ upb_MiniTable_SetSubEnum(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 5), google_protobuf_FieldDescriptorProto_Type_enum_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 8), google_protobuf_FieldOptions_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_OneofDescriptorProto_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$13";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 2), google_protobuf_OneofOptions_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_EnumDescriptorProto_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$1G3GE";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 2), google_protobuf_EnumValueDescriptorProto_msg_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 3), google_protobuf_EnumOptions_msg_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 4), google_protobuf_EnumDescriptorProto_EnumReservedRange_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_EnumDescriptorProto_EnumReservedRange_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$((";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_EnumValueDescriptorProto_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$1(3";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 3), google_protobuf_EnumValueOptions_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_ServiceDescriptorProto_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$1G3";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 2), google_protobuf_MethodDescriptorProto_msg_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 3), google_protobuf_ServiceOptions_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_MethodDescriptorProto_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$1113//";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 4), google_protobuf_MethodOptions_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_FileOptions_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$P1f14/1d///a/b/c/c/d11a111/a11y|G";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubEnum(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 9), google_protobuf_FileOptions_OptimizeMode_enum_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 999), google_protobuf_UninterpretedOption_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_MessageOptions_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$P///c/c/{}G";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 999), google_protobuf_UninterpretedOption_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_FieldOptions_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$P4//a/4c/d//v}G";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubEnum(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 1), google_protobuf_FieldOptions_CType_enum_init());
+ upb_MiniTable_SetSubEnum(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 6), google_protobuf_FieldOptions_JSType_enum_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 999), google_protobuf_UninterpretedOption_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_OneofOptions_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$Pf~G";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 999), google_protobuf_UninterpretedOption_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_EnumOptions_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$Pa//b/`~G";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 999), google_protobuf_UninterpretedOption_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_EnumValueOptions_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$P/e~G";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 999), google_protobuf_UninterpretedOption_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_ServiceOptions_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$P``/e}G";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 999), google_protobuf_UninterpretedOption_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_MethodOptions_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$P``/4d}G";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubEnum(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 34), google_protobuf_MethodOptions_IdempotencyLevel_enum_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 999), google_protobuf_UninterpretedOption_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_UninterpretedOption_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$aG1,+ 01";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 2), google_protobuf_UninterpretedOption_NamePart_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_UninterpretedOption_NamePart_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$1N/N";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_SourceCodeInfo_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$G";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 1), google_protobuf_SourceCodeInfo_Location_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_SourceCodeInfo_Location_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$<M<M11aE";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_GeneratedCodeInfo_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$G";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 1), google_protobuf_GeneratedCodeInfo_Annotation_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_GeneratedCodeInfo_Annotation_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$<M1((4";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubEnum(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 5), google_protobuf_GeneratedCodeInfo_Annotation_Semantic_enum_init());
+ return mini_table;
+}
+
+const upb_MiniTableEnum* google_protobuf_FieldDescriptorProto_Label_enum_init() {
+ static const upb_MiniTableEnum* mini_table = NULL;
+ static const char* mini_descriptor = "!0";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTableEnum_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ return mini_table;
+}
+
+const upb_MiniTableEnum* google_protobuf_FieldDescriptorProto_Type_enum_init() {
+ static const upb_MiniTableEnum* mini_table = NULL;
+ static const char* mini_descriptor = "!@AA1";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTableEnum_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ return mini_table;
+}
+
+const upb_MiniTableEnum* google_protobuf_FieldOptions_CType_enum_init() {
+ static const upb_MiniTableEnum* mini_table = NULL;
+ static const char* mini_descriptor = "!)";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTableEnum_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ return mini_table;
+}
+
+const upb_MiniTableEnum* google_protobuf_FieldOptions_JSType_enum_init() {
+ static const upb_MiniTableEnum* mini_table = NULL;
+ static const char* mini_descriptor = "!)";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTableEnum_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ return mini_table;
+}
+
+const upb_MiniTableEnum* google_protobuf_FieldOptions_OptionRetention_enum_init() {
+ static const upb_MiniTableEnum* mini_table = NULL;
+ static const char* mini_descriptor = "!)";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTableEnum_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ return mini_table;
+}
+
+const upb_MiniTableEnum* google_protobuf_FieldOptions_OptionTargetType_enum_init() {
+ static const upb_MiniTableEnum* mini_table = NULL;
+ static const char* mini_descriptor = "!AA";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTableEnum_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ return mini_table;
+}
+
+const upb_MiniTableEnum* google_protobuf_FileOptions_OptimizeMode_enum_init() {
+ static const upb_MiniTableEnum* mini_table = NULL;
+ static const char* mini_descriptor = "!0";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTableEnum_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ return mini_table;
+}
+
+const upb_MiniTableEnum* google_protobuf_GeneratedCodeInfo_Annotation_Semantic_enum_init() {
+ static const upb_MiniTableEnum* mini_table = NULL;
+ static const char* mini_descriptor = "!)";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTableEnum_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ return mini_table;
+}
+
+const upb_MiniTableEnum* google_protobuf_MethodOptions_IdempotencyLevel_enum_init() {
+ static const upb_MiniTableEnum* mini_table = NULL;
+ static const char* mini_descriptor = "!)";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTableEnum_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ return mini_table;
+}
diff --git a/upb/upb/reflection/stage0/google/protobuf/descriptor.upb.h b/upb/upb/reflection/stage0/google/protobuf/descriptor.upb.h
new file mode 100644
index 0000000..a5e0490
--- /dev/null
+++ b/upb/upb/reflection/stage0/google/protobuf/descriptor.upb.h
@@ -0,0 +1,4697 @@
+/* This file was generated by upbc (the upb compiler) from the input
+ * file:
+ *
+ * google/protobuf/descriptor.proto
+ *
+ * Do not edit -- your changes will be discarded when the file is
+ * regenerated. */
+
+#ifndef GOOGLE_PROTOBUF_DESCRIPTOR_PROTO_UPB_H_
+#define GOOGLE_PROTOBUF_DESCRIPTOR_PROTO_UPB_H_
+
+#include "upb/generated_code_support.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct google_protobuf_FileDescriptorSet google_protobuf_FileDescriptorSet;
+typedef struct google_protobuf_FileDescriptorProto google_protobuf_FileDescriptorProto;
+typedef struct google_protobuf_DescriptorProto google_protobuf_DescriptorProto;
+typedef struct google_protobuf_DescriptorProto_ExtensionRange google_protobuf_DescriptorProto_ExtensionRange;
+typedef struct google_protobuf_DescriptorProto_ReservedRange google_protobuf_DescriptorProto_ReservedRange;
+typedef struct google_protobuf_ExtensionRangeOptions google_protobuf_ExtensionRangeOptions;
+typedef struct google_protobuf_FieldDescriptorProto google_protobuf_FieldDescriptorProto;
+typedef struct google_protobuf_OneofDescriptorProto google_protobuf_OneofDescriptorProto;
+typedef struct google_protobuf_EnumDescriptorProto google_protobuf_EnumDescriptorProto;
+typedef struct google_protobuf_EnumDescriptorProto_EnumReservedRange google_protobuf_EnumDescriptorProto_EnumReservedRange;
+typedef struct google_protobuf_EnumValueDescriptorProto google_protobuf_EnumValueDescriptorProto;
+typedef struct google_protobuf_ServiceDescriptorProto google_protobuf_ServiceDescriptorProto;
+typedef struct google_protobuf_MethodDescriptorProto google_protobuf_MethodDescriptorProto;
+typedef struct google_protobuf_FileOptions google_protobuf_FileOptions;
+typedef struct google_protobuf_MessageOptions google_protobuf_MessageOptions;
+typedef struct google_protobuf_FieldOptions google_protobuf_FieldOptions;
+typedef struct google_protobuf_OneofOptions google_protobuf_OneofOptions;
+typedef struct google_protobuf_EnumOptions google_protobuf_EnumOptions;
+typedef struct google_protobuf_EnumValueOptions google_protobuf_EnumValueOptions;
+typedef struct google_protobuf_ServiceOptions google_protobuf_ServiceOptions;
+typedef struct google_protobuf_MethodOptions google_protobuf_MethodOptions;
+typedef struct google_protobuf_UninterpretedOption google_protobuf_UninterpretedOption;
+typedef struct google_protobuf_UninterpretedOption_NamePart google_protobuf_UninterpretedOption_NamePart;
+typedef struct google_protobuf_SourceCodeInfo google_protobuf_SourceCodeInfo;
+typedef struct google_protobuf_SourceCodeInfo_Location google_protobuf_SourceCodeInfo_Location;
+typedef struct google_protobuf_GeneratedCodeInfo google_protobuf_GeneratedCodeInfo;
+typedef struct google_protobuf_GeneratedCodeInfo_Annotation google_protobuf_GeneratedCodeInfo_Annotation;
+extern const upb_MiniTable* google_protobuf_FileDescriptorSet_msg_init();
+extern const upb_MiniTable* google_protobuf_FileDescriptorProto_msg_init();
+extern const upb_MiniTable* google_protobuf_DescriptorProto_msg_init();
+extern const upb_MiniTable* google_protobuf_DescriptorProto_ExtensionRange_msg_init();
+extern const upb_MiniTable* google_protobuf_DescriptorProto_ReservedRange_msg_init();
+extern const upb_MiniTable* google_protobuf_ExtensionRangeOptions_msg_init();
+extern const upb_MiniTable* google_protobuf_FieldDescriptorProto_msg_init();
+extern const upb_MiniTable* google_protobuf_OneofDescriptorProto_msg_init();
+extern const upb_MiniTable* google_protobuf_EnumDescriptorProto_msg_init();
+extern const upb_MiniTable* google_protobuf_EnumDescriptorProto_EnumReservedRange_msg_init();
+extern const upb_MiniTable* google_protobuf_EnumValueDescriptorProto_msg_init();
+extern const upb_MiniTable* google_protobuf_ServiceDescriptorProto_msg_init();
+extern const upb_MiniTable* google_protobuf_MethodDescriptorProto_msg_init();
+extern const upb_MiniTable* google_protobuf_FileOptions_msg_init();
+extern const upb_MiniTable* google_protobuf_MessageOptions_msg_init();
+extern const upb_MiniTable* google_protobuf_FieldOptions_msg_init();
+extern const upb_MiniTable* google_protobuf_OneofOptions_msg_init();
+extern const upb_MiniTable* google_protobuf_EnumOptions_msg_init();
+extern const upb_MiniTable* google_protobuf_EnumValueOptions_msg_init();
+extern const upb_MiniTable* google_protobuf_ServiceOptions_msg_init();
+extern const upb_MiniTable* google_protobuf_MethodOptions_msg_init();
+extern const upb_MiniTable* google_protobuf_UninterpretedOption_msg_init();
+extern const upb_MiniTable* google_protobuf_UninterpretedOption_NamePart_msg_init();
+extern const upb_MiniTable* google_protobuf_SourceCodeInfo_msg_init();
+extern const upb_MiniTable* google_protobuf_SourceCodeInfo_Location_msg_init();
+extern const upb_MiniTable* google_protobuf_GeneratedCodeInfo_msg_init();
+extern const upb_MiniTable* google_protobuf_GeneratedCodeInfo_Annotation_msg_init();
+
+typedef enum {
+ google_protobuf_FieldDescriptorProto_TYPE_DOUBLE = 1,
+ google_protobuf_FieldDescriptorProto_TYPE_FLOAT = 2,
+ google_protobuf_FieldDescriptorProto_TYPE_INT64 = 3,
+ google_protobuf_FieldDescriptorProto_TYPE_UINT64 = 4,
+ google_protobuf_FieldDescriptorProto_TYPE_INT32 = 5,
+ google_protobuf_FieldDescriptorProto_TYPE_FIXED64 = 6,
+ google_protobuf_FieldDescriptorProto_TYPE_FIXED32 = 7,
+ google_protobuf_FieldDescriptorProto_TYPE_BOOL = 8,
+ google_protobuf_FieldDescriptorProto_TYPE_STRING = 9,
+ google_protobuf_FieldDescriptorProto_TYPE_GROUP = 10,
+ google_protobuf_FieldDescriptorProto_TYPE_MESSAGE = 11,
+ google_protobuf_FieldDescriptorProto_TYPE_BYTES = 12,
+ google_protobuf_FieldDescriptorProto_TYPE_UINT32 = 13,
+ google_protobuf_FieldDescriptorProto_TYPE_ENUM = 14,
+ google_protobuf_FieldDescriptorProto_TYPE_SFIXED32 = 15,
+ google_protobuf_FieldDescriptorProto_TYPE_SFIXED64 = 16,
+ google_protobuf_FieldDescriptorProto_TYPE_SINT32 = 17,
+ google_protobuf_FieldDescriptorProto_TYPE_SINT64 = 18
+} google_protobuf_FieldDescriptorProto_Type;
+
+typedef enum {
+ google_protobuf_FieldDescriptorProto_LABEL_OPTIONAL = 1,
+ google_protobuf_FieldDescriptorProto_LABEL_REQUIRED = 2,
+ google_protobuf_FieldDescriptorProto_LABEL_REPEATED = 3
+} google_protobuf_FieldDescriptorProto_Label;
+
+typedef enum {
+ google_protobuf_FileOptions_SPEED = 1,
+ google_protobuf_FileOptions_CODE_SIZE = 2,
+ google_protobuf_FileOptions_LITE_RUNTIME = 3
+} google_protobuf_FileOptions_OptimizeMode;
+
+typedef enum {
+ google_protobuf_FieldOptions_STRING = 0,
+ google_protobuf_FieldOptions_CORD = 1,
+ google_protobuf_FieldOptions_STRING_PIECE = 2
+} google_protobuf_FieldOptions_CType;
+
+typedef enum {
+ google_protobuf_FieldOptions_JS_NORMAL = 0,
+ google_protobuf_FieldOptions_JS_STRING = 1,
+ google_protobuf_FieldOptions_JS_NUMBER = 2
+} google_protobuf_FieldOptions_JSType;
+
+typedef enum {
+ google_protobuf_FieldOptions_RETENTION_UNKNOWN = 0,
+ google_protobuf_FieldOptions_RETENTION_RUNTIME = 1,
+ google_protobuf_FieldOptions_RETENTION_SOURCE = 2
+} google_protobuf_FieldOptions_OptionRetention;
+
+typedef enum {
+ google_protobuf_FieldOptions_TARGET_TYPE_UNKNOWN = 0,
+ google_protobuf_FieldOptions_TARGET_TYPE_FILE = 1,
+ google_protobuf_FieldOptions_TARGET_TYPE_EXTENSION_RANGE = 2,
+ google_protobuf_FieldOptions_TARGET_TYPE_MESSAGE = 3,
+ google_protobuf_FieldOptions_TARGET_TYPE_FIELD = 4,
+ google_protobuf_FieldOptions_TARGET_TYPE_ONEOF = 5,
+ google_protobuf_FieldOptions_TARGET_TYPE_ENUM = 6,
+ google_protobuf_FieldOptions_TARGET_TYPE_ENUM_ENTRY = 7,
+ google_protobuf_FieldOptions_TARGET_TYPE_SERVICE = 8,
+ google_protobuf_FieldOptions_TARGET_TYPE_METHOD = 9
+} google_protobuf_FieldOptions_OptionTargetType;
+
+typedef enum {
+ google_protobuf_MethodOptions_IDEMPOTENCY_UNKNOWN = 0,
+ google_protobuf_MethodOptions_NO_SIDE_EFFECTS = 1,
+ google_protobuf_MethodOptions_IDEMPOTENT = 2
+} google_protobuf_MethodOptions_IdempotencyLevel;
+
+typedef enum {
+ google_protobuf_GeneratedCodeInfo_Annotation_NONE = 0,
+ google_protobuf_GeneratedCodeInfo_Annotation_SET = 1,
+ google_protobuf_GeneratedCodeInfo_Annotation_ALIAS = 2
+} google_protobuf_GeneratedCodeInfo_Annotation_Semantic;
+
+
+extern const upb_MiniTableEnum* google_protobuf_FieldDescriptorProto_Type_enum_init();
+extern const upb_MiniTableEnum* google_protobuf_FieldDescriptorProto_Label_enum_init();
+extern const upb_MiniTableEnum* google_protobuf_FileOptions_OptimizeMode_enum_init();
+extern const upb_MiniTableEnum* google_protobuf_FieldOptions_CType_enum_init();
+extern const upb_MiniTableEnum* google_protobuf_FieldOptions_JSType_enum_init();
+extern const upb_MiniTableEnum* google_protobuf_FieldOptions_OptionRetention_enum_init();
+extern const upb_MiniTableEnum* google_protobuf_FieldOptions_OptionTargetType_enum_init();
+extern const upb_MiniTableEnum* google_protobuf_MethodOptions_IdempotencyLevel_enum_init();
+extern const upb_MiniTableEnum* google_protobuf_GeneratedCodeInfo_Annotation_Semantic_enum_init();
+
+/* google.protobuf.FileDescriptorSet */
+
+UPB_INLINE google_protobuf_FileDescriptorSet* google_protobuf_FileDescriptorSet_new(upb_Arena* arena) {
+ return (google_protobuf_FileDescriptorSet*)_upb_Message_New(google_protobuf_FileDescriptorSet_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_FileDescriptorSet* google_protobuf_FileDescriptorSet_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_FileDescriptorSet* ret = google_protobuf_FileDescriptorSet_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_FileDescriptorSet_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_FileDescriptorSet* google_protobuf_FileDescriptorSet_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_FileDescriptorSet* ret = google_protobuf_FileDescriptorSet_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_FileDescriptorSet_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_FileDescriptorSet_serialize(const google_protobuf_FileDescriptorSet* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_FileDescriptorSet_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_FileDescriptorSet_serialize_ex(const google_protobuf_FileDescriptorSet* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_FileDescriptorSet_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_FileDescriptorSet_clear_file(google_protobuf_FileDescriptorSet* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorSet_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_FileDescriptorProto* const* google_protobuf_FileDescriptorSet_file(const google_protobuf_FileDescriptorSet* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorSet_msg_init(), 1);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_FileDescriptorProto* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_FileDescriptorSet_has_file(const google_protobuf_FileDescriptorSet* msg) {
+ size_t size;
+ google_protobuf_FileDescriptorSet_file(msg, &size);
+ return size != 0;
+}
+
+UPB_INLINE google_protobuf_FileDescriptorProto** google_protobuf_FileDescriptorSet_mutable_file(google_protobuf_FileDescriptorSet* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorSet_msg_init(), 1);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_FileDescriptorProto**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_FileDescriptorProto** google_protobuf_FileDescriptorSet_resize_file(google_protobuf_FileDescriptorSet* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorSet_msg_init(), 1);
+ return (google_protobuf_FileDescriptorProto**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_FileDescriptorProto* google_protobuf_FileDescriptorSet_add_file(google_protobuf_FileDescriptorSet* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorSet_msg_init(), 1);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_FileDescriptorProto* sub = (struct google_protobuf_FileDescriptorProto*)_upb_Message_New(google_protobuf_FileDescriptorProto_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+
+/* google.protobuf.FileDescriptorProto */
+
+UPB_INLINE google_protobuf_FileDescriptorProto* google_protobuf_FileDescriptorProto_new(upb_Arena* arena) {
+ return (google_protobuf_FileDescriptorProto*)_upb_Message_New(google_protobuf_FileDescriptorProto_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_FileDescriptorProto* google_protobuf_FileDescriptorProto_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_FileDescriptorProto* ret = google_protobuf_FileDescriptorProto_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_FileDescriptorProto_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_FileDescriptorProto* google_protobuf_FileDescriptorProto_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_FileDescriptorProto* ret = google_protobuf_FileDescriptorProto_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_FileDescriptorProto_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_FileDescriptorProto_serialize(const google_protobuf_FileDescriptorProto* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_FileDescriptorProto_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_FileDescriptorProto_serialize_ex(const google_protobuf_FileDescriptorProto* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_FileDescriptorProto_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_FileDescriptorProto_clear_name(google_protobuf_FileDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_FileDescriptorProto_name(const google_protobuf_FileDescriptorProto* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 1);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileDescriptorProto_has_name(const google_protobuf_FileDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 1);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileDescriptorProto_clear_package(google_protobuf_FileDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_FileDescriptorProto_package(const google_protobuf_FileDescriptorProto* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 2);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileDescriptorProto_has_package(const google_protobuf_FileDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 2);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileDescriptorProto_clear_dependency(google_protobuf_FileDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 3);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView const* google_protobuf_FileDescriptorProto_dependency(const google_protobuf_FileDescriptorProto* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 3);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (upb_StringView const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_FileDescriptorProto_has_dependency(const google_protobuf_FileDescriptorProto* msg) {
+ size_t size;
+ google_protobuf_FileDescriptorProto_dependency(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_FileDescriptorProto_clear_message_type(google_protobuf_FileDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 4);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_DescriptorProto* const* google_protobuf_FileDescriptorProto_message_type(const google_protobuf_FileDescriptorProto* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 4);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_DescriptorProto* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_FileDescriptorProto_has_message_type(const google_protobuf_FileDescriptorProto* msg) {
+ size_t size;
+ google_protobuf_FileDescriptorProto_message_type(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_FileDescriptorProto_clear_enum_type(google_protobuf_FileDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 5);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_EnumDescriptorProto* const* google_protobuf_FileDescriptorProto_enum_type(const google_protobuf_FileDescriptorProto* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 5);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_EnumDescriptorProto* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_FileDescriptorProto_has_enum_type(const google_protobuf_FileDescriptorProto* msg) {
+ size_t size;
+ google_protobuf_FileDescriptorProto_enum_type(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_FileDescriptorProto_clear_service(google_protobuf_FileDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 6);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_ServiceDescriptorProto* const* google_protobuf_FileDescriptorProto_service(const google_protobuf_FileDescriptorProto* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 6);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_ServiceDescriptorProto* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_FileDescriptorProto_has_service(const google_protobuf_FileDescriptorProto* msg) {
+ size_t size;
+ google_protobuf_FileDescriptorProto_service(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_FileDescriptorProto_clear_extension(google_protobuf_FileDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 7);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_FieldDescriptorProto* const* google_protobuf_FileDescriptorProto_extension(const google_protobuf_FileDescriptorProto* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 7);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_FieldDescriptorProto* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_FileDescriptorProto_has_extension(const google_protobuf_FileDescriptorProto* msg) {
+ size_t size;
+ google_protobuf_FileDescriptorProto_extension(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_FileDescriptorProto_clear_options(google_protobuf_FileDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 8);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_FileOptions* google_protobuf_FileDescriptorProto_options(const google_protobuf_FileDescriptorProto* msg) {
+ const google_protobuf_FileOptions* default_val = NULL;
+ const google_protobuf_FileOptions* ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 8);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileDescriptorProto_has_options(const google_protobuf_FileDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 8);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileDescriptorProto_clear_source_code_info(google_protobuf_FileDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 9);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_SourceCodeInfo* google_protobuf_FileDescriptorProto_source_code_info(const google_protobuf_FileDescriptorProto* msg) {
+ const google_protobuf_SourceCodeInfo* default_val = NULL;
+ const google_protobuf_SourceCodeInfo* ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 9);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileDescriptorProto_has_source_code_info(const google_protobuf_FileDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 9);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileDescriptorProto_clear_public_dependency(google_protobuf_FileDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 10);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t const* google_protobuf_FileDescriptorProto_public_dependency(const google_protobuf_FileDescriptorProto* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 10);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (int32_t const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_FileDescriptorProto_has_public_dependency(const google_protobuf_FileDescriptorProto* msg) {
+ size_t size;
+ google_protobuf_FileDescriptorProto_public_dependency(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_FileDescriptorProto_clear_weak_dependency(google_protobuf_FileDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 11);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t const* google_protobuf_FileDescriptorProto_weak_dependency(const google_protobuf_FileDescriptorProto* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 11);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (int32_t const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_FileDescriptorProto_has_weak_dependency(const google_protobuf_FileDescriptorProto* msg) {
+ size_t size;
+ google_protobuf_FileDescriptorProto_weak_dependency(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_FileDescriptorProto_clear_syntax(google_protobuf_FileDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 12);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_FileDescriptorProto_syntax(const google_protobuf_FileDescriptorProto* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 12);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileDescriptorProto_has_syntax(const google_protobuf_FileDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 12);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileDescriptorProto_clear_edition(google_protobuf_FileDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 13);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_FileDescriptorProto_edition(const google_protobuf_FileDescriptorProto* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 13);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileDescriptorProto_has_edition(const google_protobuf_FileDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 13);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+
+UPB_INLINE void google_protobuf_FileDescriptorProto_set_name(google_protobuf_FileDescriptorProto *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 1);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileDescriptorProto_set_package(google_protobuf_FileDescriptorProto *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 2);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE upb_StringView* google_protobuf_FileDescriptorProto_mutable_dependency(google_protobuf_FileDescriptorProto* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 3);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (upb_StringView*)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE upb_StringView* google_protobuf_FileDescriptorProto_resize_dependency(google_protobuf_FileDescriptorProto* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 3);
+ return (upb_StringView*)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE bool google_protobuf_FileDescriptorProto_add_dependency(google_protobuf_FileDescriptorProto* msg, upb_StringView val, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 3);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return false;
+ }
+ _upb_Array_Set(arr, arr->size - 1, &val, sizeof(val));
+ return true;
+}
+UPB_INLINE google_protobuf_DescriptorProto** google_protobuf_FileDescriptorProto_mutable_message_type(google_protobuf_FileDescriptorProto* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 4);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_DescriptorProto**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_DescriptorProto** google_protobuf_FileDescriptorProto_resize_message_type(google_protobuf_FileDescriptorProto* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 4);
+ return (google_protobuf_DescriptorProto**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_DescriptorProto* google_protobuf_FileDescriptorProto_add_message_type(google_protobuf_FileDescriptorProto* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 4);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_DescriptorProto* sub = (struct google_protobuf_DescriptorProto*)_upb_Message_New(google_protobuf_DescriptorProto_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+UPB_INLINE google_protobuf_EnumDescriptorProto** google_protobuf_FileDescriptorProto_mutable_enum_type(google_protobuf_FileDescriptorProto* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 5);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_EnumDescriptorProto**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_EnumDescriptorProto** google_protobuf_FileDescriptorProto_resize_enum_type(google_protobuf_FileDescriptorProto* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 5);
+ return (google_protobuf_EnumDescriptorProto**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_EnumDescriptorProto* google_protobuf_FileDescriptorProto_add_enum_type(google_protobuf_FileDescriptorProto* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 5);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_EnumDescriptorProto* sub = (struct google_protobuf_EnumDescriptorProto*)_upb_Message_New(google_protobuf_EnumDescriptorProto_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+UPB_INLINE google_protobuf_ServiceDescriptorProto** google_protobuf_FileDescriptorProto_mutable_service(google_protobuf_FileDescriptorProto* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 6);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_ServiceDescriptorProto**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_ServiceDescriptorProto** google_protobuf_FileDescriptorProto_resize_service(google_protobuf_FileDescriptorProto* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 6);
+ return (google_protobuf_ServiceDescriptorProto**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_ServiceDescriptorProto* google_protobuf_FileDescriptorProto_add_service(google_protobuf_FileDescriptorProto* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 6);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_ServiceDescriptorProto* sub = (struct google_protobuf_ServiceDescriptorProto*)_upb_Message_New(google_protobuf_ServiceDescriptorProto_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+UPB_INLINE google_protobuf_FieldDescriptorProto** google_protobuf_FileDescriptorProto_mutable_extension(google_protobuf_FileDescriptorProto* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 7);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_FieldDescriptorProto**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_FieldDescriptorProto** google_protobuf_FileDescriptorProto_resize_extension(google_protobuf_FileDescriptorProto* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 7);
+ return (google_protobuf_FieldDescriptorProto**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_FieldDescriptorProto* google_protobuf_FileDescriptorProto_add_extension(google_protobuf_FileDescriptorProto* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 7);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_FieldDescriptorProto* sub = (struct google_protobuf_FieldDescriptorProto*)_upb_Message_New(google_protobuf_FieldDescriptorProto_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+UPB_INLINE void google_protobuf_FileDescriptorProto_set_options(google_protobuf_FileDescriptorProto *msg, google_protobuf_FileOptions* value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 8);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE struct google_protobuf_FileOptions* google_protobuf_FileDescriptorProto_mutable_options(google_protobuf_FileDescriptorProto* msg, upb_Arena* arena) {
+ struct google_protobuf_FileOptions* sub = (struct google_protobuf_FileOptions*)google_protobuf_FileDescriptorProto_options(msg);
+ if (sub == NULL) {
+ sub = (struct google_protobuf_FileOptions*)_upb_Message_New(google_protobuf_FileOptions_msg_init(), arena);
+ if (sub) google_protobuf_FileDescriptorProto_set_options(msg, sub);
+ }
+ return sub;
+}
+UPB_INLINE void google_protobuf_FileDescriptorProto_set_source_code_info(google_protobuf_FileDescriptorProto *msg, google_protobuf_SourceCodeInfo* value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 9);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE struct google_protobuf_SourceCodeInfo* google_protobuf_FileDescriptorProto_mutable_source_code_info(google_protobuf_FileDescriptorProto* msg, upb_Arena* arena) {
+ struct google_protobuf_SourceCodeInfo* sub = (struct google_protobuf_SourceCodeInfo*)google_protobuf_FileDescriptorProto_source_code_info(msg);
+ if (sub == NULL) {
+ sub = (struct google_protobuf_SourceCodeInfo*)_upb_Message_New(google_protobuf_SourceCodeInfo_msg_init(), arena);
+ if (sub) google_protobuf_FileDescriptorProto_set_source_code_info(msg, sub);
+ }
+ return sub;
+}
+UPB_INLINE int32_t* google_protobuf_FileDescriptorProto_mutable_public_dependency(google_protobuf_FileDescriptorProto* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 10);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (int32_t*)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE int32_t* google_protobuf_FileDescriptorProto_resize_public_dependency(google_protobuf_FileDescriptorProto* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 10);
+ return (int32_t*)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE bool google_protobuf_FileDescriptorProto_add_public_dependency(google_protobuf_FileDescriptorProto* msg, int32_t val, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 10);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return false;
+ }
+ _upb_Array_Set(arr, arr->size - 1, &val, sizeof(val));
+ return true;
+}
+UPB_INLINE int32_t* google_protobuf_FileDescriptorProto_mutable_weak_dependency(google_protobuf_FileDescriptorProto* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 11);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (int32_t*)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE int32_t* google_protobuf_FileDescriptorProto_resize_weak_dependency(google_protobuf_FileDescriptorProto* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 11);
+ return (int32_t*)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE bool google_protobuf_FileDescriptorProto_add_weak_dependency(google_protobuf_FileDescriptorProto* msg, int32_t val, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 11);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return false;
+ }
+ _upb_Array_Set(arr, arr->size - 1, &val, sizeof(val));
+ return true;
+}
+UPB_INLINE void google_protobuf_FileDescriptorProto_set_syntax(google_protobuf_FileDescriptorProto *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 12);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileDescriptorProto_set_edition(google_protobuf_FileDescriptorProto *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileDescriptorProto_msg_init(), 13);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+
+/* google.protobuf.DescriptorProto */
+
+UPB_INLINE google_protobuf_DescriptorProto* google_protobuf_DescriptorProto_new(upb_Arena* arena) {
+ return (google_protobuf_DescriptorProto*)_upb_Message_New(google_protobuf_DescriptorProto_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_DescriptorProto* google_protobuf_DescriptorProto_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_DescriptorProto* ret = google_protobuf_DescriptorProto_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_DescriptorProto_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_DescriptorProto* google_protobuf_DescriptorProto_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_DescriptorProto* ret = google_protobuf_DescriptorProto_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_DescriptorProto_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_DescriptorProto_serialize(const google_protobuf_DescriptorProto* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_DescriptorProto_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_DescriptorProto_serialize_ex(const google_protobuf_DescriptorProto* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_DescriptorProto_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_DescriptorProto_clear_name(google_protobuf_DescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_DescriptorProto_name(const google_protobuf_DescriptorProto* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 1);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_DescriptorProto_has_name(const google_protobuf_DescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 1);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_DescriptorProto_clear_field(google_protobuf_DescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_FieldDescriptorProto* const* google_protobuf_DescriptorProto_field(const google_protobuf_DescriptorProto* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 2);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_FieldDescriptorProto* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_DescriptorProto_has_field(const google_protobuf_DescriptorProto* msg) {
+ size_t size;
+ google_protobuf_DescriptorProto_field(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_DescriptorProto_clear_nested_type(google_protobuf_DescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 3);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_DescriptorProto* const* google_protobuf_DescriptorProto_nested_type(const google_protobuf_DescriptorProto* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 3);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_DescriptorProto* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_DescriptorProto_has_nested_type(const google_protobuf_DescriptorProto* msg) {
+ size_t size;
+ google_protobuf_DescriptorProto_nested_type(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_DescriptorProto_clear_enum_type(google_protobuf_DescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 4);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_EnumDescriptorProto* const* google_protobuf_DescriptorProto_enum_type(const google_protobuf_DescriptorProto* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 4);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_EnumDescriptorProto* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_DescriptorProto_has_enum_type(const google_protobuf_DescriptorProto* msg) {
+ size_t size;
+ google_protobuf_DescriptorProto_enum_type(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_DescriptorProto_clear_extension_range(google_protobuf_DescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 5);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_DescriptorProto_ExtensionRange* const* google_protobuf_DescriptorProto_extension_range(const google_protobuf_DescriptorProto* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 5);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_DescriptorProto_ExtensionRange* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_DescriptorProto_has_extension_range(const google_protobuf_DescriptorProto* msg) {
+ size_t size;
+ google_protobuf_DescriptorProto_extension_range(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_DescriptorProto_clear_extension(google_protobuf_DescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 6);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_FieldDescriptorProto* const* google_protobuf_DescriptorProto_extension(const google_protobuf_DescriptorProto* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 6);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_FieldDescriptorProto* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_DescriptorProto_has_extension(const google_protobuf_DescriptorProto* msg) {
+ size_t size;
+ google_protobuf_DescriptorProto_extension(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_DescriptorProto_clear_options(google_protobuf_DescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 7);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_MessageOptions* google_protobuf_DescriptorProto_options(const google_protobuf_DescriptorProto* msg) {
+ const google_protobuf_MessageOptions* default_val = NULL;
+ const google_protobuf_MessageOptions* ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 7);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_DescriptorProto_has_options(const google_protobuf_DescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 7);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_DescriptorProto_clear_oneof_decl(google_protobuf_DescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 8);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_OneofDescriptorProto* const* google_protobuf_DescriptorProto_oneof_decl(const google_protobuf_DescriptorProto* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 8);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_OneofDescriptorProto* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_DescriptorProto_has_oneof_decl(const google_protobuf_DescriptorProto* msg) {
+ size_t size;
+ google_protobuf_DescriptorProto_oneof_decl(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_DescriptorProto_clear_reserved_range(google_protobuf_DescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 9);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_DescriptorProto_ReservedRange* const* google_protobuf_DescriptorProto_reserved_range(const google_protobuf_DescriptorProto* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 9);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_DescriptorProto_ReservedRange* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_DescriptorProto_has_reserved_range(const google_protobuf_DescriptorProto* msg) {
+ size_t size;
+ google_protobuf_DescriptorProto_reserved_range(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_DescriptorProto_clear_reserved_name(google_protobuf_DescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 10);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView const* google_protobuf_DescriptorProto_reserved_name(const google_protobuf_DescriptorProto* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 10);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (upb_StringView const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_DescriptorProto_has_reserved_name(const google_protobuf_DescriptorProto* msg) {
+ size_t size;
+ google_protobuf_DescriptorProto_reserved_name(msg, &size);
+ return size != 0;
+}
+
+UPB_INLINE void google_protobuf_DescriptorProto_set_name(google_protobuf_DescriptorProto *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 1);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE google_protobuf_FieldDescriptorProto** google_protobuf_DescriptorProto_mutable_field(google_protobuf_DescriptorProto* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 2);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_FieldDescriptorProto**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_FieldDescriptorProto** google_protobuf_DescriptorProto_resize_field(google_protobuf_DescriptorProto* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 2);
+ return (google_protobuf_FieldDescriptorProto**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_FieldDescriptorProto* google_protobuf_DescriptorProto_add_field(google_protobuf_DescriptorProto* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 2);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_FieldDescriptorProto* sub = (struct google_protobuf_FieldDescriptorProto*)_upb_Message_New(google_protobuf_FieldDescriptorProto_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+UPB_INLINE google_protobuf_DescriptorProto** google_protobuf_DescriptorProto_mutable_nested_type(google_protobuf_DescriptorProto* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 3);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_DescriptorProto**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_DescriptorProto** google_protobuf_DescriptorProto_resize_nested_type(google_protobuf_DescriptorProto* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 3);
+ return (google_protobuf_DescriptorProto**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_DescriptorProto* google_protobuf_DescriptorProto_add_nested_type(google_protobuf_DescriptorProto* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 3);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_DescriptorProto* sub = (struct google_protobuf_DescriptorProto*)_upb_Message_New(google_protobuf_DescriptorProto_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+UPB_INLINE google_protobuf_EnumDescriptorProto** google_protobuf_DescriptorProto_mutable_enum_type(google_protobuf_DescriptorProto* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 4);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_EnumDescriptorProto**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_EnumDescriptorProto** google_protobuf_DescriptorProto_resize_enum_type(google_protobuf_DescriptorProto* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 4);
+ return (google_protobuf_EnumDescriptorProto**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_EnumDescriptorProto* google_protobuf_DescriptorProto_add_enum_type(google_protobuf_DescriptorProto* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 4);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_EnumDescriptorProto* sub = (struct google_protobuf_EnumDescriptorProto*)_upb_Message_New(google_protobuf_EnumDescriptorProto_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+UPB_INLINE google_protobuf_DescriptorProto_ExtensionRange** google_protobuf_DescriptorProto_mutable_extension_range(google_protobuf_DescriptorProto* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 5);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_DescriptorProto_ExtensionRange**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_DescriptorProto_ExtensionRange** google_protobuf_DescriptorProto_resize_extension_range(google_protobuf_DescriptorProto* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 5);
+ return (google_protobuf_DescriptorProto_ExtensionRange**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_DescriptorProto_ExtensionRange* google_protobuf_DescriptorProto_add_extension_range(google_protobuf_DescriptorProto* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 5);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_DescriptorProto_ExtensionRange* sub = (struct google_protobuf_DescriptorProto_ExtensionRange*)_upb_Message_New(google_protobuf_DescriptorProto_ExtensionRange_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+UPB_INLINE google_protobuf_FieldDescriptorProto** google_protobuf_DescriptorProto_mutable_extension(google_protobuf_DescriptorProto* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 6);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_FieldDescriptorProto**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_FieldDescriptorProto** google_protobuf_DescriptorProto_resize_extension(google_protobuf_DescriptorProto* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 6);
+ return (google_protobuf_FieldDescriptorProto**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_FieldDescriptorProto* google_protobuf_DescriptorProto_add_extension(google_protobuf_DescriptorProto* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 6);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_FieldDescriptorProto* sub = (struct google_protobuf_FieldDescriptorProto*)_upb_Message_New(google_protobuf_FieldDescriptorProto_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+UPB_INLINE void google_protobuf_DescriptorProto_set_options(google_protobuf_DescriptorProto *msg, google_protobuf_MessageOptions* value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 7);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE struct google_protobuf_MessageOptions* google_protobuf_DescriptorProto_mutable_options(google_protobuf_DescriptorProto* msg, upb_Arena* arena) {
+ struct google_protobuf_MessageOptions* sub = (struct google_protobuf_MessageOptions*)google_protobuf_DescriptorProto_options(msg);
+ if (sub == NULL) {
+ sub = (struct google_protobuf_MessageOptions*)_upb_Message_New(google_protobuf_MessageOptions_msg_init(), arena);
+ if (sub) google_protobuf_DescriptorProto_set_options(msg, sub);
+ }
+ return sub;
+}
+UPB_INLINE google_protobuf_OneofDescriptorProto** google_protobuf_DescriptorProto_mutable_oneof_decl(google_protobuf_DescriptorProto* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 8);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_OneofDescriptorProto**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_OneofDescriptorProto** google_protobuf_DescriptorProto_resize_oneof_decl(google_protobuf_DescriptorProto* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 8);
+ return (google_protobuf_OneofDescriptorProto**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_OneofDescriptorProto* google_protobuf_DescriptorProto_add_oneof_decl(google_protobuf_DescriptorProto* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 8);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_OneofDescriptorProto* sub = (struct google_protobuf_OneofDescriptorProto*)_upb_Message_New(google_protobuf_OneofDescriptorProto_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+UPB_INLINE google_protobuf_DescriptorProto_ReservedRange** google_protobuf_DescriptorProto_mutable_reserved_range(google_protobuf_DescriptorProto* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 9);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_DescriptorProto_ReservedRange**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_DescriptorProto_ReservedRange** google_protobuf_DescriptorProto_resize_reserved_range(google_protobuf_DescriptorProto* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 9);
+ return (google_protobuf_DescriptorProto_ReservedRange**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_DescriptorProto_ReservedRange* google_protobuf_DescriptorProto_add_reserved_range(google_protobuf_DescriptorProto* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 9);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_DescriptorProto_ReservedRange* sub = (struct google_protobuf_DescriptorProto_ReservedRange*)_upb_Message_New(google_protobuf_DescriptorProto_ReservedRange_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+UPB_INLINE upb_StringView* google_protobuf_DescriptorProto_mutable_reserved_name(google_protobuf_DescriptorProto* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 10);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (upb_StringView*)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE upb_StringView* google_protobuf_DescriptorProto_resize_reserved_name(google_protobuf_DescriptorProto* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 10);
+ return (upb_StringView*)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE bool google_protobuf_DescriptorProto_add_reserved_name(google_protobuf_DescriptorProto* msg, upb_StringView val, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_msg_init(), 10);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return false;
+ }
+ _upb_Array_Set(arr, arr->size - 1, &val, sizeof(val));
+ return true;
+}
+
+/* google.protobuf.DescriptorProto.ExtensionRange */
+
+UPB_INLINE google_protobuf_DescriptorProto_ExtensionRange* google_protobuf_DescriptorProto_ExtensionRange_new(upb_Arena* arena) {
+ return (google_protobuf_DescriptorProto_ExtensionRange*)_upb_Message_New(google_protobuf_DescriptorProto_ExtensionRange_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_DescriptorProto_ExtensionRange* google_protobuf_DescriptorProto_ExtensionRange_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_DescriptorProto_ExtensionRange* ret = google_protobuf_DescriptorProto_ExtensionRange_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_DescriptorProto_ExtensionRange_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_DescriptorProto_ExtensionRange* google_protobuf_DescriptorProto_ExtensionRange_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_DescriptorProto_ExtensionRange* ret = google_protobuf_DescriptorProto_ExtensionRange_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_DescriptorProto_ExtensionRange_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_DescriptorProto_ExtensionRange_serialize(const google_protobuf_DescriptorProto_ExtensionRange* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_DescriptorProto_ExtensionRange_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_DescriptorProto_ExtensionRange_serialize_ex(const google_protobuf_DescriptorProto_ExtensionRange* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_DescriptorProto_ExtensionRange_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_DescriptorProto_ExtensionRange_clear_start(google_protobuf_DescriptorProto_ExtensionRange* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ExtensionRange_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_DescriptorProto_ExtensionRange_start(const google_protobuf_DescriptorProto_ExtensionRange* msg) {
+ int32_t default_val = (int32_t)0;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ExtensionRange_msg_init(), 1);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_DescriptorProto_ExtensionRange_has_start(const google_protobuf_DescriptorProto_ExtensionRange* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ExtensionRange_msg_init(), 1);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_DescriptorProto_ExtensionRange_clear_end(google_protobuf_DescriptorProto_ExtensionRange* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ExtensionRange_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_DescriptorProto_ExtensionRange_end(const google_protobuf_DescriptorProto_ExtensionRange* msg) {
+ int32_t default_val = (int32_t)0;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ExtensionRange_msg_init(), 2);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_DescriptorProto_ExtensionRange_has_end(const google_protobuf_DescriptorProto_ExtensionRange* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ExtensionRange_msg_init(), 2);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_DescriptorProto_ExtensionRange_clear_options(google_protobuf_DescriptorProto_ExtensionRange* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ExtensionRange_msg_init(), 3);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_ExtensionRangeOptions* google_protobuf_DescriptorProto_ExtensionRange_options(const google_protobuf_DescriptorProto_ExtensionRange* msg) {
+ const google_protobuf_ExtensionRangeOptions* default_val = NULL;
+ const google_protobuf_ExtensionRangeOptions* ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ExtensionRange_msg_init(), 3);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_DescriptorProto_ExtensionRange_has_options(const google_protobuf_DescriptorProto_ExtensionRange* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ExtensionRange_msg_init(), 3);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+
+UPB_INLINE void google_protobuf_DescriptorProto_ExtensionRange_set_start(google_protobuf_DescriptorProto_ExtensionRange *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ExtensionRange_msg_init(), 1);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_DescriptorProto_ExtensionRange_set_end(google_protobuf_DescriptorProto_ExtensionRange *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ExtensionRange_msg_init(), 2);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_DescriptorProto_ExtensionRange_set_options(google_protobuf_DescriptorProto_ExtensionRange *msg, google_protobuf_ExtensionRangeOptions* value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ExtensionRange_msg_init(), 3);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE struct google_protobuf_ExtensionRangeOptions* google_protobuf_DescriptorProto_ExtensionRange_mutable_options(google_protobuf_DescriptorProto_ExtensionRange* msg, upb_Arena* arena) {
+ struct google_protobuf_ExtensionRangeOptions* sub = (struct google_protobuf_ExtensionRangeOptions*)google_protobuf_DescriptorProto_ExtensionRange_options(msg);
+ if (sub == NULL) {
+ sub = (struct google_protobuf_ExtensionRangeOptions*)_upb_Message_New(google_protobuf_ExtensionRangeOptions_msg_init(), arena);
+ if (sub) google_protobuf_DescriptorProto_ExtensionRange_set_options(msg, sub);
+ }
+ return sub;
+}
+
+/* google.protobuf.DescriptorProto.ReservedRange */
+
+UPB_INLINE google_protobuf_DescriptorProto_ReservedRange* google_protobuf_DescriptorProto_ReservedRange_new(upb_Arena* arena) {
+ return (google_protobuf_DescriptorProto_ReservedRange*)_upb_Message_New(google_protobuf_DescriptorProto_ReservedRange_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_DescriptorProto_ReservedRange* google_protobuf_DescriptorProto_ReservedRange_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_DescriptorProto_ReservedRange* ret = google_protobuf_DescriptorProto_ReservedRange_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_DescriptorProto_ReservedRange_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_DescriptorProto_ReservedRange* google_protobuf_DescriptorProto_ReservedRange_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_DescriptorProto_ReservedRange* ret = google_protobuf_DescriptorProto_ReservedRange_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_DescriptorProto_ReservedRange_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_DescriptorProto_ReservedRange_serialize(const google_protobuf_DescriptorProto_ReservedRange* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_DescriptorProto_ReservedRange_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_DescriptorProto_ReservedRange_serialize_ex(const google_protobuf_DescriptorProto_ReservedRange* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_DescriptorProto_ReservedRange_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_DescriptorProto_ReservedRange_clear_start(google_protobuf_DescriptorProto_ReservedRange* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ReservedRange_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_DescriptorProto_ReservedRange_start(const google_protobuf_DescriptorProto_ReservedRange* msg) {
+ int32_t default_val = (int32_t)0;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ReservedRange_msg_init(), 1);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_DescriptorProto_ReservedRange_has_start(const google_protobuf_DescriptorProto_ReservedRange* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ReservedRange_msg_init(), 1);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_DescriptorProto_ReservedRange_clear_end(google_protobuf_DescriptorProto_ReservedRange* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ReservedRange_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_DescriptorProto_ReservedRange_end(const google_protobuf_DescriptorProto_ReservedRange* msg) {
+ int32_t default_val = (int32_t)0;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ReservedRange_msg_init(), 2);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_DescriptorProto_ReservedRange_has_end(const google_protobuf_DescriptorProto_ReservedRange* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ReservedRange_msg_init(), 2);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+
+UPB_INLINE void google_protobuf_DescriptorProto_ReservedRange_set_start(google_protobuf_DescriptorProto_ReservedRange *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ReservedRange_msg_init(), 1);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_DescriptorProto_ReservedRange_set_end(google_protobuf_DescriptorProto_ReservedRange *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_DescriptorProto_ReservedRange_msg_init(), 2);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+
+/* google.protobuf.ExtensionRangeOptions */
+
+UPB_INLINE google_protobuf_ExtensionRangeOptions* google_protobuf_ExtensionRangeOptions_new(upb_Arena* arena) {
+ return (google_protobuf_ExtensionRangeOptions*)_upb_Message_New(google_protobuf_ExtensionRangeOptions_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_ExtensionRangeOptions* google_protobuf_ExtensionRangeOptions_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_ExtensionRangeOptions* ret = google_protobuf_ExtensionRangeOptions_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_ExtensionRangeOptions_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_ExtensionRangeOptions* google_protobuf_ExtensionRangeOptions_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_ExtensionRangeOptions* ret = google_protobuf_ExtensionRangeOptions_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_ExtensionRangeOptions_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_ExtensionRangeOptions_serialize(const google_protobuf_ExtensionRangeOptions* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_ExtensionRangeOptions_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_ExtensionRangeOptions_serialize_ex(const google_protobuf_ExtensionRangeOptions* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_ExtensionRangeOptions_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_ExtensionRangeOptions_clear_uninterpreted_option(google_protobuf_ExtensionRangeOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ExtensionRangeOptions_msg_init(), 999);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_UninterpretedOption* const* google_protobuf_ExtensionRangeOptions_uninterpreted_option(const google_protobuf_ExtensionRangeOptions* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ExtensionRangeOptions_msg_init(), 999);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_UninterpretedOption* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_ExtensionRangeOptions_has_uninterpreted_option(const google_protobuf_ExtensionRangeOptions* msg) {
+ size_t size;
+ google_protobuf_ExtensionRangeOptions_uninterpreted_option(msg, &size);
+ return size != 0;
+}
+
+UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_ExtensionRangeOptions_mutable_uninterpreted_option(google_protobuf_ExtensionRangeOptions* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ExtensionRangeOptions_msg_init(), 999);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_UninterpretedOption**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_ExtensionRangeOptions_resize_uninterpreted_option(google_protobuf_ExtensionRangeOptions* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ExtensionRangeOptions_msg_init(), 999);
+ return (google_protobuf_UninterpretedOption**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_UninterpretedOption* google_protobuf_ExtensionRangeOptions_add_uninterpreted_option(google_protobuf_ExtensionRangeOptions* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ExtensionRangeOptions_msg_init(), 999);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_UninterpretedOption* sub = (struct google_protobuf_UninterpretedOption*)_upb_Message_New(google_protobuf_UninterpretedOption_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+
+/* google.protobuf.FieldDescriptorProto */
+
+UPB_INLINE google_protobuf_FieldDescriptorProto* google_protobuf_FieldDescriptorProto_new(upb_Arena* arena) {
+ return (google_protobuf_FieldDescriptorProto*)_upb_Message_New(google_protobuf_FieldDescriptorProto_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_FieldDescriptorProto* google_protobuf_FieldDescriptorProto_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_FieldDescriptorProto* ret = google_protobuf_FieldDescriptorProto_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_FieldDescriptorProto_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_FieldDescriptorProto* google_protobuf_FieldDescriptorProto_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_FieldDescriptorProto* ret = google_protobuf_FieldDescriptorProto_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_FieldDescriptorProto_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_FieldDescriptorProto_serialize(const google_protobuf_FieldDescriptorProto* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_FieldDescriptorProto_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_FieldDescriptorProto_serialize_ex(const google_protobuf_FieldDescriptorProto* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_FieldDescriptorProto_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_clear_name(google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_FieldDescriptorProto_name(const google_protobuf_FieldDescriptorProto* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 1);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_name(const google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 1);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_clear_extendee(google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_FieldDescriptorProto_extendee(const google_protobuf_FieldDescriptorProto* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 2);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_extendee(const google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 2);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_clear_number(google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 3);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_FieldDescriptorProto_number(const google_protobuf_FieldDescriptorProto* msg) {
+ int32_t default_val = (int32_t)0;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 3);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_number(const google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 3);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_clear_label(google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 4);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_FieldDescriptorProto_label(const google_protobuf_FieldDescriptorProto* msg) {
+ int32_t default_val = 1;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 4);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_label(const google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 4);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_clear_type(google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 5);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_FieldDescriptorProto_type(const google_protobuf_FieldDescriptorProto* msg) {
+ int32_t default_val = 1;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 5);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_type(const google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 5);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_clear_type_name(google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 6);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_FieldDescriptorProto_type_name(const google_protobuf_FieldDescriptorProto* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 6);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_type_name(const google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 6);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_clear_default_value(google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 7);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_FieldDescriptorProto_default_value(const google_protobuf_FieldDescriptorProto* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 7);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_default_value(const google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 7);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_clear_options(google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 8);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_FieldOptions* google_protobuf_FieldDescriptorProto_options(const google_protobuf_FieldDescriptorProto* msg) {
+ const google_protobuf_FieldOptions* default_val = NULL;
+ const google_protobuf_FieldOptions* ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 8);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_options(const google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 8);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_clear_oneof_index(google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 9);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_FieldDescriptorProto_oneof_index(const google_protobuf_FieldDescriptorProto* msg) {
+ int32_t default_val = (int32_t)0;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 9);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_oneof_index(const google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 9);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_clear_json_name(google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 10);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_FieldDescriptorProto_json_name(const google_protobuf_FieldDescriptorProto* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 10);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_json_name(const google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 10);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_clear_proto3_optional(google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 17);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_FieldDescriptorProto_proto3_optional(const google_protobuf_FieldDescriptorProto* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 17);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_proto3_optional(const google_protobuf_FieldDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 17);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+
+UPB_INLINE void google_protobuf_FieldDescriptorProto_set_name(google_protobuf_FieldDescriptorProto *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 1);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_set_extendee(google_protobuf_FieldDescriptorProto *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 2);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_set_number(google_protobuf_FieldDescriptorProto *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 3);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_set_label(google_protobuf_FieldDescriptorProto *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 4);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_set_type(google_protobuf_FieldDescriptorProto *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 5);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_set_type_name(google_protobuf_FieldDescriptorProto *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 6);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_set_default_value(google_protobuf_FieldDescriptorProto *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 7);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_set_options(google_protobuf_FieldDescriptorProto *msg, google_protobuf_FieldOptions* value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 8);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE struct google_protobuf_FieldOptions* google_protobuf_FieldDescriptorProto_mutable_options(google_protobuf_FieldDescriptorProto* msg, upb_Arena* arena) {
+ struct google_protobuf_FieldOptions* sub = (struct google_protobuf_FieldOptions*)google_protobuf_FieldDescriptorProto_options(msg);
+ if (sub == NULL) {
+ sub = (struct google_protobuf_FieldOptions*)_upb_Message_New(google_protobuf_FieldOptions_msg_init(), arena);
+ if (sub) google_protobuf_FieldDescriptorProto_set_options(msg, sub);
+ }
+ return sub;
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_set_oneof_index(google_protobuf_FieldDescriptorProto *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 9);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_set_json_name(google_protobuf_FieldDescriptorProto *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 10);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FieldDescriptorProto_set_proto3_optional(google_protobuf_FieldDescriptorProto *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldDescriptorProto_msg_init(), 17);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+
+/* google.protobuf.OneofDescriptorProto */
+
+UPB_INLINE google_protobuf_OneofDescriptorProto* google_protobuf_OneofDescriptorProto_new(upb_Arena* arena) {
+ return (google_protobuf_OneofDescriptorProto*)_upb_Message_New(google_protobuf_OneofDescriptorProto_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_OneofDescriptorProto* google_protobuf_OneofDescriptorProto_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_OneofDescriptorProto* ret = google_protobuf_OneofDescriptorProto_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_OneofDescriptorProto_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_OneofDescriptorProto* google_protobuf_OneofDescriptorProto_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_OneofDescriptorProto* ret = google_protobuf_OneofDescriptorProto_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_OneofDescriptorProto_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_OneofDescriptorProto_serialize(const google_protobuf_OneofDescriptorProto* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_OneofDescriptorProto_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_OneofDescriptorProto_serialize_ex(const google_protobuf_OneofDescriptorProto* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_OneofDescriptorProto_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_OneofDescriptorProto_clear_name(google_protobuf_OneofDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_OneofDescriptorProto_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_OneofDescriptorProto_name(const google_protobuf_OneofDescriptorProto* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_OneofDescriptorProto_msg_init(), 1);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_OneofDescriptorProto_has_name(const google_protobuf_OneofDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_OneofDescriptorProto_msg_init(), 1);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_OneofDescriptorProto_clear_options(google_protobuf_OneofDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_OneofDescriptorProto_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_OneofOptions* google_protobuf_OneofDescriptorProto_options(const google_protobuf_OneofDescriptorProto* msg) {
+ const google_protobuf_OneofOptions* default_val = NULL;
+ const google_protobuf_OneofOptions* ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_OneofDescriptorProto_msg_init(), 2);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_OneofDescriptorProto_has_options(const google_protobuf_OneofDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_OneofDescriptorProto_msg_init(), 2);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+
+UPB_INLINE void google_protobuf_OneofDescriptorProto_set_name(google_protobuf_OneofDescriptorProto *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_OneofDescriptorProto_msg_init(), 1);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_OneofDescriptorProto_set_options(google_protobuf_OneofDescriptorProto *msg, google_protobuf_OneofOptions* value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_OneofDescriptorProto_msg_init(), 2);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE struct google_protobuf_OneofOptions* google_protobuf_OneofDescriptorProto_mutable_options(google_protobuf_OneofDescriptorProto* msg, upb_Arena* arena) {
+ struct google_protobuf_OneofOptions* sub = (struct google_protobuf_OneofOptions*)google_protobuf_OneofDescriptorProto_options(msg);
+ if (sub == NULL) {
+ sub = (struct google_protobuf_OneofOptions*)_upb_Message_New(google_protobuf_OneofOptions_msg_init(), arena);
+ if (sub) google_protobuf_OneofDescriptorProto_set_options(msg, sub);
+ }
+ return sub;
+}
+
+/* google.protobuf.EnumDescriptorProto */
+
+UPB_INLINE google_protobuf_EnumDescriptorProto* google_protobuf_EnumDescriptorProto_new(upb_Arena* arena) {
+ return (google_protobuf_EnumDescriptorProto*)_upb_Message_New(google_protobuf_EnumDescriptorProto_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_EnumDescriptorProto* google_protobuf_EnumDescriptorProto_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_EnumDescriptorProto* ret = google_protobuf_EnumDescriptorProto_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_EnumDescriptorProto_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_EnumDescriptorProto* google_protobuf_EnumDescriptorProto_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_EnumDescriptorProto* ret = google_protobuf_EnumDescriptorProto_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_EnumDescriptorProto_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_EnumDescriptorProto_serialize(const google_protobuf_EnumDescriptorProto* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_EnumDescriptorProto_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_EnumDescriptorProto_serialize_ex(const google_protobuf_EnumDescriptorProto* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_EnumDescriptorProto_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_EnumDescriptorProto_clear_name(google_protobuf_EnumDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_EnumDescriptorProto_name(const google_protobuf_EnumDescriptorProto* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 1);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_EnumDescriptorProto_has_name(const google_protobuf_EnumDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 1);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_EnumDescriptorProto_clear_value(google_protobuf_EnumDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_EnumValueDescriptorProto* const* google_protobuf_EnumDescriptorProto_value(const google_protobuf_EnumDescriptorProto* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 2);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_EnumValueDescriptorProto* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_EnumDescriptorProto_has_value(const google_protobuf_EnumDescriptorProto* msg) {
+ size_t size;
+ google_protobuf_EnumDescriptorProto_value(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_EnumDescriptorProto_clear_options(google_protobuf_EnumDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 3);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_EnumOptions* google_protobuf_EnumDescriptorProto_options(const google_protobuf_EnumDescriptorProto* msg) {
+ const google_protobuf_EnumOptions* default_val = NULL;
+ const google_protobuf_EnumOptions* ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 3);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_EnumDescriptorProto_has_options(const google_protobuf_EnumDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 3);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_EnumDescriptorProto_clear_reserved_range(google_protobuf_EnumDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 4);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_EnumDescriptorProto_EnumReservedRange* const* google_protobuf_EnumDescriptorProto_reserved_range(const google_protobuf_EnumDescriptorProto* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 4);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_EnumDescriptorProto_EnumReservedRange* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_EnumDescriptorProto_has_reserved_range(const google_protobuf_EnumDescriptorProto* msg) {
+ size_t size;
+ google_protobuf_EnumDescriptorProto_reserved_range(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_EnumDescriptorProto_clear_reserved_name(google_protobuf_EnumDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 5);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView const* google_protobuf_EnumDescriptorProto_reserved_name(const google_protobuf_EnumDescriptorProto* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 5);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (upb_StringView const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_EnumDescriptorProto_has_reserved_name(const google_protobuf_EnumDescriptorProto* msg) {
+ size_t size;
+ google_protobuf_EnumDescriptorProto_reserved_name(msg, &size);
+ return size != 0;
+}
+
+UPB_INLINE void google_protobuf_EnumDescriptorProto_set_name(google_protobuf_EnumDescriptorProto *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 1);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE google_protobuf_EnumValueDescriptorProto** google_protobuf_EnumDescriptorProto_mutable_value(google_protobuf_EnumDescriptorProto* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 2);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_EnumValueDescriptorProto**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_EnumValueDescriptorProto** google_protobuf_EnumDescriptorProto_resize_value(google_protobuf_EnumDescriptorProto* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 2);
+ return (google_protobuf_EnumValueDescriptorProto**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_EnumValueDescriptorProto* google_protobuf_EnumDescriptorProto_add_value(google_protobuf_EnumDescriptorProto* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 2);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_EnumValueDescriptorProto* sub = (struct google_protobuf_EnumValueDescriptorProto*)_upb_Message_New(google_protobuf_EnumValueDescriptorProto_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+UPB_INLINE void google_protobuf_EnumDescriptorProto_set_options(google_protobuf_EnumDescriptorProto *msg, google_protobuf_EnumOptions* value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 3);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE struct google_protobuf_EnumOptions* google_protobuf_EnumDescriptorProto_mutable_options(google_protobuf_EnumDescriptorProto* msg, upb_Arena* arena) {
+ struct google_protobuf_EnumOptions* sub = (struct google_protobuf_EnumOptions*)google_protobuf_EnumDescriptorProto_options(msg);
+ if (sub == NULL) {
+ sub = (struct google_protobuf_EnumOptions*)_upb_Message_New(google_protobuf_EnumOptions_msg_init(), arena);
+ if (sub) google_protobuf_EnumDescriptorProto_set_options(msg, sub);
+ }
+ return sub;
+}
+UPB_INLINE google_protobuf_EnumDescriptorProto_EnumReservedRange** google_protobuf_EnumDescriptorProto_mutable_reserved_range(google_protobuf_EnumDescriptorProto* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 4);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_EnumDescriptorProto_EnumReservedRange**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_EnumDescriptorProto_EnumReservedRange** google_protobuf_EnumDescriptorProto_resize_reserved_range(google_protobuf_EnumDescriptorProto* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 4);
+ return (google_protobuf_EnumDescriptorProto_EnumReservedRange**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_EnumDescriptorProto_EnumReservedRange* google_protobuf_EnumDescriptorProto_add_reserved_range(google_protobuf_EnumDescriptorProto* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 4);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_EnumDescriptorProto_EnumReservedRange* sub = (struct google_protobuf_EnumDescriptorProto_EnumReservedRange*)_upb_Message_New(google_protobuf_EnumDescriptorProto_EnumReservedRange_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+UPB_INLINE upb_StringView* google_protobuf_EnumDescriptorProto_mutable_reserved_name(google_protobuf_EnumDescriptorProto* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 5);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (upb_StringView*)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE upb_StringView* google_protobuf_EnumDescriptorProto_resize_reserved_name(google_protobuf_EnumDescriptorProto* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 5);
+ return (upb_StringView*)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE bool google_protobuf_EnumDescriptorProto_add_reserved_name(google_protobuf_EnumDescriptorProto* msg, upb_StringView val, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_msg_init(), 5);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return false;
+ }
+ _upb_Array_Set(arr, arr->size - 1, &val, sizeof(val));
+ return true;
+}
+
+/* google.protobuf.EnumDescriptorProto.EnumReservedRange */
+
+UPB_INLINE google_protobuf_EnumDescriptorProto_EnumReservedRange* google_protobuf_EnumDescriptorProto_EnumReservedRange_new(upb_Arena* arena) {
+ return (google_protobuf_EnumDescriptorProto_EnumReservedRange*)_upb_Message_New(google_protobuf_EnumDescriptorProto_EnumReservedRange_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_EnumDescriptorProto_EnumReservedRange* google_protobuf_EnumDescriptorProto_EnumReservedRange_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_EnumDescriptorProto_EnumReservedRange* ret = google_protobuf_EnumDescriptorProto_EnumReservedRange_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_EnumDescriptorProto_EnumReservedRange_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_EnumDescriptorProto_EnumReservedRange* google_protobuf_EnumDescriptorProto_EnumReservedRange_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_EnumDescriptorProto_EnumReservedRange* ret = google_protobuf_EnumDescriptorProto_EnumReservedRange_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_EnumDescriptorProto_EnumReservedRange_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_EnumDescriptorProto_EnumReservedRange_serialize(const google_protobuf_EnumDescriptorProto_EnumReservedRange* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_EnumDescriptorProto_EnumReservedRange_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_EnumDescriptorProto_EnumReservedRange_serialize_ex(const google_protobuf_EnumDescriptorProto_EnumReservedRange* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_EnumDescriptorProto_EnumReservedRange_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_EnumDescriptorProto_EnumReservedRange_clear_start(google_protobuf_EnumDescriptorProto_EnumReservedRange* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_EnumReservedRange_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_EnumDescriptorProto_EnumReservedRange_start(const google_protobuf_EnumDescriptorProto_EnumReservedRange* msg) {
+ int32_t default_val = (int32_t)0;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_EnumReservedRange_msg_init(), 1);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_EnumDescriptorProto_EnumReservedRange_has_start(const google_protobuf_EnumDescriptorProto_EnumReservedRange* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_EnumReservedRange_msg_init(), 1);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_EnumDescriptorProto_EnumReservedRange_clear_end(google_protobuf_EnumDescriptorProto_EnumReservedRange* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_EnumReservedRange_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_EnumDescriptorProto_EnumReservedRange_end(const google_protobuf_EnumDescriptorProto_EnumReservedRange* msg) {
+ int32_t default_val = (int32_t)0;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_EnumReservedRange_msg_init(), 2);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_EnumDescriptorProto_EnumReservedRange_has_end(const google_protobuf_EnumDescriptorProto_EnumReservedRange* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_EnumReservedRange_msg_init(), 2);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+
+UPB_INLINE void google_protobuf_EnumDescriptorProto_EnumReservedRange_set_start(google_protobuf_EnumDescriptorProto_EnumReservedRange *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_EnumReservedRange_msg_init(), 1);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_EnumDescriptorProto_EnumReservedRange_set_end(google_protobuf_EnumDescriptorProto_EnumReservedRange *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumDescriptorProto_EnumReservedRange_msg_init(), 2);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+
+/* google.protobuf.EnumValueDescriptorProto */
+
+UPB_INLINE google_protobuf_EnumValueDescriptorProto* google_protobuf_EnumValueDescriptorProto_new(upb_Arena* arena) {
+ return (google_protobuf_EnumValueDescriptorProto*)_upb_Message_New(google_protobuf_EnumValueDescriptorProto_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_EnumValueDescriptorProto* google_protobuf_EnumValueDescriptorProto_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_EnumValueDescriptorProto* ret = google_protobuf_EnumValueDescriptorProto_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_EnumValueDescriptorProto_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_EnumValueDescriptorProto* google_protobuf_EnumValueDescriptorProto_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_EnumValueDescriptorProto* ret = google_protobuf_EnumValueDescriptorProto_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_EnumValueDescriptorProto_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_EnumValueDescriptorProto_serialize(const google_protobuf_EnumValueDescriptorProto* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_EnumValueDescriptorProto_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_EnumValueDescriptorProto_serialize_ex(const google_protobuf_EnumValueDescriptorProto* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_EnumValueDescriptorProto_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_EnumValueDescriptorProto_clear_name(google_protobuf_EnumValueDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueDescriptorProto_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_EnumValueDescriptorProto_name(const google_protobuf_EnumValueDescriptorProto* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueDescriptorProto_msg_init(), 1);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_EnumValueDescriptorProto_has_name(const google_protobuf_EnumValueDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueDescriptorProto_msg_init(), 1);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_EnumValueDescriptorProto_clear_number(google_protobuf_EnumValueDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueDescriptorProto_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_EnumValueDescriptorProto_number(const google_protobuf_EnumValueDescriptorProto* msg) {
+ int32_t default_val = (int32_t)0;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueDescriptorProto_msg_init(), 2);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_EnumValueDescriptorProto_has_number(const google_protobuf_EnumValueDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueDescriptorProto_msg_init(), 2);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_EnumValueDescriptorProto_clear_options(google_protobuf_EnumValueDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueDescriptorProto_msg_init(), 3);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_EnumValueOptions* google_protobuf_EnumValueDescriptorProto_options(const google_protobuf_EnumValueDescriptorProto* msg) {
+ const google_protobuf_EnumValueOptions* default_val = NULL;
+ const google_protobuf_EnumValueOptions* ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueDescriptorProto_msg_init(), 3);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_EnumValueDescriptorProto_has_options(const google_protobuf_EnumValueDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueDescriptorProto_msg_init(), 3);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+
+UPB_INLINE void google_protobuf_EnumValueDescriptorProto_set_name(google_protobuf_EnumValueDescriptorProto *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueDescriptorProto_msg_init(), 1);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_EnumValueDescriptorProto_set_number(google_protobuf_EnumValueDescriptorProto *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueDescriptorProto_msg_init(), 2);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_EnumValueDescriptorProto_set_options(google_protobuf_EnumValueDescriptorProto *msg, google_protobuf_EnumValueOptions* value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueDescriptorProto_msg_init(), 3);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE struct google_protobuf_EnumValueOptions* google_protobuf_EnumValueDescriptorProto_mutable_options(google_protobuf_EnumValueDescriptorProto* msg, upb_Arena* arena) {
+ struct google_protobuf_EnumValueOptions* sub = (struct google_protobuf_EnumValueOptions*)google_protobuf_EnumValueDescriptorProto_options(msg);
+ if (sub == NULL) {
+ sub = (struct google_protobuf_EnumValueOptions*)_upb_Message_New(google_protobuf_EnumValueOptions_msg_init(), arena);
+ if (sub) google_protobuf_EnumValueDescriptorProto_set_options(msg, sub);
+ }
+ return sub;
+}
+
+/* google.protobuf.ServiceDescriptorProto */
+
+UPB_INLINE google_protobuf_ServiceDescriptorProto* google_protobuf_ServiceDescriptorProto_new(upb_Arena* arena) {
+ return (google_protobuf_ServiceDescriptorProto*)_upb_Message_New(google_protobuf_ServiceDescriptorProto_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_ServiceDescriptorProto* google_protobuf_ServiceDescriptorProto_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_ServiceDescriptorProto* ret = google_protobuf_ServiceDescriptorProto_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_ServiceDescriptorProto_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_ServiceDescriptorProto* google_protobuf_ServiceDescriptorProto_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_ServiceDescriptorProto* ret = google_protobuf_ServiceDescriptorProto_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_ServiceDescriptorProto_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_ServiceDescriptorProto_serialize(const google_protobuf_ServiceDescriptorProto* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_ServiceDescriptorProto_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_ServiceDescriptorProto_serialize_ex(const google_protobuf_ServiceDescriptorProto* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_ServiceDescriptorProto_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_ServiceDescriptorProto_clear_name(google_protobuf_ServiceDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceDescriptorProto_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_ServiceDescriptorProto_name(const google_protobuf_ServiceDescriptorProto* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceDescriptorProto_msg_init(), 1);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_ServiceDescriptorProto_has_name(const google_protobuf_ServiceDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceDescriptorProto_msg_init(), 1);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_ServiceDescriptorProto_clear_method(google_protobuf_ServiceDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceDescriptorProto_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_MethodDescriptorProto* const* google_protobuf_ServiceDescriptorProto_method(const google_protobuf_ServiceDescriptorProto* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceDescriptorProto_msg_init(), 2);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_MethodDescriptorProto* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_ServiceDescriptorProto_has_method(const google_protobuf_ServiceDescriptorProto* msg) {
+ size_t size;
+ google_protobuf_ServiceDescriptorProto_method(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_ServiceDescriptorProto_clear_options(google_protobuf_ServiceDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceDescriptorProto_msg_init(), 3);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_ServiceOptions* google_protobuf_ServiceDescriptorProto_options(const google_protobuf_ServiceDescriptorProto* msg) {
+ const google_protobuf_ServiceOptions* default_val = NULL;
+ const google_protobuf_ServiceOptions* ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceDescriptorProto_msg_init(), 3);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_ServiceDescriptorProto_has_options(const google_protobuf_ServiceDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceDescriptorProto_msg_init(), 3);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+
+UPB_INLINE void google_protobuf_ServiceDescriptorProto_set_name(google_protobuf_ServiceDescriptorProto *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceDescriptorProto_msg_init(), 1);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE google_protobuf_MethodDescriptorProto** google_protobuf_ServiceDescriptorProto_mutable_method(google_protobuf_ServiceDescriptorProto* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceDescriptorProto_msg_init(), 2);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_MethodDescriptorProto**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_MethodDescriptorProto** google_protobuf_ServiceDescriptorProto_resize_method(google_protobuf_ServiceDescriptorProto* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceDescriptorProto_msg_init(), 2);
+ return (google_protobuf_MethodDescriptorProto**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_MethodDescriptorProto* google_protobuf_ServiceDescriptorProto_add_method(google_protobuf_ServiceDescriptorProto* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceDescriptorProto_msg_init(), 2);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_MethodDescriptorProto* sub = (struct google_protobuf_MethodDescriptorProto*)_upb_Message_New(google_protobuf_MethodDescriptorProto_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+UPB_INLINE void google_protobuf_ServiceDescriptorProto_set_options(google_protobuf_ServiceDescriptorProto *msg, google_protobuf_ServiceOptions* value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceDescriptorProto_msg_init(), 3);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE struct google_protobuf_ServiceOptions* google_protobuf_ServiceDescriptorProto_mutable_options(google_protobuf_ServiceDescriptorProto* msg, upb_Arena* arena) {
+ struct google_protobuf_ServiceOptions* sub = (struct google_protobuf_ServiceOptions*)google_protobuf_ServiceDescriptorProto_options(msg);
+ if (sub == NULL) {
+ sub = (struct google_protobuf_ServiceOptions*)_upb_Message_New(google_protobuf_ServiceOptions_msg_init(), arena);
+ if (sub) google_protobuf_ServiceDescriptorProto_set_options(msg, sub);
+ }
+ return sub;
+}
+
+/* google.protobuf.MethodDescriptorProto */
+
+UPB_INLINE google_protobuf_MethodDescriptorProto* google_protobuf_MethodDescriptorProto_new(upb_Arena* arena) {
+ return (google_protobuf_MethodDescriptorProto*)_upb_Message_New(google_protobuf_MethodDescriptorProto_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_MethodDescriptorProto* google_protobuf_MethodDescriptorProto_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_MethodDescriptorProto* ret = google_protobuf_MethodDescriptorProto_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_MethodDescriptorProto_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_MethodDescriptorProto* google_protobuf_MethodDescriptorProto_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_MethodDescriptorProto* ret = google_protobuf_MethodDescriptorProto_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_MethodDescriptorProto_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_MethodDescriptorProto_serialize(const google_protobuf_MethodDescriptorProto* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_MethodDescriptorProto_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_MethodDescriptorProto_serialize_ex(const google_protobuf_MethodDescriptorProto* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_MethodDescriptorProto_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_MethodDescriptorProto_clear_name(google_protobuf_MethodDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_MethodDescriptorProto_name(const google_protobuf_MethodDescriptorProto* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 1);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_MethodDescriptorProto_has_name(const google_protobuf_MethodDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 1);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_MethodDescriptorProto_clear_input_type(google_protobuf_MethodDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_MethodDescriptorProto_input_type(const google_protobuf_MethodDescriptorProto* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 2);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_MethodDescriptorProto_has_input_type(const google_protobuf_MethodDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 2);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_MethodDescriptorProto_clear_output_type(google_protobuf_MethodDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 3);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_MethodDescriptorProto_output_type(const google_protobuf_MethodDescriptorProto* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 3);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_MethodDescriptorProto_has_output_type(const google_protobuf_MethodDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 3);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_MethodDescriptorProto_clear_options(google_protobuf_MethodDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 4);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_MethodOptions* google_protobuf_MethodDescriptorProto_options(const google_protobuf_MethodDescriptorProto* msg) {
+ const google_protobuf_MethodOptions* default_val = NULL;
+ const google_protobuf_MethodOptions* ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 4);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_MethodDescriptorProto_has_options(const google_protobuf_MethodDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 4);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_MethodDescriptorProto_clear_client_streaming(google_protobuf_MethodDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 5);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_MethodDescriptorProto_client_streaming(const google_protobuf_MethodDescriptorProto* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 5);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_MethodDescriptorProto_has_client_streaming(const google_protobuf_MethodDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 5);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_MethodDescriptorProto_clear_server_streaming(google_protobuf_MethodDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 6);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_MethodDescriptorProto_server_streaming(const google_protobuf_MethodDescriptorProto* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 6);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_MethodDescriptorProto_has_server_streaming(const google_protobuf_MethodDescriptorProto* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 6);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+
+UPB_INLINE void google_protobuf_MethodDescriptorProto_set_name(google_protobuf_MethodDescriptorProto *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 1);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_MethodDescriptorProto_set_input_type(google_protobuf_MethodDescriptorProto *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 2);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_MethodDescriptorProto_set_output_type(google_protobuf_MethodDescriptorProto *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 3);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_MethodDescriptorProto_set_options(google_protobuf_MethodDescriptorProto *msg, google_protobuf_MethodOptions* value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 4);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE struct google_protobuf_MethodOptions* google_protobuf_MethodDescriptorProto_mutable_options(google_protobuf_MethodDescriptorProto* msg, upb_Arena* arena) {
+ struct google_protobuf_MethodOptions* sub = (struct google_protobuf_MethodOptions*)google_protobuf_MethodDescriptorProto_options(msg);
+ if (sub == NULL) {
+ sub = (struct google_protobuf_MethodOptions*)_upb_Message_New(google_protobuf_MethodOptions_msg_init(), arena);
+ if (sub) google_protobuf_MethodDescriptorProto_set_options(msg, sub);
+ }
+ return sub;
+}
+UPB_INLINE void google_protobuf_MethodDescriptorProto_set_client_streaming(google_protobuf_MethodDescriptorProto *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 5);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_MethodDescriptorProto_set_server_streaming(google_protobuf_MethodDescriptorProto *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodDescriptorProto_msg_init(), 6);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+
+/* google.protobuf.FileOptions */
+
+UPB_INLINE google_protobuf_FileOptions* google_protobuf_FileOptions_new(upb_Arena* arena) {
+ return (google_protobuf_FileOptions*)_upb_Message_New(google_protobuf_FileOptions_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_FileOptions* google_protobuf_FileOptions_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_FileOptions* ret = google_protobuf_FileOptions_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_FileOptions_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_FileOptions* google_protobuf_FileOptions_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_FileOptions* ret = google_protobuf_FileOptions_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_FileOptions_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_FileOptions_serialize(const google_protobuf_FileOptions* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_FileOptions_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_FileOptions_serialize_ex(const google_protobuf_FileOptions* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_FileOptions_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_java_package(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_FileOptions_java_package(const google_protobuf_FileOptions* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 1);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_java_package(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 1);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_java_outer_classname(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 8);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_FileOptions_java_outer_classname(const google_protobuf_FileOptions* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 8);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_java_outer_classname(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 8);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_optimize_for(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 9);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_FileOptions_optimize_for(const google_protobuf_FileOptions* msg) {
+ int32_t default_val = 1;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 9);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_optimize_for(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 9);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_java_multiple_files(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 10);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_FileOptions_java_multiple_files(const google_protobuf_FileOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 10);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_java_multiple_files(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 10);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_go_package(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 11);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_FileOptions_go_package(const google_protobuf_FileOptions* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 11);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_go_package(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 11);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_cc_generic_services(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 16);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_FileOptions_cc_generic_services(const google_protobuf_FileOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 16);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_cc_generic_services(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 16);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_java_generic_services(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 17);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_FileOptions_java_generic_services(const google_protobuf_FileOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 17);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_java_generic_services(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 17);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_py_generic_services(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 18);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_FileOptions_py_generic_services(const google_protobuf_FileOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 18);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_py_generic_services(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 18);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_java_generate_equals_and_hash(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 20);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_FileOptions_java_generate_equals_and_hash(const google_protobuf_FileOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 20);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_java_generate_equals_and_hash(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 20);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_deprecated(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 23);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_FileOptions_deprecated(const google_protobuf_FileOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 23);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_deprecated(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 23);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_java_string_check_utf8(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 27);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_FileOptions_java_string_check_utf8(const google_protobuf_FileOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 27);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_java_string_check_utf8(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 27);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_cc_enable_arenas(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 31);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_FileOptions_cc_enable_arenas(const google_protobuf_FileOptions* msg) {
+ bool default_val = true;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 31);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_cc_enable_arenas(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 31);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_objc_class_prefix(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 36);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_FileOptions_objc_class_prefix(const google_protobuf_FileOptions* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 36);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_objc_class_prefix(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 36);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_csharp_namespace(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 37);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_FileOptions_csharp_namespace(const google_protobuf_FileOptions* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 37);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_csharp_namespace(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 37);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_swift_prefix(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 39);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_FileOptions_swift_prefix(const google_protobuf_FileOptions* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 39);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_swift_prefix(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 39);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_php_class_prefix(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 40);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_FileOptions_php_class_prefix(const google_protobuf_FileOptions* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 40);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_php_class_prefix(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 40);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_php_namespace(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 41);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_FileOptions_php_namespace(const google_protobuf_FileOptions* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 41);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_php_namespace(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 41);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_php_generic_services(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 42);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_FileOptions_php_generic_services(const google_protobuf_FileOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 42);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_php_generic_services(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 42);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_php_metadata_namespace(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 44);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_FileOptions_php_metadata_namespace(const google_protobuf_FileOptions* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 44);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_php_metadata_namespace(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 44);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_ruby_package(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 45);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_FileOptions_ruby_package(const google_protobuf_FileOptions* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 45);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_ruby_package(const google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 45);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FileOptions_clear_uninterpreted_option(google_protobuf_FileOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 999);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_UninterpretedOption* const* google_protobuf_FileOptions_uninterpreted_option(const google_protobuf_FileOptions* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 999);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_UninterpretedOption* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_FileOptions_has_uninterpreted_option(const google_protobuf_FileOptions* msg) {
+ size_t size;
+ google_protobuf_FileOptions_uninterpreted_option(msg, &size);
+ return size != 0;
+}
+
+UPB_INLINE void google_protobuf_FileOptions_set_java_package(google_protobuf_FileOptions *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 1);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileOptions_set_java_outer_classname(google_protobuf_FileOptions *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 8);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileOptions_set_optimize_for(google_protobuf_FileOptions *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 9);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileOptions_set_java_multiple_files(google_protobuf_FileOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 10);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileOptions_set_go_package(google_protobuf_FileOptions *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 11);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileOptions_set_cc_generic_services(google_protobuf_FileOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 16);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileOptions_set_java_generic_services(google_protobuf_FileOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 17);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileOptions_set_py_generic_services(google_protobuf_FileOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 18);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileOptions_set_java_generate_equals_and_hash(google_protobuf_FileOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 20);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileOptions_set_deprecated(google_protobuf_FileOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 23);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileOptions_set_java_string_check_utf8(google_protobuf_FileOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 27);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileOptions_set_cc_enable_arenas(google_protobuf_FileOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 31);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileOptions_set_objc_class_prefix(google_protobuf_FileOptions *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 36);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileOptions_set_csharp_namespace(google_protobuf_FileOptions *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 37);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileOptions_set_swift_prefix(google_protobuf_FileOptions *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 39);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileOptions_set_php_class_prefix(google_protobuf_FileOptions *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 40);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileOptions_set_php_namespace(google_protobuf_FileOptions *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 41);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileOptions_set_php_generic_services(google_protobuf_FileOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 42);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileOptions_set_php_metadata_namespace(google_protobuf_FileOptions *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 44);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FileOptions_set_ruby_package(google_protobuf_FileOptions *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 45);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_FileOptions_mutable_uninterpreted_option(google_protobuf_FileOptions* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 999);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_UninterpretedOption**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_FileOptions_resize_uninterpreted_option(google_protobuf_FileOptions* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 999);
+ return (google_protobuf_UninterpretedOption**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_UninterpretedOption* google_protobuf_FileOptions_add_uninterpreted_option(google_protobuf_FileOptions* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FileOptions_msg_init(), 999);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_UninterpretedOption* sub = (struct google_protobuf_UninterpretedOption*)_upb_Message_New(google_protobuf_UninterpretedOption_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+
+/* google.protobuf.MessageOptions */
+
+UPB_INLINE google_protobuf_MessageOptions* google_protobuf_MessageOptions_new(upb_Arena* arena) {
+ return (google_protobuf_MessageOptions*)_upb_Message_New(google_protobuf_MessageOptions_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_MessageOptions* google_protobuf_MessageOptions_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_MessageOptions* ret = google_protobuf_MessageOptions_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_MessageOptions_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_MessageOptions* google_protobuf_MessageOptions_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_MessageOptions* ret = google_protobuf_MessageOptions_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_MessageOptions_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_MessageOptions_serialize(const google_protobuf_MessageOptions* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_MessageOptions_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_MessageOptions_serialize_ex(const google_protobuf_MessageOptions* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_MessageOptions_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_MessageOptions_clear_message_set_wire_format(google_protobuf_MessageOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_MessageOptions_message_set_wire_format(const google_protobuf_MessageOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 1);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_MessageOptions_has_message_set_wire_format(const google_protobuf_MessageOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 1);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_MessageOptions_clear_no_standard_descriptor_accessor(google_protobuf_MessageOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_MessageOptions_no_standard_descriptor_accessor(const google_protobuf_MessageOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 2);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_MessageOptions_has_no_standard_descriptor_accessor(const google_protobuf_MessageOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 2);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_MessageOptions_clear_deprecated(google_protobuf_MessageOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 3);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_MessageOptions_deprecated(const google_protobuf_MessageOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 3);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_MessageOptions_has_deprecated(const google_protobuf_MessageOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 3);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_MessageOptions_clear_map_entry(google_protobuf_MessageOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 7);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_MessageOptions_map_entry(const google_protobuf_MessageOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 7);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_MessageOptions_has_map_entry(const google_protobuf_MessageOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 7);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_MessageOptions_clear_deprecated_legacy_json_field_conflicts(google_protobuf_MessageOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 11);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_MessageOptions_deprecated_legacy_json_field_conflicts(const google_protobuf_MessageOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 11);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_MessageOptions_has_deprecated_legacy_json_field_conflicts(const google_protobuf_MessageOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 11);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_MessageOptions_clear_uninterpreted_option(google_protobuf_MessageOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 999);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_UninterpretedOption* const* google_protobuf_MessageOptions_uninterpreted_option(const google_protobuf_MessageOptions* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 999);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_UninterpretedOption* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_MessageOptions_has_uninterpreted_option(const google_protobuf_MessageOptions* msg) {
+ size_t size;
+ google_protobuf_MessageOptions_uninterpreted_option(msg, &size);
+ return size != 0;
+}
+
+UPB_INLINE void google_protobuf_MessageOptions_set_message_set_wire_format(google_protobuf_MessageOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 1);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_MessageOptions_set_no_standard_descriptor_accessor(google_protobuf_MessageOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 2);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_MessageOptions_set_deprecated(google_protobuf_MessageOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 3);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_MessageOptions_set_map_entry(google_protobuf_MessageOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 7);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_MessageOptions_set_deprecated_legacy_json_field_conflicts(google_protobuf_MessageOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 11);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_MessageOptions_mutable_uninterpreted_option(google_protobuf_MessageOptions* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 999);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_UninterpretedOption**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_MessageOptions_resize_uninterpreted_option(google_protobuf_MessageOptions* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 999);
+ return (google_protobuf_UninterpretedOption**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_UninterpretedOption* google_protobuf_MessageOptions_add_uninterpreted_option(google_protobuf_MessageOptions* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MessageOptions_msg_init(), 999);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_UninterpretedOption* sub = (struct google_protobuf_UninterpretedOption*)_upb_Message_New(google_protobuf_UninterpretedOption_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+
+/* google.protobuf.FieldOptions */
+
+UPB_INLINE google_protobuf_FieldOptions* google_protobuf_FieldOptions_new(upb_Arena* arena) {
+ return (google_protobuf_FieldOptions*)_upb_Message_New(google_protobuf_FieldOptions_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_FieldOptions* google_protobuf_FieldOptions_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_FieldOptions* ret = google_protobuf_FieldOptions_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_FieldOptions_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_FieldOptions* google_protobuf_FieldOptions_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_FieldOptions* ret = google_protobuf_FieldOptions_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_FieldOptions_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_FieldOptions_serialize(const google_protobuf_FieldOptions* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_FieldOptions_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_FieldOptions_serialize_ex(const google_protobuf_FieldOptions* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_FieldOptions_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_FieldOptions_clear_ctype(google_protobuf_FieldOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_FieldOptions_ctype(const google_protobuf_FieldOptions* msg) {
+ int32_t default_val = 0;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 1);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FieldOptions_has_ctype(const google_protobuf_FieldOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 1);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FieldOptions_clear_packed(google_protobuf_FieldOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_FieldOptions_packed(const google_protobuf_FieldOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 2);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FieldOptions_has_packed(const google_protobuf_FieldOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 2);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FieldOptions_clear_deprecated(google_protobuf_FieldOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 3);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_FieldOptions_deprecated(const google_protobuf_FieldOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 3);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FieldOptions_has_deprecated(const google_protobuf_FieldOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 3);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FieldOptions_clear_lazy(google_protobuf_FieldOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 5);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_FieldOptions_lazy(const google_protobuf_FieldOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 5);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FieldOptions_has_lazy(const google_protobuf_FieldOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 5);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FieldOptions_clear_jstype(google_protobuf_FieldOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 6);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_FieldOptions_jstype(const google_protobuf_FieldOptions* msg) {
+ int32_t default_val = 0;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 6);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FieldOptions_has_jstype(const google_protobuf_FieldOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 6);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FieldOptions_clear_weak(google_protobuf_FieldOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 10);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_FieldOptions_weak(const google_protobuf_FieldOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 10);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FieldOptions_has_weak(const google_protobuf_FieldOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 10);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FieldOptions_clear_unverified_lazy(google_protobuf_FieldOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 15);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_FieldOptions_unverified_lazy(const google_protobuf_FieldOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 15);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FieldOptions_has_unverified_lazy(const google_protobuf_FieldOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 15);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FieldOptions_clear_debug_redact(google_protobuf_FieldOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 16);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_FieldOptions_debug_redact(const google_protobuf_FieldOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 16);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_FieldOptions_has_debug_redact(const google_protobuf_FieldOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 16);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_FieldOptions_clear_uninterpreted_option(google_protobuf_FieldOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 999);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_UninterpretedOption* const* google_protobuf_FieldOptions_uninterpreted_option(const google_protobuf_FieldOptions* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 999);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_UninterpretedOption* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_FieldOptions_has_uninterpreted_option(const google_protobuf_FieldOptions* msg) {
+ size_t size;
+ google_protobuf_FieldOptions_uninterpreted_option(msg, &size);
+ return size != 0;
+}
+
+UPB_INLINE void google_protobuf_FieldOptions_set_ctype(google_protobuf_FieldOptions *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 1);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FieldOptions_set_packed(google_protobuf_FieldOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 2);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FieldOptions_set_deprecated(google_protobuf_FieldOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 3);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FieldOptions_set_lazy(google_protobuf_FieldOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 5);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FieldOptions_set_jstype(google_protobuf_FieldOptions *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 6);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FieldOptions_set_weak(google_protobuf_FieldOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 10);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FieldOptions_set_unverified_lazy(google_protobuf_FieldOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 15);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_FieldOptions_set_debug_redact(google_protobuf_FieldOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 16);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_FieldOptions_mutable_uninterpreted_option(google_protobuf_FieldOptions* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 999);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_UninterpretedOption**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_FieldOptions_resize_uninterpreted_option(google_protobuf_FieldOptions* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 999);
+ return (google_protobuf_UninterpretedOption**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_UninterpretedOption* google_protobuf_FieldOptions_add_uninterpreted_option(google_protobuf_FieldOptions* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_FieldOptions_msg_init(), 999);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_UninterpretedOption* sub = (struct google_protobuf_UninterpretedOption*)_upb_Message_New(google_protobuf_UninterpretedOption_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+
+/* google.protobuf.OneofOptions */
+
+UPB_INLINE google_protobuf_OneofOptions* google_protobuf_OneofOptions_new(upb_Arena* arena) {
+ return (google_protobuf_OneofOptions*)_upb_Message_New(google_protobuf_OneofOptions_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_OneofOptions* google_protobuf_OneofOptions_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_OneofOptions* ret = google_protobuf_OneofOptions_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_OneofOptions_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_OneofOptions* google_protobuf_OneofOptions_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_OneofOptions* ret = google_protobuf_OneofOptions_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_OneofOptions_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_OneofOptions_serialize(const google_protobuf_OneofOptions* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_OneofOptions_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_OneofOptions_serialize_ex(const google_protobuf_OneofOptions* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_OneofOptions_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_OneofOptions_clear_uninterpreted_option(google_protobuf_OneofOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_OneofOptions_msg_init(), 999);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_UninterpretedOption* const* google_protobuf_OneofOptions_uninterpreted_option(const google_protobuf_OneofOptions* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_OneofOptions_msg_init(), 999);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_UninterpretedOption* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_OneofOptions_has_uninterpreted_option(const google_protobuf_OneofOptions* msg) {
+ size_t size;
+ google_protobuf_OneofOptions_uninterpreted_option(msg, &size);
+ return size != 0;
+}
+
+UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_OneofOptions_mutable_uninterpreted_option(google_protobuf_OneofOptions* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_OneofOptions_msg_init(), 999);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_UninterpretedOption**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_OneofOptions_resize_uninterpreted_option(google_protobuf_OneofOptions* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_OneofOptions_msg_init(), 999);
+ return (google_protobuf_UninterpretedOption**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_UninterpretedOption* google_protobuf_OneofOptions_add_uninterpreted_option(google_protobuf_OneofOptions* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_OneofOptions_msg_init(), 999);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_UninterpretedOption* sub = (struct google_protobuf_UninterpretedOption*)_upb_Message_New(google_protobuf_UninterpretedOption_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+
+/* google.protobuf.EnumOptions */
+
+UPB_INLINE google_protobuf_EnumOptions* google_protobuf_EnumOptions_new(upb_Arena* arena) {
+ return (google_protobuf_EnumOptions*)_upb_Message_New(google_protobuf_EnumOptions_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_EnumOptions* google_protobuf_EnumOptions_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_EnumOptions* ret = google_protobuf_EnumOptions_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_EnumOptions_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_EnumOptions* google_protobuf_EnumOptions_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_EnumOptions* ret = google_protobuf_EnumOptions_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_EnumOptions_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_EnumOptions_serialize(const google_protobuf_EnumOptions* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_EnumOptions_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_EnumOptions_serialize_ex(const google_protobuf_EnumOptions* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_EnumOptions_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_EnumOptions_clear_allow_alias(google_protobuf_EnumOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumOptions_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_EnumOptions_allow_alias(const google_protobuf_EnumOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumOptions_msg_init(), 2);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_EnumOptions_has_allow_alias(const google_protobuf_EnumOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumOptions_msg_init(), 2);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_EnumOptions_clear_deprecated(google_protobuf_EnumOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumOptions_msg_init(), 3);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_EnumOptions_deprecated(const google_protobuf_EnumOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumOptions_msg_init(), 3);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_EnumOptions_has_deprecated(const google_protobuf_EnumOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumOptions_msg_init(), 3);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_EnumOptions_clear_deprecated_legacy_json_field_conflicts(google_protobuf_EnumOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumOptions_msg_init(), 6);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_EnumOptions_deprecated_legacy_json_field_conflicts(const google_protobuf_EnumOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumOptions_msg_init(), 6);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_EnumOptions_has_deprecated_legacy_json_field_conflicts(const google_protobuf_EnumOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumOptions_msg_init(), 6);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_EnumOptions_clear_uninterpreted_option(google_protobuf_EnumOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumOptions_msg_init(), 999);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_UninterpretedOption* const* google_protobuf_EnumOptions_uninterpreted_option(const google_protobuf_EnumOptions* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumOptions_msg_init(), 999);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_UninterpretedOption* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_EnumOptions_has_uninterpreted_option(const google_protobuf_EnumOptions* msg) {
+ size_t size;
+ google_protobuf_EnumOptions_uninterpreted_option(msg, &size);
+ return size != 0;
+}
+
+UPB_INLINE void google_protobuf_EnumOptions_set_allow_alias(google_protobuf_EnumOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumOptions_msg_init(), 2);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_EnumOptions_set_deprecated(google_protobuf_EnumOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumOptions_msg_init(), 3);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_EnumOptions_set_deprecated_legacy_json_field_conflicts(google_protobuf_EnumOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumOptions_msg_init(), 6);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_EnumOptions_mutable_uninterpreted_option(google_protobuf_EnumOptions* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumOptions_msg_init(), 999);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_UninterpretedOption**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_EnumOptions_resize_uninterpreted_option(google_protobuf_EnumOptions* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumOptions_msg_init(), 999);
+ return (google_protobuf_UninterpretedOption**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_UninterpretedOption* google_protobuf_EnumOptions_add_uninterpreted_option(google_protobuf_EnumOptions* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumOptions_msg_init(), 999);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_UninterpretedOption* sub = (struct google_protobuf_UninterpretedOption*)_upb_Message_New(google_protobuf_UninterpretedOption_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+
+/* google.protobuf.EnumValueOptions */
+
+UPB_INLINE google_protobuf_EnumValueOptions* google_protobuf_EnumValueOptions_new(upb_Arena* arena) {
+ return (google_protobuf_EnumValueOptions*)_upb_Message_New(google_protobuf_EnumValueOptions_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_EnumValueOptions* google_protobuf_EnumValueOptions_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_EnumValueOptions* ret = google_protobuf_EnumValueOptions_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_EnumValueOptions_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_EnumValueOptions* google_protobuf_EnumValueOptions_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_EnumValueOptions* ret = google_protobuf_EnumValueOptions_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_EnumValueOptions_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_EnumValueOptions_serialize(const google_protobuf_EnumValueOptions* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_EnumValueOptions_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_EnumValueOptions_serialize_ex(const google_protobuf_EnumValueOptions* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_EnumValueOptions_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_EnumValueOptions_clear_deprecated(google_protobuf_EnumValueOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueOptions_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_EnumValueOptions_deprecated(const google_protobuf_EnumValueOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueOptions_msg_init(), 1);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_EnumValueOptions_has_deprecated(const google_protobuf_EnumValueOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueOptions_msg_init(), 1);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_EnumValueOptions_clear_uninterpreted_option(google_protobuf_EnumValueOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueOptions_msg_init(), 999);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_UninterpretedOption* const* google_protobuf_EnumValueOptions_uninterpreted_option(const google_protobuf_EnumValueOptions* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueOptions_msg_init(), 999);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_UninterpretedOption* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_EnumValueOptions_has_uninterpreted_option(const google_protobuf_EnumValueOptions* msg) {
+ size_t size;
+ google_protobuf_EnumValueOptions_uninterpreted_option(msg, &size);
+ return size != 0;
+}
+
+UPB_INLINE void google_protobuf_EnumValueOptions_set_deprecated(google_protobuf_EnumValueOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueOptions_msg_init(), 1);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_EnumValueOptions_mutable_uninterpreted_option(google_protobuf_EnumValueOptions* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueOptions_msg_init(), 999);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_UninterpretedOption**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_EnumValueOptions_resize_uninterpreted_option(google_protobuf_EnumValueOptions* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueOptions_msg_init(), 999);
+ return (google_protobuf_UninterpretedOption**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_UninterpretedOption* google_protobuf_EnumValueOptions_add_uninterpreted_option(google_protobuf_EnumValueOptions* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_EnumValueOptions_msg_init(), 999);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_UninterpretedOption* sub = (struct google_protobuf_UninterpretedOption*)_upb_Message_New(google_protobuf_UninterpretedOption_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+
+/* google.protobuf.ServiceOptions */
+
+UPB_INLINE google_protobuf_ServiceOptions* google_protobuf_ServiceOptions_new(upb_Arena* arena) {
+ return (google_protobuf_ServiceOptions*)_upb_Message_New(google_protobuf_ServiceOptions_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_ServiceOptions* google_protobuf_ServiceOptions_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_ServiceOptions* ret = google_protobuf_ServiceOptions_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_ServiceOptions_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_ServiceOptions* google_protobuf_ServiceOptions_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_ServiceOptions* ret = google_protobuf_ServiceOptions_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_ServiceOptions_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_ServiceOptions_serialize(const google_protobuf_ServiceOptions* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_ServiceOptions_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_ServiceOptions_serialize_ex(const google_protobuf_ServiceOptions* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_ServiceOptions_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_ServiceOptions_clear_deprecated(google_protobuf_ServiceOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceOptions_msg_init(), 33);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_ServiceOptions_deprecated(const google_protobuf_ServiceOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceOptions_msg_init(), 33);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_ServiceOptions_has_deprecated(const google_protobuf_ServiceOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceOptions_msg_init(), 33);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_ServiceOptions_clear_uninterpreted_option(google_protobuf_ServiceOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceOptions_msg_init(), 999);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_UninterpretedOption* const* google_protobuf_ServiceOptions_uninterpreted_option(const google_protobuf_ServiceOptions* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceOptions_msg_init(), 999);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_UninterpretedOption* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_ServiceOptions_has_uninterpreted_option(const google_protobuf_ServiceOptions* msg) {
+ size_t size;
+ google_protobuf_ServiceOptions_uninterpreted_option(msg, &size);
+ return size != 0;
+}
+
+UPB_INLINE void google_protobuf_ServiceOptions_set_deprecated(google_protobuf_ServiceOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceOptions_msg_init(), 33);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_ServiceOptions_mutable_uninterpreted_option(google_protobuf_ServiceOptions* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceOptions_msg_init(), 999);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_UninterpretedOption**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_ServiceOptions_resize_uninterpreted_option(google_protobuf_ServiceOptions* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceOptions_msg_init(), 999);
+ return (google_protobuf_UninterpretedOption**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_UninterpretedOption* google_protobuf_ServiceOptions_add_uninterpreted_option(google_protobuf_ServiceOptions* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_ServiceOptions_msg_init(), 999);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_UninterpretedOption* sub = (struct google_protobuf_UninterpretedOption*)_upb_Message_New(google_protobuf_UninterpretedOption_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+
+/* google.protobuf.MethodOptions */
+
+UPB_INLINE google_protobuf_MethodOptions* google_protobuf_MethodOptions_new(upb_Arena* arena) {
+ return (google_protobuf_MethodOptions*)_upb_Message_New(google_protobuf_MethodOptions_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_MethodOptions* google_protobuf_MethodOptions_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_MethodOptions* ret = google_protobuf_MethodOptions_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_MethodOptions_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_MethodOptions* google_protobuf_MethodOptions_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_MethodOptions* ret = google_protobuf_MethodOptions_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_MethodOptions_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_MethodOptions_serialize(const google_protobuf_MethodOptions* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_MethodOptions_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_MethodOptions_serialize_ex(const google_protobuf_MethodOptions* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_MethodOptions_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_MethodOptions_clear_deprecated(google_protobuf_MethodOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodOptions_msg_init(), 33);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_MethodOptions_deprecated(const google_protobuf_MethodOptions* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodOptions_msg_init(), 33);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_MethodOptions_has_deprecated(const google_protobuf_MethodOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodOptions_msg_init(), 33);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_MethodOptions_clear_idempotency_level(google_protobuf_MethodOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodOptions_msg_init(), 34);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_MethodOptions_idempotency_level(const google_protobuf_MethodOptions* msg) {
+ int32_t default_val = 0;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodOptions_msg_init(), 34);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_MethodOptions_has_idempotency_level(const google_protobuf_MethodOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodOptions_msg_init(), 34);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_MethodOptions_clear_uninterpreted_option(google_protobuf_MethodOptions* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodOptions_msg_init(), 999);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_UninterpretedOption* const* google_protobuf_MethodOptions_uninterpreted_option(const google_protobuf_MethodOptions* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodOptions_msg_init(), 999);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_UninterpretedOption* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_MethodOptions_has_uninterpreted_option(const google_protobuf_MethodOptions* msg) {
+ size_t size;
+ google_protobuf_MethodOptions_uninterpreted_option(msg, &size);
+ return size != 0;
+}
+
+UPB_INLINE void google_protobuf_MethodOptions_set_deprecated(google_protobuf_MethodOptions *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodOptions_msg_init(), 33);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_MethodOptions_set_idempotency_level(google_protobuf_MethodOptions *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodOptions_msg_init(), 34);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_MethodOptions_mutable_uninterpreted_option(google_protobuf_MethodOptions* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodOptions_msg_init(), 999);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_UninterpretedOption**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_MethodOptions_resize_uninterpreted_option(google_protobuf_MethodOptions* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodOptions_msg_init(), 999);
+ return (google_protobuf_UninterpretedOption**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_UninterpretedOption* google_protobuf_MethodOptions_add_uninterpreted_option(google_protobuf_MethodOptions* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_MethodOptions_msg_init(), 999);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_UninterpretedOption* sub = (struct google_protobuf_UninterpretedOption*)_upb_Message_New(google_protobuf_UninterpretedOption_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+
+/* google.protobuf.UninterpretedOption */
+
+UPB_INLINE google_protobuf_UninterpretedOption* google_protobuf_UninterpretedOption_new(upb_Arena* arena) {
+ return (google_protobuf_UninterpretedOption*)_upb_Message_New(google_protobuf_UninterpretedOption_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_UninterpretedOption* google_protobuf_UninterpretedOption_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_UninterpretedOption* ret = google_protobuf_UninterpretedOption_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_UninterpretedOption_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_UninterpretedOption* google_protobuf_UninterpretedOption_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_UninterpretedOption* ret = google_protobuf_UninterpretedOption_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_UninterpretedOption_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_UninterpretedOption_serialize(const google_protobuf_UninterpretedOption* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_UninterpretedOption_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_UninterpretedOption_serialize_ex(const google_protobuf_UninterpretedOption* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_UninterpretedOption_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_UninterpretedOption_clear_name(google_protobuf_UninterpretedOption* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_UninterpretedOption_NamePart* const* google_protobuf_UninterpretedOption_name(const google_protobuf_UninterpretedOption* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 2);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_UninterpretedOption_NamePart* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_UninterpretedOption_has_name(const google_protobuf_UninterpretedOption* msg) {
+ size_t size;
+ google_protobuf_UninterpretedOption_name(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_UninterpretedOption_clear_identifier_value(google_protobuf_UninterpretedOption* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 3);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_UninterpretedOption_identifier_value(const google_protobuf_UninterpretedOption* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 3);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_UninterpretedOption_has_identifier_value(const google_protobuf_UninterpretedOption* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 3);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_UninterpretedOption_clear_positive_int_value(google_protobuf_UninterpretedOption* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 4);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE uint64_t google_protobuf_UninterpretedOption_positive_int_value(const google_protobuf_UninterpretedOption* msg) {
+ uint64_t default_val = (uint64_t)0ull;
+ uint64_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 4);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_UninterpretedOption_has_positive_int_value(const google_protobuf_UninterpretedOption* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 4);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_UninterpretedOption_clear_negative_int_value(google_protobuf_UninterpretedOption* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 5);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int64_t google_protobuf_UninterpretedOption_negative_int_value(const google_protobuf_UninterpretedOption* msg) {
+ int64_t default_val = (int64_t)0ll;
+ int64_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 5);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_UninterpretedOption_has_negative_int_value(const google_protobuf_UninterpretedOption* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 5);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_UninterpretedOption_clear_double_value(google_protobuf_UninterpretedOption* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 6);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE double google_protobuf_UninterpretedOption_double_value(const google_protobuf_UninterpretedOption* msg) {
+ double default_val = 0;
+ double ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 6);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_UninterpretedOption_has_double_value(const google_protobuf_UninterpretedOption* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 6);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_UninterpretedOption_clear_string_value(google_protobuf_UninterpretedOption* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 7);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_UninterpretedOption_string_value(const google_protobuf_UninterpretedOption* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 7);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_UninterpretedOption_has_string_value(const google_protobuf_UninterpretedOption* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 7);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_UninterpretedOption_clear_aggregate_value(google_protobuf_UninterpretedOption* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 8);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_UninterpretedOption_aggregate_value(const google_protobuf_UninterpretedOption* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 8);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_UninterpretedOption_has_aggregate_value(const google_protobuf_UninterpretedOption* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 8);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+
+UPB_INLINE google_protobuf_UninterpretedOption_NamePart** google_protobuf_UninterpretedOption_mutable_name(google_protobuf_UninterpretedOption* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 2);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_UninterpretedOption_NamePart**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_UninterpretedOption_NamePart** google_protobuf_UninterpretedOption_resize_name(google_protobuf_UninterpretedOption* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 2);
+ return (google_protobuf_UninterpretedOption_NamePart**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_UninterpretedOption_NamePart* google_protobuf_UninterpretedOption_add_name(google_protobuf_UninterpretedOption* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 2);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_UninterpretedOption_NamePart* sub = (struct google_protobuf_UninterpretedOption_NamePart*)_upb_Message_New(google_protobuf_UninterpretedOption_NamePart_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+UPB_INLINE void google_protobuf_UninterpretedOption_set_identifier_value(google_protobuf_UninterpretedOption *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 3);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_UninterpretedOption_set_positive_int_value(google_protobuf_UninterpretedOption *msg, uint64_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 4);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_UninterpretedOption_set_negative_int_value(google_protobuf_UninterpretedOption *msg, int64_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 5);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_UninterpretedOption_set_double_value(google_protobuf_UninterpretedOption *msg, double value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 6);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_UninterpretedOption_set_string_value(google_protobuf_UninterpretedOption *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 7);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_UninterpretedOption_set_aggregate_value(google_protobuf_UninterpretedOption *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_msg_init(), 8);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+
+/* google.protobuf.UninterpretedOption.NamePart */
+
+UPB_INLINE google_protobuf_UninterpretedOption_NamePart* google_protobuf_UninterpretedOption_NamePart_new(upb_Arena* arena) {
+ return (google_protobuf_UninterpretedOption_NamePart*)_upb_Message_New(google_protobuf_UninterpretedOption_NamePart_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_UninterpretedOption_NamePart* google_protobuf_UninterpretedOption_NamePart_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_UninterpretedOption_NamePart* ret = google_protobuf_UninterpretedOption_NamePart_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_UninterpretedOption_NamePart_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_UninterpretedOption_NamePart* google_protobuf_UninterpretedOption_NamePart_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_UninterpretedOption_NamePart* ret = google_protobuf_UninterpretedOption_NamePart_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_UninterpretedOption_NamePart_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_UninterpretedOption_NamePart_serialize(const google_protobuf_UninterpretedOption_NamePart* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_UninterpretedOption_NamePart_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_UninterpretedOption_NamePart_serialize_ex(const google_protobuf_UninterpretedOption_NamePart* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_UninterpretedOption_NamePart_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_UninterpretedOption_NamePart_clear_name_part(google_protobuf_UninterpretedOption_NamePart* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_NamePart_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_UninterpretedOption_NamePart_name_part(const google_protobuf_UninterpretedOption_NamePart* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_NamePart_msg_init(), 1);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_UninterpretedOption_NamePart_has_name_part(const google_protobuf_UninterpretedOption_NamePart* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_NamePart_msg_init(), 1);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_UninterpretedOption_NamePart_clear_is_extension(google_protobuf_UninterpretedOption_NamePart* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_NamePart_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE bool google_protobuf_UninterpretedOption_NamePart_is_extension(const google_protobuf_UninterpretedOption_NamePart* msg) {
+ bool default_val = false;
+ bool ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_NamePart_msg_init(), 2);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_UninterpretedOption_NamePart_has_is_extension(const google_protobuf_UninterpretedOption_NamePart* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_NamePart_msg_init(), 2);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+
+UPB_INLINE void google_protobuf_UninterpretedOption_NamePart_set_name_part(google_protobuf_UninterpretedOption_NamePart *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_NamePart_msg_init(), 1);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_UninterpretedOption_NamePart_set_is_extension(google_protobuf_UninterpretedOption_NamePart *msg, bool value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_UninterpretedOption_NamePart_msg_init(), 2);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+
+/* google.protobuf.SourceCodeInfo */
+
+UPB_INLINE google_protobuf_SourceCodeInfo* google_protobuf_SourceCodeInfo_new(upb_Arena* arena) {
+ return (google_protobuf_SourceCodeInfo*)_upb_Message_New(google_protobuf_SourceCodeInfo_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_SourceCodeInfo* google_protobuf_SourceCodeInfo_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_SourceCodeInfo* ret = google_protobuf_SourceCodeInfo_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_SourceCodeInfo_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_SourceCodeInfo* google_protobuf_SourceCodeInfo_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_SourceCodeInfo* ret = google_protobuf_SourceCodeInfo_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_SourceCodeInfo_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_SourceCodeInfo_serialize(const google_protobuf_SourceCodeInfo* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_SourceCodeInfo_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_SourceCodeInfo_serialize_ex(const google_protobuf_SourceCodeInfo* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_SourceCodeInfo_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_SourceCodeInfo_clear_location(google_protobuf_SourceCodeInfo* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_SourceCodeInfo_Location* const* google_protobuf_SourceCodeInfo_location(const google_protobuf_SourceCodeInfo* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_msg_init(), 1);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_SourceCodeInfo_Location* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_SourceCodeInfo_has_location(const google_protobuf_SourceCodeInfo* msg) {
+ size_t size;
+ google_protobuf_SourceCodeInfo_location(msg, &size);
+ return size != 0;
+}
+
+UPB_INLINE google_protobuf_SourceCodeInfo_Location** google_protobuf_SourceCodeInfo_mutable_location(google_protobuf_SourceCodeInfo* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_msg_init(), 1);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_SourceCodeInfo_Location**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_SourceCodeInfo_Location** google_protobuf_SourceCodeInfo_resize_location(google_protobuf_SourceCodeInfo* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_msg_init(), 1);
+ return (google_protobuf_SourceCodeInfo_Location**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_SourceCodeInfo_Location* google_protobuf_SourceCodeInfo_add_location(google_protobuf_SourceCodeInfo* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_msg_init(), 1);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_SourceCodeInfo_Location* sub = (struct google_protobuf_SourceCodeInfo_Location*)_upb_Message_New(google_protobuf_SourceCodeInfo_Location_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+
+/* google.protobuf.SourceCodeInfo.Location */
+
+UPB_INLINE google_protobuf_SourceCodeInfo_Location* google_protobuf_SourceCodeInfo_Location_new(upb_Arena* arena) {
+ return (google_protobuf_SourceCodeInfo_Location*)_upb_Message_New(google_protobuf_SourceCodeInfo_Location_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_SourceCodeInfo_Location* google_protobuf_SourceCodeInfo_Location_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_SourceCodeInfo_Location* ret = google_protobuf_SourceCodeInfo_Location_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_SourceCodeInfo_Location_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_SourceCodeInfo_Location* google_protobuf_SourceCodeInfo_Location_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_SourceCodeInfo_Location* ret = google_protobuf_SourceCodeInfo_Location_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_SourceCodeInfo_Location_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_SourceCodeInfo_Location_serialize(const google_protobuf_SourceCodeInfo_Location* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_SourceCodeInfo_Location_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_SourceCodeInfo_Location_serialize_ex(const google_protobuf_SourceCodeInfo_Location* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_SourceCodeInfo_Location_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_SourceCodeInfo_Location_clear_path(google_protobuf_SourceCodeInfo_Location* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t const* google_protobuf_SourceCodeInfo_Location_path(const google_protobuf_SourceCodeInfo_Location* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 1);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (int32_t const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_SourceCodeInfo_Location_has_path(const google_protobuf_SourceCodeInfo_Location* msg) {
+ size_t size;
+ google_protobuf_SourceCodeInfo_Location_path(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_SourceCodeInfo_Location_clear_span(google_protobuf_SourceCodeInfo_Location* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t const* google_protobuf_SourceCodeInfo_Location_span(const google_protobuf_SourceCodeInfo_Location* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 2);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (int32_t const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_SourceCodeInfo_Location_has_span(const google_protobuf_SourceCodeInfo_Location* msg) {
+ size_t size;
+ google_protobuf_SourceCodeInfo_Location_span(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_SourceCodeInfo_Location_clear_leading_comments(google_protobuf_SourceCodeInfo_Location* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 3);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_SourceCodeInfo_Location_leading_comments(const google_protobuf_SourceCodeInfo_Location* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 3);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_SourceCodeInfo_Location_has_leading_comments(const google_protobuf_SourceCodeInfo_Location* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 3);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_SourceCodeInfo_Location_clear_trailing_comments(google_protobuf_SourceCodeInfo_Location* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 4);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_SourceCodeInfo_Location_trailing_comments(const google_protobuf_SourceCodeInfo_Location* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 4);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_SourceCodeInfo_Location_has_trailing_comments(const google_protobuf_SourceCodeInfo_Location* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 4);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_SourceCodeInfo_Location_clear_leading_detached_comments(google_protobuf_SourceCodeInfo_Location* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 6);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView const* google_protobuf_SourceCodeInfo_Location_leading_detached_comments(const google_protobuf_SourceCodeInfo_Location* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 6);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (upb_StringView const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_SourceCodeInfo_Location_has_leading_detached_comments(const google_protobuf_SourceCodeInfo_Location* msg) {
+ size_t size;
+ google_protobuf_SourceCodeInfo_Location_leading_detached_comments(msg, &size);
+ return size != 0;
+}
+
+UPB_INLINE int32_t* google_protobuf_SourceCodeInfo_Location_mutable_path(google_protobuf_SourceCodeInfo_Location* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 1);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (int32_t*)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE int32_t* google_protobuf_SourceCodeInfo_Location_resize_path(google_protobuf_SourceCodeInfo_Location* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 1);
+ return (int32_t*)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE bool google_protobuf_SourceCodeInfo_Location_add_path(google_protobuf_SourceCodeInfo_Location* msg, int32_t val, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 1);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return false;
+ }
+ _upb_Array_Set(arr, arr->size - 1, &val, sizeof(val));
+ return true;
+}
+UPB_INLINE int32_t* google_protobuf_SourceCodeInfo_Location_mutable_span(google_protobuf_SourceCodeInfo_Location* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 2);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (int32_t*)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE int32_t* google_protobuf_SourceCodeInfo_Location_resize_span(google_protobuf_SourceCodeInfo_Location* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 2);
+ return (int32_t*)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE bool google_protobuf_SourceCodeInfo_Location_add_span(google_protobuf_SourceCodeInfo_Location* msg, int32_t val, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 2);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return false;
+ }
+ _upb_Array_Set(arr, arr->size - 1, &val, sizeof(val));
+ return true;
+}
+UPB_INLINE void google_protobuf_SourceCodeInfo_Location_set_leading_comments(google_protobuf_SourceCodeInfo_Location *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 3);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_SourceCodeInfo_Location_set_trailing_comments(google_protobuf_SourceCodeInfo_Location *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 4);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE upb_StringView* google_protobuf_SourceCodeInfo_Location_mutable_leading_detached_comments(google_protobuf_SourceCodeInfo_Location* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 6);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (upb_StringView*)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE upb_StringView* google_protobuf_SourceCodeInfo_Location_resize_leading_detached_comments(google_protobuf_SourceCodeInfo_Location* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 6);
+ return (upb_StringView*)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE bool google_protobuf_SourceCodeInfo_Location_add_leading_detached_comments(google_protobuf_SourceCodeInfo_Location* msg, upb_StringView val, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_SourceCodeInfo_Location_msg_init(), 6);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return false;
+ }
+ _upb_Array_Set(arr, arr->size - 1, &val, sizeof(val));
+ return true;
+}
+
+/* google.protobuf.GeneratedCodeInfo */
+
+UPB_INLINE google_protobuf_GeneratedCodeInfo* google_protobuf_GeneratedCodeInfo_new(upb_Arena* arena) {
+ return (google_protobuf_GeneratedCodeInfo*)_upb_Message_New(google_protobuf_GeneratedCodeInfo_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_GeneratedCodeInfo* google_protobuf_GeneratedCodeInfo_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_GeneratedCodeInfo* ret = google_protobuf_GeneratedCodeInfo_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_GeneratedCodeInfo_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_GeneratedCodeInfo* google_protobuf_GeneratedCodeInfo_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_GeneratedCodeInfo* ret = google_protobuf_GeneratedCodeInfo_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_GeneratedCodeInfo_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_GeneratedCodeInfo_serialize(const google_protobuf_GeneratedCodeInfo* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_GeneratedCodeInfo_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_GeneratedCodeInfo_serialize_ex(const google_protobuf_GeneratedCodeInfo* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_GeneratedCodeInfo_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_GeneratedCodeInfo_clear_annotation(google_protobuf_GeneratedCodeInfo* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_GeneratedCodeInfo_Annotation* const* google_protobuf_GeneratedCodeInfo_annotation(const google_protobuf_GeneratedCodeInfo* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_msg_init(), 1);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_GeneratedCodeInfo_Annotation* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_GeneratedCodeInfo_has_annotation(const google_protobuf_GeneratedCodeInfo* msg) {
+ size_t size;
+ google_protobuf_GeneratedCodeInfo_annotation(msg, &size);
+ return size != 0;
+}
+
+UPB_INLINE google_protobuf_GeneratedCodeInfo_Annotation** google_protobuf_GeneratedCodeInfo_mutable_annotation(google_protobuf_GeneratedCodeInfo* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_msg_init(), 1);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_GeneratedCodeInfo_Annotation**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_GeneratedCodeInfo_Annotation** google_protobuf_GeneratedCodeInfo_resize_annotation(google_protobuf_GeneratedCodeInfo* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_msg_init(), 1);
+ return (google_protobuf_GeneratedCodeInfo_Annotation**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_GeneratedCodeInfo_Annotation* google_protobuf_GeneratedCodeInfo_add_annotation(google_protobuf_GeneratedCodeInfo* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_msg_init(), 1);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_GeneratedCodeInfo_Annotation* sub = (struct google_protobuf_GeneratedCodeInfo_Annotation*)_upb_Message_New(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+
+/* google.protobuf.GeneratedCodeInfo.Annotation */
+
+UPB_INLINE google_protobuf_GeneratedCodeInfo_Annotation* google_protobuf_GeneratedCodeInfo_Annotation_new(upb_Arena* arena) {
+ return (google_protobuf_GeneratedCodeInfo_Annotation*)_upb_Message_New(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_GeneratedCodeInfo_Annotation* google_protobuf_GeneratedCodeInfo_Annotation_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_GeneratedCodeInfo_Annotation* ret = google_protobuf_GeneratedCodeInfo_Annotation_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_GeneratedCodeInfo_Annotation* google_protobuf_GeneratedCodeInfo_Annotation_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_GeneratedCodeInfo_Annotation* ret = google_protobuf_GeneratedCodeInfo_Annotation_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_GeneratedCodeInfo_Annotation_serialize(const google_protobuf_GeneratedCodeInfo_Annotation* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_GeneratedCodeInfo_Annotation_serialize_ex(const google_protobuf_GeneratedCodeInfo_Annotation* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_GeneratedCodeInfo_Annotation_clear_path(google_protobuf_GeneratedCodeInfo_Annotation* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t const* google_protobuf_GeneratedCodeInfo_Annotation_path(const google_protobuf_GeneratedCodeInfo_Annotation* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 1);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (int32_t const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_GeneratedCodeInfo_Annotation_has_path(const google_protobuf_GeneratedCodeInfo_Annotation* msg) {
+ size_t size;
+ google_protobuf_GeneratedCodeInfo_Annotation_path(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_GeneratedCodeInfo_Annotation_clear_source_file(google_protobuf_GeneratedCodeInfo_Annotation* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_GeneratedCodeInfo_Annotation_source_file(const google_protobuf_GeneratedCodeInfo_Annotation* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 2);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_GeneratedCodeInfo_Annotation_has_source_file(const google_protobuf_GeneratedCodeInfo_Annotation* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 2);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_GeneratedCodeInfo_Annotation_clear_begin(google_protobuf_GeneratedCodeInfo_Annotation* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 3);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_GeneratedCodeInfo_Annotation_begin(const google_protobuf_GeneratedCodeInfo_Annotation* msg) {
+ int32_t default_val = (int32_t)0;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 3);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_GeneratedCodeInfo_Annotation_has_begin(const google_protobuf_GeneratedCodeInfo_Annotation* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 3);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_GeneratedCodeInfo_Annotation_clear_end(google_protobuf_GeneratedCodeInfo_Annotation* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 4);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_GeneratedCodeInfo_Annotation_end(const google_protobuf_GeneratedCodeInfo_Annotation* msg) {
+ int32_t default_val = (int32_t)0;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 4);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_GeneratedCodeInfo_Annotation_has_end(const google_protobuf_GeneratedCodeInfo_Annotation* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 4);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_GeneratedCodeInfo_Annotation_clear_semantic(google_protobuf_GeneratedCodeInfo_Annotation* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 5);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_GeneratedCodeInfo_Annotation_semantic(const google_protobuf_GeneratedCodeInfo_Annotation* msg) {
+ int32_t default_val = 0;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 5);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_GeneratedCodeInfo_Annotation_has_semantic(const google_protobuf_GeneratedCodeInfo_Annotation* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 5);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+
+UPB_INLINE int32_t* google_protobuf_GeneratedCodeInfo_Annotation_mutable_path(google_protobuf_GeneratedCodeInfo_Annotation* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 1);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (int32_t*)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE int32_t* google_protobuf_GeneratedCodeInfo_Annotation_resize_path(google_protobuf_GeneratedCodeInfo_Annotation* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 1);
+ return (int32_t*)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE bool google_protobuf_GeneratedCodeInfo_Annotation_add_path(google_protobuf_GeneratedCodeInfo_Annotation* msg, int32_t val, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 1);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return false;
+ }
+ _upb_Array_Set(arr, arr->size - 1, &val, sizeof(val));
+ return true;
+}
+UPB_INLINE void google_protobuf_GeneratedCodeInfo_Annotation_set_source_file(google_protobuf_GeneratedCodeInfo_Annotation *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 2);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_GeneratedCodeInfo_Annotation_set_begin(google_protobuf_GeneratedCodeInfo_Annotation *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 3);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_GeneratedCodeInfo_Annotation_set_end(google_protobuf_GeneratedCodeInfo_Annotation *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 4);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_GeneratedCodeInfo_Annotation_set_semantic(google_protobuf_GeneratedCodeInfo_Annotation *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_GeneratedCodeInfo_Annotation_msg_init(), 5);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+
+extern const upb_MiniTableFile google_protobuf_descriptor_proto_upb_file_layout;
+
+/* Max size 32 is google.protobuf.FileOptions */
+/* Max size 64 is google.protobuf.FileOptions */
+#define _UPB_MAXOPT_SIZE UPB_SIZE(104, 192)
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* GOOGLE_PROTOBUF_DESCRIPTOR_PROTO_UPB_H_ */
diff --git a/upb/upb/test/BUILD b/upb/upb/test/BUILD
new file mode 100644
index 0000000..b7cee46
--- /dev/null
+++ b/upb/upb/test/BUILD
@@ -0,0 +1,257 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load(
+ "//bazel:build_defs.bzl",
+ "UPB_DEFAULT_CPPOPTS",
+)
+load(
+ "//bazel:upb_proto_library.bzl",
+ "upb_proto_library",
+ "upb_proto_reflection_library",
+)
+
+# begin:google_only
+# package(default_applicable_licenses = ["//:license"])
+# end:google_only
+
+cc_library(
+ name = "parse_text_proto",
+ testonly = 1,
+ hdrs = ["parse_text_proto.h"],
+ visibility = ["//:__subpackages__"],
+ deps = [
+ "@com_google_googletest//:gtest",
+ "@com_google_protobuf//:protobuf",
+ ],
+)
+
+proto_library(
+ name = "empty_proto",
+ srcs = ["empty.proto"],
+)
+
+upb_proto_reflection_library(
+ name = "empty_upb_proto_reflection",
+ testonly = 1,
+ deps = [":empty_proto"],
+)
+
+proto_library(
+ name = "proto3_test_proto",
+ testonly = 1,
+ srcs = ["proto3_test.proto"],
+ deps = ["@com_google_protobuf//:descriptor_proto"],
+)
+
+upb_proto_library(
+ name = "proto3_test_upb_proto",
+ testonly = 1,
+ deps = [":proto3_test_proto"],
+)
+
+upb_proto_reflection_library(
+ name = "proto3_test_upb_proto_reflection",
+ testonly = 1,
+ deps = [":proto3_test_proto"],
+)
+
+proto_library(
+ name = "test_proto",
+ testonly = 1,
+ srcs = ["test.proto"],
+)
+
+upb_proto_library(
+ name = "test_upb_proto",
+ testonly = 1,
+ visibility = ["//:__subpackages__"],
+ deps = [":test_proto"],
+)
+
+proto_library(
+ name = "test_cpp_proto",
+ srcs = ["test_cpp.proto"],
+ deps = ["@com_google_protobuf//:timestamp_proto"],
+)
+
+upb_proto_library(
+ name = "test_cpp_upb_proto",
+ deps = ["test_cpp_proto"],
+)
+
+upb_proto_reflection_library(
+ name = "test_cpp_upb_proto_reflection",
+ deps = ["test_cpp_proto"],
+)
+
+upb_proto_library(
+ name = "test_messages_proto2_upb_proto",
+ testonly = 1,
+ visibility = ["//:__subpackages__"],
+ deps = ["@com_google_protobuf//src/google/protobuf:test_messages_proto2_proto"],
+)
+
+upb_proto_library(
+ name = "test_messages_proto3_upb_proto",
+ testonly = 1,
+ visibility = ["//:__subpackages__"],
+ deps = ["@com_google_protobuf//src/google/protobuf:test_messages_proto3_proto"],
+)
+
+upb_proto_library(
+ name = "timestamp_upb_proto",
+ deps = ["@com_google_protobuf//:timestamp_proto"],
+)
+
+upb_proto_reflection_library(
+ name = "timestamp_upb_proto_reflection",
+ deps = ["@com_google_protobuf//:timestamp_proto"],
+)
+
+cc_library(
+ name = "fuzz_util",
+ testonly = 1,
+ srcs = ["fuzz_util.cc"],
+ hdrs = ["fuzz_util.h"],
+ visibility = ["//:__subpackages__"],
+ deps = [
+ "//:base",
+ "//:message",
+ "//:mini_descriptor",
+ "//:mini_table",
+ "//:mini_table_internal",
+ "//:port",
+ ],
+)
+
+cc_test(
+ name = "proto3_test",
+ srcs = ["proto3_test.cc"],
+ copts = UPB_DEFAULT_CPPOPTS,
+ deps = [
+ ":proto3_test_upb_proto",
+ ":proto3_test_upb_proto_reflection",
+ "//:reflection",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "test_cpp",
+ srcs = ["test_cpp.cc"],
+ copts = UPB_DEFAULT_CPPOPTS,
+ deps = [
+ ":test_cpp_upb_proto",
+ ":test_cpp_upb_proto_reflection",
+ ":timestamp_upb_proto",
+ ":timestamp_upb_proto_reflection",
+ "//:json",
+ "//:port",
+ "//:reflection",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "test_generated_code",
+ srcs = [
+ "test_generated_code.cc",
+ ],
+ deps = [
+ ":empty_upb_proto_reflection",
+ ":test_messages_proto2_upb_proto",
+ ":test_messages_proto3_upb_proto",
+ ":test_upb_proto",
+ "//:base",
+ "//:collections",
+ "//:mem",
+ "//:port",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "test_mini_table_oneof",
+ srcs = [
+ "test_mini_table_oneof.cc",
+ ],
+ deps = [
+ ":empty_upb_proto_reflection",
+ ":test_messages_proto2_upb_proto",
+ ":test_messages_proto3_upb_proto",
+ ":test_upb_proto",
+ "//:mini_table",
+ "//:mini_table_internal",
+ "//:port",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+proto_library(
+ name = "empty_srcs_proto",
+ testonly = 1,
+ srcs = [],
+ deps = [":test_proto"],
+)
+
+proto_library(
+ name = "empty_srcs_2_proto",
+ testonly = 1,
+ srcs = [],
+ deps = [":test_cpp_proto"],
+)
+
+proto_library(
+ name = "empty_srcs_3_proto",
+ testonly = 1,
+ srcs = [],
+ deps = [
+ ":empty_srcs_2_proto",
+ ":empty_srcs_proto",
+ ],
+)
+
+proto_library(
+ name = "test_import_empty_srcs_proto",
+ testonly = 1,
+ srcs = ["test_import_empty_srcs.proto"],
+ deps = [":empty_srcs_3_proto"],
+)
+
+upb_proto_library(
+ name = "test_import_empty_srcs_upb_proto",
+ testonly = 1,
+ deps = [":test_import_empty_srcs_proto"],
+)
+
+cc_test(
+ name = "test_import_empty_srcs",
+ srcs = ["test_import_empty_srcs.cc"],
+ deps = [
+ ":test_import_empty_srcs_upb_proto",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
diff --git a/upb/upb/test/empty.proto b/upb/upb/test/empty.proto
new file mode 100644
index 0000000..a1ad52b
--- /dev/null
+++ b/upb/upb/test/empty.proto
@@ -0,0 +1,32 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto2";
+package upb_test;
diff --git a/upb/upb/test/fuzz_util.cc b/upb/upb/test/fuzz_util.cc
new file mode 100644
index 0000000..f29aa7f
--- /dev/null
+++ b/upb/upb/test/fuzz_util.cc
@@ -0,0 +1,191 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/test/fuzz_util.h"
+
+#include "upb/base/status.hpp"
+#include "upb/message/message.h"
+#include "upb/mini_descriptor/decode.h"
+#include "upb/mini_table/extension.h"
+#include "upb/mini_table/extension_registry.h"
+
+// Must be last
+#include "upb/port/def.inc"
+
+namespace upb {
+namespace fuzz {
+
+namespace {
+
+class Builder {
+ public:
+ Builder(const MiniTableFuzzInput& input, upb_Arena* arena)
+ : input_(&input), arena_(arena) {}
+
+ const upb_MiniTable* Build(upb_ExtensionRegistry** exts) {
+ BuildMessages();
+ BuildEnums();
+ BuildExtensions(exts);
+ if (!LinkMessages()) return nullptr;
+ return mini_tables_.empty() ? nullptr : mini_tables_.front();
+ }
+
+ private:
+ void BuildMessages();
+ void BuildEnums();
+ void BuildExtensions(upb_ExtensionRegistry** exts);
+ bool LinkExtension(upb_MiniTableExtension* ext);
+ bool LinkMessages();
+
+ size_t NextLink() {
+ if (input_->links.empty()) return 0;
+ if (link_ == input_->links.size()) link_ = 0;
+ return input_->links[link_++];
+ }
+
+ const upb_MiniTable* NextMiniTable() {
+ return mini_tables_.empty()
+ ? nullptr
+ : mini_tables_[NextLink() % mini_tables_.size()];
+ }
+
+ const upb_MiniTableEnum* NextEnumTable() {
+ return enum_tables_.empty()
+ ? nullptr
+ : enum_tables_[NextLink() % enum_tables_.size()];
+ }
+
+ const MiniTableFuzzInput* input_;
+ upb_Arena* arena_;
+ std::vector<const upb_MiniTable*> mini_tables_;
+ std::vector<const upb_MiniTableEnum*> enum_tables_;
+ size_t link_ = 0;
+};
+
+void Builder::BuildMessages() {
+ upb::Status status;
+ mini_tables_.reserve(input_->mini_descriptors.size());
+ for (const auto& d : input_->mini_descriptors) {
+ upb_MiniTable* table =
+ upb_MiniTable_Build(d.data(), d.size(), arena_, status.ptr());
+ if (table) mini_tables_.push_back(table);
+ }
+}
+
+void Builder::BuildEnums() {
+ upb::Status status;
+ enum_tables_.reserve(input_->enum_mini_descriptors.size());
+ for (const auto& d : input_->enum_mini_descriptors) {
+ upb_MiniTableEnum* enum_table =
+ upb_MiniTableEnum_Build(d.data(), d.size(), arena_, status.ptr());
+ if (enum_table) enum_tables_.push_back(enum_table);
+ }
+}
+
+bool Builder::LinkExtension(upb_MiniTableExtension* ext) {
+ upb_MiniTableField* field = &ext->field;
+ if (upb_MiniTableField_CType(field) == kUpb_CType_Message) {
+ auto mt = NextMiniTable();
+ if (!mt) field->UPB_PRIVATE(descriptortype) = kUpb_FieldType_Int32;
+ ext->sub.submsg = mt;
+ }
+ if (upb_MiniTableField_IsClosedEnum(field)) {
+ auto et = NextEnumTable();
+ if (!et) field->UPB_PRIVATE(descriptortype) = kUpb_FieldType_Int32;
+ ext->sub.subenum = et;
+ }
+ return true;
+}
+
+void Builder::BuildExtensions(upb_ExtensionRegistry** exts) {
+ upb::Status status;
+ if (input_->extensions.empty()) {
+ *exts = nullptr;
+ } else {
+ *exts = upb_ExtensionRegistry_New(arena_);
+ const char* ptr = input_->extensions.data();
+ const char* end = ptr + input_->extensions.size();
+ // Iterate through the buffer, building extensions as long as we can.
+ while (ptr < end) {
+ upb_MiniTableExtension* ext = reinterpret_cast<upb_MiniTableExtension*>(
+ upb_Arena_Malloc(arena_, sizeof(*ext)));
+ upb_MiniTableSub sub;
+ const upb_MiniTable* extendee = NextMiniTable();
+ if (!extendee) break;
+ ptr = upb_MiniTableExtension_Init(ptr, end - ptr, ext, extendee, sub,
+ status.ptr());
+ if (!ptr) break;
+ if (!LinkExtension(ext)) continue;
+ if (upb_ExtensionRegistry_Lookup(*exts, ext->extendee, ext->field.number))
+ continue;
+ upb_ExtensionRegistry_AddArray(
+ *exts, const_cast<const upb_MiniTableExtension**>(&ext), 1);
+ }
+ }
+}
+
+bool Builder::LinkMessages() {
+ for (auto* t : mini_tables_) {
+ upb_MiniTable* table = const_cast<upb_MiniTable*>(t);
+ // For each field that requires a sub-table, assign one as appropriate.
+ for (size_t i = 0; i < table->field_count; i++) {
+ upb_MiniTableField* field =
+ const_cast<upb_MiniTableField*>(&table->fields[i]);
+ if (link_ == input_->links.size()) link_ = 0;
+ if (upb_MiniTableField_CType(field) == kUpb_CType_Message &&
+ !upb_MiniTable_SetSubMessage(table, field, NextMiniTable())) {
+ return false;
+ }
+ if (upb_MiniTableField_IsClosedEnum(field)) {
+ auto* et = NextEnumTable();
+ if (et) {
+ if (!upb_MiniTable_SetSubEnum(table, field, et)) return false;
+ } else {
+ // We don't have any sub-enums. Override the field type so that it is
+ // not needed.
+ field->UPB_PRIVATE(descriptortype) = kUpb_FieldType_Int32;
+ }
+ }
+ }
+ }
+ return true;
+}
+
+} // namespace
+
+const upb_MiniTable* BuildMiniTable(const MiniTableFuzzInput& input,
+ upb_ExtensionRegistry** exts,
+ upb_Arena* arena) {
+ Builder builder(input, arena);
+ return builder.Build(exts);
+}
+
+} // namespace fuzz
+} // namespace upb
diff --git a/upb/upb/test/fuzz_util.h b/upb/upb/test/fuzz_util.h
new file mode 100644
index 0000000..c03d376
--- /dev/null
+++ b/upb/upb/test/fuzz_util.h
@@ -0,0 +1,84 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_TEST_FUZZ_UTIL_H_
+#define UPB_TEST_FUZZ_UTIL_H_
+
+#include <string>
+#include <vector>
+
+#include "upb/mini_table/extension_registry.h"
+// #include "upb/mini_table/types.h"
+
+namespace upb {
+namespace fuzz {
+
+struct MiniTableFuzzInput {
+ // MiniDescripotrs for N messages, in the format accepted by
+ // upb_MiniTable_Build().
+ std::vector<std::string> mini_descriptors;
+
+ // MiniDescripotrs for N enums, in the format accepted by
+ // upb_MiniTableEnum_Build().
+ std::vector<std::string> enum_mini_descriptors;
+
+ // A MiniDescriptor for N extensions, in the format accepted by
+ // upb_MiniTableExtension_Build().
+ std::string extensions;
+
+ // Integer indexes into the message or enum mini tables lists. These specify
+ // which message or enum to use for each sub-message or enum field. We mod
+ // by the total number of enums or messages so that any link value can be
+ // valid.
+ std::vector<uint32_t> links;
+};
+
+// Builds an arbitrary mini table corresponding to the random data in `input`.
+// This function should be capable of producing any mini table that can
+// successfully build, and any topology of messages and enums (including
+// cycles).
+//
+// As currently written, it effectively fuzzes the mini descriptor parser also,
+// and can therefore trigger any bugs in that parser. To better isolate these
+// two, we may want to change this implementation to use the mini descriptor
+// builder API so we are producing mini descriptors in a known good format. That
+// would mostly eliminate the chance of crashing the mini descriptor parser
+// itself.
+//
+// TODO: maps. If we give maps some space in the regular encoding instead of
+// using a separate function, we could get that for free.
+const upb_MiniTable* BuildMiniTable(const MiniTableFuzzInput& input,
+ upb_ExtensionRegistry** exts,
+ upb_Arena* arena);
+
+} // namespace fuzz
+} // namespace upb
+
+#endif // UPB_TEST_FUZZ_UTIL_H_
diff --git a/upb/upb/test/parse_text_proto.h b/upb/upb/test/parse_text_proto.h
new file mode 100644
index 0000000..866d156
--- /dev/null
+++ b/upb/upb/test/parse_text_proto.h
@@ -0,0 +1,67 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_UPB_TEST_PARSE_TEXT_PROTO_H_
+#define UPB_UPB_TEST_PARSE_TEXT_PROTO_H_
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "google/protobuf/message.h"
+#include "google/protobuf/text_format.h"
+
+namespace upb_test {
+
+// Replacement for Google ParseTextProtoOrDie.
+// Only to be used in unit tests.
+// Usage: MyMessage msg = ParseTextProtoOrDie(my_text_proto);
+class ParseTextProtoOrDie {
+ public:
+ explicit ParseTextProtoOrDie(absl::string_view text_proto)
+ : text_proto_(text_proto) {}
+
+ template <class T>
+ operator T() { // NOLINT: Needed to support parsing text proto as appropriate
+ // type.
+ T message;
+ if (!google::protobuf::TextFormat::ParseFromString(text_proto_, &message)) {
+ ADD_FAILURE() << "Failed to parse textproto: " << text_proto_;
+ abort();
+ }
+ return message;
+ }
+
+ private:
+ std::string text_proto_;
+};
+
+} // namespace upb_test
+
+#endif // UPB_UPB_TEST_PARSE_TEXT_PROTO_H_
diff --git a/upb/upb/test/proto3_test.cc b/upb/upb/test/proto3_test.cc
new file mode 100644
index 0000000..663da70
--- /dev/null
+++ b/upb/upb/test/proto3_test.cc
@@ -0,0 +1,40 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "gtest/gtest.h"
+#include "upb/reflection/def.hpp"
+#include "upb/test/proto3_test.upb.h"
+#include "upb/test/proto3_test.upbdefs.h"
+
+TEST(Proto3Test, SyntheticOneofExtension) {
+ upb::DefPool defpool;
+ upb::MessageDefPtr md(upb_test_TestMessage3_getmsgdef(defpool.ptr()));
+ ASSERT_EQ(md.field_count(), 6);
+}
diff --git a/upb/upb/test/proto3_test.proto b/upb/upb/test/proto3_test.proto
new file mode 100644
index 0000000..2f36a8e
--- /dev/null
+++ b/upb/upb/test/proto3_test.proto
@@ -0,0 +1,52 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto3";
+
+package upb.test;
+
+import "google/protobuf/descriptor.proto";
+
+extend google.protobuf.MessageOptions {
+ optional string my_option = 51235;
+}
+
+message MyMessage3 {
+ option (my_option) = "Hello world!";
+}
+
+message TestMessage3 {
+ optional int32 i32 = 1;
+ repeated int32 r_i32 = 2;
+ optional string str = 3;
+ repeated string r_str = 4;
+ optional TestMessage3 msg = 5;
+ repeated TestMessage3 r_msg = 6;
+}
diff --git a/upb/upb/test/test.proto b/upb/upb/test/test.proto
new file mode 100644
index 0000000..0031608
--- /dev/null
+++ b/upb/upb/test/test.proto
@@ -0,0 +1,125 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto2";
+
+package upb_test;
+
+message MapTest {
+ map<string, double> map_string_double = 1;
+}
+
+message MessageName {
+ optional int32 field1 = 1;
+ optional int32 field2 = 2;
+}
+
+message HelloRequest {
+ optional uint32 id = 1;
+ optional uint32 random_name_a0 = 2;
+ optional uint32 random_name_a1 = 3;
+ optional uint32 random_name_a2 = 4;
+ optional uint32 random_name_a3 = 5;
+ optional uint32 random_name_a4 = 6;
+ optional uint32 random_name_a5 = 7;
+ optional uint32 random_name_a6 = 8;
+ optional uint32 random_name_a7 = 9;
+ optional uint32 random_name_a8 = 10;
+ optional uint32 random_name_a9 = 11;
+ optional uint32 random_name_b0 = 12;
+ optional uint32 random_name_b1 = 13;
+ optional uint32 random_name_b2 = 14;
+ optional uint32 random_name_b3 = 15;
+ optional uint32 random_name_b4 = 16;
+ optional uint32 random_name_b5 = 17;
+ optional uint32 random_name_b6 = 18;
+ optional uint32 random_name_b7 = 19;
+ optional uint32 random_name_b8 = 20;
+ optional uint32 random_name_b9 = 21;
+ optional uint32 random_name_c0 = 22;
+ optional uint32 random_name_c1 = 23;
+ optional uint32 random_name_c2 = 24;
+ optional uint32 random_name_c3 = 25;
+ optional uint32 random_name_c4 = 26;
+ optional uint32 random_name_c5 = 27;
+ optional uint32 random_name_c6 = 28;
+ optional uint32 random_name_c7 = 29;
+ optional uint32 random_name_c8 = 30;
+ optional uint32 random_name_c9 = 31;
+ optional string version = 32;
+}
+
+message EmptyMessageWithExtensions {
+ // Reserved for unknown fields/extensions test.
+ reserved 1000 to max;
+}
+
+message ModelWithExtensions {
+ optional int32 random_int32 = 3;
+ optional string random_name = 4;
+ repeated int32 repeated_int32 = 5;
+ // Reserved for unknown fields/extensions test.
+ extensions 1000 to max;
+}
+
+message ModelExtension1 {
+ extend ModelWithExtensions {
+ optional ModelExtension1 model_ext = 1547;
+ }
+ optional string str = 25;
+}
+
+message ModelExtension2 {
+ extend ModelWithExtensions {
+ optional ModelExtension2 model_ext = 4135;
+ optional ModelExtension2 model_ext_2 = 4136;
+ optional ModelExtension2 model_ext_3 = 4137;
+ optional ModelExtension2 model_ext_4 = 4138;
+ optional ModelExtension2 model_ext_5 = 4139;
+ }
+ optional int32 i = 9;
+}
+
+message ModelWithSubMessages {
+ optional int32 id = 4;
+ optional ModelWithExtensions optional_child = 5;
+ repeated ModelWithExtensions items = 6;
+}
+
+message ModelWithMaps {
+ optional int32 id = 1;
+ map<string, string> map_ss = 3;
+ map<int32, int32> map_ii = 4;
+ map<int32, ModelWithExtensions> map_im = 5;
+}
+
+message ExtremeDefaults {
+ optional int64 int64_min = 1 [default = -9223372036854775808];
+}
diff --git a/upb/upb/test/test_cpp.cc b/upb/upb/test/test_cpp.cc
new file mode 100644
index 0000000..2b7d939
--- /dev/null
+++ b/upb/upb/test/test_cpp.cc
@@ -0,0 +1,131 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Tests for C++ wrappers.
+
+#include <string.h>
+
+#include <fstream>
+#include <iostream>
+#include <set>
+#include <sstream>
+
+#include "google/protobuf/timestamp.upb.h"
+#include "google/protobuf/timestamp.upbdefs.h"
+#include "gtest/gtest.h"
+#include "upb/json/decode.h"
+#include "upb/json/encode.h"
+#include "upb/reflection/def.h"
+#include "upb/reflection/def.hpp"
+#include "upb/test/test_cpp.upb.h"
+#include "upb/test/test_cpp.upbdefs.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+TEST(Cpp, Iteration) {
+ upb::DefPool defpool;
+ upb::MessageDefPtr md(upb_test_TestMessage_getmsgdef(defpool.ptr()));
+
+ // Test range-based for on both fields and oneofs (with the iterator adaptor).
+ int field_count = 0;
+ for (auto field : md.fields()) {
+ UPB_UNUSED(field);
+ field_count++;
+ }
+ EXPECT_EQ(field_count, md.field_count());
+
+ int oneof_count = 0;
+ for (auto oneof : md.oneofs()) {
+ UPB_UNUSED(oneof);
+ oneof_count++;
+ }
+ EXPECT_EQ(oneof_count, md.oneof_count());
+}
+
+TEST(Cpp, InlinedArena2) {
+ upb::InlinedArena<64> arena;
+ upb_Arena_Malloc(arena.ptr(), sizeof(int));
+}
+
+TEST(Cpp, Default) {
+ upb::DefPool defpool;
+ upb::Arena arena;
+ upb::MessageDefPtr md(upb_test_TestMessage_getmsgdef(defpool.ptr()));
+ upb_test_TestMessage* msg = upb_test_TestMessage_new(arena.ptr());
+ size_t size = upb_JsonEncode(msg, md.ptr(), NULL, 0, NULL, 0, NULL);
+ EXPECT_EQ(2, size); // "{}"
+}
+
+TEST(Cpp, JsonNull) {
+ upb::DefPool defpool;
+ upb::MessageDefPtr md(upb_test_TestMessage_getmsgdef(defpool.ptr()));
+ upb::FieldDefPtr i32_f = md.FindFieldByName("i32");
+ upb::FieldDefPtr str_f = md.FindFieldByName("str");
+ ASSERT_TRUE(i32_f);
+ ASSERT_TRUE(str_f);
+ EXPECT_EQ(5, i32_f.default_value().int32_val);
+ EXPECT_EQ(0, strcmp(str_f.default_value().str_val.data, "abc"));
+ EXPECT_EQ(3, str_f.default_value().str_val.size);
+}
+
+TEST(Cpp, TimestampEncoder) {
+ upb::DefPool defpool;
+ upb::Arena arena;
+ upb::MessageDefPtr md(google_protobuf_Timestamp_getmsgdef(defpool.ptr()));
+ google_protobuf_Timestamp* timestamp_upb =
+ google_protobuf_Timestamp_new(arena.ptr());
+ google_protobuf_Timestamp* timestamp_upb_decoded =
+ google_protobuf_Timestamp_new(arena.ptr());
+
+ int64_t timestamps[] = {
+ 253402300799, // 9999-12-31T23:59:59Z
+ 1641006000, // 2022-01-01T03:00:00Z
+ 0, // 1970-01-01T00:00:00Z
+ -31525200, // 1969-01-01T03:00:00Z
+ -2208988800, // 1900-01-01T00:00:00Z
+ -62135596800, // 0000-01-01T00:00:00Z
+ };
+
+ for (int64_t timestamp : timestamps) {
+ google_protobuf_Timestamp_set_seconds(timestamp_upb, timestamp);
+
+ char json[128];
+ size_t size = upb_JsonEncode(timestamp_upb, md.ptr(), NULL, 0, json,
+ sizeof(json), NULL);
+ bool result = upb_JsonDecode(json, size, timestamp_upb_decoded, md.ptr(),
+ NULL, 0, arena.ptr(), NULL);
+ const int64_t timestamp_decoded =
+ google_protobuf_Timestamp_seconds(timestamp_upb_decoded);
+
+ ASSERT_TRUE(result);
+ EXPECT_EQ(timestamp, timestamp_decoded);
+ }
+}
diff --git a/upb/upb/test/test_cpp.proto b/upb/upb/test/test_cpp.proto
new file mode 100644
index 0000000..19a4b30
--- /dev/null
+++ b/upb/upb/test/test_cpp.proto
@@ -0,0 +1,42 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto2";
+
+package upb.test;
+
+message TestMessage {
+ optional int32 i32 = 1 [default = 5];
+ repeated int32 r_i32 = 2;
+ optional string str = 3 [default = "abc"];
+ repeated string r_str = 4;
+ optional TestMessage msg = 5;
+ repeated TestMessage r_msg = 6;
+}
diff --git a/upb/upb/test/test_generated_code.cc b/upb/upb/test/test_generated_code.cc
new file mode 100644
index 0000000..98c1825
--- /dev/null
+++ b/upb/upb/test/test_generated_code.cc
@@ -0,0 +1,933 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+/* Test of generated code, with a special focus on features that are not used in
+ * descriptor.proto or conformance.proto (since these get some testing from
+ * upb/def.c and tests/conformance_upb.c, respectively).
+ */
+
+#include <cstddef>
+#include <cstdint>
+
+#include "gtest/gtest.h"
+#include "google/protobuf/test_messages_proto2.upb.h"
+#include "google/protobuf/test_messages_proto3.upb.h"
+#include "upb/base/status.h"
+#include "upb/base/string_view.h"
+#include "upb/collections/array.h"
+#include "upb/mem/arena.hpp"
+#include "upb/test/test.upb.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#if !defined(MIN)
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+#endif
+
+const char test_str[] = "abcdefg";
+const char test_str2[] = "12345678910";
+const char test_str3[] = "rstlnezxcvbnm";
+const char test_str4[] = "just another test string";
+
+const upb_StringView test_str_view = {test_str, sizeof(test_str) - 1};
+const upb_StringView test_str_view2 = {test_str2, sizeof(test_str2) - 1};
+const upb_StringView test_str_view3 = {test_str3, sizeof(test_str3) - 1};
+const upb_StringView test_str_view4 = {test_str4, sizeof(test_str4) - 1};
+
+const int32_t test_int32 = 10;
+const int32_t test_int32_2 = -20;
+const int32_t test_int32_3 = 30;
+const int32_t test_int32_4 = -40;
+
+TEST(GeneratedCode, ScalarsProto3) {
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto3_TestAllTypesProto3* msg =
+ protobuf_test_messages_proto3_TestAllTypesProto3_new(arena);
+ protobuf_test_messages_proto3_TestAllTypesProto3* msg2;
+ upb_StringView serialized;
+ upb_StringView val;
+
+ // Test serialization.
+ protobuf_test_messages_proto3_TestAllTypesProto3_set_optional_int32(msg, 10);
+ protobuf_test_messages_proto3_TestAllTypesProto3_set_optional_int64(msg, 20);
+ protobuf_test_messages_proto3_TestAllTypesProto3_set_optional_uint32(msg, 30);
+ protobuf_test_messages_proto3_TestAllTypesProto3_set_optional_uint64(msg, 40);
+ protobuf_test_messages_proto3_TestAllTypesProto3_set_optional_float(msg,
+ 50.5);
+ protobuf_test_messages_proto3_TestAllTypesProto3_set_optional_double(msg,
+ 60.6);
+ protobuf_test_messages_proto3_TestAllTypesProto3_set_optional_bool(msg, true);
+ protobuf_test_messages_proto3_TestAllTypesProto3_set_optional_string(
+ msg, test_str_view);
+
+ serialized.data = protobuf_test_messages_proto3_TestAllTypesProto3_serialize(
+ msg, arena, &serialized.size);
+
+ msg2 = protobuf_test_messages_proto3_TestAllTypesProto3_parse(
+ serialized.data, serialized.size, arena);
+
+ EXPECT_EQ(10, protobuf_test_messages_proto3_TestAllTypesProto3_optional_int32(
+ msg2));
+ EXPECT_EQ(20, protobuf_test_messages_proto3_TestAllTypesProto3_optional_int64(
+ msg2));
+ EXPECT_EQ(
+ 30,
+ protobuf_test_messages_proto3_TestAllTypesProto3_optional_uint32(msg2));
+ EXPECT_EQ(
+ 40,
+ protobuf_test_messages_proto3_TestAllTypesProto3_optional_uint64(msg2));
+ EXPECT_EQ(
+ 50.5,
+ protobuf_test_messages_proto3_TestAllTypesProto3_optional_float(msg2));
+ EXPECT_EQ(
+ 60.6,
+ protobuf_test_messages_proto3_TestAllTypesProto3_optional_double(msg2));
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto3_TestAllTypesProto3_optional_bool(msg2));
+ val = protobuf_test_messages_proto3_TestAllTypesProto3_optional_string(msg2);
+ EXPECT_TRUE(upb_StringView_IsEqual(val, test_str_view));
+
+ // Test clear.
+ protobuf_test_messages_proto3_TestAllTypesProto3_clear_optional_int32(msg);
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto3_TestAllTypesProto3_optional_int32(msg));
+ protobuf_test_messages_proto3_TestAllTypesProto3_clear_optional_int64(msg);
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto3_TestAllTypesProto3_optional_int64(msg));
+ protobuf_test_messages_proto3_TestAllTypesProto3_clear_optional_uint32(msg);
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto3_TestAllTypesProto3_optional_uint32(msg));
+ protobuf_test_messages_proto3_TestAllTypesProto3_clear_optional_uint64(msg);
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto3_TestAllTypesProto3_optional_uint64(msg));
+ protobuf_test_messages_proto3_TestAllTypesProto3_clear_optional_float(msg);
+ EXPECT_EQ(
+ 0.0f,
+ protobuf_test_messages_proto3_TestAllTypesProto3_optional_float(msg));
+ protobuf_test_messages_proto3_TestAllTypesProto3_clear_optional_double(msg);
+ EXPECT_EQ(
+ 0.0,
+ protobuf_test_messages_proto3_TestAllTypesProto3_optional_double(msg));
+ protobuf_test_messages_proto3_TestAllTypesProto3_clear_optional_bool(msg);
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto3_TestAllTypesProto3_optional_bool(msg));
+ protobuf_test_messages_proto3_TestAllTypesProto3_clear_optional_string(msg);
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto3_TestAllTypesProto3_optional_string(msg)
+ .size);
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, ScalarsProto2) {
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* msg =
+ protobuf_test_messages_proto2_TestAllTypesProto2_new(arena);
+ protobuf_test_messages_proto2_TestAllTypesProto2* msg2;
+ upb_StringView serialized;
+
+ // Test hazzer and serialization.
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_int32(msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_optional_int32(msg, 10);
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_int32(msg));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_int64(msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_optional_int64(msg, 20);
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_int64(msg));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_uint32(
+ msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_optional_uint32(msg, 30);
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_uint32(
+ msg));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_uint64(
+ msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_optional_uint64(msg, 40);
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_uint64(
+ msg));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_sint32(
+ msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_optional_sint32(msg, 50);
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_sint32(
+ msg));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_sint64(
+ msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_optional_sint64(msg, 60);
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_sint64(
+ msg));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_fixed32(
+ msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_optional_fixed32(msg,
+ 70);
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_fixed32(
+ msg));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_fixed64(
+ msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_optional_fixed64(msg,
+ 80);
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_fixed64(
+ msg));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_sfixed32(
+ msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_optional_sfixed32(msg,
+ 90);
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_sfixed32(
+ msg));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_sfixed64(
+ msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_optional_sfixed64(msg,
+ 100);
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_sfixed64(
+ msg));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_float(msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_optional_float(msg,
+ 50.5);
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_float(msg));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_double(
+ msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_optional_double(msg,
+ 60.6);
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_double(
+ msg));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_bool(msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_optional_bool(msg, true);
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_bool(msg));
+
+ serialized.data = protobuf_test_messages_proto2_TestAllTypesProto2_serialize(
+ msg, arena, &serialized.size);
+
+ msg2 = protobuf_test_messages_proto2_TestAllTypesProto2_parse(
+ serialized.data, serialized.size, arena);
+
+ EXPECT_EQ(10, protobuf_test_messages_proto2_TestAllTypesProto2_optional_int32(
+ msg2));
+ EXPECT_EQ(20, protobuf_test_messages_proto2_TestAllTypesProto2_optional_int64(
+ msg2));
+ EXPECT_EQ(
+ 30,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_uint32(msg2));
+ EXPECT_EQ(
+ 40,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_uint64(msg2));
+ EXPECT_EQ(
+ 50,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_sint32(msg2));
+ EXPECT_EQ(
+ 60,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_sint64(msg2));
+ EXPECT_EQ(
+ 70,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_fixed32(msg2));
+ EXPECT_EQ(
+ 80,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_fixed64(msg2));
+ EXPECT_EQ(
+ 90,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_sfixed32(msg2));
+ EXPECT_EQ(
+ 100,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_sfixed64(msg2));
+ EXPECT_EQ(
+ 50.5,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_float(msg2));
+ EXPECT_EQ(
+ 60.6,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_double(msg2));
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_bool(msg2));
+
+ // Test clear.
+ protobuf_test_messages_proto2_TestAllTypesProto2_clear_optional_int32(msg);
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto2_TestAllTypesProto2_optional_int32(msg));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_int32(msg));
+
+ protobuf_test_messages_proto2_TestAllTypesProto2_clear_optional_int64(msg);
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto2_TestAllTypesProto2_optional_int64(msg));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_int64(msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_clear_optional_uint32(msg);
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto2_TestAllTypesProto2_optional_uint32(msg));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_uint32(
+ msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_clear_optional_uint64(msg);
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto2_TestAllTypesProto2_optional_uint64(msg));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_uint64(
+ msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_clear_optional_float(msg);
+ EXPECT_EQ(
+ 0.0f,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_float(msg));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_float(msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_clear_optional_double(msg);
+ EXPECT_EQ(
+ 0.0,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_double(msg));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_double(
+ msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_clear_optional_bool(msg);
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_bool(msg));
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_bool(msg));
+
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, RepeatedClear) {
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* msg =
+ protobuf_test_messages_proto2_TestAllTypesProto2_new(arena);
+ size_t len = 0;
+ protobuf_test_messages_proto2_TestAllTypesProto2_repeated_int32(msg, &len);
+ EXPECT_EQ(0, len);
+ protobuf_test_messages_proto2_TestAllTypesProto2_add_repeated_int32(msg, 2,
+ arena);
+ protobuf_test_messages_proto2_TestAllTypesProto2_add_repeated_int32(msg, 3,
+ arena);
+ protobuf_test_messages_proto2_TestAllTypesProto2_add_repeated_int32(msg, 4,
+ arena);
+ protobuf_test_messages_proto2_TestAllTypesProto2_repeated_int32(msg, &len);
+ EXPECT_EQ(3, len);
+ protobuf_test_messages_proto2_TestAllTypesProto2_clear_repeated_int32(msg);
+ protobuf_test_messages_proto2_TestAllTypesProto2_repeated_int32(msg, &len);
+ EXPECT_EQ(0, len);
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, Clear) {
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* msg =
+ protobuf_test_messages_proto2_TestAllTypesProto2_new(arena);
+ // Test clear.
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_optional_int32(msg, 1);
+ EXPECT_TRUE(
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_int32(msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_clear_optional_int32(msg);
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto2_TestAllTypesProto2_optional_int32(msg));
+ EXPECT_FALSE(
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_int32(msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_clear_optional_int64(msg);
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto2_TestAllTypesProto2_optional_int64(msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_clear_optional_uint32(msg);
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto2_TestAllTypesProto2_optional_uint32(msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_clear_optional_uint64(msg);
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto2_TestAllTypesProto2_optional_uint64(msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_clear_optional_float(msg);
+ EXPECT_EQ(
+ 0.0f,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_float(msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_clear_optional_double(msg);
+ EXPECT_EQ(
+ 0.0,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_double(msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_clear_optional_bool(msg);
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_bool(msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_clear_optional_string(msg);
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto2_TestAllTypesProto2_optional_string(msg)
+ .size);
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, Bytes) {
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto2_TestAllTypesProto2* msg =
+ protobuf_test_messages_proto2_TestAllTypesProto2_new(arena);
+ protobuf_test_messages_proto2_TestAllTypesProto2* msg2;
+ upb_StringView serialized;
+ const char data[] = "ABCDEF";
+ upb_StringView bytes = upb_StringView_FromString(data);
+ upb_StringView val;
+
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_bytes(msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_optional_bytes(msg,
+ bytes);
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_bytes(msg));
+
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_string(
+ msg));
+ protobuf_test_messages_proto2_TestAllTypesProto2_set_optional_string(
+ msg, test_str_view);
+ EXPECT_EQ(
+ true,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_string(
+ msg));
+
+ serialized.data = protobuf_test_messages_proto2_TestAllTypesProto2_serialize(
+ msg, arena, &serialized.size);
+
+ msg2 = protobuf_test_messages_proto2_TestAllTypesProto2_parse(
+ serialized.data, serialized.size, arena);
+
+ EXPECT_EQ(bytes.size,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_bytes(msg)
+ .size);
+ EXPECT_EQ(
+ 0, memcmp(bytes.data,
+ protobuf_test_messages_proto2_TestAllTypesProto2_optional_bytes(
+ msg)
+ .data,
+ bytes.size));
+ protobuf_test_messages_proto2_TestAllTypesProto2_clear_optional_bytes(msg);
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_bytes(msg));
+
+ val = protobuf_test_messages_proto2_TestAllTypesProto2_optional_string(msg2);
+ EXPECT_TRUE(upb_StringView_IsEqual(val, test_str_view));
+
+ protobuf_test_messages_proto2_TestAllTypesProto2_clear_optional_string(msg);
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto2_TestAllTypesProto2_optional_string(msg)
+ .size);
+ EXPECT_EQ(
+ false,
+ protobuf_test_messages_proto2_TestAllTypesProto2_has_optional_string(
+ msg));
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, UTF8) {
+ const char invalid_utf8[] = "\xff";
+ const upb_StringView invalid_utf8_view =
+ upb_StringView_FromDataAndSize(invalid_utf8, 1);
+ upb_Arena* arena = upb_Arena_New();
+ upb_StringView serialized;
+ protobuf_test_messages_proto3_TestAllTypesProto3* msg =
+ protobuf_test_messages_proto3_TestAllTypesProto3_new(arena);
+ protobuf_test_messages_proto3_TestAllTypesProto3* msg2;
+
+ protobuf_test_messages_proto3_TestAllTypesProto3_set_optional_string(
+ msg, invalid_utf8_view);
+
+ serialized.data = protobuf_test_messages_proto3_TestAllTypesProto3_serialize(
+ msg, arena, &serialized.size);
+
+ msg2 = protobuf_test_messages_proto3_TestAllTypesProto3_parse(
+ serialized.data, serialized.size, arena);
+ EXPECT_EQ(nullptr, msg2);
+
+ upb_Arena_Free(arena);
+}
+
+static void check_string_map_empty(
+ protobuf_test_messages_proto3_TestAllTypesProto3* msg) {
+ size_t iter = kUpb_Map_Begin;
+
+ EXPECT_EQ(
+ 0,
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_string_string_size(
+ msg));
+ EXPECT_FALSE(
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_string_string_next(
+ msg, &iter));
+}
+
+static void check_string_map_one_entry(
+ protobuf_test_messages_proto3_TestAllTypesProto3* msg) {
+ const protobuf_test_messages_proto3_TestAllTypesProto3_MapStringStringEntry*
+ const_ent;
+ size_t iter;
+ upb_StringView str;
+
+ EXPECT_EQ(
+ 1,
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_string_string_size(
+ msg));
+ EXPECT_TRUE(
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_string_string_get(
+ msg, test_str_view, &str));
+ EXPECT_TRUE(upb_StringView_IsEqual(str, test_str_view2));
+
+ EXPECT_FALSE(
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_string_string_get(
+ msg, test_str_view3, &str));
+
+ /* Test that iteration reveals a single k/v pair in the map. */
+ iter = kUpb_Map_Begin;
+ const_ent =
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_string_string_next(
+ msg, &iter);
+ ASSERT_NE(nullptr, const_ent);
+ EXPECT_TRUE(upb_StringView_IsEqual(
+ test_str_view,
+ protobuf_test_messages_proto3_TestAllTypesProto3_MapStringStringEntry_key(
+ const_ent)));
+ EXPECT_TRUE(upb_StringView_IsEqual(
+ test_str_view2,
+ protobuf_test_messages_proto3_TestAllTypesProto3_MapStringStringEntry_value(
+ const_ent)));
+
+ const_ent =
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_string_string_next(
+ msg, &iter);
+ EXPECT_EQ(nullptr, const_ent);
+}
+
+TEST(GeneratedCode, StringDoubleMap) {
+ upb_Arena* arena = upb_Arena_New();
+ upb_StringView serialized;
+ upb_test_MapTest* msg = upb_test_MapTest_new(arena);
+ upb_test_MapTest* msg2;
+ double val;
+
+ upb_test_MapTest_map_string_double_set(msg, test_str_view, 1.5, arena);
+ ASSERT_NE(nullptr, msg);
+ EXPECT_TRUE(upb_test_MapTest_map_string_double_get(msg, test_str_view, &val));
+ EXPECT_EQ(1.5, val);
+ val = 0;
+
+ serialized.data = upb_test_MapTest_serialize(msg, arena, &serialized.size);
+ EXPECT_NE(nullptr, serialized.data);
+
+ msg2 = upb_test_MapTest_parse(serialized.data, serialized.size, arena);
+ ASSERT_NE(nullptr, msg2);
+ EXPECT_TRUE(
+ upb_test_MapTest_map_string_double_get(msg2, test_str_view, &val));
+ EXPECT_EQ(1.5, val);
+
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, StringMap) {
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto3_TestAllTypesProto3* msg =
+ protobuf_test_messages_proto3_TestAllTypesProto3_new(arena);
+ const protobuf_test_messages_proto3_TestAllTypesProto3_MapStringStringEntry*
+ const_ent;
+ size_t iter, count;
+
+ check_string_map_empty(msg);
+
+ /* Set map[test_str_view] = test_str_view2 */
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_string_string_set(
+ msg, test_str_view, test_str_view2, arena);
+ check_string_map_one_entry(msg);
+
+ /* Deleting a non-existent key does nothing. */
+ EXPECT_FALSE(
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_string_string_delete(
+ msg, test_str_view3));
+ check_string_map_one_entry(msg);
+
+ /* Deleting the key sets the map back to empty. */
+ EXPECT_TRUE(
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_string_string_delete(
+ msg, test_str_view));
+ check_string_map_empty(msg);
+
+ /* Set two keys this time:
+ * map[test_str_view] = test_str_view2
+ * map[test_str_view3] = test_str_view4
+ */
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_string_string_set(
+ msg, test_str_view, test_str_view2, arena);
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_string_string_set(
+ msg, test_str_view3, test_str_view4, arena);
+
+ /* Test iteration */
+ iter = kUpb_Map_Begin;
+ count = 0;
+
+ while (
+ (const_ent =
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_string_string_next(
+ msg, &iter)) != nullptr) {
+ upb_StringView key =
+ protobuf_test_messages_proto3_TestAllTypesProto3_MapStringStringEntry_key(
+ const_ent);
+ upb_StringView val =
+ protobuf_test_messages_proto3_TestAllTypesProto3_MapStringStringEntry_value(
+ const_ent);
+
+ count++;
+ if (upb_StringView_IsEqual(key, test_str_view)) {
+ EXPECT_TRUE(upb_StringView_IsEqual(val, test_str_view2));
+ } else {
+ EXPECT_TRUE(upb_StringView_IsEqual(key, test_str_view3));
+ EXPECT_TRUE(upb_StringView_IsEqual(val, test_str_view4));
+ }
+ }
+
+ EXPECT_EQ(2, count);
+
+ /* Clearing the map goes back to empty. */
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_string_string_clear(msg);
+ check_string_map_empty(msg);
+
+ upb_Arena_Free(arena);
+}
+
+static void check_int32_map_empty(
+ protobuf_test_messages_proto3_TestAllTypesProto3* msg) {
+ size_t iter = kUpb_Map_Begin;
+
+ EXPECT_EQ(
+ 0, protobuf_test_messages_proto3_TestAllTypesProto3_map_int32_int32_size(
+ msg));
+ EXPECT_FALSE(
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_int32_int32_next(
+ msg, &iter));
+}
+
+static void check_int32_map_one_entry(
+ protobuf_test_messages_proto3_TestAllTypesProto3* msg) {
+ const protobuf_test_messages_proto3_TestAllTypesProto3_MapInt32Int32Entry*
+ const_ent;
+ size_t iter;
+ int32_t val;
+
+ EXPECT_EQ(
+ 1, protobuf_test_messages_proto3_TestAllTypesProto3_map_int32_int32_size(
+ msg));
+ EXPECT_TRUE(
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_int32_int32_get(
+ msg, test_int32, &val));
+ EXPECT_EQ(val, test_int32_2);
+
+ EXPECT_FALSE(
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_int32_int32_get(
+ msg, test_int32_3, &val));
+
+ /* Test that iteration reveals a single k/v pair in the map. */
+ iter = kUpb_Map_Begin;
+ const_ent =
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_int32_int32_next(
+ msg, &iter);
+ ASSERT_NE(nullptr, const_ent);
+ EXPECT_EQ(
+ test_int32,
+ protobuf_test_messages_proto3_TestAllTypesProto3_MapInt32Int32Entry_key(
+ const_ent));
+ EXPECT_EQ(
+ test_int32_2,
+ protobuf_test_messages_proto3_TestAllTypesProto3_MapInt32Int32Entry_value(
+ const_ent));
+
+ const_ent =
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_int32_int32_next(
+ msg, &iter);
+ EXPECT_EQ(nullptr, const_ent);
+}
+
+TEST(GeneratedCode, Int32Map) {
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto3_TestAllTypesProto3* msg =
+ protobuf_test_messages_proto3_TestAllTypesProto3_new(arena);
+ const protobuf_test_messages_proto3_TestAllTypesProto3_MapInt32Int32Entry*
+ const_ent;
+ size_t iter, count;
+
+ check_int32_map_empty(msg);
+
+ /* Set map[test_int32] = test_int32_2 */
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_int32_int32_set(
+ msg, test_int32, test_int32_2, arena);
+ check_int32_map_one_entry(msg);
+
+ /* Deleting a non-existent key does nothing. */
+ EXPECT_FALSE(
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_int32_int32_delete(
+ msg, test_int32_3));
+ check_int32_map_one_entry(msg);
+
+ /* Deleting the key sets the map back to empty. */
+ EXPECT_TRUE(
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_int32_int32_delete(
+ msg, test_int32));
+ check_int32_map_empty(msg);
+
+ /* Set two keys this time:
+ * map[test_int32] = test_int32_2
+ * map[test_int32_3] = test_int32_4
+ */
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_int32_int32_set(
+ msg, test_int32, test_int32_2, arena);
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_int32_int32_set(
+ msg, test_int32_3, test_int32_4, arena);
+
+ /* Test iteration */
+ iter = kUpb_Map_Begin;
+ count = 0;
+
+ while (
+ (const_ent =
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_int32_int32_next(
+ msg, &iter)) != nullptr) {
+ int32_t key =
+ protobuf_test_messages_proto3_TestAllTypesProto3_MapInt32Int32Entry_key(
+ const_ent);
+ int32_t val =
+ protobuf_test_messages_proto3_TestAllTypesProto3_MapInt32Int32Entry_value(
+ const_ent);
+
+ count++;
+ if (key == test_int32) {
+ EXPECT_EQ(val, test_int32_2);
+ } else {
+ EXPECT_EQ(key, test_int32_3);
+ EXPECT_EQ(val, test_int32_4);
+ }
+ }
+
+ EXPECT_EQ(2, count);
+
+ /* Clearing the map goes back to empty. */
+ protobuf_test_messages_proto3_TestAllTypesProto3_map_int32_int32_clear(msg);
+ check_int32_map_empty(msg);
+
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, TestRepeated) {
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto3_TestAllTypesProto3* msg =
+ protobuf_test_messages_proto3_TestAllTypesProto3_new(arena);
+ size_t size;
+ const int* elems;
+
+ EXPECT_EQ(
+ _protobuf_test_messages_proto3_TestAllTypesProto3_repeated_int32_upb_array(
+ msg, &size),
+ nullptr);
+
+ protobuf_test_messages_proto3_TestAllTypesProto3_add_repeated_int32(msg, 5,
+ arena);
+
+ EXPECT_NE(
+ _protobuf_test_messages_proto3_TestAllTypesProto3_repeated_int32_upb_array(
+ msg, &size),
+ nullptr);
+
+ elems = protobuf_test_messages_proto3_TestAllTypesProto3_repeated_int32(
+ msg, &size);
+
+ EXPECT_EQ(size, 1);
+ EXPECT_EQ(elems[0], 5);
+
+ const upb_Array* arr =
+ _protobuf_test_messages_proto3_TestAllTypesProto3_repeated_int32_upb_array(
+ msg, &size);
+ EXPECT_EQ(size, 1);
+ upb_Array* mutable_arr =
+ _protobuf_test_messages_proto3_TestAllTypesProto3_repeated_int32_mutable_upb_array(
+ msg, &size, arena);
+ EXPECT_EQ(mutable_arr, arr);
+ EXPECT_EQ(upb_Array_Size(arr), 1);
+ EXPECT_EQ(size, 1);
+
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, Issue9440) {
+ upb::Arena arena;
+ upb_test_HelloRequest* msg = upb_test_HelloRequest_new(arena.ptr());
+ upb_test_HelloRequest_set_id(msg, 8);
+ EXPECT_EQ(8, upb_test_HelloRequest_id(msg));
+ char str[] = "1";
+ upb_test_HelloRequest_set_version(msg, upb_StringView{str, strlen(str)});
+ EXPECT_EQ(8, upb_test_HelloRequest_id(msg));
+}
+
+TEST(GeneratedCode, NullDecodeBuffer) {
+ upb_Arena* arena = upb_Arena_New();
+ protobuf_test_messages_proto3_TestAllTypesProto3* msg =
+ protobuf_test_messages_proto3_TestAllTypesProto3_parse(nullptr, 0, arena);
+ size_t size;
+
+ ASSERT_NE(nullptr, msg);
+ protobuf_test_messages_proto3_TestAllTypesProto3_serialize(msg, arena, &size);
+ EXPECT_EQ(0, size);
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, StatusTruncation) {
+ int i, j;
+ upb_Status status;
+ upb_Status status2;
+ for (i = 0; i < _kUpb_Status_MaxMessage + 20; i++) {
+ char* msg = static_cast<char*>(malloc(i + 1));
+ int end;
+ char ch = (i % 96) + 33; /* Cycle through printable chars. */
+
+ for (j = 0; j < i; j++) {
+ msg[j] = ch;
+ }
+ msg[i] = '\0';
+
+ upb_Status_SetErrorMessage(&status, msg);
+ upb_Status_SetErrorFormat(&status2, "%s", msg);
+ end = MIN(i, _kUpb_Status_MaxMessage - 1);
+ EXPECT_EQ(end, strlen(status.msg));
+ EXPECT_EQ(end, strlen(status2.msg));
+
+ for (j = 0; j < end; j++) {
+ EXPECT_EQ(ch, status.msg[j]);
+ EXPECT_EQ(ch, status2.msg[j]);
+ }
+
+ free(msg);
+ }
+}
+
+TEST(GeneratedCode, ArenaUnaligned) {
+ char buf1[1024];
+ // Force the pointer to be unaligned.
+ uintptr_t low_bits = UPB_MALLOC_ALIGN - 1;
+ char* unaligned_buf_ptr = (char*)((uintptr_t)buf1 | low_bits);
+ upb_Arena* arena = upb_Arena_Init(
+ unaligned_buf_ptr, &buf1[sizeof(buf1)] - unaligned_buf_ptr, nullptr);
+ char* mem = static_cast<char*>(upb_Arena_Malloc(arena, 5));
+ EXPECT_EQ(0, reinterpret_cast<uintptr_t>(mem) & low_bits);
+ upb_Arena_Free(arena);
+
+ // Try the same, but with a size so small that aligning up will overflow.
+ arena = upb_Arena_Init(unaligned_buf_ptr, 5, &upb_alloc_global);
+ mem = static_cast<char*>(upb_Arena_Malloc(arena, 5));
+ EXPECT_EQ(0, reinterpret_cast<uintptr_t>(mem) & low_bits);
+ upb_Arena_Free(arena);
+}
+
+TEST(GeneratedCode, Extensions) {
+ upb::Arena arena;
+ upb_test_ModelExtension1* extension1 =
+ upb_test_ModelExtension1_new(arena.ptr());
+ upb_test_ModelExtension1_set_str(extension1,
+ upb_StringView_FromString("Hello"));
+
+ upb_test_ModelExtension2* extension2 =
+ upb_test_ModelExtension2_new(arena.ptr());
+ upb_test_ModelExtension2_set_i(extension2, 5);
+
+ upb_test_ModelWithExtensions* msg1 =
+ upb_test_ModelWithExtensions_new(arena.ptr());
+ upb_test_ModelWithExtensions* msg2 =
+ upb_test_ModelWithExtensions_new(arena.ptr());
+
+ // msg1: [extension1, extension2]
+ upb_test_ModelExtension1_set_model_ext(msg1, extension1, arena.ptr());
+ upb_test_ModelExtension2_set_model_ext(msg1, extension2, arena.ptr());
+
+ // msg2: [extension2, extension1]
+ upb_test_ModelExtension2_set_model_ext(msg2, extension2, arena.ptr());
+ upb_test_ModelExtension1_set_model_ext(msg2, extension1, arena.ptr());
+
+ size_t size1, size2;
+ int opts = kUpb_EncodeOption_Deterministic;
+ char* pb1 = upb_test_ModelWithExtensions_serialize_ex(msg1, opts, arena.ptr(),
+ &size1);
+ char* pb2 = upb_test_ModelWithExtensions_serialize_ex(msg2, opts, arena.ptr(),
+ &size2);
+
+ ASSERT_EQ(size1, size2);
+ ASSERT_EQ(0, memcmp(pb1, pb2, size1));
+}
diff --git a/upb/upb/test/test_import_empty_srcs.cc b/upb/upb/test/test_import_empty_srcs.cc
new file mode 100644
index 0000000..941e8d0
--- /dev/null
+++ b/upb/upb/test/test_import_empty_srcs.cc
@@ -0,0 +1,37 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "gtest/gtest.h"
+#include "upb/test/test_import_empty_srcs.upb.h"
+
+TEST(Test, Reexport) {
+ // This test really just ensures that compilation succeeds.
+ ASSERT_GT(sizeof(upb_test_ContainsImported_msg_init), 0);
+}
diff --git a/upb/upb/test/test_import_empty_srcs.proto b/upb/upb/test/test_import_empty_srcs.proto
new file mode 100644
index 0000000..78794e9
--- /dev/null
+++ b/upb/upb/test/test_import_empty_srcs.proto
@@ -0,0 +1,41 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto2";
+
+package upb_test;
+
+import "upb/test/test.proto";
+import "upb/test/test_cpp.proto";
+
+message ContainsImported {
+ optional MessageName message_name = 1;
+ optional upb.test.TestMessage test_message = 2;
+}
diff --git a/upb/upb/test/test_mini_table_oneof.cc b/upb/upb/test/test_mini_table_oneof.cc
new file mode 100644
index 0000000..f741d62
--- /dev/null
+++ b/upb/upb/test/test_mini_table_oneof.cc
@@ -0,0 +1,67 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "gtest/gtest.h"
+#include "google/protobuf/test_messages_proto2.upb.h"
+#include "google/protobuf/test_messages_proto3.upb.h"
+#include "upb/mini_table/field.h"
+#include "upb/mini_table/message.h"
+#include "upb/test/test.upb.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+TEST(MiniTableOneofTest, OneOfIteratorProto2) {
+ constexpr int oneof_first_field_number = 111;
+ constexpr int oneof_test_field_number = 116;
+
+ const upb_MiniTable* google_protobuf_table =
+ &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init;
+ const upb_MiniTableField* field =
+ upb_MiniTable_FindFieldByNumber(google_protobuf_table, oneof_test_field_number);
+ ASSERT_TRUE(field != nullptr);
+ const upb_MiniTableField* ptr = upb_MiniTable_GetOneof(google_protobuf_table, field);
+ int field_num = oneof_first_field_number;
+ do {
+ EXPECT_EQ(ptr->number, field_num++);
+ } while (upb_MiniTable_NextOneofField(google_protobuf_table, &ptr));
+}
+
+TEST(MiniTableOneofTest, InitialFieldNotOneOf) {
+ constexpr int test_field_number = 1; // optional int that is not a oneof
+ const upb_MiniTable* google_protobuf_table =
+ &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init;
+ const upb_MiniTableField* field =
+ upb_MiniTable_FindFieldByNumber(google_protobuf_table, test_field_number);
+ ASSERT_TRUE(field != nullptr);
+ const upb_MiniTableField* first_field =
+ upb_MiniTable_GetOneof(google_protobuf_table, field);
+ EXPECT_EQ(first_field, nullptr);
+}
diff --git a/upb/upb/text/BUILD b/upb/upb/text/BUILD
new file mode 100644
index 0000000..1c71597
--- /dev/null
+++ b/upb/upb/text/BUILD
@@ -0,0 +1,65 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load("//bazel:build_defs.bzl", "UPB_DEFAULT_COPTS")
+
+cc_library(
+ name = "text",
+ srcs = [
+ "encode.c",
+ ],
+ hdrs = [
+ "encode.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ "//:collections",
+ "//:collections_internal",
+ "//:eps_copy_input_stream",
+ "//:lex",
+ "//:port",
+ "//:reflection",
+ "//:wire",
+ "//:wire_reader",
+ "//:wire_types",
+ ],
+)
+
+# begin:github_only
+filegroup(
+ name = "source_files",
+ srcs = glob(
+ [
+ "**/*.c",
+ "**/*.h",
+ ],
+ ),
+ visibility = [
+ "//cmake:__pkg__",
+ "//python/dist:__pkg__",
+ ]
+)
+# end:github_only
diff --git a/upb/upb/text/encode.c b/upb/upb/text/encode.c
new file mode 100644
index 0000000..0e9fee7
--- /dev/null
+++ b/upb/upb/text/encode.c
@@ -0,0 +1,480 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/text/encode.h"
+
+#include <ctype.h>
+#include <float.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <string.h>
+
+#include "upb/collections/internal/map_sorter.h"
+#include "upb/collections/map.h"
+#include "upb/lex/round_trip.h"
+#include "upb/port/vsnprintf_compat.h"
+#include "upb/reflection/message.h"
+#include "upb/wire/eps_copy_input_stream.h"
+#include "upb/wire/reader.h"
+#include "upb/wire/types.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+typedef struct {
+ char *buf, *ptr, *end;
+ size_t overflow;
+ int indent_depth;
+ int options;
+ const upb_DefPool* ext_pool;
+ _upb_mapsorter sorter;
+} txtenc;
+
+static void txtenc_msg(txtenc* e, const upb_Message* msg,
+ const upb_MessageDef* m);
+
+static void txtenc_putbytes(txtenc* e, const void* data, size_t len) {
+ size_t have = e->end - e->ptr;
+ if (UPB_LIKELY(have >= len)) {
+ memcpy(e->ptr, data, len);
+ e->ptr += len;
+ } else {
+ if (have) {
+ memcpy(e->ptr, data, have);
+ e->ptr += have;
+ }
+ e->overflow += (len - have);
+ }
+}
+
+static void txtenc_putstr(txtenc* e, const char* str) {
+ txtenc_putbytes(e, str, strlen(str));
+}
+
+static void txtenc_printf(txtenc* e, const char* fmt, ...) {
+ size_t n;
+ size_t have = e->end - e->ptr;
+ va_list args;
+
+ va_start(args, fmt);
+ n = _upb_vsnprintf(e->ptr, have, fmt, args);
+ va_end(args);
+
+ if (UPB_LIKELY(have > n)) {
+ e->ptr += n;
+ } else {
+ e->ptr = UPB_PTRADD(e->ptr, have);
+ e->overflow += (n - have);
+ }
+}
+
+static void txtenc_indent(txtenc* e) {
+ if ((e->options & UPB_TXTENC_SINGLELINE) == 0) {
+ int i = e->indent_depth;
+ while (i-- > 0) {
+ txtenc_putstr(e, " ");
+ }
+ }
+}
+
+static void txtenc_endfield(txtenc* e) {
+ if (e->options & UPB_TXTENC_SINGLELINE) {
+ txtenc_putstr(e, " ");
+ } else {
+ txtenc_putstr(e, "\n");
+ }
+}
+
+static void txtenc_enum(int32_t val, const upb_FieldDef* f, txtenc* e) {
+ const upb_EnumDef* e_def = upb_FieldDef_EnumSubDef(f);
+ const upb_EnumValueDef* ev = upb_EnumDef_FindValueByNumber(e_def, val);
+
+ if (ev) {
+ txtenc_printf(e, "%s", upb_EnumValueDef_Name(ev));
+ } else {
+ txtenc_printf(e, "%" PRId32, val);
+ }
+}
+
+static void txtenc_string(txtenc* e, upb_StringView str, bool bytes) {
+ const char* ptr = str.data;
+ const char* end = ptr + str.size;
+ txtenc_putstr(e, "\"");
+
+ while (ptr < end) {
+ switch (*ptr) {
+ case '\n':
+ txtenc_putstr(e, "\\n");
+ break;
+ case '\r':
+ txtenc_putstr(e, "\\r");
+ break;
+ case '\t':
+ txtenc_putstr(e, "\\t");
+ break;
+ case '\"':
+ txtenc_putstr(e, "\\\"");
+ break;
+ case '\'':
+ txtenc_putstr(e, "\\'");
+ break;
+ case '\\':
+ txtenc_putstr(e, "\\\\");
+ break;
+ default:
+ if ((bytes || (uint8_t)*ptr < 0x80) && !isprint(*ptr)) {
+ txtenc_printf(e, "\\%03o", (int)(uint8_t)*ptr);
+ } else {
+ txtenc_putbytes(e, ptr, 1);
+ }
+ break;
+ }
+ ptr++;
+ }
+
+ txtenc_putstr(e, "\"");
+}
+
+static void txtenc_field(txtenc* e, upb_MessageValue val,
+ const upb_FieldDef* f) {
+ txtenc_indent(e);
+ const upb_CType type = upb_FieldDef_CType(f);
+ const bool is_ext = upb_FieldDef_IsExtension(f);
+ const char* full = upb_FieldDef_FullName(f);
+ const char* name = upb_FieldDef_Name(f);
+
+ if (type == kUpb_CType_Message) {
+ if (is_ext) {
+ txtenc_printf(e, "[%s] {", full);
+ } else {
+ txtenc_printf(e, "%s {", name);
+ }
+ txtenc_endfield(e);
+ e->indent_depth++;
+ txtenc_msg(e, val.msg_val, upb_FieldDef_MessageSubDef(f));
+ e->indent_depth--;
+ txtenc_indent(e);
+ txtenc_putstr(e, "}");
+ txtenc_endfield(e);
+ return;
+ }
+
+ if (is_ext) {
+ txtenc_printf(e, "[%s]: ", full);
+ } else {
+ txtenc_printf(e, "%s: ", name);
+ }
+
+ switch (type) {
+ case kUpb_CType_Bool:
+ txtenc_putstr(e, val.bool_val ? "true" : "false");
+ break;
+ case kUpb_CType_Float: {
+ char buf[32];
+ _upb_EncodeRoundTripFloat(val.float_val, buf, sizeof(buf));
+ txtenc_putstr(e, buf);
+ break;
+ }
+ case kUpb_CType_Double: {
+ char buf[32];
+ _upb_EncodeRoundTripDouble(val.double_val, buf, sizeof(buf));
+ txtenc_putstr(e, buf);
+ break;
+ }
+ case kUpb_CType_Int32:
+ txtenc_printf(e, "%" PRId32, val.int32_val);
+ break;
+ case kUpb_CType_UInt32:
+ txtenc_printf(e, "%" PRIu32, val.uint32_val);
+ break;
+ case kUpb_CType_Int64:
+ txtenc_printf(e, "%" PRId64, val.int64_val);
+ break;
+ case kUpb_CType_UInt64:
+ txtenc_printf(e, "%" PRIu64, val.uint64_val);
+ break;
+ case kUpb_CType_String:
+ txtenc_string(e, val.str_val, false);
+ break;
+ case kUpb_CType_Bytes:
+ txtenc_string(e, val.str_val, true);
+ break;
+ case kUpb_CType_Enum:
+ txtenc_enum(val.int32_val, f, e);
+ break;
+ default:
+ UPB_UNREACHABLE();
+ }
+
+ txtenc_endfield(e);
+}
+
+/*
+ * Arrays print as simple repeated elements, eg.
+ *
+ * foo_field: 1
+ * foo_field: 2
+ * foo_field: 3
+ */
+static void txtenc_array(txtenc* e, const upb_Array* arr,
+ const upb_FieldDef* f) {
+ size_t i;
+ size_t size = upb_Array_Size(arr);
+
+ for (i = 0; i < size; i++) {
+ txtenc_field(e, upb_Array_Get(arr, i), f);
+ }
+}
+
+static void txtenc_mapentry(txtenc* e, upb_MessageValue key,
+ upb_MessageValue val, const upb_FieldDef* f) {
+ const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
+ const upb_FieldDef* key_f = upb_MessageDef_Field(entry, 0);
+ const upb_FieldDef* val_f = upb_MessageDef_Field(entry, 1);
+ txtenc_indent(e);
+ txtenc_printf(e, "%s {", upb_FieldDef_Name(f));
+ txtenc_endfield(e);
+ e->indent_depth++;
+
+ txtenc_field(e, key, key_f);
+ txtenc_field(e, val, val_f);
+
+ e->indent_depth--;
+ txtenc_indent(e);
+ txtenc_putstr(e, "}");
+ txtenc_endfield(e);
+}
+
+/*
+ * Maps print as messages of key/value, etc.
+ *
+ * foo_map: {
+ * key: "abc"
+ * value: 123
+ * }
+ * foo_map: {
+ * key: "def"
+ * value: 456
+ * }
+ */
+static void txtenc_map(txtenc* e, const upb_Map* map, const upb_FieldDef* f) {
+ if (e->options & UPB_TXTENC_NOSORT) {
+ size_t iter = kUpb_Map_Begin;
+ upb_MessageValue key, val;
+ while (upb_Map_Next(map, &key, &val, &iter)) {
+ txtenc_mapentry(e, key, val, f);
+ }
+ } else {
+ const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
+ const upb_FieldDef* key_f = upb_MessageDef_Field(entry, 0);
+ _upb_sortedmap sorted;
+ upb_MapEntry ent;
+
+ _upb_mapsorter_pushmap(&e->sorter, upb_FieldDef_Type(key_f), map, &sorted);
+ while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
+ upb_MessageValue key, val;
+ memcpy(&key, &ent.data.k, sizeof(key));
+ memcpy(&val, &ent.data.v, sizeof(val));
+ txtenc_mapentry(e, key, val, f);
+ }
+ _upb_mapsorter_popmap(&e->sorter, &sorted);
+ }
+}
+
+#define CHK(x) \
+ do { \
+ if (!(x)) { \
+ return false; \
+ } \
+ } while (0)
+
+/*
+ * Unknown fields are printed by number.
+ *
+ * 1001: 123
+ * 1002: "hello"
+ * 1006: 0xdeadbeef
+ * 1003: {
+ * 1: 111
+ * }
+ */
+static const char* txtenc_unknown(txtenc* e, const char* ptr,
+ upb_EpsCopyInputStream* stream,
+ int groupnum) {
+ // We are guaranteed that the unknown data is valid wire format, and will not
+ // contain tag zero.
+ uint32_t end_group = groupnum > 0
+ ? ((groupnum << kUpb_WireReader_WireTypeBits) |
+ kUpb_WireType_EndGroup)
+ : 0;
+
+ while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
+ uint32_t tag;
+ CHK(ptr = upb_WireReader_ReadTag(ptr, &tag));
+ if (tag == end_group) return ptr;
+
+ txtenc_indent(e);
+ txtenc_printf(e, "%d: ", (int)upb_WireReader_GetFieldNumber(tag));
+
+ switch (upb_WireReader_GetWireType(tag)) {
+ case kUpb_WireType_Varint: {
+ uint64_t val;
+ CHK(ptr = upb_WireReader_ReadVarint(ptr, &val));
+ txtenc_printf(e, "%" PRIu64, val);
+ break;
+ }
+ case kUpb_WireType_32Bit: {
+ uint32_t val;
+ ptr = upb_WireReader_ReadFixed32(ptr, &val);
+ txtenc_printf(e, "0x%08" PRIu32, val);
+ break;
+ }
+ case kUpb_WireType_64Bit: {
+ uint64_t val;
+ ptr = upb_WireReader_ReadFixed64(ptr, &val);
+ txtenc_printf(e, "0x%016" PRIu64, val);
+ break;
+ }
+ case kUpb_WireType_Delimited: {
+ int size;
+ char* start = e->ptr;
+ size_t start_overflow = e->overflow;
+ CHK(ptr = upb_WireReader_ReadSize(ptr, &size));
+ CHK(upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size));
+
+ // Speculatively try to parse as message.
+ txtenc_putstr(e, "{");
+ txtenc_endfield(e);
+
+ // EpsCopyInputStream can't back up, so create a sub-stream for the
+ // speculative parse.
+ upb_EpsCopyInputStream sub_stream;
+ const char* sub_ptr = upb_EpsCopyInputStream_GetAliasedPtr(stream, ptr);
+ upb_EpsCopyInputStream_Init(&sub_stream, &sub_ptr, size, true);
+
+ e->indent_depth++;
+ if (txtenc_unknown(e, sub_ptr, &sub_stream, -1)) {
+ ptr = upb_EpsCopyInputStream_Skip(stream, ptr, size);
+ e->indent_depth--;
+ txtenc_indent(e);
+ txtenc_putstr(e, "}");
+ } else {
+ // Didn't work out, print as raw bytes.
+ e->indent_depth--;
+ e->ptr = start;
+ e->overflow = start_overflow;
+ const char* str = ptr;
+ ptr = upb_EpsCopyInputStream_ReadString(stream, &str, size, NULL);
+ assert(ptr);
+ txtenc_string(e, (upb_StringView){.data = str, .size = size}, true);
+ }
+ break;
+ }
+ case kUpb_WireType_StartGroup:
+ txtenc_putstr(e, "{");
+ txtenc_endfield(e);
+ e->indent_depth++;
+ CHK(ptr = txtenc_unknown(e, ptr, stream,
+ upb_WireReader_GetFieldNumber(tag)));
+ e->indent_depth--;
+ txtenc_indent(e);
+ txtenc_putstr(e, "}");
+ break;
+ default:
+ return NULL;
+ }
+ txtenc_endfield(e);
+ }
+
+ return end_group == 0 && !upb_EpsCopyInputStream_IsError(stream) ? ptr : NULL;
+}
+
+#undef CHK
+
+static void txtenc_msg(txtenc* e, const upb_Message* msg,
+ const upb_MessageDef* m) {
+ size_t iter = kUpb_Message_Begin;
+ const upb_FieldDef* f;
+ upb_MessageValue val;
+
+ while (upb_Message_Next(msg, m, e->ext_pool, &f, &val, &iter)) {
+ if (upb_FieldDef_IsMap(f)) {
+ txtenc_map(e, val.map_val, f);
+ } else if (upb_FieldDef_IsRepeated(f)) {
+ txtenc_array(e, val.array_val, f);
+ } else {
+ txtenc_field(e, val, f);
+ }
+ }
+
+ if ((e->options & UPB_TXTENC_SKIPUNKNOWN) == 0) {
+ size_t size;
+ const char* ptr = upb_Message_GetUnknown(msg, &size);
+ if (size != 0) {
+ char* start = e->ptr;
+ upb_EpsCopyInputStream stream;
+ upb_EpsCopyInputStream_Init(&stream, &ptr, size, true);
+ if (!txtenc_unknown(e, ptr, &stream, -1)) {
+ /* Unknown failed to parse, back up and don't print it at all. */
+ e->ptr = start;
+ }
+ }
+ }
+}
+
+size_t txtenc_nullz(txtenc* e, size_t size) {
+ size_t ret = e->ptr - e->buf + e->overflow;
+
+ if (size > 0) {
+ if (e->ptr == e->end) e->ptr--;
+ *e->ptr = '\0';
+ }
+
+ return ret;
+}
+
+size_t upb_TextEncode(const upb_Message* msg, const upb_MessageDef* m,
+ const upb_DefPool* ext_pool, int options, char* buf,
+ size_t size) {
+ txtenc e;
+
+ e.buf = buf;
+ e.ptr = buf;
+ e.end = UPB_PTRADD(buf, size);
+ e.overflow = 0;
+ e.indent_depth = 0;
+ e.options = options;
+ e.ext_pool = ext_pool;
+ _upb_mapsorter_init(&e.sorter);
+
+ txtenc_msg(&e, msg, m);
+ _upb_mapsorter_destroy(&e.sorter);
+ return txtenc_nullz(&e, size);
+}
diff --git a/upb/upb/text/encode.h b/upb/upb/text/encode.h
new file mode 100644
index 0000000..2c0bcd7
--- /dev/null
+++ b/upb/upb/text/encode.h
@@ -0,0 +1,72 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_TEXT_ENCODE_H_
+#define UPB_TEXT_ENCODE_H_
+
+#include "upb/reflection/def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+ // When set, prints everything on a single line.
+ UPB_TXTENC_SINGLELINE = 1,
+
+ // When set, unknown fields are not printed.
+ UPB_TXTENC_SKIPUNKNOWN = 2,
+
+ // When set, maps are *not* sorted (this avoids allocating tmp mem).
+ UPB_TXTENC_NOSORT = 4
+};
+
+/* Encodes the given |msg| to text format. The message's reflection is given in
+ * |m|. The symtab in |symtab| is used to find extensions (if NULL, extensions
+ * will not be printed).
+ *
+ * Output is placed in the given buffer, and always NULL-terminated. The output
+ * size (excluding NULL) is returned. This means that a return value >= |size|
+ * implies that the output was truncated. (These are the same semantics as
+ * snprintf()). */
+size_t upb_TextEncode(const upb_Message* msg, const upb_MessageDef* m,
+ const upb_DefPool* ext_pool, int options, char* buf,
+ size_t size);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_TEXT_ENCODE_H_ */
diff --git a/upb/upb/upb.hpp b/upb/upb/upb.hpp
new file mode 100644
index 0000000..3ee4b63
--- /dev/null
+++ b/upb/upb/upb.hpp
@@ -0,0 +1,41 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Temporary hack to fix gRPC. Do not use.
+
+#ifndef UPB_HPP_
+#define UPB_HPP_
+
+// IWYU pragma: begin_exports
+#include "upb/base/status.hpp"
+#include "upb/mem/arena.hpp"
+// IWYU pragma: end_exports
+
+#endif // UPB_HPP_
diff --git a/upb/upb/upb_so.c b/upb/upb/upb_so.c
new file mode 100644
index 0000000..fd7fb87
--- /dev/null
+++ b/upb/upb/upb_so.c
@@ -0,0 +1,37 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// These headers form a spanning tree for the upb defs needed by FFI layers.
+
+#include "upb/collections/array_split64.h"
+#include "upb/collections/map.h"
+#include "upb/message/accessors_split64.h"
+#include "upb/message/message.h"
+#include "upb/mini_descriptor/decode.h"
diff --git a/upb/upb/util/BUILD b/upb/upb/util/BUILD
new file mode 100644
index 0000000..201a035
--- /dev/null
+++ b/upb/upb/util/BUILD
@@ -0,0 +1,178 @@
+load(
+ "//bazel:upb_proto_library.bzl",
+ "upb_proto_library",
+ "upb_proto_reflection_library",
+)
+
+# begin:google_only
+# package(default_applicable_licenses = ["//:license"])
+# end:google_only
+
+licenses(["notice"])
+
+# Def to Proto
+
+cc_library(
+ name = "def_to_proto",
+ srcs = ["def_to_proto.c"],
+ hdrs = ["def_to_proto.h"],
+ visibility = ["//visibility:public"],
+ deps = [
+ "//:port",
+ "//:reflection",
+ "//:reflection_internal",
+ ],
+)
+
+proto_library(
+ name = "def_to_proto_test_proto",
+ srcs = [
+ "def_to_proto_public_import_test.proto",
+ "def_to_proto_regular_import_test.proto",
+ "def_to_proto_test.proto",
+ ],
+)
+
+upb_proto_library(
+ name = "def_to_proto_test_upb_proto",
+ deps = ["def_to_proto_test_proto"],
+)
+
+upb_proto_reflection_library(
+ name = "def_to_proto_test_upb_proto_reflection",
+ deps = ["def_to_proto_test_proto"],
+)
+
+cc_library(
+ name = "def_to_proto_test_lib",
+ testonly = 1,
+ hdrs = ["def_to_proto_test.h"],
+ deps = [
+ ":def_to_proto",
+ "//:base",
+ "//:descriptor_upb_proto",
+ "//:mem",
+ "//:reflection_internal",
+ "@com_google_googletest//:gtest",
+ "@com_google_protobuf//:protobuf",
+ ],
+)
+
+cc_test(
+ name = "def_to_proto_test",
+ srcs = ["def_to_proto_test.cc"],
+ deps = [
+ ":def_to_proto",
+ ":def_to_proto_test_lib",
+ ":def_to_proto_test_upb_proto",
+ ":def_to_proto_test_upb_proto_reflection",
+ "//:descriptor_upb_proto_reflection",
+ "//:mem",
+ "//:reflection",
+ "//upb/test:parse_text_proto",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest_main",
+ "@com_google_protobuf//:protobuf",
+ ],
+)
+
+# begin:google_only
+# cc_test(
+# name = "def_to_proto_fuzz_test",
+# srcs = ["def_to_proto_fuzz_test.cc"],
+# tags = ["clang_only"],
+# deps = [
+# ":def_to_proto_test_lib",
+# "//testing/fuzzing:fuzztest",
+# "@com_google_googletest//:gtest_main",
+# ],
+# )
+# end:google_only
+
+# Required fields
+
+cc_library(
+ name = "required_fields",
+ srcs = ["required_fields.c"],
+ hdrs = ["required_fields.h"],
+ visibility = ["//visibility:public"],
+ deps = [
+ "//:collections",
+ "//:port",
+ "//:reflection",
+ ],
+)
+
+proto_library(
+ name = "required_fields_test_proto",
+ srcs = ["required_fields_test.proto"],
+)
+
+upb_proto_library(
+ name = "required_fields_test_upb_proto",
+ deps = ["required_fields_test_proto"],
+)
+
+upb_proto_reflection_library(
+ name = "required_fields_test_upb_proto_reflection",
+ deps = ["required_fields_test_proto"],
+)
+
+cc_test(
+ name = "required_fields_test",
+ srcs = ["required_fields_test.cc"],
+ deps = [
+ ":required_fields",
+ ":required_fields_test_upb_proto",
+ ":required_fields_test_upb_proto_reflection",
+ "//:base",
+ "//:json",
+ "//:mem",
+ "//:reflection",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+# Compare
+
+cc_library(
+ name = "compare",
+ srcs = ["compare.c"],
+ hdrs = ["compare.h"],
+ visibility = ["//visibility:public"],
+ deps = [
+ "//:base",
+ "//:eps_copy_input_stream",
+ "//:port",
+ "//:wire_reader",
+ "//:wire_types",
+ ],
+)
+
+cc_test(
+ name = "compare_test",
+ srcs = ["compare_test.cc"],
+ deps = [
+ ":compare",
+ "//:wire_internal",
+ "//:wire_types",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+# begin:github_only
+filegroup(
+ name = "source_files",
+ srcs = [
+ "compare.c",
+ "compare.h",
+ "def_to_proto.c",
+ "def_to_proto.h",
+ "required_fields.c",
+ "required_fields.h",
+ ],
+ visibility = ["//python/dist:__pkg__"],
+)
+# end:github_only
diff --git a/upb/upb/util/README.md b/upb/upb/util/README.md
new file mode 100644
index 0000000..efcdacb
--- /dev/null
+++ b/upb/upb/util/README.md
@@ -0,0 +1,7 @@
+
+# upb util library
+
+The libraries in this directory contain useful functionality that is layered
+on top of the main upb APIs. In other words, the APIs in this directory have
+no special access to upb internals; you could easily implement the same things
+yourself.
diff --git a/upb/upb/util/compare.c b/upb/upb/util/compare.c
new file mode 100644
index 0000000..b96b94d
--- /dev/null
+++ b/upb/upb/util/compare.c
@@ -0,0 +1,311 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/util/compare.h"
+
+#include <stdlib.h>
+
+#include "upb/base/string_view.h"
+#include "upb/wire/eps_copy_input_stream.h"
+#include "upb/wire/reader.h"
+#include "upb/wire/types.h"
+// Must be last.
+#include "upb/port/def.inc"
+
+struct upb_UnknownFields;
+typedef struct upb_UnknownFields upb_UnknownFields;
+
+typedef struct {
+ uint32_t tag;
+ union {
+ uint64_t varint;
+ uint64_t uint64;
+ uint32_t uint32;
+ upb_StringView delimited;
+ upb_UnknownFields* group;
+ } data;
+} upb_UnknownField;
+
+struct upb_UnknownFields {
+ size_t size;
+ size_t capacity;
+ upb_UnknownField* fields;
+};
+
+typedef struct {
+ upb_EpsCopyInputStream stream;
+ upb_Arena* arena;
+ upb_UnknownField* tmp;
+ size_t tmp_size;
+ int depth;
+ upb_UnknownCompareResult status;
+ jmp_buf err;
+} upb_UnknownField_Context;
+
+UPB_NORETURN static void upb_UnknownFields_OutOfMemory(
+ upb_UnknownField_Context* ctx) {
+ ctx->status = kUpb_UnknownCompareResult_OutOfMemory;
+ UPB_LONGJMP(ctx->err, 1);
+}
+
+static void upb_UnknownFields_Grow(upb_UnknownField_Context* ctx,
+ upb_UnknownField** base,
+ upb_UnknownField** ptr,
+ upb_UnknownField** end) {
+ size_t old = (*ptr - *base);
+ size_t new = UPB_MAX(4, old * 2);
+
+ *base = upb_Arena_Realloc(ctx->arena, *base, old * sizeof(**base),
+ new * sizeof(**base));
+ if (!*base) upb_UnknownFields_OutOfMemory(ctx);
+
+ *ptr = *base + old;
+ *end = *base + new;
+}
+
+// We have to implement our own sort here, since qsort() is not an in-order
+// sort. Here we use merge sort, the simplest in-order sort.
+static void upb_UnknownFields_Merge(upb_UnknownField* arr, size_t start,
+ size_t mid, size_t end,
+ upb_UnknownField* tmp) {
+ memcpy(tmp, &arr[start], (end - start) * sizeof(*tmp));
+
+ upb_UnknownField* ptr1 = tmp;
+ upb_UnknownField* end1 = &tmp[mid - start];
+ upb_UnknownField* ptr2 = &tmp[mid - start];
+ upb_UnknownField* end2 = &tmp[end - start];
+ upb_UnknownField* out = &arr[start];
+
+ while (ptr1 < end1 && ptr2 < end2) {
+ if (ptr1->tag <= ptr2->tag) {
+ *out++ = *ptr1++;
+ } else {
+ *out++ = *ptr2++;
+ }
+ }
+
+ if (ptr1 < end1) {
+ memcpy(out, ptr1, (end1 - ptr1) * sizeof(*out));
+ } else if (ptr2 < end2) {
+ memcpy(out, ptr1, (end2 - ptr2) * sizeof(*out));
+ }
+}
+
+static void upb_UnknownFields_SortRecursive(upb_UnknownField* arr, size_t start,
+ size_t end, upb_UnknownField* tmp) {
+ if (end - start > 1) {
+ size_t mid = start + ((end - start) / 2);
+ upb_UnknownFields_SortRecursive(arr, start, mid, tmp);
+ upb_UnknownFields_SortRecursive(arr, mid, end, tmp);
+ upb_UnknownFields_Merge(arr, start, mid, end, tmp);
+ }
+}
+
+static void upb_UnknownFields_Sort(upb_UnknownField_Context* ctx,
+ upb_UnknownFields* fields) {
+ if (ctx->tmp_size < fields->size) {
+ ctx->tmp_size = UPB_MAX(8, ctx->tmp_size);
+ while (ctx->tmp_size < fields->size) ctx->tmp_size *= 2;
+ ctx->tmp = realloc(ctx->tmp, ctx->tmp_size * sizeof(*ctx->tmp));
+ }
+ upb_UnknownFields_SortRecursive(fields->fields, 0, fields->size, ctx->tmp);
+}
+
+static upb_UnknownFields* upb_UnknownFields_DoBuild(
+ upb_UnknownField_Context* ctx, const char** buf) {
+ upb_UnknownField* arr_base = NULL;
+ upb_UnknownField* arr_ptr = NULL;
+ upb_UnknownField* arr_end = NULL;
+ const char* ptr = *buf;
+ uint32_t last_tag = 0;
+ bool sorted = true;
+ while (!upb_EpsCopyInputStream_IsDone(&ctx->stream, &ptr)) {
+ uint32_t tag;
+ ptr = upb_WireReader_ReadTag(ptr, &tag);
+ UPB_ASSERT(tag <= UINT32_MAX);
+ int wire_type = upb_WireReader_GetWireType(tag);
+ if (wire_type == kUpb_WireType_EndGroup) break;
+ if (tag < last_tag) sorted = false;
+ last_tag = tag;
+
+ if (arr_ptr == arr_end) {
+ upb_UnknownFields_Grow(ctx, &arr_base, &arr_ptr, &arr_end);
+ }
+ upb_UnknownField* field = arr_ptr;
+ field->tag = tag;
+ arr_ptr++;
+
+ switch (wire_type) {
+ case kUpb_WireType_Varint:
+ ptr = upb_WireReader_ReadVarint(ptr, &field->data.varint);
+ break;
+ case kUpb_WireType_64Bit:
+ ptr = upb_WireReader_ReadFixed64(ptr, &field->data.uint64);
+ break;
+ case kUpb_WireType_32Bit:
+ ptr = upb_WireReader_ReadFixed32(ptr, &field->data.uint32);
+ break;
+ case kUpb_WireType_Delimited: {
+ int size;
+ ptr = upb_WireReader_ReadSize(ptr, &size);
+ const char* s_ptr = ptr;
+ ptr = upb_EpsCopyInputStream_ReadStringAliased(&ctx->stream, &s_ptr,
+ size);
+ field->data.delimited.data = s_ptr;
+ field->data.delimited.size = size;
+ break;
+ }
+ case kUpb_WireType_StartGroup:
+ if (--ctx->depth == 0) {
+ ctx->status = kUpb_UnknownCompareResult_MaxDepthExceeded;
+ UPB_LONGJMP(ctx->err, 1);
+ }
+ field->data.group = upb_UnknownFields_DoBuild(ctx, &ptr);
+ ctx->depth++;
+ break;
+ default:
+ UPB_UNREACHABLE();
+ }
+ }
+
+ *buf = ptr;
+ upb_UnknownFields* ret = upb_Arena_Malloc(ctx->arena, sizeof(*ret));
+ if (!ret) upb_UnknownFields_OutOfMemory(ctx);
+ ret->fields = arr_base;
+ ret->size = arr_ptr - arr_base;
+ ret->capacity = arr_end - arr_base;
+ if (!sorted) {
+ upb_UnknownFields_Sort(ctx, ret);
+ }
+ return ret;
+}
+
+// Builds a upb_UnknownFields data structure from the binary data in buf.
+static upb_UnknownFields* upb_UnknownFields_Build(upb_UnknownField_Context* ctx,
+ const char* ptr,
+ size_t size) {
+ upb_EpsCopyInputStream_Init(&ctx->stream, &ptr, size, true);
+ upb_UnknownFields* fields = upb_UnknownFields_DoBuild(ctx, &ptr);
+ UPB_ASSERT(upb_EpsCopyInputStream_IsDone(&ctx->stream, &ptr) &&
+ !upb_EpsCopyInputStream_IsError(&ctx->stream));
+ return fields;
+}
+
+// Compares two sorted upb_UnknownFields structures for equality.
+static bool upb_UnknownFields_IsEqual(const upb_UnknownFields* uf1,
+ const upb_UnknownFields* uf2) {
+ if (uf1->size != uf2->size) return false;
+ for (size_t i = 0, n = uf1->size; i < n; i++) {
+ upb_UnknownField* f1 = &uf1->fields[i];
+ upb_UnknownField* f2 = &uf2->fields[i];
+ if (f1->tag != f2->tag) return false;
+ int wire_type = f1->tag & 7;
+ switch (wire_type) {
+ case kUpb_WireType_Varint:
+ if (f1->data.varint != f2->data.varint) return false;
+ break;
+ case kUpb_WireType_64Bit:
+ if (f1->data.uint64 != f2->data.uint64) return false;
+ break;
+ case kUpb_WireType_32Bit:
+ if (f1->data.uint32 != f2->data.uint32) return false;
+ break;
+ case kUpb_WireType_Delimited:
+ if (!upb_StringView_IsEqual(f1->data.delimited, f2->data.delimited)) {
+ return false;
+ }
+ break;
+ case kUpb_WireType_StartGroup:
+ if (!upb_UnknownFields_IsEqual(f1->data.group, f2->data.group)) {
+ return false;
+ }
+ break;
+ default:
+ UPB_UNREACHABLE();
+ }
+ }
+ return true;
+}
+
+static upb_UnknownCompareResult upb_UnknownField_DoCompare(
+ upb_UnknownField_Context* ctx, const char* buf1, size_t size1,
+ const char* buf2, size_t size2) {
+ upb_UnknownCompareResult ret;
+ // First build both unknown fields into a sorted data structure (similar
+ // to the UnknownFieldSet in C++).
+ upb_UnknownFields* uf1 = upb_UnknownFields_Build(ctx, buf1, size1);
+ upb_UnknownFields* uf2 = upb_UnknownFields_Build(ctx, buf2, size2);
+
+ // Now perform the equality check on the sorted structures.
+ if (upb_UnknownFields_IsEqual(uf1, uf2)) {
+ ret = kUpb_UnknownCompareResult_Equal;
+ } else {
+ ret = kUpb_UnknownCompareResult_NotEqual;
+ }
+ return ret;
+}
+
+static upb_UnknownCompareResult upb_UnknownField_Compare(
+ upb_UnknownField_Context* const ctx, const char* const buf1,
+ const size_t size1, const char* const buf2, const size_t size2) {
+ upb_UnknownCompareResult ret;
+ if (UPB_SETJMP(ctx->err) == 0) {
+ ret = upb_UnknownField_DoCompare(ctx, buf1, size1, buf2, size2);
+ } else {
+ ret = ctx->status;
+ UPB_ASSERT(ret != kUpb_UnknownCompareResult_Equal);
+ }
+
+ upb_Arena_Free(ctx->arena);
+ free(ctx->tmp);
+ return ret;
+}
+
+upb_UnknownCompareResult upb_Message_UnknownFieldsAreEqual(const char* buf1,
+ size_t size1,
+ const char* buf2,
+ size_t size2,
+ int max_depth) {
+ if (size1 == 0 && size2 == 0) return kUpb_UnknownCompareResult_Equal;
+ if (size1 == 0 || size2 == 0) return kUpb_UnknownCompareResult_NotEqual;
+ if (memcmp(buf1, buf2, size1) == 0) return kUpb_UnknownCompareResult_Equal;
+
+ upb_UnknownField_Context ctx = {
+ .arena = upb_Arena_New(),
+ .depth = max_depth,
+ .tmp = NULL,
+ .tmp_size = 0,
+ .status = kUpb_UnknownCompareResult_Equal,
+ };
+
+ if (!ctx.arena) return kUpb_UnknownCompareResult_OutOfMemory;
+
+ return upb_UnknownField_Compare(&ctx, buf1, size1, buf2, size2);
+}
diff --git a/upb/upb/util/compare.h b/upb/upb/util/compare.h
new file mode 100644
index 0000000..2a0bf9e
--- /dev/null
+++ b/upb/upb/util/compare.h
@@ -0,0 +1,69 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_UTIL_COMPARE_H_
+#define UPB_UTIL_COMPARE_H_
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Returns true if unknown fields from the two messages are equal when sorted
+// and varints are made canonical.
+//
+// This function is discouraged, as the comparison is inherently lossy without
+// schema data:
+//
+// 1. We don't know whether delimited fields are sub-messages. Unknown
+// sub-messages will therefore not have their fields sorted and varints
+// canonicalized.
+// 2. We don't know about oneof/non-repeated fields, which should semantically
+// discard every value except the last.
+
+typedef enum {
+ kUpb_UnknownCompareResult_Equal = 0,
+ kUpb_UnknownCompareResult_NotEqual = 1,
+ kUpb_UnknownCompareResult_OutOfMemory = 2,
+ kUpb_UnknownCompareResult_MaxDepthExceeded = 3,
+} upb_UnknownCompareResult;
+
+upb_UnknownCompareResult upb_Message_UnknownFieldsAreEqual(const char* buf1,
+ size_t size1,
+ const char* buf2,
+ size_t size2,
+ int max_depth);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* UPB_UTIL_COMPARE_H_ */
diff --git a/upb/upb/util/compare_test.cc b/upb/upb/util/compare_test.cc
new file mode 100644
index 0000000..4a37412
--- /dev/null
+++ b/upb/upb/util/compare_test.cc
@@ -0,0 +1,197 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/util/compare.h"
+
+#include <stdint.h>
+
+#include <initializer_list>
+#include <string>
+#include <variant>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "upb/wire/internal/swap.h"
+#include "upb/wire/types.h"
+
+struct UnknownField;
+
+using UnknownFields = std::vector<UnknownField>;
+
+struct Varint {
+ explicit Varint(uint64_t _val) : val(_val) {}
+ uint64_t val;
+};
+struct LongVarint {
+ explicit LongVarint(uint64_t _val) : val(_val) {}
+ uint64_t val; // Over-encoded.
+};
+struct Delimited {
+ explicit Delimited(std::string _val) : val(_val) {}
+ std::string val;
+};
+struct Fixed64 {
+ explicit Fixed64(uint64_t _val) : val(_val) {}
+ uint64_t val;
+};
+struct Fixed32 {
+ explicit Fixed32(uint32_t _val) : val(_val) {}
+ uint32_t val;
+};
+struct Group {
+ Group(std::initializer_list<UnknownField> _val);
+ UnknownFields val;
+};
+
+struct UnknownField {
+ uint32_t field_number;
+ std::variant<Varint, LongVarint, Delimited, Fixed64, Fixed32, Group> value;
+};
+
+Group::Group(std::initializer_list<UnknownField> _val) : val(_val) {}
+
+void EncodeVarint(uint64_t val, std::string* str) {
+ do {
+ char byte = val & 0x7fU;
+ val >>= 7;
+ if (val) byte |= 0x80U;
+ str->push_back(byte);
+ } while (val);
+}
+
+std::string ToBinaryPayload(const UnknownFields& fields) {
+ std::string ret;
+
+ for (const auto& field : fields) {
+ if (const auto* val = std::get_if<Varint>(&field.value)) {
+ EncodeVarint(field.field_number << 3 | kUpb_WireType_Varint, &ret);
+ EncodeVarint(val->val, &ret);
+ } else if (const auto* val = std::get_if<LongVarint>(&field.value)) {
+ EncodeVarint(field.field_number << 3 | kUpb_WireType_Varint, &ret);
+ EncodeVarint(val->val, &ret);
+ ret.back() |= 0x80;
+ ret.push_back(0);
+ } else if (const auto* val = std::get_if<Delimited>(&field.value)) {
+ EncodeVarint(field.field_number << 3 | kUpb_WireType_Delimited, &ret);
+ EncodeVarint(val->val.size(), &ret);
+ ret.append(val->val);
+ } else if (const auto* val = std::get_if<Fixed64>(&field.value)) {
+ EncodeVarint(field.field_number << 3 | kUpb_WireType_64Bit, &ret);
+ uint64_t swapped = _upb_BigEndian_Swap64(val->val);
+ ret.append(reinterpret_cast<const char*>(&swapped), sizeof(swapped));
+ } else if (const auto* val = std::get_if<Fixed32>(&field.value)) {
+ EncodeVarint(field.field_number << 3 | kUpb_WireType_32Bit, &ret);
+ uint32_t swapped = _upb_BigEndian_Swap32(val->val);
+ ret.append(reinterpret_cast<const char*>(&swapped), sizeof(swapped));
+ } else if (const auto* val = std::get_if<Group>(&field.value)) {
+ EncodeVarint(field.field_number << 3 | kUpb_WireType_StartGroup, &ret);
+ ret.append(ToBinaryPayload(val->val));
+ EncodeVarint(field.field_number << 3 | kUpb_WireType_EndGroup, &ret);
+ }
+ }
+
+ return ret;
+}
+
+upb_UnknownCompareResult CompareUnknownWithMaxDepth(UnknownFields uf1,
+ UnknownFields uf2,
+ int max_depth) {
+ std::string buf1 = ToBinaryPayload(uf1);
+ std::string buf2 = ToBinaryPayload(uf2);
+ return upb_Message_UnknownFieldsAreEqual(buf1.data(), buf1.size(),
+ buf2.data(), buf2.size(), max_depth);
+}
+
+upb_UnknownCompareResult CompareUnknown(UnknownFields uf1, UnknownFields uf2) {
+ return CompareUnknownWithMaxDepth(uf1, uf2, 64);
+}
+
+TEST(CompareTest, UnknownFieldsReflexive) {
+ EXPECT_EQ(kUpb_UnknownCompareResult_Equal, CompareUnknown({}, {}));
+ EXPECT_EQ(kUpb_UnknownCompareResult_Equal,
+ CompareUnknown({{1, Varint(123)}, {2, Fixed32(456)}},
+ {{1, Varint(123)}, {2, Fixed32(456)}}));
+ EXPECT_EQ(
+ kUpb_UnknownCompareResult_Equal,
+ CompareUnknown(
+ {{1, Group({{2, Group({{3, Fixed32(456)}, {4, Fixed64(123)}})}})}},
+ {{1, Group({{2, Group({{3, Fixed32(456)}, {4, Fixed64(123)}})}})}}));
+}
+
+TEST(CompareTest, UnknownFieldsOrdering) {
+ EXPECT_EQ(kUpb_UnknownCompareResult_Equal,
+ CompareUnknown({{1, Varint(111)},
+ {2, Delimited("ABC")},
+ {3, Fixed32(456)},
+ {4, Fixed64(123)},
+ {5, Group({})}},
+ {{5, Group({})},
+ {4, Fixed64(123)},
+ {3, Fixed32(456)},
+ {2, Delimited("ABC")},
+ {1, Varint(111)}}));
+ EXPECT_EQ(kUpb_UnknownCompareResult_NotEqual,
+ CompareUnknown({{1, Varint(111)},
+ {2, Delimited("ABC")},
+ {3, Fixed32(456)},
+ {4, Fixed64(123)},
+ {5, Group({})}},
+ {{5, Group({})},
+ {4, Fixed64(123)},
+ {3, Fixed32(455)}, // Small difference.
+ {2, Delimited("ABC")},
+ {1, Varint(111)}}));
+ EXPECT_EQ(kUpb_UnknownCompareResult_Equal,
+ CompareUnknown({{3, Fixed32(456)}, {4, Fixed64(123)}},
+ {{4, Fixed64(123)}, {3, Fixed32(456)}}));
+ EXPECT_EQ(
+ kUpb_UnknownCompareResult_Equal,
+ CompareUnknown(
+ {{1, Group({{2, Group({{3, Fixed32(456)}, {4, Fixed64(123)}})}})}},
+ {{1, Group({{2, Group({{4, Fixed64(123)}, {3, Fixed32(456)}})}})}}));
+}
+
+TEST(CompareTest, LongVarint) {
+ EXPECT_EQ(kUpb_UnknownCompareResult_Equal,
+ CompareUnknown({{1, LongVarint(123)}, {2, LongVarint(456)}},
+ {{1, Varint(123)}, {2, Varint(456)}}));
+ EXPECT_EQ(kUpb_UnknownCompareResult_Equal,
+ CompareUnknown({{2, LongVarint(456)}, {1, LongVarint(123)}},
+ {{1, Varint(123)}, {2, Varint(456)}}));
+}
+
+TEST(CompareTest, MaxDepth) {
+ EXPECT_EQ(
+ kUpb_UnknownCompareResult_MaxDepthExceeded,
+ CompareUnknownWithMaxDepth(
+ {{1, Group({{2, Group({{3, Fixed32(456)}, {4, Fixed64(123)}})}})}},
+ {{1, Group({{2, Group({{4, Fixed64(123)}, {3, Fixed32(456)}})}})}},
+ 2));
+}
diff --git a/upb/upb/util/def_to_proto.c b/upb/upb/util/def_to_proto.c
new file mode 100644
index 0000000..64a1d2e
--- /dev/null
+++ b/upb/upb/util/def_to_proto.c
@@ -0,0 +1,693 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/util/def_to_proto.h"
+
+#include <inttypes.h>
+#include <math.h>
+
+#include "upb/port/vsnprintf_compat.h"
+#include "upb/reflection/enum_reserved_range.h"
+#include "upb/reflection/extension_range.h"
+#include "upb/reflection/internal/field_def.h"
+#include "upb/reflection/internal/file_def.h"
+#include "upb/reflection/message.h"
+#include "upb/reflection/message_reserved_range.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+typedef struct {
+ upb_Arena* arena;
+ jmp_buf err;
+} upb_ToProto_Context;
+
+#define CHK_OOM(val) \
+ if (!(val)) UPB_LONGJMP(ctx->err, 1);
+
+// We want to copy the options verbatim into the destination options proto.
+// We use serialize+parse as our deep copy.
+#define SET_OPTIONS(proto, desc_type, options_type, src) \
+ { \
+ size_t size; \
+ /* MEM: could use a temporary arena here instead. */ \
+ char* pb = google_protobuf_##options_type##_serialize(src, ctx->arena, &size); \
+ CHK_OOM(pb); \
+ google_protobuf_##options_type* dst = \
+ google_protobuf_##options_type##_parse(pb, size, ctx->arena); \
+ CHK_OOM(dst); \
+ google_protobuf_##desc_type##_set_options(proto, dst); \
+ }
+
+static upb_StringView strviewdup2(upb_ToProto_Context* ctx,
+ upb_StringView str) {
+ char* p = upb_Arena_Malloc(ctx->arena, str.size);
+ CHK_OOM(p);
+ memcpy(p, str.data, str.size);
+ return (upb_StringView){.data = p, .size = str.size};
+}
+
+static upb_StringView strviewdup(upb_ToProto_Context* ctx, const char* s) {
+ return strviewdup2(ctx, (upb_StringView){.data = s, .size = strlen(s)});
+}
+
+static upb_StringView qual_dup(upb_ToProto_Context* ctx, const char* s) {
+ size_t n = strlen(s);
+ char* p = upb_Arena_Malloc(ctx->arena, n + 1);
+ CHK_OOM(p);
+ p[0] = '.';
+ memcpy(p + 1, s, n);
+ return (upb_StringView){.data = p, .size = n + 1};
+}
+
+UPB_PRINTF(2, 3)
+static upb_StringView printf_dup(upb_ToProto_Context* ctx, const char* fmt,
+ ...) {
+ const size_t max = 32;
+ char* p = upb_Arena_Malloc(ctx->arena, max);
+ CHK_OOM(p);
+ va_list args;
+ va_start(args, fmt);
+ size_t n = _upb_vsnprintf(p, max, fmt, args);
+ va_end(args);
+ UPB_ASSERT(n < max);
+ return (upb_StringView){.data = p, .size = n};
+}
+
+static bool upb_isprint(char ch) { return ch >= 0x20 && ch <= 0x7f; }
+
+static int special_escape(char ch) {
+ switch (ch) {
+ // This is the same set of special escapes recognized by
+ // absl::CEscape().
+ case '\n':
+ return 'n';
+ case '\r':
+ return 'r';
+ case '\t':
+ return 't';
+ case '\\':
+ return '\\';
+ case '\'':
+ return '\'';
+ case '"':
+ return '"';
+ default:
+ return -1;
+ }
+}
+
+static upb_StringView default_bytes(upb_ToProto_Context* ctx,
+ upb_StringView val) {
+ size_t n = 0;
+ for (size_t i = 0; i < val.size; i++) {
+ char ch = val.data[i];
+ if (special_escape(ch) >= 0)
+ n += 2; // '\C'
+ else if (upb_isprint(ch))
+ n += 1;
+ else
+ n += 4; // '\123'
+ }
+ char* p = upb_Arena_Malloc(ctx->arena, n);
+ CHK_OOM(p);
+ char* dst = p;
+ const char* src = val.data;
+ const char* end = src + val.size;
+ while (src < end) {
+ unsigned char ch = *src++;
+ if (special_escape(ch) >= 0) {
+ *dst++ = '\\';
+ *dst++ = (char)special_escape(ch);
+ } else if (upb_isprint(ch)) {
+ *dst++ = ch;
+ } else {
+ *dst++ = '\\';
+ *dst++ = '0' + (ch >> 6);
+ *dst++ = '0' + ((ch >> 3) & 0x7);
+ *dst++ = '0' + (ch & 0x7);
+ }
+ }
+ return (upb_StringView){.data = p, .size = n};
+}
+
+static upb_StringView default_string(upb_ToProto_Context* ctx,
+ const upb_FieldDef* f) {
+ upb_MessageValue d = upb_FieldDef_Default(f);
+ upb_CType type = upb_FieldDef_CType(f);
+
+ if (type == kUpb_CType_Float || type == kUpb_CType_Double) {
+ double val = type == kUpb_CType_Float ? d.float_val : d.double_val;
+ if (val == INFINITY) {
+ return strviewdup(ctx, "inf");
+ } else if (val == -INFINITY) {
+ return strviewdup(ctx, "-inf");
+ } else if (val != val) {
+ return strviewdup(ctx, "nan");
+ }
+ }
+
+ switch (upb_FieldDef_CType(f)) {
+ case kUpb_CType_Bool:
+ return strviewdup(ctx, d.bool_val ? "true" : "false");
+ case kUpb_CType_Enum: {
+ const upb_EnumDef* e = upb_FieldDef_EnumSubDef(f);
+ const upb_EnumValueDef* ev =
+ upb_EnumDef_FindValueByNumber(e, d.int32_val);
+ return strviewdup(ctx, upb_EnumValueDef_Name(ev));
+ }
+ case kUpb_CType_Int64:
+ return printf_dup(ctx, "%" PRId64, d.int64_val);
+ case kUpb_CType_UInt64:
+ return printf_dup(ctx, "%" PRIu64, d.uint64_val);
+ case kUpb_CType_Int32:
+ return printf_dup(ctx, "%" PRId32, d.int32_val);
+ case kUpb_CType_UInt32:
+ return printf_dup(ctx, "%" PRIu32, d.uint32_val);
+ case kUpb_CType_Float:
+ return printf_dup(ctx, "%.9g", d.float_val);
+ case kUpb_CType_Double:
+ return printf_dup(ctx, "%.17g", d.double_val);
+ case kUpb_CType_String:
+ return strviewdup2(ctx, d.str_val);
+ case kUpb_CType_Bytes:
+ return default_bytes(ctx, d.str_val);
+ default:
+ UPB_UNREACHABLE();
+ }
+}
+
+static google_protobuf_DescriptorProto_ReservedRange* resrange_toproto(
+ upb_ToProto_Context* ctx, const upb_MessageReservedRange* r) {
+ google_protobuf_DescriptorProto_ReservedRange* proto =
+ google_protobuf_DescriptorProto_ReservedRange_new(ctx->arena);
+ CHK_OOM(proto);
+
+ google_protobuf_DescriptorProto_ReservedRange_set_start(
+ proto, upb_MessageReservedRange_Start(r));
+ google_protobuf_DescriptorProto_ReservedRange_set_end(proto,
+ upb_MessageReservedRange_End(r));
+
+ return proto;
+}
+
+static google_protobuf_EnumDescriptorProto_EnumReservedRange* enumresrange_toproto(
+ upb_ToProto_Context* ctx, const upb_EnumReservedRange* r) {
+ google_protobuf_EnumDescriptorProto_EnumReservedRange* proto =
+ google_protobuf_EnumDescriptorProto_EnumReservedRange_new(ctx->arena);
+ CHK_OOM(proto);
+
+ google_protobuf_EnumDescriptorProto_EnumReservedRange_set_start(
+ proto, upb_EnumReservedRange_Start(r));
+ google_protobuf_EnumDescriptorProto_EnumReservedRange_set_end(
+ proto, upb_EnumReservedRange_End(r));
+
+ return proto;
+}
+
+static google_protobuf_FieldDescriptorProto* fielddef_toproto(upb_ToProto_Context* ctx,
+ const upb_FieldDef* f) {
+ google_protobuf_FieldDescriptorProto* proto =
+ google_protobuf_FieldDescriptorProto_new(ctx->arena);
+ CHK_OOM(proto);
+
+ google_protobuf_FieldDescriptorProto_set_name(proto,
+ strviewdup(ctx, upb_FieldDef_Name(f)));
+ google_protobuf_FieldDescriptorProto_set_number(proto, upb_FieldDef_Number(f));
+ google_protobuf_FieldDescriptorProto_set_label(proto, upb_FieldDef_Label(f));
+ google_protobuf_FieldDescriptorProto_set_type(proto, upb_FieldDef_Type(f));
+
+ if (upb_FieldDef_HasJsonName(f)) {
+ google_protobuf_FieldDescriptorProto_set_json_name(
+ proto, strviewdup(ctx, upb_FieldDef_JsonName(f)));
+ }
+
+ if (upb_FieldDef_IsSubMessage(f)) {
+ google_protobuf_FieldDescriptorProto_set_type_name(
+ proto,
+ qual_dup(ctx, upb_MessageDef_FullName(upb_FieldDef_MessageSubDef(f))));
+ } else if (upb_FieldDef_CType(f) == kUpb_CType_Enum) {
+ google_protobuf_FieldDescriptorProto_set_type_name(
+ proto, qual_dup(ctx, upb_EnumDef_FullName(upb_FieldDef_EnumSubDef(f))));
+ }
+
+ if (upb_FieldDef_IsExtension(f)) {
+ google_protobuf_FieldDescriptorProto_set_extendee(
+ proto,
+ qual_dup(ctx, upb_MessageDef_FullName(upb_FieldDef_ContainingType(f))));
+ }
+
+ if (upb_FieldDef_HasDefault(f)) {
+ google_protobuf_FieldDescriptorProto_set_default_value(proto,
+ default_string(ctx, f));
+ }
+
+ const upb_OneofDef* o = upb_FieldDef_ContainingOneof(f);
+ if (o) {
+ google_protobuf_FieldDescriptorProto_set_oneof_index(proto, upb_OneofDef_Index(o));
+ }
+
+ if (_upb_FieldDef_IsProto3Optional(f)) {
+ google_protobuf_FieldDescriptorProto_set_proto3_optional(proto, true);
+ }
+
+ if (upb_FieldDef_HasOptions(f)) {
+ SET_OPTIONS(proto, FieldDescriptorProto, FieldOptions,
+ upb_FieldDef_Options(f));
+ }
+
+ return proto;
+}
+
+static google_protobuf_OneofDescriptorProto* oneofdef_toproto(upb_ToProto_Context* ctx,
+ const upb_OneofDef* o) {
+ google_protobuf_OneofDescriptorProto* proto =
+ google_protobuf_OneofDescriptorProto_new(ctx->arena);
+ CHK_OOM(proto);
+
+ google_protobuf_OneofDescriptorProto_set_name(proto,
+ strviewdup(ctx, upb_OneofDef_Name(o)));
+
+ if (upb_OneofDef_HasOptions(o)) {
+ SET_OPTIONS(proto, OneofDescriptorProto, OneofOptions,
+ upb_OneofDef_Options(o));
+ }
+
+ return proto;
+}
+
+static google_protobuf_EnumValueDescriptorProto* enumvaldef_toproto(
+ upb_ToProto_Context* ctx, const upb_EnumValueDef* e) {
+ google_protobuf_EnumValueDescriptorProto* proto =
+ google_protobuf_EnumValueDescriptorProto_new(ctx->arena);
+ CHK_OOM(proto);
+
+ google_protobuf_EnumValueDescriptorProto_set_name(
+ proto, strviewdup(ctx, upb_EnumValueDef_Name(e)));
+ google_protobuf_EnumValueDescriptorProto_set_number(proto, upb_EnumValueDef_Number(e));
+
+ if (upb_EnumValueDef_HasOptions(e)) {
+ SET_OPTIONS(proto, EnumValueDescriptorProto, EnumValueOptions,
+ upb_EnumValueDef_Options(e));
+ }
+
+ return proto;
+}
+
+static google_protobuf_EnumDescriptorProto* enumdef_toproto(upb_ToProto_Context* ctx,
+ const upb_EnumDef* e) {
+ google_protobuf_EnumDescriptorProto* proto =
+ google_protobuf_EnumDescriptorProto_new(ctx->arena);
+ CHK_OOM(proto);
+
+ google_protobuf_EnumDescriptorProto_set_name(proto,
+ strviewdup(ctx, upb_EnumDef_Name(e)));
+
+ int n = upb_EnumDef_ValueCount(e);
+ google_protobuf_EnumValueDescriptorProto** vals =
+ google_protobuf_EnumDescriptorProto_resize_value(proto, n, ctx->arena);
+ CHK_OOM(vals);
+ for (int i = 0; i < n; i++) {
+ vals[i] = enumvaldef_toproto(ctx, upb_EnumDef_Value(e, i));
+ }
+
+ n = upb_EnumDef_ReservedRangeCount(e);
+ google_protobuf_EnumDescriptorProto_EnumReservedRange** res_ranges =
+ google_protobuf_EnumDescriptorProto_resize_reserved_range(proto, n, ctx->arena);
+ for (int i = 0; i < n; i++) {
+ res_ranges[i] = enumresrange_toproto(ctx, upb_EnumDef_ReservedRange(e, i));
+ }
+
+ n = upb_EnumDef_ReservedNameCount(e);
+ upb_StringView* res_names =
+ google_protobuf_EnumDescriptorProto_resize_reserved_name(proto, n, ctx->arena);
+ for (int i = 0; i < n; i++) {
+ res_names[i] = upb_EnumDef_ReservedName(e, i);
+ }
+
+ if (upb_EnumDef_HasOptions(e)) {
+ SET_OPTIONS(proto, EnumDescriptorProto, EnumOptions,
+ upb_EnumDef_Options(e));
+ }
+
+ return proto;
+}
+
+static google_protobuf_DescriptorProto_ExtensionRange* extrange_toproto(
+ upb_ToProto_Context* ctx, const upb_ExtensionRange* e) {
+ google_protobuf_DescriptorProto_ExtensionRange* proto =
+ google_protobuf_DescriptorProto_ExtensionRange_new(ctx->arena);
+ CHK_OOM(proto);
+
+ google_protobuf_DescriptorProto_ExtensionRange_set_start(proto,
+ upb_ExtensionRange_Start(e));
+ google_protobuf_DescriptorProto_ExtensionRange_set_end(proto,
+ upb_ExtensionRange_End(e));
+
+ if (upb_ExtensionRange_HasOptions(e)) {
+ SET_OPTIONS(proto, DescriptorProto_ExtensionRange, ExtensionRangeOptions,
+ upb_ExtensionRange_Options(e));
+ }
+
+ return proto;
+}
+
+static google_protobuf_DescriptorProto* msgdef_toproto(upb_ToProto_Context* ctx,
+ const upb_MessageDef* m) {
+ google_protobuf_DescriptorProto* proto = google_protobuf_DescriptorProto_new(ctx->arena);
+ CHK_OOM(proto);
+
+ google_protobuf_DescriptorProto_set_name(proto,
+ strviewdup(ctx, upb_MessageDef_Name(m)));
+
+ int n;
+
+ n = upb_MessageDef_FieldCount(m);
+ google_protobuf_FieldDescriptorProto** fields =
+ google_protobuf_DescriptorProto_resize_field(proto, n, ctx->arena);
+ CHK_OOM(fields);
+ for (int i = 0; i < n; i++) {
+ fields[i] = fielddef_toproto(ctx, upb_MessageDef_Field(m, i));
+ }
+
+ n = upb_MessageDef_OneofCount(m);
+ google_protobuf_OneofDescriptorProto** oneofs =
+ google_protobuf_DescriptorProto_resize_oneof_decl(proto, n, ctx->arena);
+ for (int i = 0; i < n; i++) {
+ oneofs[i] = oneofdef_toproto(ctx, upb_MessageDef_Oneof(m, i));
+ }
+
+ n = upb_MessageDef_NestedMessageCount(m);
+ google_protobuf_DescriptorProto** nested_msgs =
+ google_protobuf_DescriptorProto_resize_nested_type(proto, n, ctx->arena);
+ for (int i = 0; i < n; i++) {
+ nested_msgs[i] = msgdef_toproto(ctx, upb_MessageDef_NestedMessage(m, i));
+ }
+
+ n = upb_MessageDef_NestedEnumCount(m);
+ google_protobuf_EnumDescriptorProto** nested_enums =
+ google_protobuf_DescriptorProto_resize_enum_type(proto, n, ctx->arena);
+ for (int i = 0; i < n; i++) {
+ nested_enums[i] = enumdef_toproto(ctx, upb_MessageDef_NestedEnum(m, i));
+ }
+
+ n = upb_MessageDef_NestedExtensionCount(m);
+ google_protobuf_FieldDescriptorProto** nested_exts =
+ google_protobuf_DescriptorProto_resize_extension(proto, n, ctx->arena);
+ for (int i = 0; i < n; i++) {
+ nested_exts[i] =
+ fielddef_toproto(ctx, upb_MessageDef_NestedExtension(m, i));
+ }
+
+ n = upb_MessageDef_ExtensionRangeCount(m);
+ google_protobuf_DescriptorProto_ExtensionRange** ext_ranges =
+ google_protobuf_DescriptorProto_resize_extension_range(proto, n, ctx->arena);
+ for (int i = 0; i < n; i++) {
+ ext_ranges[i] = extrange_toproto(ctx, upb_MessageDef_ExtensionRange(m, i));
+ }
+
+ n = upb_MessageDef_ReservedRangeCount(m);
+ google_protobuf_DescriptorProto_ReservedRange** res_ranges =
+ google_protobuf_DescriptorProto_resize_reserved_range(proto, n, ctx->arena);
+ for (int i = 0; i < n; i++) {
+ res_ranges[i] = resrange_toproto(ctx, upb_MessageDef_ReservedRange(m, i));
+ }
+
+ n = upb_MessageDef_ReservedNameCount(m);
+ upb_StringView* res_names =
+ google_protobuf_DescriptorProto_resize_reserved_name(proto, n, ctx->arena);
+ for (int i = 0; i < n; i++) {
+ res_names[i] = upb_MessageDef_ReservedName(m, i);
+ }
+
+ if (upb_MessageDef_HasOptions(m)) {
+ SET_OPTIONS(proto, DescriptorProto, MessageOptions,
+ upb_MessageDef_Options(m));
+ }
+
+ return proto;
+}
+
+static google_protobuf_MethodDescriptorProto* methoddef_toproto(upb_ToProto_Context* ctx,
+ const upb_MethodDef* m) {
+ google_protobuf_MethodDescriptorProto* proto =
+ google_protobuf_MethodDescriptorProto_new(ctx->arena);
+ CHK_OOM(proto);
+
+ google_protobuf_MethodDescriptorProto_set_name(proto,
+ strviewdup(ctx, upb_MethodDef_Name(m)));
+
+ google_protobuf_MethodDescriptorProto_set_input_type(
+ proto,
+ qual_dup(ctx, upb_MessageDef_FullName(upb_MethodDef_InputType(m))));
+ google_protobuf_MethodDescriptorProto_set_output_type(
+ proto,
+ qual_dup(ctx, upb_MessageDef_FullName(upb_MethodDef_OutputType(m))));
+
+ if (upb_MethodDef_ClientStreaming(m)) {
+ google_protobuf_MethodDescriptorProto_set_client_streaming(proto, true);
+ }
+
+ if (upb_MethodDef_ServerStreaming(m)) {
+ google_protobuf_MethodDescriptorProto_set_server_streaming(proto, true);
+ }
+
+ if (upb_MethodDef_HasOptions(m)) {
+ SET_OPTIONS(proto, MethodDescriptorProto, MethodOptions,
+ upb_MethodDef_Options(m));
+ }
+
+ return proto;
+}
+
+static google_protobuf_ServiceDescriptorProto* servicedef_toproto(
+ upb_ToProto_Context* ctx, const upb_ServiceDef* s) {
+ google_protobuf_ServiceDescriptorProto* proto =
+ google_protobuf_ServiceDescriptorProto_new(ctx->arena);
+ CHK_OOM(proto);
+
+ google_protobuf_ServiceDescriptorProto_set_name(
+ proto, strviewdup(ctx, upb_ServiceDef_Name(s)));
+
+ size_t n = upb_ServiceDef_MethodCount(s);
+ google_protobuf_MethodDescriptorProto** methods =
+ google_protobuf_ServiceDescriptorProto_resize_method(proto, n, ctx->arena);
+ for (size_t i = 0; i < n; i++) {
+ methods[i] = methoddef_toproto(ctx, upb_ServiceDef_Method(s, i));
+ }
+
+ if (upb_ServiceDef_HasOptions(s)) {
+ SET_OPTIONS(proto, ServiceDescriptorProto, ServiceOptions,
+ upb_ServiceDef_Options(s));
+ }
+
+ return proto;
+}
+
+static google_protobuf_FileDescriptorProto* filedef_toproto(upb_ToProto_Context* ctx,
+ const upb_FileDef* f) {
+ google_protobuf_FileDescriptorProto* proto =
+ google_protobuf_FileDescriptorProto_new(ctx->arena);
+ CHK_OOM(proto);
+
+ google_protobuf_FileDescriptorProto_set_name(proto,
+ strviewdup(ctx, upb_FileDef_Name(f)));
+
+ const char* package = upb_FileDef_Package(f);
+ if (package) {
+ size_t n = strlen(package);
+ if (n) {
+ google_protobuf_FileDescriptorProto_set_package(proto, strviewdup(ctx, package));
+ }
+ }
+
+ const char* edition = upb_FileDef_Edition(f);
+ if (edition != NULL) {
+ size_t n = strlen(edition);
+ if (n != 0) {
+ google_protobuf_FileDescriptorProto_set_edition(proto, strviewdup(ctx, edition));
+ }
+ }
+
+ if (upb_FileDef_Syntax(f) == kUpb_Syntax_Proto3) {
+ google_protobuf_FileDescriptorProto_set_syntax(proto, strviewdup(ctx, "proto3"));
+ }
+
+ size_t n;
+ n = upb_FileDef_DependencyCount(f);
+ upb_StringView* deps =
+ google_protobuf_FileDescriptorProto_resize_dependency(proto, n, ctx->arena);
+ for (size_t i = 0; i < n; i++) {
+ deps[i] = strviewdup(ctx, upb_FileDef_Name(upb_FileDef_Dependency(f, i)));
+ }
+
+ n = upb_FileDef_PublicDependencyCount(f);
+ int32_t* public_deps =
+ google_protobuf_FileDescriptorProto_resize_public_dependency(proto, n, ctx->arena);
+ const int32_t* public_dep_nums = _upb_FileDef_PublicDependencyIndexes(f);
+ if (n) memcpy(public_deps, public_dep_nums, n * sizeof(int32_t));
+
+ n = upb_FileDef_WeakDependencyCount(f);
+ int32_t* weak_deps =
+ google_protobuf_FileDescriptorProto_resize_weak_dependency(proto, n, ctx->arena);
+ const int32_t* weak_dep_nums = _upb_FileDef_WeakDependencyIndexes(f);
+ if (n) memcpy(weak_deps, weak_dep_nums, n * sizeof(int32_t));
+
+ n = upb_FileDef_TopLevelMessageCount(f);
+ google_protobuf_DescriptorProto** msgs =
+ google_protobuf_FileDescriptorProto_resize_message_type(proto, n, ctx->arena);
+ for (size_t i = 0; i < n; i++) {
+ msgs[i] = msgdef_toproto(ctx, upb_FileDef_TopLevelMessage(f, i));
+ }
+
+ n = upb_FileDef_TopLevelEnumCount(f);
+ google_protobuf_EnumDescriptorProto** enums =
+ google_protobuf_FileDescriptorProto_resize_enum_type(proto, n, ctx->arena);
+ for (size_t i = 0; i < n; i++) {
+ enums[i] = enumdef_toproto(ctx, upb_FileDef_TopLevelEnum(f, i));
+ }
+
+ n = upb_FileDef_ServiceCount(f);
+ google_protobuf_ServiceDescriptorProto** services =
+ google_protobuf_FileDescriptorProto_resize_service(proto, n, ctx->arena);
+ for (size_t i = 0; i < n; i++) {
+ services[i] = servicedef_toproto(ctx, upb_FileDef_Service(f, i));
+ }
+
+ n = upb_FileDef_TopLevelExtensionCount(f);
+ google_protobuf_FieldDescriptorProto** exts =
+ google_protobuf_FileDescriptorProto_resize_extension(proto, n, ctx->arena);
+ for (size_t i = 0; i < n; i++) {
+ exts[i] = fielddef_toproto(ctx, upb_FileDef_TopLevelExtension(f, i));
+ }
+
+ if (upb_FileDef_HasOptions(f)) {
+ SET_OPTIONS(proto, FileDescriptorProto, FileOptions,
+ upb_FileDef_Options(f));
+ }
+
+ return proto;
+}
+
+static google_protobuf_DescriptorProto* upb_ToProto_ConvertMessageDef(
+ upb_ToProto_Context* const ctx, const upb_MessageDef* const m) {
+ if (UPB_SETJMP(ctx->err)) return NULL;
+ return msgdef_toproto(ctx, m);
+}
+
+google_protobuf_DescriptorProto* upb_MessageDef_ToProto(const upb_MessageDef* m,
+ upb_Arena* a) {
+ upb_ToProto_Context ctx = {a};
+ return upb_ToProto_ConvertMessageDef(&ctx, m);
+}
+
+google_protobuf_EnumDescriptorProto* upb_ToProto_ConvertEnumDef(
+ upb_ToProto_Context* const ctx, const upb_EnumDef* const e) {
+ if (UPB_SETJMP(ctx->err)) return NULL;
+ return enumdef_toproto(ctx, e);
+}
+
+google_protobuf_EnumDescriptorProto* upb_EnumDef_ToProto(const upb_EnumDef* e,
+ upb_Arena* a) {
+ upb_ToProto_Context ctx = {a};
+ return upb_ToProto_ConvertEnumDef(&ctx, e);
+}
+
+google_protobuf_EnumValueDescriptorProto* upb_ToProto_ConvertEnumValueDef(
+ upb_ToProto_Context* const ctx, const upb_EnumValueDef* e) {
+ if (UPB_SETJMP(ctx->err)) return NULL;
+ return enumvaldef_toproto(ctx, e);
+}
+
+google_protobuf_EnumValueDescriptorProto* upb_EnumValueDef_ToProto(
+ const upb_EnumValueDef* e, upb_Arena* a) {
+ upb_ToProto_Context ctx = {a};
+ return upb_ToProto_ConvertEnumValueDef(&ctx, e);
+}
+
+google_protobuf_FieldDescriptorProto* upb_ToProto_ConvertFieldDef(
+ upb_ToProto_Context* const ctx, const upb_FieldDef* f) {
+ if (UPB_SETJMP(ctx->err)) return NULL;
+ return fielddef_toproto(ctx, f);
+}
+
+google_protobuf_FieldDescriptorProto* upb_FieldDef_ToProto(const upb_FieldDef* f,
+ upb_Arena* a) {
+ upb_ToProto_Context ctx = {a};
+ return upb_ToProto_ConvertFieldDef(&ctx, f);
+}
+
+google_protobuf_OneofDescriptorProto* upb_ToProto_ConvertOneofDef(
+ upb_ToProto_Context* const ctx, const upb_OneofDef* o) {
+ if (UPB_SETJMP(ctx->err)) return NULL;
+ return oneofdef_toproto(ctx, o);
+}
+
+google_protobuf_OneofDescriptorProto* upb_OneofDef_ToProto(const upb_OneofDef* o,
+ upb_Arena* a) {
+ upb_ToProto_Context ctx = {a};
+ return upb_ToProto_ConvertOneofDef(&ctx, o);
+}
+
+google_protobuf_FileDescriptorProto* upb_ToProto_ConvertFileDef(
+ upb_ToProto_Context* const ctx, const upb_FileDef* const f) {
+ if (UPB_SETJMP(ctx->err)) return NULL;
+ return filedef_toproto(ctx, f);
+}
+
+google_protobuf_FileDescriptorProto* upb_FileDef_ToProto(const upb_FileDef* f,
+ upb_Arena* a) {
+ upb_ToProto_Context ctx = {a};
+ return upb_ToProto_ConvertFileDef(&ctx, f);
+}
+
+google_protobuf_MethodDescriptorProto* upb_ToProto_ConvertMethodDef(
+ upb_ToProto_Context* const ctx, const upb_MethodDef* m) {
+ if (UPB_SETJMP(ctx->err)) return NULL;
+ return methoddef_toproto(ctx, m);
+}
+
+google_protobuf_MethodDescriptorProto* upb_MethodDef_ToProto(
+ const upb_MethodDef* const m, upb_Arena* a) {
+ upb_ToProto_Context ctx = {a};
+ return upb_ToProto_ConvertMethodDef(&ctx, m);
+}
+
+google_protobuf_ServiceDescriptorProto* upb_ToProto_ConvertServiceDef(
+ upb_ToProto_Context* const ctx, const upb_ServiceDef* const s) {
+ if (UPB_SETJMP(ctx->err)) return NULL;
+ return servicedef_toproto(ctx, s);
+}
+
+google_protobuf_ServiceDescriptorProto* upb_ServiceDef_ToProto(const upb_ServiceDef* s,
+ upb_Arena* a) {
+ upb_ToProto_Context ctx = {a};
+ return upb_ToProto_ConvertServiceDef(&ctx, s);
+}
diff --git a/upb/upb/util/def_to_proto.h b/upb/upb/util/def_to_proto.h
new file mode 100644
index 0000000..ba41db5
--- /dev/null
+++ b/upb/upb/util/def_to_proto.h
@@ -0,0 +1,65 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_UTIL_DEF_TO_PROTO_H_
+#define UPB_UTIL_DEF_TO_PROTO_H_
+
+#include "upb/reflection/def.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Functions for converting defs back to the equivalent descriptor proto.
+// Ultimately the goal is that a round-trip proto->def->proto is lossless. Each
+// function returns a new proto created in arena `a`, or NULL if memory
+// allocation failed.
+google_protobuf_DescriptorProto* upb_MessageDef_ToProto(const upb_MessageDef* m,
+ upb_Arena* a);
+google_protobuf_EnumDescriptorProto* upb_EnumDef_ToProto(const upb_EnumDef* e,
+ upb_Arena* a);
+google_protobuf_EnumValueDescriptorProto* upb_EnumValueDef_ToProto(
+ const upb_EnumValueDef* e, upb_Arena* a);
+google_protobuf_FieldDescriptorProto* upb_FieldDef_ToProto(
+ const upb_FieldDef* f, upb_Arena* a);
+google_protobuf_OneofDescriptorProto* upb_OneofDef_ToProto(
+ const upb_OneofDef* o, upb_Arena* a);
+google_protobuf_FileDescriptorProto* upb_FileDef_ToProto(const upb_FileDef* f,
+ upb_Arena* a);
+google_protobuf_MethodDescriptorProto* upb_MethodDef_ToProto(
+ const upb_MethodDef* m, upb_Arena* a);
+google_protobuf_ServiceDescriptorProto* upb_ServiceDef_ToProto(
+ const upb_ServiceDef* s, upb_Arena* a);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* UPB_UTIL_DEF_TO_PROTO_H_ */
diff --git a/upb/upb/util/def_to_proto_fuzz_test.cc b/upb/upb/util/def_to_proto_fuzz_test.cc
new file mode 100644
index 0000000..833bcc6
--- /dev/null
+++ b/upb/upb/util/def_to_proto_fuzz_test.cc
@@ -0,0 +1,55 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <string>
+
+#include "google/protobuf/descriptor.proto.h"
+#include "gtest/gtest.h"
+#include "testing/fuzzing/fuzztest.h"
+#include "upb/util/def_to_proto_test.h"
+
+namespace upb_test {
+
+FUZZ_TEST(FuzzTest, RoundTripDescriptor)
+ .WithDomains(
+ ::fuzztest::Arbitrary<google::protobuf::FileDescriptorSet>().WithProtobufField(
+ "file",
+ ::fuzztest::Arbitrary<google::protobuf::FileDescriptorProto>()
+ // upb_FileDef_ToProto() does not attempt to preserve
+ // source_code_info.
+ .WithFieldUnset("source_code_info")
+ .WithProtobufField(
+ "service",
+ ::fuzztest::Arbitrary<google::protobuf::ServiceDescriptorProto>()
+ // streams are google3-only, and we do not currently
+ // attempt to preserve them.
+ .WithFieldUnset("stream"))));
+
+} // namespace upb_test
diff --git a/upb/upb/util/def_to_proto_public_import_test.proto b/upb/upb/util/def_to_proto_public_import_test.proto
new file mode 100644
index 0000000..6491ff0
--- /dev/null
+++ b/upb/upb/util/def_to_proto_public_import_test.proto
@@ -0,0 +1,35 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto3";
+
+package pkg;
+
+message PublicImportMessage {}
diff --git a/upb/upb/util/def_to_proto_regular_import_test.proto b/upb/upb/util/def_to_proto_regular_import_test.proto
new file mode 100644
index 0000000..4c29eac
--- /dev/null
+++ b/upb/upb/util/def_to_proto_regular_import_test.proto
@@ -0,0 +1,39 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto3";
+
+package pkg;
+
+message RegularImportMessage {}
+
+enum Proto3Enum {
+ PROTO3_ENUM_ZERO = 0;
+}
diff --git a/upb/upb/util/def_to_proto_test.cc b/upb/upb/util/def_to_proto_test.cc
new file mode 100644
index 0000000..cf7b968
--- /dev/null
+++ b/upb/upb/util/def_to_proto_test.cc
@@ -0,0 +1,337 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/util/def_to_proto.h"
+
+#include <memory>
+#include <string>
+
+#include "google/protobuf/descriptor.pb.h"
+#include "google/protobuf/descriptor.upbdefs.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "google/protobuf/dynamic_message.h"
+#include "google/protobuf/util/message_differencer.h"
+#include "upb/mem/arena.hpp"
+#include "upb/reflection/def.hpp"
+#include "upb/test/parse_text_proto.h"
+#include "upb/util/def_to_proto_test.h"
+#include "upb/util/def_to_proto_test.upbdefs.h"
+
+namespace upb_test {
+
+// Loads and retrieves a descriptor for `msgdef` into the given `pool`.
+const google::protobuf::Descriptor* AddMessageDescriptor(
+ upb::MessageDefPtr msgdef, google::protobuf::DescriptorPool* pool) {
+ upb::Arena tmp_arena;
+ upb::FileDefPtr file = msgdef.file();
+ google_protobuf_FileDescriptorProto* upb_proto =
+ upb_FileDef_ToProto(file.ptr(), tmp_arena.ptr());
+ size_t size;
+ const char* buf = google_protobuf_FileDescriptorProto_serialize(
+ upb_proto, tmp_arena.ptr(), &size);
+ google::protobuf::FileDescriptorProto google_proto;
+ google_proto.ParseFromArray(buf, size);
+ const google::protobuf::FileDescriptor* file_desc =
+ pool->BuildFile(google_proto);
+ EXPECT_TRUE(file_desc != nullptr);
+ return pool->FindMessageTypeByName(msgdef.full_name());
+}
+
+// Converts a upb `msg` (with type `msgdef`) into a protobuf Message object from
+// the given factory and descriptor.
+std::unique_ptr<google::protobuf::Message> ToProto(
+ const upb_Message* msg, const upb_MessageDef* msgdef,
+ const google::protobuf::Descriptor* desc,
+ google::protobuf::MessageFactory* factory) {
+ upb::Arena arena;
+ EXPECT_TRUE(desc != nullptr);
+ std::unique_ptr<google::protobuf::Message> google_msg(
+ factory->GetPrototype(desc)->New());
+ char* buf;
+ size_t size;
+ upb_EncodeStatus status = upb_Encode(msg, upb_MessageDef_MiniTable(msgdef), 0,
+ arena.ptr(), &buf, &size);
+ EXPECT_EQ(status, kUpb_EncodeStatus_Ok);
+ google_msg->ParseFromArray(buf, size);
+ return google_msg;
+}
+
+// A gtest matcher that verifies that a proto is equal to `proto`. Both `proto`
+// and `arg` must be messages of type `msgdef_func` (a .upbdefs.h function that
+// loads a known msgdef into the given defpool).
+MATCHER_P2(EqualsUpbProto, proto, msgdef_func,
+ negation ? "are not equal" : "are equal") {
+ upb::DefPool defpool;
+ google::protobuf::DescriptorPool pool;
+ google::protobuf::DynamicMessageFactory factory;
+ upb::MessageDefPtr msgdef(msgdef_func(defpool.ptr()));
+ EXPECT_TRUE(msgdef.ptr() != nullptr);
+ const google::protobuf::Descriptor* desc =
+ AddMessageDescriptor(msgdef, &pool);
+ EXPECT_TRUE(desc != nullptr);
+ std::unique_ptr<google::protobuf::Message> m1(
+ ToProto(proto, msgdef.ptr(), desc, &factory));
+ std::unique_ptr<google::protobuf::Message> m2(
+ ToProto(arg, msgdef.ptr(), desc, &factory));
+ std::string differences;
+ google::protobuf::util::MessageDifferencer differencer;
+ differencer.ReportDifferencesToString(&differences);
+ bool eq = differencer.Compare(*m2, *m1);
+ if (!eq) {
+ *result_listener << differences;
+ }
+ return eq;
+}
+
+// Verifies that the given upb FileDef can be converted to a proto that matches
+// `proto`.
+void CheckFile(const upb::FileDefPtr file,
+ const google_protobuf_FileDescriptorProto* proto) {
+ upb::Arena arena;
+ google_protobuf_FileDescriptorProto* proto2 =
+ upb_FileDef_ToProto(file.ptr(), arena.ptr());
+ ASSERT_THAT(
+ proto,
+ EqualsUpbProto(proto2, google_protobuf_FileDescriptorProto_getmsgdef));
+}
+
+// Verifies that upb/util/def_to_proto_test.proto can round-trip:
+// serialized descriptor -> upb def -> serialized descriptor
+TEST(DefToProto, Test) {
+ upb::Arena arena;
+ upb::DefPool defpool;
+ upb_StringView test_file_desc =
+ upb_util_def_to_proto_test_proto_upbdefinit.descriptor;
+ const auto* file_desc = google_protobuf_FileDescriptorProto_parse(
+ test_file_desc.data, test_file_desc.size, arena.ptr());
+
+ upb::MessageDefPtr msgdef(pkg_Message_getmsgdef(defpool.ptr()));
+ upb::FileDefPtr file = msgdef.file();
+ CheckFile(file, file_desc);
+}
+
+// Like the previous test, but uses a message layout built at runtime.
+TEST(DefToProto, TestRuntimeReflection) {
+ upb::Arena arena;
+ upb::DefPool defpool;
+ upb_StringView test_file_desc =
+ upb_util_def_to_proto_test_proto_upbdefinit.descriptor;
+ const auto* file_desc = google_protobuf_FileDescriptorProto_parse(
+ test_file_desc.data, test_file_desc.size, arena.ptr());
+
+ _upb_DefPool_LoadDefInitEx(
+ defpool.ptr(),
+ &upb_util_def_to_proto_test_proto_upbdefinit, true);
+ upb::FileDefPtr file = defpool.FindFileByName(
+ upb_util_def_to_proto_test_proto_upbdefinit.filename);
+ CheckFile(file, file_desc);
+}
+
+// Fuzz test regressions.
+
+TEST(FuzzTest, EmptyPackage) {
+ RoundTripDescriptor(ParseTextProtoOrDie(R"pb(file { package: "" })pb"));
+}
+
+TEST(FuzzTest, EmptyName) {
+ RoundTripDescriptor(ParseTextProtoOrDie(R"pb(file { name: "" })pb"));
+}
+
+TEST(FuzzTest, EmptyPackage2) {
+ RoundTripDescriptor(
+ ParseTextProtoOrDie(R"pb(file { name: "n" package: "" })pb"));
+}
+
+TEST(FuzzTest, FileNameEmbeddedNull) {
+ RoundTripDescriptor(ParseTextProtoOrDie(R"pb(file { name: "\000" })pb"));
+}
+
+TEST(FuzzTest, EditionEmbeddedNull) {
+ RoundTripDescriptor(
+ ParseTextProtoOrDie(R"pb(file { name: "n" edition: "\000" })pb"));
+}
+
+TEST(FuzzTest, DuplicateOneofIndex) {
+ RoundTripDescriptor(ParseTextProtoOrDie(
+ R"pb(file {
+ name: "F"
+ message_type {
+ name: "M"
+ oneof_decl { name: "O" }
+ field { name: "f1" number: 1 type: TYPE_INT32 oneof_index: 0 }
+ field { name: "f2" number: 1 type: TYPE_INT32 oneof_index: 0 }
+ }
+ })pb"));
+}
+
+TEST(FuzzTest, NanValue) {
+ RoundTripDescriptor(ParseTextProtoOrDie(
+ R"pb(file {
+ enum_type {
+ value {
+ number: 0
+ options { uninterpreted_option { double_value: nan } }
+ }
+ }
+ })pb"));
+}
+
+TEST(FuzzTest, EnumValueEmbeddedNull) {
+ RoundTripDescriptor(ParseTextProtoOrDie(
+ R"pb(file {
+ name: "\035"
+ enum_type {
+ name: "f"
+ value { name: "\000" number: 0 }
+ }
+ })pb"));
+}
+
+TEST(FuzzTest, EnumValueNoNumber) {
+ RoundTripDescriptor(ParseTextProtoOrDie(
+ R"pb(file {
+ name: "\035"
+ enum_type {
+ name: "f"
+ value { name: "abc" }
+ }
+ })pb"));
+}
+
+TEST(FuzzTest, DefaultWithUnterminatedHex) {
+ RoundTripDescriptor(ParseTextProtoOrDie(
+ R"pb(file {
+ name: "\035"
+ message_type {
+ name: "A"
+ field {
+ name: "f"
+ number: 1
+ label: LABEL_OPTIONAL
+ type: TYPE_BYTES
+ default_value: "\\x"
+ }
+ }
+ })pb"));
+}
+
+TEST(FuzzTest, DefaultWithValidHexEscape) {
+ RoundTripDescriptor(ParseTextProtoOrDie(
+ R"pb(file {
+ name: "\035"
+ message_type {
+ name: "A"
+ field {
+ name: "f"
+ number: 1
+ label: LABEL_OPTIONAL
+ type: TYPE_BYTES
+ default_value: "\\x03"
+ }
+ }
+ })pb"));
+}
+
+TEST(FuzzTest, DefaultWithValidHexEscapePrintable) {
+ RoundTripDescriptor(ParseTextProtoOrDie(
+ R"pb(file {
+ name: "\035"
+ message_type {
+ name: "A"
+ field {
+ name: "f"
+ number: 1
+ label: LABEL_OPTIONAL
+ type: TYPE_BYTES
+ default_value: "\\x23" # 0x32 = '#'
+ }
+ }
+ })pb"));
+}
+
+// begin:google_only
+// TEST(FuzzTest, DependencyWithEmbeddedNull) {
+// RoundTripDescriptor(ParseTextProtoOrDie(R"pb(file {
+// name: "a"
+// dependency: "a\000"
+// options { cc_api_version: 0 }
+// weak_dependency: 0
+// })pb"));
+// }
+//
+// TEST(FuzzTest, NanInOptions) {
+// RoundTripDescriptor(
+// ParseTextProtoOrDie(R"pb(file {
+// name: ""
+// service {
+// name: "A"
+// options { failure_detection_delay: nan }
+// }
+// })pb"));
+// }
+// end:google_only
+
+TEST(FuzzTest, PackageStartsWithNumber) {
+ RoundTripDescriptor(
+ ParseTextProtoOrDie(R"pb(file { name: "" package: "0" })pb"));
+}
+
+TEST(FuzzTest, RoundTripDescriptorRegression) {
+ RoundTripDescriptor(ParseTextProtoOrDie(R"pb(file {
+ name: ""
+ message_type {
+ name: "A"
+ field {
+ name: "B"
+ number: 1
+ type: TYPE_BYTES
+ default_value: "\007"
+ }
+ }
+ })pb"));
+}
+
+// Multiple oneof fields which have the same name.
+TEST(FuzzTest, RoundTripDescriptorRegressionOneofSameName) {
+ RoundTripDescriptor(ParseTextProtoOrDie(
+ R"pb(file {
+ name: "N"
+ package: ""
+ message_type {
+ name: "b"
+ field { name: "W" number: 1 type: TYPE_BYTES oneof_index: 0 }
+ field { name: "W" number: 17 type: TYPE_UINT32 oneof_index: 0 }
+ oneof_decl { name: "k" }
+ }
+ })pb"));
+}
+
+} // namespace upb_test
diff --git a/upb/upb/util/def_to_proto_test.h b/upb/upb/util/def_to_proto_test.h
new file mode 100644
index 0000000..b006336
--- /dev/null
+++ b/upb/upb/util/def_to_proto_test.h
@@ -0,0 +1,148 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_UTIL_DEF_TO_PROTO_TEST_H_
+#define UPB_UTIL_DEF_TO_PROTO_TEST_H_
+
+#include <string>
+
+#include "google/protobuf/descriptor.pb.h"
+#include "google/protobuf/descriptor.upb.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "google/protobuf/descriptor.h"
+#include "google/protobuf/dynamic_message.h"
+#include "google/protobuf/util/field_comparator.h"
+#include "upb/base/status.hpp"
+#include "upb/mem/arena.hpp"
+#include "upb/reflection/def.hpp"
+#include "upb/util/def_to_proto.h"
+
+namespace upb_test {
+
+// A gtest matcher that verifies that a proto is equal to `proto`. Both `proto`
+// and `arg` must be messages of type `msgdef_func` (a .upbdefs.h function that
+// loads a known msgdef into the given defpool).
+MATCHER_P(EqualsProtoTreatNansAsEqual, proto,
+ negation ? "are not equal" : "are equal") {
+ upb::DefPool defpool;
+ google::protobuf::DescriptorPool pool;
+ google::protobuf::DynamicMessageFactory factory;
+ std::string differences;
+ google::protobuf::util::DefaultFieldComparator comparator;
+ comparator.set_treat_nan_as_equal(true);
+ google::protobuf::util::MessageDifferencer differencer;
+ differencer.set_field_comparator(&comparator);
+ differencer.ReportDifferencesToString(&differences);
+ bool eq = differencer.Compare(proto, arg);
+ if (!eq) {
+ *result_listener << differences;
+ }
+ return eq;
+}
+
+class NullErrorCollector : public google::protobuf::DescriptorPool::ErrorCollector {
+ void AddError(const std::string& filename, const std::string& element_name,
+ const google::protobuf::Message* descriptor, ErrorLocation location,
+ const std::string& message) override {}
+ void RecordWarning(absl::string_view filename, absl::string_view element_name,
+ const google::protobuf::Message* descriptor, ErrorLocation location,
+ absl::string_view message) override {}
+};
+
+static void AddFile(google::protobuf::FileDescriptorProto& file, upb::DefPool* pool,
+ google::protobuf::DescriptorPool* desc_pool) {
+ NullErrorCollector collector;
+ const google::protobuf::FileDescriptor* file_desc =
+ desc_pool->BuildFileCollectingErrors(file, &collector);
+
+ if (file_desc != nullptr) {
+ // The file descriptor was valid according to proto2.
+ google::protobuf::FileDescriptorProto normalized_file;
+ file_desc->CopyTo(&normalized_file);
+ std::string serialized;
+ normalized_file.SerializeToString(&serialized);
+ upb::Arena arena;
+ upb::Status status;
+ google_protobuf_FileDescriptorProto* proto = google_protobuf_FileDescriptorProto_parse(
+ serialized.data(), serialized.size(), arena.ptr());
+ ASSERT_NE(proto, nullptr);
+ upb::FileDefPtr file_def = pool->AddFile(proto, &status);
+
+ // Ideally we could assert that file_def is present here. After all, any
+ // descriptor accepted by C++ should be by definition valid. However C++
+ // performs some of its validation at the .proto file parser level instead
+ // of when validating descriptors. As as result, C++ will accept some
+ // unreasonable descriptors like:
+ // file { name: "" package: "0" }
+ //
+ // There is no .proto file that will produce this descriptor, but
+ // BuildFile() accepts it. We should probably clean up these cases so C++
+ // will reject them too.
+ if (!file_def) return;
+
+ ASSERT_TRUE(status.ok()) << status.error_message();
+ google_protobuf_FileDescriptorProto* upb_proto =
+ upb_FileDef_ToProto(file_def.ptr(), arena.ptr());
+ size_t size;
+ const char* buf =
+ google_protobuf_FileDescriptorProto_serialize(upb_proto, arena.ptr(), &size);
+ google::protobuf::FileDescriptorProto google_proto;
+ bool ok = google_proto.ParseFromArray(buf, size);
+ ASSERT_TRUE(ok);
+ EXPECT_THAT(google_proto, EqualsProtoTreatNansAsEqual(normalized_file));
+ } else {
+ // This file was invalid according to proto2. When we parse it with upb,
+ // it may or may not be accepted, since upb does not perform as much
+ // validation as proto2. However it must not crash.
+ std::string serialized;
+ file.SerializeToString(&serialized);
+ upb::Arena arena;
+ upb::Status status;
+ google_protobuf_FileDescriptorProto* proto = google_protobuf_FileDescriptorProto_parse(
+ serialized.data(), serialized.size(), arena.ptr());
+ ASSERT_NE(proto, nullptr);
+ pool->AddFile(proto, &status);
+ }
+}
+
+inline void RoundTripDescriptor(const google::protobuf::FileDescriptorSet& set) {
+ upb::DefPool defpool;
+ google::protobuf::DescriptorPool desc_pool;
+ desc_pool.EnforceWeakDependencies(true);
+ for (const auto& file : set.file()) {
+ google::protobuf::FileDescriptorProto mutable_file(file);
+ AddFile(mutable_file, &defpool, &desc_pool);
+ }
+}
+
+} // namespace upb_test
+
+#endif // UPB_UTIL_DEF_TO_PROTO_TEST_H_
diff --git a/upb/upb/util/def_to_proto_test.proto b/upb/upb/util/def_to_proto_test.proto
new file mode 100644
index 0000000..65a76b0
--- /dev/null
+++ b/upb/upb/util/def_to_proto_test.proto
@@ -0,0 +1,133 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto2";
+
+package pkg;
+
+import public "upb/util/def_to_proto_public_import_test.proto";
+
+import "upb/util/def_to_proto_regular_import_test.proto";
+
+option optimize_for = CODE_SIZE;
+option go_package = "foo_go_package";
+option java_package = "bar_java_package";
+option java_outer_classname = "baz_java_outer_classname";
+option csharp_namespace = "quux_csharp_namespace";
+option objc_class_prefix = "the_objc_prefix";
+option cc_enable_arenas = true;
+
+message Message {
+ optional int32 a = 1 [default = 3];
+ oneof foo {
+ string oneof_string = 2 [default = "abc\n"];
+ string oneof_bool = 3 [default = "true"];
+ bytes oneof_bytes = 4 [default = "abc\xef\xfe"];
+ }
+ optional pkg.RegularImportMessage regular_import_message = 6;
+ optional pkg.PublicImportMessage public_import_message = 7;
+ optional pkg.Proto3Enum proto3_enum = 8;
+ extensions 1000 to max;
+ extend Message {
+ optional int32 ext = 1000;
+ }
+
+ message NestedMessage {}
+ message NestedEnum {}
+
+ // TODO: support reserved ranges in defs.
+ // (At the moment they are ignored and will not round-trip through defs).
+ // reserved 4, 6 to 8;
+}
+
+enum Enum {
+ ZERO = 0;
+ ONE = 1;
+ NEGATIVE_ONE = -1;
+}
+
+enum EnumUpper32Value {
+ UPPER32_VALUE = 40;
+}
+
+enum HasDuplicateValues {
+ option allow_alias = true;
+
+ A = 0;
+ B = 1;
+ C = 120;
+ D = 130;
+
+ G = 120;
+ F = 1;
+ E = 0;
+ H = 121;
+ I = 121;
+ J = 121;
+ K = 121;
+}
+
+service Service {
+ rpc Bar(Message) returns (Message);
+}
+
+extend Message {
+ optional int32 ext = 1001;
+}
+
+enum Has31 {
+ VALUE_31 = 31;
+}
+
+message PretendMessageSet {
+ option message_set_wire_format = true;
+
+ // Since this is message_set_wire_format, "max" here means INT32_MAX.
+ // (For normal messages "max" would mean 2**29 - 1).
+ extensions 4 to 529999999;
+ extensions 530000000 to max
+ [declaration = {
+ number: 2147483646
+ full_name: ".pkg.MessageSetItem.message_set_extension"
+ type: ".pkg.MessageSetItem"
+ }];
+}
+
+message MessageSetItem {
+ extend PretendMessageSet {
+ // Since max is exclusive, this is INT32_MAX-1, not INT32_MAX.
+ optional MessageSetItem message_set_extension = 2147483646;
+ }
+}
+
+message UnusualDefaults {
+ optional bytes foo = 1 [default = "\\X"];
+ optional string bar = 2 [default = "\\X"];
+}
diff --git a/upb/upb/util/def_to_proto_weak_import_test.proto b/upb/upb/util/def_to_proto_weak_import_test.proto
new file mode 100644
index 0000000..1b123a7
--- /dev/null
+++ b/upb/upb/util/def_to_proto_weak_import_test.proto
@@ -0,0 +1,31 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto3";
diff --git a/upb/upb/util/def_to_proto_wweak_import_test.proto b/upb/upb/util/def_to_proto_wweak_import_test.proto
new file mode 100644
index 0000000..1b123a7
--- /dev/null
+++ b/upb/upb/util/def_to_proto_wweak_import_test.proto
@@ -0,0 +1,31 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto3";
diff --git a/upb/upb/util/required_fields.c b/upb/upb/util/required_fields.c
new file mode 100644
index 0000000..e0462c0
--- /dev/null
+++ b/upb/upb/util/required_fields.c
@@ -0,0 +1,313 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/util/required_fields.h"
+
+#include <inttypes.h>
+#include <stdarg.h>
+
+#include "upb/collections/map.h"
+#include "upb/port/vsnprintf_compat.h"
+#include "upb/reflection/message.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+////////////////////////////////////////////////////////////////////////////////
+// upb_FieldPath_ToText()
+////////////////////////////////////////////////////////////////////////////////
+
+typedef struct {
+ char* buf;
+ char* ptr;
+ char* end;
+ size_t overflow;
+} upb_PrintfAppender;
+
+UPB_PRINTF(2, 3)
+static void upb_FieldPath_Printf(upb_PrintfAppender* a, const char* fmt, ...) {
+ size_t n;
+ size_t have = a->end - a->ptr;
+ va_list args;
+
+ va_start(args, fmt);
+ n = _upb_vsnprintf(a->ptr, have, fmt, args);
+ va_end(args);
+
+ if (UPB_LIKELY(have > n)) {
+ // We can't end up here if the user passed (NULL, 0), therefore ptr is known
+ // to be non-NULL, and UPB_PTRADD() is not necessary.
+ assert(a->ptr);
+ a->ptr += n;
+ } else {
+ a->ptr = UPB_PTRADD(a->ptr, have);
+ a->overflow += (n - have);
+ }
+}
+
+static size_t upb_FieldPath_NullTerminate(upb_PrintfAppender* d, size_t size) {
+ size_t ret = d->ptr - d->buf + d->overflow;
+
+ if (size > 0) {
+ if (d->ptr == d->end) d->ptr--;
+ *d->ptr = '\0';
+ }
+
+ return ret;
+}
+
+static void upb_FieldPath_PutMapKey(upb_PrintfAppender* a,
+ upb_MessageValue map_key,
+ const upb_FieldDef* key_f) {
+ switch (upb_FieldDef_CType(key_f)) {
+ case kUpb_CType_Int32:
+ upb_FieldPath_Printf(a, "[%" PRId32 "]", map_key.int32_val);
+ break;
+ case kUpb_CType_Int64:
+ upb_FieldPath_Printf(a, "[%" PRId64 "]", map_key.int64_val);
+ break;
+ case kUpb_CType_UInt32:
+ upb_FieldPath_Printf(a, "[%" PRIu32 "]", map_key.uint32_val);
+ break;
+ case kUpb_CType_UInt64:
+ upb_FieldPath_Printf(a, "[%" PRIu64 "]", map_key.uint64_val);
+ break;
+ case kUpb_CType_Bool:
+ upb_FieldPath_Printf(a, "[%s]", map_key.bool_val ? "true" : "false");
+ break;
+ case kUpb_CType_String:
+ upb_FieldPath_Printf(a, "[\"");
+ for (size_t i = 0; i < map_key.str_val.size; i++) {
+ char ch = map_key.str_val.data[i];
+ if (ch == '"') {
+ upb_FieldPath_Printf(a, "\\\"");
+ } else {
+ upb_FieldPath_Printf(a, "%c", ch);
+ }
+ }
+ upb_FieldPath_Printf(a, "\"]");
+ break;
+ default:
+ UPB_UNREACHABLE(); // Other types can't be map keys.
+ }
+}
+
+size_t upb_FieldPath_ToText(upb_FieldPathEntry** path, char* buf, size_t size) {
+ upb_FieldPathEntry* ptr = *path;
+ upb_PrintfAppender appender;
+ appender.buf = buf;
+ appender.ptr = buf;
+ appender.end = UPB_PTRADD(buf, size);
+ appender.overflow = 0;
+ bool first = true;
+
+ while (ptr->field) {
+ const upb_FieldDef* f = ptr->field;
+
+ upb_FieldPath_Printf(&appender, first ? "%s" : ".%s", upb_FieldDef_Name(f));
+ first = false;
+ ptr++;
+
+ if (upb_FieldDef_IsMap(f)) {
+ const upb_FieldDef* key_f =
+ upb_MessageDef_Field(upb_FieldDef_MessageSubDef(f), 0);
+ upb_FieldPath_PutMapKey(&appender, ptr->map_key, key_f);
+ ptr++;
+ } else if (upb_FieldDef_IsRepeated(f)) {
+ upb_FieldPath_Printf(&appender, "[%zu]", ptr->array_index);
+ ptr++;
+ }
+ }
+
+ // Advance beyond terminating NULL.
+ ptr++;
+ *path = ptr;
+ return upb_FieldPath_NullTerminate(&appender, size);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// upb_util_HasUnsetRequired()
+////////////////////////////////////////////////////////////////////////////////
+
+typedef struct {
+ upb_FieldPathEntry* path;
+ size_t size;
+ size_t cap;
+} upb_FieldPathVector;
+
+typedef struct {
+ upb_FieldPathVector stack;
+ upb_FieldPathVector out_fields;
+ const upb_DefPool* ext_pool;
+ jmp_buf err;
+ bool has_unset_required;
+ bool save_paths;
+} upb_FindContext;
+
+static void upb_FieldPathVector_Init(upb_FieldPathVector* vec) {
+ vec->path = NULL;
+ vec->size = 0;
+ vec->cap = 0;
+}
+
+static void upb_FieldPathVector_Reserve(upb_FindContext* ctx,
+ upb_FieldPathVector* vec,
+ size_t elems) {
+ if (vec->cap - vec->size < elems) {
+ size_t need = vec->size + elems;
+ vec->cap = UPB_MAX(4, vec->cap);
+ while (vec->cap < need) vec->cap *= 2;
+ vec->path = realloc(vec->path, vec->cap * sizeof(*vec->path));
+ if (!vec->path) {
+ UPB_LONGJMP(ctx->err, 1);
+ }
+ }
+}
+
+static void upb_FindContext_Push(upb_FindContext* ctx, upb_FieldPathEntry ent) {
+ if (!ctx->save_paths) return;
+ upb_FieldPathVector_Reserve(ctx, &ctx->stack, 1);
+ ctx->stack.path[ctx->stack.size++] = ent;
+}
+
+static void upb_FindContext_Pop(upb_FindContext* ctx) {
+ if (!ctx->save_paths) return;
+ assert(ctx->stack.size != 0);
+ ctx->stack.size--;
+}
+
+static void upb_util_FindUnsetInMessage(upb_FindContext* ctx,
+ const upb_Message* msg,
+ const upb_MessageDef* m) {
+ // Iterate over all fields to see if any required fields are missing.
+ for (int i = 0, n = upb_MessageDef_FieldCount(m); i < n; i++) {
+ const upb_FieldDef* f = upb_MessageDef_Field(m, i);
+ if (upb_FieldDef_Label(f) != kUpb_Label_Required) continue;
+
+ if (!msg || !upb_Message_HasFieldByDef(msg, f)) {
+ // A required field is missing.
+ ctx->has_unset_required = true;
+
+ if (ctx->save_paths) {
+ // Append the contents of the stack to the out array, then
+ // NULL-terminate.
+ upb_FieldPathVector_Reserve(ctx, &ctx->out_fields, ctx->stack.size + 2);
+ if (ctx->stack.size) {
+ memcpy(&ctx->out_fields.path[ctx->out_fields.size], ctx->stack.path,
+ ctx->stack.size * sizeof(*ctx->stack.path));
+ }
+ ctx->out_fields.size += ctx->stack.size;
+ ctx->out_fields.path[ctx->out_fields.size++] =
+ (upb_FieldPathEntry){.field = f};
+ ctx->out_fields.path[ctx->out_fields.size++] =
+ (upb_FieldPathEntry){.field = NULL};
+ }
+ }
+ }
+}
+
+static void upb_util_FindUnsetRequiredInternal(upb_FindContext* ctx,
+ const upb_Message* msg,
+ const upb_MessageDef* m) {
+ // OPT: add markers in the schema for where we can avoid iterating:
+ // 1. messages with no required fields.
+ // 2. messages that cannot possibly reach any required fields.
+
+ upb_util_FindUnsetInMessage(ctx, msg, m);
+ if (!msg) return;
+
+ // Iterate over all present fields to find sub-messages that might be missing
+ // required fields. This may revisit some of the fields already inspected
+ // in the previous loop. We do this separately because this loop will also
+ // find present extensions, which the previous loop will not.
+ //
+ // TODO(haberman): consider changing upb_Message_Next() to be capable of
+ // visiting extensions only, for example with a kUpb_Message_BeginEXT
+ // constant.
+ size_t iter = kUpb_Message_Begin;
+ const upb_FieldDef* f;
+ upb_MessageValue val;
+ while (upb_Message_Next(msg, m, ctx->ext_pool, &f, &val, &iter)) {
+ // Skip non-submessage fields.
+ if (!upb_FieldDef_IsSubMessage(f)) continue;
+
+ upb_FindContext_Push(ctx, (upb_FieldPathEntry){.field = f});
+ const upb_MessageDef* sub_m = upb_FieldDef_MessageSubDef(f);
+
+ if (upb_FieldDef_IsMap(f)) {
+ // Map field.
+ const upb_FieldDef* val_f = upb_MessageDef_Field(sub_m, 1);
+ const upb_MessageDef* val_m = upb_FieldDef_MessageSubDef(val_f);
+ if (!val_m) continue;
+ const upb_Map* map = val.map_val;
+ size_t iter = kUpb_Map_Begin;
+ upb_MessageValue key, map_val;
+ while (upb_Map_Next(map, &key, &map_val, &iter)) {
+ upb_FindContext_Push(ctx, (upb_FieldPathEntry){.map_key = key});
+ upb_util_FindUnsetRequiredInternal(ctx, map_val.msg_val, val_m);
+ upb_FindContext_Pop(ctx);
+ }
+ } else if (upb_FieldDef_IsRepeated(f)) {
+ // Repeated field.
+ const upb_Array* arr = val.array_val;
+ for (size_t i = 0, n = upb_Array_Size(arr); i < n; i++) {
+ upb_MessageValue elem = upb_Array_Get(arr, i);
+ upb_FindContext_Push(ctx, (upb_FieldPathEntry){.array_index = i});
+ upb_util_FindUnsetRequiredInternal(ctx, elem.msg_val, sub_m);
+ upb_FindContext_Pop(ctx);
+ }
+ } else {
+ // Scalar sub-message field.
+ upb_util_FindUnsetRequiredInternal(ctx, val.msg_val, sub_m);
+ }
+
+ upb_FindContext_Pop(ctx);
+ }
+}
+
+bool upb_util_HasUnsetRequired(const upb_Message* msg, const upb_MessageDef* m,
+ const upb_DefPool* ext_pool,
+ upb_FieldPathEntry** fields) {
+ upb_FindContext ctx;
+ ctx.has_unset_required = false;
+ ctx.save_paths = fields != NULL;
+ ctx.ext_pool = ext_pool;
+ upb_FieldPathVector_Init(&ctx.stack);
+ upb_FieldPathVector_Init(&ctx.out_fields);
+ upb_util_FindUnsetRequiredInternal(&ctx, msg, m);
+ free(ctx.stack.path);
+ if (fields) {
+ upb_FieldPathVector_Reserve(&ctx, &ctx.out_fields, 1);
+ ctx.out_fields.path[ctx.out_fields.size] =
+ (upb_FieldPathEntry){.field = NULL};
+ *fields = ctx.out_fields.path;
+ }
+ return ctx.has_unset_required;
+}
diff --git a/upb/upb/util/required_fields.h b/upb/upb/util/required_fields.h
new file mode 100644
index 0000000..68e4fcb
--- /dev/null
+++ b/upb/upb/util/required_fields.h
@@ -0,0 +1,97 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_UTIL_REQUIRED_FIELDS_H_
+#define UPB_UTIL_REQUIRED_FIELDS_H_
+
+#include "upb/reflection/def.h"
+#include "upb/reflection/message.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// A FieldPath can be encoded as an array of upb_FieldPathEntry, in the
+// following format:
+// { {.field = f1}, {.field = f2} } # f1.f2
+// { {.field = f1}, {.index = 5}, {.field = f2} } # f1[5].f2
+// { {.field = f1}, {.key = "abc"}, {.field = f2} } # f1["abc"].f2
+//
+// Users must look at the type of `field` to know if an index or map key
+// follows.
+//
+// A field path may be NULL-terminated, in which case a NULL field indicates
+// the end of the field path.
+typedef union {
+ const upb_FieldDef* field;
+ size_t array_index;
+ upb_MessageValue map_key;
+} upb_FieldPathEntry;
+
+// Writes a string representing `*path` to `buf` in the following textual
+// format:
+// foo.bar # Regular fields
+// repeated_baz[2].bar # Repeated field
+// int32_msg_map[5].bar # Integer-keyed map
+// string_msg_map["abc"] # String-keyed map
+// bool_msg_map[true] # Bool-keyed map
+//
+// The input array `*path` must be NULL-terminated. The pointer `*path` will be
+// updated to point to one past the terminating NULL pointer of the input array.
+//
+// The output buffer `buf` will always be NULL-terminated. If the output data
+// (including NULL terminator) exceeds `size`, the result will be truncated.
+// Returns the string length of the data we attempted to write, excluding the
+// terminating NULL.
+size_t upb_FieldPath_ToText(upb_FieldPathEntry** path, char* buf, size_t size);
+
+// Checks whether `msg` or any of its children has unset required fields,
+// returning `true` if any are found. `msg` may be NULL, in which case the
+// message will be treated as empty.
+//
+// When this function returns true, `fields` is updated (if non-NULL) to point
+// to a heap-allocated array encoding the field paths of the required fields
+// that are missing. Each path is terminated with {.field = NULL}, and a final
+// {.field = NULL} terminates the list of paths. The caller is responsible for
+// freeing this array.
+bool upb_util_HasUnsetRequired(const upb_Message* msg, const upb_MessageDef* m,
+ const upb_DefPool* ext_pool,
+ upb_FieldPathEntry** fields);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_UTIL_REQUIRED_FIELDS_H_ */
diff --git a/upb/upb/util/required_fields_test.cc b/upb/upb/util/required_fields_test.cc
new file mode 100644
index 0000000..99d9ea1
--- /dev/null
+++ b/upb/upb/util/required_fields_test.cc
@@ -0,0 +1,206 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/util/required_fields.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "upb/base/status.hpp"
+#include "upb/json/decode.h"
+#include "upb/mem/arena.hpp"
+#include "upb/reflection/def.hpp"
+#include "upb/util/required_fields_test.upb.h"
+#include "upb/util/required_fields_test.upbdefs.h"
+
+std::vector<std::string> PathsToText(upb_FieldPathEntry* entry) {
+ std::vector<std::string> ret;
+ char buf[1024]; // Larger than anything we'll use in this test.
+ while (entry->field) {
+ upb_FieldPathEntry* before = entry;
+ size_t len = upb_FieldPath_ToText(&entry, buf, sizeof(buf));
+ EXPECT_LT(len, sizeof(buf));
+ assert(len <= sizeof(buf));
+ ret.push_back(buf);
+
+ // Ensure that we can have a short buffer and that it will be
+ // NULL-terminated.
+ char shortbuf[4];
+ size_t len2 = upb_FieldPath_ToText(&before, shortbuf, sizeof(shortbuf));
+ EXPECT_EQ(len, len2);
+ EXPECT_EQ(ret.back().substr(0, sizeof(shortbuf) - 1),
+ std::string(shortbuf));
+ }
+ return ret;
+}
+
+void CheckRequired(absl::string_view json,
+ const std::vector<std::string>& missing) {
+ upb::Arena arena;
+ upb::DefPool defpool;
+ upb_util_test_TestRequiredFields* test_msg =
+ upb_util_test_TestRequiredFields_new(arena.ptr());
+ upb::MessageDefPtr m(
+ upb_util_test_TestRequiredFields_getmsgdef(defpool.ptr()));
+ upb::Status status;
+ EXPECT_TRUE(upb_JsonDecode(json.data(), json.size(), test_msg, m.ptr(),
+ defpool.ptr(), 0, arena.ptr(), status.ptr()))
+ << status.error_message();
+ upb_FieldPathEntry* entries;
+ EXPECT_EQ(!missing.empty(), upb_util_HasUnsetRequired(
+ test_msg, m.ptr(), defpool.ptr(), &entries));
+ EXPECT_EQ(missing, PathsToText(entries));
+ free(entries);
+
+ // Verify that we can pass a NULL pointer to entries when we don't care about
+ // them.
+ EXPECT_EQ(!missing.empty(),
+ upb_util_HasUnsetRequired(test_msg, m.ptr(), defpool.ptr(), NULL));
+}
+
+// message HasRequiredField {
+// required int32 required_int32 = 1;
+// }
+//
+// message TestRequiredFields {
+// required EmptyMessage required_message = 1;
+// optional TestRequiredFields optional_message = 2;
+// repeated HasRequiredField repeated_message = 3;
+// map<int32, HasRequiredField> map_int32_message = 4;
+// }
+TEST(RequiredFieldsTest, TestRequired) {
+ CheckRequired(R"json({})json", {"required_message"});
+ CheckRequired(R"json({"required_message": {}}")json", {});
+ CheckRequired(
+ R"json(
+ {
+ "optional_message": {}
+ }
+ )json",
+ {"required_message", "optional_message.required_message"});
+
+ // Repeated field.
+ CheckRequired(
+ R"json(
+ {
+ "optional_message": {
+ "repeated_message": [
+ {"required_int32": 1},
+ {},
+ {"required_int32": 2}
+ ]
+ }
+ }
+ )json",
+ {"required_message", "optional_message.required_message",
+ "optional_message.repeated_message[1].required_int32"});
+
+ // Int32 map key.
+ CheckRequired(
+ R"json(
+ {
+ "required_message": {},
+ "map_int32_message": {
+ "1": {"required_int32": 1},
+ "5": {},
+ "9": {"required_int32": 1}
+ }
+ }
+ )json",
+ {"map_int32_message[5].required_int32"});
+
+ // Int64 map key.
+ CheckRequired(
+ R"json(
+ {
+ "required_message": {},
+ "map_int64_message": {
+ "1": {"required_int32": 1},
+ "5": {},
+ "9": {"required_int32": 1}
+ }
+ }
+ )json",
+ {"map_int64_message[5].required_int32"});
+
+ // Uint32 map key.
+ CheckRequired(
+ R"json(
+ {
+ "required_message": {},
+ "map_uint32_message": {
+ "1": {"required_int32": 1},
+ "5": {},
+ "9": {"required_int32": 1}
+ }
+ }
+ )json",
+ {"map_uint32_message[5].required_int32"});
+
+ // Uint64 map key.
+ CheckRequired(
+ R"json(
+ {
+ "required_message": {},
+ "map_uint64_message": {
+ "1": {"required_int32": 1},
+ "5": {},
+ "9": {"required_int32": 1}
+ }
+ }
+ )json",
+ {"map_uint64_message[5].required_int32"});
+
+ // Bool map key.
+ CheckRequired(
+ R"json(
+ {
+ "required_message": {},
+ "map_bool_message": {
+ "false": {"required_int32": 1},
+ "true": {}
+ }
+ }
+ )json",
+ {"map_bool_message[true].required_int32"});
+
+ // String map key.
+ CheckRequired(
+ R"json(
+ {
+ "required_message": {},
+ "map_string_message": {
+ "abc": {"required_int32": 1},
+ "d\"ef": {}
+ }
+ }
+ )json",
+ {R"(map_string_message["d\"ef"].required_int32)"});
+}
diff --git a/upb/upb/util/required_fields_test.proto b/upb/upb/util/required_fields_test.proto
new file mode 100644
index 0000000..9fb4940
--- /dev/null
+++ b/upb/upb/util/required_fields_test.proto
@@ -0,0 +1,51 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto2";
+
+package upb_util_test;
+
+message EmptyMessage {}
+
+message HasRequiredField {
+ required int32 required_int32 = 1;
+}
+
+message TestRequiredFields {
+ required EmptyMessage required_message = 1;
+ optional TestRequiredFields optional_message = 2;
+ repeated HasRequiredField repeated_message = 3;
+ map<int32, HasRequiredField> map_int32_message = 4;
+ map<int64, HasRequiredField> map_int64_message = 5;
+ map<uint32, HasRequiredField> map_uint32_message = 6;
+ map<uint64, HasRequiredField> map_uint64_message = 7;
+ map<bool, HasRequiredField> map_bool_message = 8;
+ map<string, HasRequiredField> map_string_message = 9;
+}
diff --git a/upb/upb/wire/BUILD b/upb/upb/wire/BUILD
new file mode 100644
index 0000000..008777f
--- /dev/null
+++ b/upb/upb/wire/BUILD
@@ -0,0 +1,139 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load("//bazel:build_defs.bzl", "UPB_DEFAULT_COPTS")
+
+cc_library(
+ name = "wire",
+ srcs = [
+ ],
+ hdrs = [
+ "decode.h",
+ "encode.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":internal",
+ ":types",
+ "//:mem",
+ "//:message",
+ "//:mini_table",
+ "//:port",
+ ],
+)
+
+cc_library(
+ name = "internal",
+ srcs = [
+ "decode.c",
+ "decode.h",
+ "decode_fast.c",
+ "encode.c",
+ "encode.h",
+ ],
+ hdrs = [
+ "decode_fast.h",
+ "internal/common.h",
+ "internal/decode.h",
+ "internal/swap.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":eps_copy_input_stream",
+ ":reader",
+ ":types",
+ "//:base",
+ "//:collections_internal",
+ "//:mem",
+ "//:mem_internal",
+ "//:message",
+ "//:message_accessors_internal",
+ "//:message_internal",
+ "//:message_rep_internal",
+ "//:mini_table",
+ "//:port",
+ "@utf8_range",
+ ],
+)
+
+cc_library(
+ name = "reader",
+ srcs = [
+ "internal/swap.h",
+ "reader.c",
+ ],
+ hdrs = ["reader.h"],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":eps_copy_input_stream",
+ ":types",
+ "//:port",
+ ],
+)
+
+cc_library(
+ name = "types",
+ hdrs = ["types.h"],
+ visibility = ["//visibility:public"],
+)
+
+cc_library(
+ name = "eps_copy_input_stream",
+ srcs = ["eps_copy_input_stream.c"],
+ hdrs = ["eps_copy_input_stream.h"],
+ visibility = ["//visibility:public"],
+ deps = [
+ "//:mem",
+ "//:port",
+ ],
+)
+
+cc_test(
+ name = "eps_copy_input_stream_test",
+ srcs = ["eps_copy_input_stream_test.cc"],
+ deps = [
+ ":eps_copy_input_stream",
+ "//:mem",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+# begin:github_only
+filegroup(
+ name = "source_files",
+ srcs = glob(
+ [
+ "**/*.c",
+ "**/*.h",
+ ],
+ ),
+ visibility = [
+ "//cmake:__pkg__",
+ "//python/dist:__pkg__",
+ ]
+)
+# end:github_only
diff --git a/upb/upb/wire/decode.c b/upb/upb/wire/decode.c
new file mode 100644
index 0000000..1f6e601
--- /dev/null
+++ b/upb/upb/wire/decode.c
@@ -0,0 +1,1383 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/wire/decode.h"
+
+#include <string.h>
+
+#include "upb/base/descriptor_constants.h"
+#include "upb/collections/internal/array.h"
+#include "upb/collections/internal/map.h"
+#include "upb/mem/internal/arena.h"
+#include "upb/message/internal/accessors.h"
+#include "upb/message/internal/map_entry.h"
+#include "upb/mini_table/sub.h"
+#include "upb/port/atomic.h"
+#include "upb/wire/encode.h"
+#include "upb/wire/eps_copy_input_stream.h"
+#include "upb/wire/internal/common.h"
+#include "upb/wire/internal/decode.h"
+#include "upb/wire/internal/swap.h"
+#include "upb/wire/reader.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+// A few fake field types for our tables.
+enum {
+ kUpb_FakeFieldType_FieldNotFound = 0,
+ kUpb_FakeFieldType_MessageSetItem = 19,
+};
+
+// DecodeOp: an action to be performed for a wire-type/field-type combination.
+enum {
+ // Special ops: we don't write data to regular fields for these.
+ kUpb_DecodeOp_UnknownField = -1,
+ kUpb_DecodeOp_MessageSetItem = -2,
+
+ // Scalar-only ops.
+ kUpb_DecodeOp_Scalar1Byte = 0,
+ kUpb_DecodeOp_Scalar4Byte = 2,
+ kUpb_DecodeOp_Scalar8Byte = 3,
+ kUpb_DecodeOp_Enum = 1,
+
+ // Scalar/repeated ops.
+ kUpb_DecodeOp_String = 4,
+ kUpb_DecodeOp_Bytes = 5,
+ kUpb_DecodeOp_SubMessage = 6,
+
+ // Repeated-only ops (also see macros below).
+ kUpb_DecodeOp_PackedEnum = 13,
+};
+
+// For packed fields it is helpful to be able to recover the lg2 of the data
+// size from the op.
+#define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */
+#define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */
+
+typedef union {
+ bool bool_val;
+ uint32_t uint32_val;
+ uint64_t uint64_val;
+ uint32_t size;
+} wireval;
+
+static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr,
+ upb_Message* msg,
+ const upb_MiniTable* layout);
+
+UPB_NORETURN static void* _upb_Decoder_ErrorJmp(upb_Decoder* d,
+ upb_DecodeStatus status) {
+ assert(status != kUpb_DecodeStatus_Ok);
+ d->status = status;
+ UPB_LONGJMP(d->err, 1);
+}
+
+const char* _upb_FastDecoder_ErrorJmp(upb_Decoder* d, int status) {
+ assert(status != kUpb_DecodeStatus_Ok);
+ d->status = status;
+ UPB_LONGJMP(d->err, 1);
+ return NULL;
+}
+
+static void _upb_Decoder_VerifyUtf8(upb_Decoder* d, const char* buf, int len) {
+ if (!_upb_Decoder_VerifyUtf8Inline(buf, len)) {
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8);
+ }
+}
+
+static bool _upb_Decoder_Reserve(upb_Decoder* d, upb_Array* arr, size_t elem) {
+ bool need_realloc = arr->capacity - arr->size < elem;
+ if (need_realloc && !_upb_array_realloc(arr, arr->size + elem, &d->arena)) {
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
+ }
+ return need_realloc;
+}
+
+typedef struct {
+ const char* ptr;
+ uint64_t val;
+} _upb_DecodeLongVarintReturn;
+
+UPB_NOINLINE
+static _upb_DecodeLongVarintReturn _upb_Decoder_DecodeLongVarint(
+ const char* ptr, uint64_t val) {
+ _upb_DecodeLongVarintReturn ret = {NULL, 0};
+ uint64_t byte;
+ int i;
+ for (i = 1; i < 10; i++) {
+ byte = (uint8_t)ptr[i];
+ val += (byte - 1) << (i * 7);
+ if (!(byte & 0x80)) {
+ ret.ptr = ptr + i + 1;
+ ret.val = val;
+ return ret;
+ }
+ }
+ return ret;
+}
+
+UPB_FORCEINLINE
+static const char* _upb_Decoder_DecodeVarint(upb_Decoder* d, const char* ptr,
+ uint64_t* val) {
+ uint64_t byte = (uint8_t)*ptr;
+ if (UPB_LIKELY((byte & 0x80) == 0)) {
+ *val = byte;
+ return ptr + 1;
+ } else {
+ _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte);
+ if (!res.ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
+ *val = res.val;
+ return res.ptr;
+ }
+}
+
+UPB_FORCEINLINE
+static const char* _upb_Decoder_DecodeTag(upb_Decoder* d, const char* ptr,
+ uint32_t* val) {
+ uint64_t byte = (uint8_t)*ptr;
+ if (UPB_LIKELY((byte & 0x80) == 0)) {
+ *val = byte;
+ return ptr + 1;
+ } else {
+ const char* start = ptr;
+ _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte);
+ if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) {
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
+ }
+ *val = res.val;
+ return res.ptr;
+ }
+}
+
+UPB_FORCEINLINE
+static const char* upb_Decoder_DecodeSize(upb_Decoder* d, const char* ptr,
+ uint32_t* size) {
+ uint64_t size64;
+ ptr = _upb_Decoder_DecodeVarint(d, ptr, &size64);
+ if (size64 >= INT32_MAX ||
+ !upb_EpsCopyInputStream_CheckSize(&d->input, ptr, (int)size64)) {
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
+ }
+ *size = size64;
+ return ptr;
+}
+
+static void _upb_Decoder_MungeInt32(wireval* val) {
+ if (!_upb_IsLittleEndian()) {
+ /* The next stage will memcpy(dst, &val, 4) */
+ val->uint32_val = val->uint64_val;
+ }
+}
+
+static void _upb_Decoder_Munge(int type, wireval* val) {
+ switch (type) {
+ case kUpb_FieldType_Bool:
+ val->bool_val = val->uint64_val != 0;
+ break;
+ case kUpb_FieldType_SInt32: {
+ uint32_t n = val->uint64_val;
+ val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1);
+ break;
+ }
+ case kUpb_FieldType_SInt64: {
+ uint64_t n = val->uint64_val;
+ val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1);
+ break;
+ }
+ case kUpb_FieldType_Int32:
+ case kUpb_FieldType_UInt32:
+ case kUpb_FieldType_Enum:
+ _upb_Decoder_MungeInt32(val);
+ break;
+ }
+}
+
+static upb_Message* _upb_Decoder_NewSubMessage(upb_Decoder* d,
+ const upb_MiniTableSub* subs,
+ const upb_MiniTableField* field,
+ upb_TaggedMessagePtr* target) {
+ const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg;
+ UPB_ASSERT(subl);
+ upb_Message* msg = _upb_Message_New(subl, &d->arena);
+ if (!msg) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
+
+ // Extensions should not be unlinked. A message extension should not be
+ // registered until its sub-message type is available to be linked.
+ bool is_empty = subl == &_kUpb_MiniTable_Empty;
+ bool is_extension = field->mode & kUpb_LabelFlags_IsExtension;
+ UPB_ASSERT(!(is_empty && is_extension));
+
+ if (is_empty && !(d->options & kUpb_DecodeOption_ExperimentalAllowUnlinked)) {
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_UnlinkedSubMessage);
+ }
+
+ upb_TaggedMessagePtr tagged = _upb_TaggedMessagePtr_Pack(msg, is_empty);
+ memcpy(target, &tagged, sizeof(tagged));
+ return msg;
+}
+
+static upb_Message* _upb_Decoder_ReuseSubMessage(
+ upb_Decoder* d, const upb_MiniTableSub* subs,
+ const upb_MiniTableField* field, upb_TaggedMessagePtr* target) {
+ upb_TaggedMessagePtr tagged = *target;
+ const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg;
+ UPB_ASSERT(subl);
+ if (!upb_TaggedMessagePtr_IsEmpty(tagged) || subl == &_kUpb_MiniTable_Empty) {
+ return _upb_TaggedMessagePtr_GetMessage(tagged);
+ }
+
+ // We found an empty message from a previous parse that was performed before
+ // this field was linked. But it is linked now, so we want to allocate a new
+ // message of the correct type and promote data into it before continuing.
+ upb_Message* existing = _upb_TaggedMessagePtr_GetEmptyMessage(tagged);
+ upb_Message* promoted = _upb_Decoder_NewSubMessage(d, subs, field, target);
+ size_t size;
+ const char* unknown = upb_Message_GetUnknown(existing, &size);
+ upb_DecodeStatus status = upb_Decode(unknown, size, promoted, subl, d->extreg,
+ d->options, &d->arena);
+ if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status);
+ return promoted;
+}
+
+static const char* _upb_Decoder_ReadString(upb_Decoder* d, const char* ptr,
+ int size, upb_StringView* str) {
+ const char* str_ptr = ptr;
+ ptr = upb_EpsCopyInputStream_ReadString(&d->input, &str_ptr, size, &d->arena);
+ if (!ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
+ str->data = str_ptr;
+ str->size = size;
+ return ptr;
+}
+
+UPB_FORCEINLINE
+static const char* _upb_Decoder_RecurseSubMessage(upb_Decoder* d,
+ const char* ptr,
+ upb_Message* submsg,
+ const upb_MiniTable* subl,
+ uint32_t expected_end_group) {
+ if (--d->depth < 0) {
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded);
+ }
+ ptr = _upb_Decoder_DecodeMessage(d, ptr, submsg, subl);
+ d->depth++;
+ if (d->end_group != expected_end_group) {
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
+ }
+ return ptr;
+}
+
+UPB_FORCEINLINE
+static const char* _upb_Decoder_DecodeSubMessage(
+ upb_Decoder* d, const char* ptr, upb_Message* submsg,
+ const upb_MiniTableSub* subs, const upb_MiniTableField* field, int size) {
+ int saved_delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, size);
+ const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg;
+ UPB_ASSERT(subl);
+ ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, DECODE_NOGROUP);
+ upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_delta);
+ return ptr;
+}
+
+UPB_FORCEINLINE
+static const char* _upb_Decoder_DecodeGroup(upb_Decoder* d, const char* ptr,
+ upb_Message* submsg,
+ const upb_MiniTable* subl,
+ uint32_t number) {
+ if (_upb_Decoder_IsDone(d, &ptr)) {
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
+ }
+ ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, number);
+ d->end_group = DECODE_NOGROUP;
+ return ptr;
+}
+
+UPB_FORCEINLINE
+static const char* _upb_Decoder_DecodeUnknownGroup(upb_Decoder* d,
+ const char* ptr,
+ uint32_t number) {
+ return _upb_Decoder_DecodeGroup(d, ptr, NULL, NULL, number);
+}
+
+UPB_FORCEINLINE
+static const char* _upb_Decoder_DecodeKnownGroup(
+ upb_Decoder* d, const char* ptr, upb_Message* submsg,
+ const upb_MiniTableSub* subs, const upb_MiniTableField* field) {
+ const upb_MiniTable* subl = subs[field->UPB_PRIVATE(submsg_index)].submsg;
+ UPB_ASSERT(subl);
+ return _upb_Decoder_DecodeGroup(d, ptr, submsg, subl, field->number);
+}
+
+static char* upb_Decoder_EncodeVarint32(uint32_t val, char* ptr) {
+ do {
+ uint8_t byte = val & 0x7fU;
+ val >>= 7;
+ if (val) byte |= 0x80U;
+ *(ptr++) = byte;
+ } while (val);
+ return ptr;
+}
+
+static void _upb_Decoder_AddUnknownVarints(upb_Decoder* d, upb_Message* msg,
+ uint32_t val1, uint32_t val2) {
+ char buf[20];
+ char* end = buf;
+ end = upb_Decoder_EncodeVarint32(val1, end);
+ end = upb_Decoder_EncodeVarint32(val2, end);
+
+ if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) {
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
+ }
+}
+
+UPB_NOINLINE
+static bool _upb_Decoder_CheckEnumSlow(upb_Decoder* d, const char* ptr,
+ upb_Message* msg,
+ const upb_MiniTableEnum* e,
+ const upb_MiniTableField* field,
+ uint32_t v) {
+ if (_upb_MiniTable_CheckEnumValueSlow(e, v)) return true;
+
+ // Unrecognized enum goes into unknown fields.
+ // For packed fields the tag could be arbitrarily far in the past, so we
+ // just re-encode the tag and value here.
+ uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint;
+ upb_Message* unknown_msg =
+ field->mode & kUpb_LabelFlags_IsExtension ? d->unknown_msg : msg;
+ _upb_Decoder_AddUnknownVarints(d, unknown_msg, tag, v);
+ return false;
+}
+
+UPB_FORCEINLINE
+static bool _upb_Decoder_CheckEnum(upb_Decoder* d, const char* ptr,
+ upb_Message* msg, const upb_MiniTableEnum* e,
+ const upb_MiniTableField* field,
+ wireval* val) {
+ uint32_t v = val->uint32_val;
+
+ _kUpb_FastEnumCheck_Status status = _upb_MiniTable_CheckEnumValueFast(e, v);
+ if (UPB_LIKELY(status == _kUpb_FastEnumCheck_ValueIsInEnum)) return true;
+ return _upb_Decoder_CheckEnumSlow(d, ptr, msg, e, field, v);
+}
+
+UPB_NOINLINE
+static const char* _upb_Decoder_DecodeEnumArray(upb_Decoder* d, const char* ptr,
+ upb_Message* msg,
+ upb_Array* arr,
+ const upb_MiniTableSub* subs,
+ const upb_MiniTableField* field,
+ wireval* val) {
+ const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum;
+ if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, val)) return ptr;
+ void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void);
+ arr->size++;
+ memcpy(mem, val, 4);
+ return ptr;
+}
+
+UPB_FORCEINLINE
+static const char* _upb_Decoder_DecodeFixedPacked(
+ upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val,
+ const upb_MiniTableField* field, int lg2) {
+ int mask = (1 << lg2) - 1;
+ size_t count = val->size >> lg2;
+ if ((val->size & mask) != 0) {
+ // Length isn't a round multiple of elem size.
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
+ }
+ _upb_Decoder_Reserve(d, arr, count);
+ void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void);
+ arr->size += count;
+ // Note: if/when the decoder supports multi-buffer input, we will need to
+ // handle buffer seams here.
+ if (_upb_IsLittleEndian()) {
+ ptr = upb_EpsCopyInputStream_Copy(&d->input, ptr, mem, val->size);
+ } else {
+ int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size);
+ char* dst = mem;
+ while (!_upb_Decoder_IsDone(d, &ptr)) {
+ if (lg2 == 2) {
+ ptr = upb_WireReader_ReadFixed32(ptr, dst);
+ dst += 4;
+ } else {
+ UPB_ASSERT(lg2 == 3);
+ ptr = upb_WireReader_ReadFixed64(ptr, dst);
+ dst += 8;
+ }
+ }
+ upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta);
+ }
+
+ return ptr;
+}
+
+UPB_FORCEINLINE
+static const char* _upb_Decoder_DecodeVarintPacked(
+ upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val,
+ const upb_MiniTableField* field, int lg2) {
+ int scale = 1 << lg2;
+ int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size);
+ char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void);
+ while (!_upb_Decoder_IsDone(d, &ptr)) {
+ wireval elem;
+ ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val);
+ _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), &elem);
+ if (_upb_Decoder_Reserve(d, arr, 1)) {
+ out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void);
+ }
+ arr->size++;
+ memcpy(out, &elem, scale);
+ out += scale;
+ }
+ upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit);
+ return ptr;
+}
+
+UPB_NOINLINE
+static const char* _upb_Decoder_DecodeEnumPacked(
+ upb_Decoder* d, const char* ptr, upb_Message* msg, upb_Array* arr,
+ const upb_MiniTableSub* subs, const upb_MiniTableField* field,
+ wireval* val) {
+ const upb_MiniTableEnum* e = subs[field->UPB_PRIVATE(submsg_index)].subenum;
+ int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size);
+ char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void);
+ while (!_upb_Decoder_IsDone(d, &ptr)) {
+ wireval elem;
+ ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val);
+ _upb_Decoder_MungeInt32(&elem);
+ if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, &elem)) {
+ continue;
+ }
+ if (_upb_Decoder_Reserve(d, arr, 1)) {
+ out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void);
+ }
+ arr->size++;
+ memcpy(out, &elem, 4);
+ out += 4;
+ }
+ upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit);
+ return ptr;
+}
+
+upb_Array* _upb_Decoder_CreateArray(upb_Decoder* d,
+ const upb_MiniTableField* field) {
+ /* Maps descriptor type -> elem_size_lg2. */
+ static const uint8_t kElemSizeLg2[] = {
+ [0] = -1, // invalid descriptor type
+ [kUpb_FieldType_Double] = 3,
+ [kUpb_FieldType_Float] = 2,
+ [kUpb_FieldType_Int64] = 3,
+ [kUpb_FieldType_UInt64] = 3,
+ [kUpb_FieldType_Int32] = 2,
+ [kUpb_FieldType_Fixed64] = 3,
+ [kUpb_FieldType_Fixed32] = 2,
+ [kUpb_FieldType_Bool] = 0,
+ [kUpb_FieldType_String] = UPB_SIZE(3, 4),
+ [kUpb_FieldType_Group] = UPB_SIZE(2, 3),
+ [kUpb_FieldType_Message] = UPB_SIZE(2, 3),
+ [kUpb_FieldType_Bytes] = UPB_SIZE(3, 4),
+ [kUpb_FieldType_UInt32] = 2,
+ [kUpb_FieldType_Enum] = 2,
+ [kUpb_FieldType_SFixed32] = 2,
+ [kUpb_FieldType_SFixed64] = 3,
+ [kUpb_FieldType_SInt32] = 2,
+ [kUpb_FieldType_SInt64] = 3,
+ };
+
+ size_t lg2 = kElemSizeLg2[field->UPB_PRIVATE(descriptortype)];
+ upb_Array* ret = _upb_Array_New(&d->arena, 4, lg2);
+ if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
+ return ret;
+}
+
+static const char* _upb_Decoder_DecodeToArray(upb_Decoder* d, const char* ptr,
+ upb_Message* msg,
+ const upb_MiniTableSub* subs,
+ const upb_MiniTableField* field,
+ wireval* val, int op) {
+ upb_Array** arrp = UPB_PTR_AT(msg, field->offset, void);
+ upb_Array* arr = *arrp;
+ void* mem;
+
+ if (arr) {
+ _upb_Decoder_Reserve(d, arr, 1);
+ } else {
+ arr = _upb_Decoder_CreateArray(d, field);
+ *arrp = arr;
+ }
+
+ switch (op) {
+ case kUpb_DecodeOp_Scalar1Byte:
+ case kUpb_DecodeOp_Scalar4Byte:
+ case kUpb_DecodeOp_Scalar8Byte:
+ /* Append scalar value. */
+ mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << op, void);
+ arr->size++;
+ memcpy(mem, val, 1 << op);
+ return ptr;
+ case kUpb_DecodeOp_String:
+ _upb_Decoder_VerifyUtf8(d, ptr, val->size);
+ /* Fallthrough. */
+ case kUpb_DecodeOp_Bytes: {
+ /* Append bytes. */
+ upb_StringView* str = (upb_StringView*)_upb_array_ptr(arr) + arr->size;
+ arr->size++;
+ return _upb_Decoder_ReadString(d, ptr, val->size, str);
+ }
+ case kUpb_DecodeOp_SubMessage: {
+ /* Append submessage / group. */
+ upb_TaggedMessagePtr* target = UPB_PTR_AT(
+ _upb_array_ptr(arr), arr->size * sizeof(void*), upb_TaggedMessagePtr);
+ upb_Message* submsg = _upb_Decoder_NewSubMessage(d, subs, field, target);
+ arr->size++;
+ if (UPB_UNLIKELY(field->UPB_PRIVATE(descriptortype) ==
+ kUpb_FieldType_Group)) {
+ return _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field);
+ } else {
+ return _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field,
+ val->size);
+ }
+ }
+ case OP_FIXPCK_LG2(2):
+ case OP_FIXPCK_LG2(3):
+ return _upb_Decoder_DecodeFixedPacked(d, ptr, arr, val, field,
+ op - OP_FIXPCK_LG2(0));
+ case OP_VARPCK_LG2(0):
+ case OP_VARPCK_LG2(2):
+ case OP_VARPCK_LG2(3):
+ return _upb_Decoder_DecodeVarintPacked(d, ptr, arr, val, field,
+ op - OP_VARPCK_LG2(0));
+ case kUpb_DecodeOp_Enum:
+ return _upb_Decoder_DecodeEnumArray(d, ptr, msg, arr, subs, field, val);
+ case kUpb_DecodeOp_PackedEnum:
+ return _upb_Decoder_DecodeEnumPacked(d, ptr, msg, arr, subs, field, val);
+ default:
+ UPB_UNREACHABLE();
+ }
+}
+
+upb_Map* _upb_Decoder_CreateMap(upb_Decoder* d, const upb_MiniTable* entry) {
+ /* Maps descriptor type -> upb map size. */
+ static const uint8_t kSizeInMap[] = {
+ [0] = -1, // invalid descriptor type */
+ [kUpb_FieldType_Double] = 8,
+ [kUpb_FieldType_Float] = 4,
+ [kUpb_FieldType_Int64] = 8,
+ [kUpb_FieldType_UInt64] = 8,
+ [kUpb_FieldType_Int32] = 4,
+ [kUpb_FieldType_Fixed64] = 8,
+ [kUpb_FieldType_Fixed32] = 4,
+ [kUpb_FieldType_Bool] = 1,
+ [kUpb_FieldType_String] = UPB_MAPTYPE_STRING,
+ [kUpb_FieldType_Group] = sizeof(void*),
+ [kUpb_FieldType_Message] = sizeof(void*),
+ [kUpb_FieldType_Bytes] = UPB_MAPTYPE_STRING,
+ [kUpb_FieldType_UInt32] = 4,
+ [kUpb_FieldType_Enum] = 4,
+ [kUpb_FieldType_SFixed32] = 4,
+ [kUpb_FieldType_SFixed64] = 8,
+ [kUpb_FieldType_SInt32] = 4,
+ [kUpb_FieldType_SInt64] = 8,
+ };
+
+ const upb_MiniTableField* key_field = &entry->fields[0];
+ const upb_MiniTableField* val_field = &entry->fields[1];
+ char key_size = kSizeInMap[key_field->UPB_PRIVATE(descriptortype)];
+ char val_size = kSizeInMap[val_field->UPB_PRIVATE(descriptortype)];
+ UPB_ASSERT(key_field->offset == offsetof(upb_MapEntryData, k));
+ UPB_ASSERT(val_field->offset == offsetof(upb_MapEntryData, v));
+ upb_Map* ret = _upb_Map_New(&d->arena, key_size, val_size);
+ if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
+ return ret;
+}
+
+static const char* _upb_Decoder_DecodeToMap(upb_Decoder* d, const char* ptr,
+ upb_Message* msg,
+ const upb_MiniTableSub* subs,
+ const upb_MiniTableField* field,
+ wireval* val) {
+ upb_Map** map_p = UPB_PTR_AT(msg, field->offset, upb_Map*);
+ upb_Map* map = *map_p;
+ upb_MapEntry ent;
+ UPB_ASSERT(upb_MiniTableField_Type(field) == kUpb_FieldType_Message);
+ const upb_MiniTable* entry = subs[field->UPB_PRIVATE(submsg_index)].submsg;
+
+ UPB_ASSERT(entry);
+ UPB_ASSERT(entry->field_count == 2);
+ UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[0]));
+ UPB_ASSERT(!upb_IsRepeatedOrMap(&entry->fields[1]));
+
+ if (!map) {
+ map = _upb_Decoder_CreateMap(d, entry);
+ *map_p = map;
+ }
+
+ // Parse map entry.
+ memset(&ent, 0, sizeof(ent));
+
+ if (entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Message ||
+ entry->fields[1].UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) {
+ // Create proactively to handle the case where it doesn't appear.
+ upb_TaggedMessagePtr msg;
+ _upb_Decoder_NewSubMessage(d, entry->subs, &entry->fields[1], &msg);
+ ent.data.v.val = upb_value_uintptr(msg);
+ }
+
+ ptr =
+ _upb_Decoder_DecodeSubMessage(d, ptr, &ent.data, subs, field, val->size);
+ // check if ent had any unknown fields
+ size_t size;
+ upb_Message_GetUnknown(&ent.data, &size);
+ if (size != 0) {
+ char* buf;
+ size_t size;
+ uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited;
+ upb_EncodeStatus status =
+ upb_Encode(&ent.data, entry, 0, &d->arena, &buf, &size);
+ if (status != kUpb_EncodeStatus_Ok) {
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
+ }
+ _upb_Decoder_AddUnknownVarints(d, msg, tag, size);
+ if (!_upb_Message_AddUnknown(msg, buf, size, &d->arena)) {
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
+ }
+ } else {
+ if (_upb_Map_Insert(map, &ent.data.k, map->key_size, &ent.data.v,
+ map->val_size,
+ &d->arena) == kUpb_MapInsertStatus_OutOfMemory) {
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
+ }
+ }
+ return ptr;
+}
+
+static const char* _upb_Decoder_DecodeToSubMessage(
+ upb_Decoder* d, const char* ptr, upb_Message* msg,
+ const upb_MiniTableSub* subs, const upb_MiniTableField* field, wireval* val,
+ int op) {
+ void* mem = UPB_PTR_AT(msg, field->offset, void);
+ int type = field->UPB_PRIVATE(descriptortype);
+
+ if (UPB_UNLIKELY(op == kUpb_DecodeOp_Enum) &&
+ !_upb_Decoder_CheckEnum(d, ptr, msg,
+ subs[field->UPB_PRIVATE(submsg_index)].subenum,
+ field, val)) {
+ return ptr;
+ }
+
+ /* Set presence if necessary. */
+ if (field->presence > 0) {
+ _upb_sethas_field(msg, field);
+ } else if (field->presence < 0) {
+ /* Oneof case */
+ uint32_t* oneof_case = _upb_oneofcase_field(msg, field);
+ if (op == kUpb_DecodeOp_SubMessage && *oneof_case != field->number) {
+ memset(mem, 0, sizeof(void*));
+ }
+ *oneof_case = field->number;
+ }
+
+ /* Store into message. */
+ switch (op) {
+ case kUpb_DecodeOp_SubMessage: {
+ upb_TaggedMessagePtr* submsgp = mem;
+ upb_Message* submsg;
+ if (*submsgp) {
+ submsg = _upb_Decoder_ReuseSubMessage(d, subs, field, submsgp);
+ } else {
+ submsg = _upb_Decoder_NewSubMessage(d, subs, field, submsgp);
+ }
+ if (UPB_UNLIKELY(type == kUpb_FieldType_Group)) {
+ ptr = _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field);
+ } else {
+ ptr = _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field,
+ val->size);
+ }
+ break;
+ }
+ case kUpb_DecodeOp_String:
+ _upb_Decoder_VerifyUtf8(d, ptr, val->size);
+ /* Fallthrough. */
+ case kUpb_DecodeOp_Bytes:
+ return _upb_Decoder_ReadString(d, ptr, val->size, mem);
+ case kUpb_DecodeOp_Scalar8Byte:
+ memcpy(mem, val, 8);
+ break;
+ case kUpb_DecodeOp_Enum:
+ case kUpb_DecodeOp_Scalar4Byte:
+ memcpy(mem, val, 4);
+ break;
+ case kUpb_DecodeOp_Scalar1Byte:
+ memcpy(mem, val, 1);
+ break;
+ default:
+ UPB_UNREACHABLE();
+ }
+
+ return ptr;
+}
+
+UPB_NOINLINE
+const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr,
+ const upb_Message* msg,
+ const upb_MiniTable* l) {
+ assert(l->required_count);
+ if (UPB_LIKELY((d->options & kUpb_DecodeOption_CheckRequired) == 0)) {
+ return ptr;
+ }
+ uint64_t msg_head;
+ memcpy(&msg_head, msg, 8);
+ msg_head = _upb_BigEndian_Swap64(msg_head);
+ if (upb_MiniTable_requiredmask(l) & ~msg_head) {
+ d->missing_required = true;
+ }
+ return ptr;
+}
+
+UPB_FORCEINLINE
+static bool _upb_Decoder_TryFastDispatch(upb_Decoder* d, const char** ptr,
+ upb_Message* msg,
+ const upb_MiniTable* layout) {
+#if UPB_FASTTABLE
+ if (layout && layout->table_mask != (unsigned char)-1) {
+ uint16_t tag = _upb_FastDecoder_LoadTag(*ptr);
+ intptr_t table = decode_totable(layout);
+ *ptr = _upb_FastDecoder_TagDispatch(d, *ptr, msg, table, 0, tag);
+ return true;
+ }
+#endif
+ return false;
+}
+
+static const char* upb_Decoder_SkipField(upb_Decoder* d, const char* ptr,
+ uint32_t tag) {
+ int field_number = tag >> 3;
+ int wire_type = tag & 7;
+ switch (wire_type) {
+ case kUpb_WireType_Varint: {
+ uint64_t val;
+ return _upb_Decoder_DecodeVarint(d, ptr, &val);
+ }
+ case kUpb_WireType_64Bit:
+ return ptr + 8;
+ case kUpb_WireType_32Bit:
+ return ptr + 4;
+ case kUpb_WireType_Delimited: {
+ uint32_t size;
+ ptr = upb_Decoder_DecodeSize(d, ptr, &size);
+ return ptr + size;
+ }
+ case kUpb_WireType_StartGroup:
+ return _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number);
+ default:
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
+ }
+}
+
+enum {
+ kStartItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_StartGroup),
+ kEndItemTag = ((kUpb_MsgSet_Item << 3) | kUpb_WireType_EndGroup),
+ kTypeIdTag = ((kUpb_MsgSet_TypeId << 3) | kUpb_WireType_Varint),
+ kMessageTag = ((kUpb_MsgSet_Message << 3) | kUpb_WireType_Delimited),
+};
+
+static void upb_Decoder_AddKnownMessageSetItem(
+ upb_Decoder* d, upb_Message* msg, const upb_MiniTableExtension* item_mt,
+ const char* data, uint32_t size) {
+ upb_Message_Extension* ext =
+ _upb_Message_GetOrCreateExtension(msg, item_mt, &d->arena);
+ if (UPB_UNLIKELY(!ext)) {
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
+ }
+ upb_Message* submsg = _upb_Decoder_NewSubMessage(
+ d, &ext->ext->sub, &ext->ext->field, (upb_TaggedMessagePtr*)&ext->data);
+ upb_DecodeStatus status = upb_Decode(data, size, submsg, item_mt->sub.submsg,
+ d->extreg, d->options, &d->arena);
+ if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status);
+}
+
+static void upb_Decoder_AddUnknownMessageSetItem(upb_Decoder* d,
+ upb_Message* msg,
+ uint32_t type_id,
+ const char* message_data,
+ uint32_t message_size) {
+ char buf[60];
+ char* ptr = buf;
+ ptr = upb_Decoder_EncodeVarint32(kStartItemTag, ptr);
+ ptr = upb_Decoder_EncodeVarint32(kTypeIdTag, ptr);
+ ptr = upb_Decoder_EncodeVarint32(type_id, ptr);
+ ptr = upb_Decoder_EncodeVarint32(kMessageTag, ptr);
+ ptr = upb_Decoder_EncodeVarint32(message_size, ptr);
+ char* split = ptr;
+
+ ptr = upb_Decoder_EncodeVarint32(kEndItemTag, ptr);
+ char* end = ptr;
+
+ if (!_upb_Message_AddUnknown(msg, buf, split - buf, &d->arena) ||
+ !_upb_Message_AddUnknown(msg, message_data, message_size, &d->arena) ||
+ !_upb_Message_AddUnknown(msg, split, end - split, &d->arena)) {
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
+ }
+}
+
+static void upb_Decoder_AddMessageSetItem(upb_Decoder* d, upb_Message* msg,
+ const upb_MiniTable* t,
+ uint32_t type_id, const char* data,
+ uint32_t size) {
+ const upb_MiniTableExtension* item_mt =
+ upb_ExtensionRegistry_Lookup(d->extreg, t, type_id);
+ if (item_mt) {
+ upb_Decoder_AddKnownMessageSetItem(d, msg, item_mt, data, size);
+ } else {
+ upb_Decoder_AddUnknownMessageSetItem(d, msg, type_id, data, size);
+ }
+}
+
+static const char* upb_Decoder_DecodeMessageSetItem(
+ upb_Decoder* d, const char* ptr, upb_Message* msg,
+ const upb_MiniTable* layout) {
+ uint32_t type_id = 0;
+ upb_StringView preserved = {NULL, 0};
+ typedef enum {
+ kUpb_HaveId = 1 << 0,
+ kUpb_HavePayload = 1 << 1,
+ } StateMask;
+ StateMask state_mask = 0;
+ while (!_upb_Decoder_IsDone(d, &ptr)) {
+ uint32_t tag;
+ ptr = _upb_Decoder_DecodeTag(d, ptr, &tag);
+ switch (tag) {
+ case kEndItemTag:
+ return ptr;
+ case kTypeIdTag: {
+ uint64_t tmp;
+ ptr = _upb_Decoder_DecodeVarint(d, ptr, &tmp);
+ if (state_mask & kUpb_HaveId) break; // Ignore dup.
+ state_mask |= kUpb_HaveId;
+ type_id = tmp;
+ if (state_mask & kUpb_HavePayload) {
+ upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, preserved.data,
+ preserved.size);
+ }
+ break;
+ }
+ case kMessageTag: {
+ uint32_t size;
+ ptr = upb_Decoder_DecodeSize(d, ptr, &size);
+ const char* data = ptr;
+ ptr += size;
+ if (state_mask & kUpb_HavePayload) break; // Ignore dup.
+ state_mask |= kUpb_HavePayload;
+ if (state_mask & kUpb_HaveId) {
+ upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, data, size);
+ } else {
+ // Out of order, we must preserve the payload.
+ preserved.data = data;
+ preserved.size = size;
+ }
+ break;
+ }
+ default:
+ // We do not preserve unexpected fields inside a message set item.
+ ptr = upb_Decoder_SkipField(d, ptr, tag);
+ break;
+ }
+ }
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
+}
+
+static const upb_MiniTableField* _upb_Decoder_FindField(upb_Decoder* d,
+ const upb_MiniTable* t,
+ uint32_t field_number,
+ int* last_field_index) {
+ static upb_MiniTableField none = {
+ 0, 0, 0, 0, kUpb_FakeFieldType_FieldNotFound, 0};
+ if (t == NULL) return &none;
+
+ size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX
+ if (idx < t->dense_below) {
+ /* Fastest case: index into dense fields. */
+ goto found;
+ }
+
+ if (t->dense_below < t->field_count) {
+ /* Linear search non-dense fields. Resume scanning from last_field_index
+ * since fields are usually in order. */
+ size_t last = *last_field_index;
+ for (idx = last; idx < t->field_count; idx++) {
+ if (t->fields[idx].number == field_number) {
+ goto found;
+ }
+ }
+
+ for (idx = t->dense_below; idx < last; idx++) {
+ if (t->fields[idx].number == field_number) {
+ goto found;
+ }
+ }
+ }
+
+ if (d->extreg) {
+ switch (t->ext) {
+ case kUpb_ExtMode_Extendable: {
+ const upb_MiniTableExtension* ext =
+ upb_ExtensionRegistry_Lookup(d->extreg, t, field_number);
+ if (ext) return &ext->field;
+ break;
+ }
+ case kUpb_ExtMode_IsMessageSet:
+ if (field_number == kUpb_MsgSet_Item) {
+ static upb_MiniTableField item = {
+ 0, 0, 0, 0, kUpb_FakeFieldType_MessageSetItem, 0};
+ return &item;
+ }
+ break;
+ }
+ }
+
+ return &none; /* Unknown field. */
+
+found:
+ UPB_ASSERT(t->fields[idx].number == field_number);
+ *last_field_index = idx;
+ return &t->fields[idx];
+}
+
+int _upb_Decoder_GetVarintOp(const upb_MiniTableField* field) {
+ static const int8_t kVarintOps[] = {
+ [kUpb_FakeFieldType_FieldNotFound] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_Int64] = kUpb_DecodeOp_Scalar8Byte,
+ [kUpb_FieldType_UInt64] = kUpb_DecodeOp_Scalar8Byte,
+ [kUpb_FieldType_Int32] = kUpb_DecodeOp_Scalar4Byte,
+ [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_Bool] = kUpb_DecodeOp_Scalar1Byte,
+ [kUpb_FieldType_String] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_Message] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_Bytes] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_UInt32] = kUpb_DecodeOp_Scalar4Byte,
+ [kUpb_FieldType_Enum] = kUpb_DecodeOp_Enum,
+ [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_SInt32] = kUpb_DecodeOp_Scalar4Byte,
+ [kUpb_FieldType_SInt64] = kUpb_DecodeOp_Scalar8Byte,
+ [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField,
+ };
+
+ return kVarintOps[field->UPB_PRIVATE(descriptortype)];
+}
+
+UPB_FORCEINLINE
+static void _upb_Decoder_CheckUnlinked(upb_Decoder* d, const upb_MiniTable* mt,
+ const upb_MiniTableField* field,
+ int* op) {
+ // If sub-message is not linked, treat as unknown.
+ if (field->mode & kUpb_LabelFlags_IsExtension) return;
+ const upb_MiniTableSub* sub = &mt->subs[field->UPB_PRIVATE(submsg_index)];
+ if ((d->options & kUpb_DecodeOption_ExperimentalAllowUnlinked) ||
+ sub->submsg != &_kUpb_MiniTable_Empty) {
+ return;
+ }
+#ifndef NDEBUG
+ const upb_MiniTableField* oneof = upb_MiniTable_GetOneof(mt, field);
+ if (oneof) {
+ // All other members of the oneof must be message fields that are also
+ // unlinked.
+ do {
+ assert(upb_MiniTableField_CType(oneof) == kUpb_CType_Message);
+ const upb_MiniTableSub* oneof_sub =
+ &mt->subs[oneof->UPB_PRIVATE(submsg_index)];
+ assert(!oneof_sub);
+ } while (upb_MiniTable_NextOneofField(mt, &oneof));
+ }
+#endif // NDEBUG
+ *op = kUpb_DecodeOp_UnknownField;
+}
+
+int _upb_Decoder_GetDelimitedOp(upb_Decoder* d, const upb_MiniTable* mt,
+ const upb_MiniTableField* field) {
+ enum { kRepeatedBase = 19 };
+
+ static const int8_t kDelimitedOps[] = {
+ /* For non-repeated field type. */
+ [kUpb_FakeFieldType_FieldNotFound] =
+ kUpb_DecodeOp_UnknownField, // Field not found.
+ [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_Int64] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_UInt64] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_Int32] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_Bool] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_String] = kUpb_DecodeOp_String,
+ [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage,
+ [kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes,
+ [kUpb_FieldType_UInt32] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_Enum] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_SInt32] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FieldType_SInt64] = kUpb_DecodeOp_UnknownField,
+ [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField,
+ // For repeated field type. */
+ [kRepeatedBase + kUpb_FieldType_Double] = OP_FIXPCK_LG2(3),
+ [kRepeatedBase + kUpb_FieldType_Float] = OP_FIXPCK_LG2(2),
+ [kRepeatedBase + kUpb_FieldType_Int64] = OP_VARPCK_LG2(3),
+ [kRepeatedBase + kUpb_FieldType_UInt64] = OP_VARPCK_LG2(3),
+ [kRepeatedBase + kUpb_FieldType_Int32] = OP_VARPCK_LG2(2),
+ [kRepeatedBase + kUpb_FieldType_Fixed64] = OP_FIXPCK_LG2(3),
+ [kRepeatedBase + kUpb_FieldType_Fixed32] = OP_FIXPCK_LG2(2),
+ [kRepeatedBase + kUpb_FieldType_Bool] = OP_VARPCK_LG2(0),
+ [kRepeatedBase + kUpb_FieldType_String] = kUpb_DecodeOp_String,
+ [kRepeatedBase + kUpb_FieldType_Group] = kUpb_DecodeOp_SubMessage,
+ [kRepeatedBase + kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage,
+ [kRepeatedBase + kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes,
+ [kRepeatedBase + kUpb_FieldType_UInt32] = OP_VARPCK_LG2(2),
+ [kRepeatedBase + kUpb_FieldType_Enum] = kUpb_DecodeOp_PackedEnum,
+ [kRepeatedBase + kUpb_FieldType_SFixed32] = OP_FIXPCK_LG2(2),
+ [kRepeatedBase + kUpb_FieldType_SFixed64] = OP_FIXPCK_LG2(3),
+ [kRepeatedBase + kUpb_FieldType_SInt32] = OP_VARPCK_LG2(2),
+ [kRepeatedBase + kUpb_FieldType_SInt64] = OP_VARPCK_LG2(3),
+ // Omitting kUpb_FakeFieldType_MessageSetItem, because we never emit a
+ // repeated msgset type
+ };
+
+ int ndx = field->UPB_PRIVATE(descriptortype);
+ if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += kRepeatedBase;
+ int op = kDelimitedOps[ndx];
+
+ if (op == kUpb_DecodeOp_SubMessage) {
+ _upb_Decoder_CheckUnlinked(d, mt, field, &op);
+ }
+
+ return op;
+}
+
+UPB_FORCEINLINE
+static const char* _upb_Decoder_DecodeWireValue(upb_Decoder* d, const char* ptr,
+ const upb_MiniTable* mt,
+ const upb_MiniTableField* field,
+ int wire_type, wireval* val,
+ int* op) {
+ static const unsigned kFixed32OkMask = (1 << kUpb_FieldType_Float) |
+ (1 << kUpb_FieldType_Fixed32) |
+ (1 << kUpb_FieldType_SFixed32);
+
+ static const unsigned kFixed64OkMask = (1 << kUpb_FieldType_Double) |
+ (1 << kUpb_FieldType_Fixed64) |
+ (1 << kUpb_FieldType_SFixed64);
+
+ switch (wire_type) {
+ case kUpb_WireType_Varint:
+ ptr = _upb_Decoder_DecodeVarint(d, ptr, &val->uint64_val);
+ *op = _upb_Decoder_GetVarintOp(field);
+ _upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), val);
+ return ptr;
+ case kUpb_WireType_32Bit:
+ *op = kUpb_DecodeOp_Scalar4Byte;
+ if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed32OkMask) == 0) {
+ *op = kUpb_DecodeOp_UnknownField;
+ }
+ return upb_WireReader_ReadFixed32(ptr, &val->uint32_val);
+ case kUpb_WireType_64Bit:
+ *op = kUpb_DecodeOp_Scalar8Byte;
+ if (((1 << field->UPB_PRIVATE(descriptortype)) & kFixed64OkMask) == 0) {
+ *op = kUpb_DecodeOp_UnknownField;
+ }
+ return upb_WireReader_ReadFixed64(ptr, &val->uint64_val);
+ case kUpb_WireType_Delimited:
+ ptr = upb_Decoder_DecodeSize(d, ptr, &val->size);
+ *op = _upb_Decoder_GetDelimitedOp(d, mt, field);
+ return ptr;
+ case kUpb_WireType_StartGroup:
+ val->uint32_val = field->number;
+ if (field->UPB_PRIVATE(descriptortype) == kUpb_FieldType_Group) {
+ *op = kUpb_DecodeOp_SubMessage;
+ _upb_Decoder_CheckUnlinked(d, mt, field, op);
+ } else if (field->UPB_PRIVATE(descriptortype) ==
+ kUpb_FakeFieldType_MessageSetItem) {
+ *op = kUpb_DecodeOp_MessageSetItem;
+ } else {
+ *op = kUpb_DecodeOp_UnknownField;
+ }
+ return ptr;
+ default:
+ break;
+ }
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
+}
+
+UPB_FORCEINLINE
+static const char* _upb_Decoder_DecodeKnownField(
+ upb_Decoder* d, const char* ptr, upb_Message* msg,
+ const upb_MiniTable* layout, const upb_MiniTableField* field, int op,
+ wireval* val) {
+ const upb_MiniTableSub* subs = layout->subs;
+ uint8_t mode = field->mode;
+
+ if (UPB_UNLIKELY(mode & kUpb_LabelFlags_IsExtension)) {
+ const upb_MiniTableExtension* ext_layout =
+ (const upb_MiniTableExtension*)field;
+ upb_Message_Extension* ext =
+ _upb_Message_GetOrCreateExtension(msg, ext_layout, &d->arena);
+ if (UPB_UNLIKELY(!ext)) {
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
+ }
+ d->unknown_msg = msg;
+ msg = &ext->data;
+ subs = &ext->ext->sub;
+ }
+
+ switch (mode & kUpb_FieldMode_Mask) {
+ case kUpb_FieldMode_Array:
+ return _upb_Decoder_DecodeToArray(d, ptr, msg, subs, field, val, op);
+ case kUpb_FieldMode_Map:
+ return _upb_Decoder_DecodeToMap(d, ptr, msg, subs, field, val);
+ case kUpb_FieldMode_Scalar:
+ return _upb_Decoder_DecodeToSubMessage(d, ptr, msg, subs, field, val, op);
+ default:
+ UPB_UNREACHABLE();
+ }
+}
+
+static const char* _upb_Decoder_ReverseSkipVarint(const char* ptr,
+ uint32_t val) {
+ uint32_t seen = 0;
+ do {
+ ptr--;
+ seen <<= 7;
+ seen |= *ptr & 0x7f;
+ } while (seen != val);
+ return ptr;
+}
+
+static const char* _upb_Decoder_DecodeUnknownField(upb_Decoder* d,
+ const char* ptr,
+ upb_Message* msg,
+ int field_number,
+ int wire_type, wireval val) {
+ if (field_number == 0) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
+
+ // Since unknown fields are the uncommon case, we do a little extra work here
+ // to walk backwards through the buffer to find the field start. This frees
+ // up a register in the fast paths (when the field is known), which leads to
+ // significant speedups in benchmarks.
+ const char* start = ptr;
+
+ if (wire_type == kUpb_WireType_Delimited) ptr += val.size;
+ if (msg) {
+ switch (wire_type) {
+ case kUpb_WireType_Varint:
+ case kUpb_WireType_Delimited:
+ start--;
+ while (start[-1] & 0x80) start--;
+ break;
+ case kUpb_WireType_32Bit:
+ start -= 4;
+ break;
+ case kUpb_WireType_64Bit:
+ start -= 8;
+ break;
+ default:
+ break;
+ }
+
+ assert(start == d->debug_valstart);
+ uint32_t tag = ((uint32_t)field_number << 3) | wire_type;
+ start = _upb_Decoder_ReverseSkipVarint(start, tag);
+ assert(start == d->debug_tagstart);
+
+ if (wire_type == kUpb_WireType_StartGroup) {
+ d->unknown = start;
+ d->unknown_msg = msg;
+ ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number);
+ start = d->unknown;
+ d->unknown = NULL;
+ }
+ if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) {
+ _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
+ }
+ } else if (wire_type == kUpb_WireType_StartGroup) {
+ ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number);
+ }
+ return ptr;
+}
+
+UPB_NOINLINE
+static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr,
+ upb_Message* msg,
+ const upb_MiniTable* layout) {
+ int last_field_index = 0;
+
+#if UPB_FASTTABLE
+ // The first time we want to skip fast dispatch, because we may have just been
+ // invoked by the fast parser to handle a case that it bailed on.
+ if (!_upb_Decoder_IsDone(d, &ptr)) goto nofast;
+#endif
+
+ while (!_upb_Decoder_IsDone(d, &ptr)) {
+ uint32_t tag;
+ const upb_MiniTableField* field;
+ int field_number;
+ int wire_type;
+ wireval val;
+ int op;
+
+ if (_upb_Decoder_TryFastDispatch(d, &ptr, msg, layout)) break;
+
+#if UPB_FASTTABLE
+ nofast:
+#endif
+
+#ifndef NDEBUG
+ d->debug_tagstart = ptr;
+#endif
+
+ UPB_ASSERT(ptr < d->input.limit_ptr);
+ ptr = _upb_Decoder_DecodeTag(d, ptr, &tag);
+ field_number = tag >> 3;
+ wire_type = tag & 7;
+
+#ifndef NDEBUG
+ d->debug_valstart = ptr;
+#endif
+
+ if (wire_type == kUpb_WireType_EndGroup) {
+ d->end_group = field_number;
+ return ptr;
+ }
+
+ field = _upb_Decoder_FindField(d, layout, field_number, &last_field_index);
+ ptr = _upb_Decoder_DecodeWireValue(d, ptr, layout, field, wire_type, &val,
+ &op);
+
+ if (op >= 0) {
+ ptr = _upb_Decoder_DecodeKnownField(d, ptr, msg, layout, field, op, &val);
+ } else {
+ switch (op) {
+ case kUpb_DecodeOp_UnknownField:
+ ptr = _upb_Decoder_DecodeUnknownField(d, ptr, msg, field_number,
+ wire_type, val);
+ break;
+ case kUpb_DecodeOp_MessageSetItem:
+ ptr = upb_Decoder_DecodeMessageSetItem(d, ptr, msg, layout);
+ break;
+ }
+ }
+ }
+
+ return UPB_UNLIKELY(layout && layout->required_count)
+ ? _upb_Decoder_CheckRequired(d, ptr, msg, layout)
+ : ptr;
+}
+
+const char* _upb_FastDecoder_DecodeGeneric(struct upb_Decoder* d,
+ const char* ptr, upb_Message* msg,
+ intptr_t table, uint64_t hasbits,
+ uint64_t data) {
+ (void)data;
+ *(uint32_t*)msg |= hasbits;
+ return _upb_Decoder_DecodeMessage(d, ptr, msg, decode_totablep(table));
+}
+
+static upb_DecodeStatus _upb_Decoder_DecodeTop(struct upb_Decoder* d,
+ const char* buf, void* msg,
+ const upb_MiniTable* l) {
+ if (!_upb_Decoder_TryFastDispatch(d, &buf, msg, l)) {
+ _upb_Decoder_DecodeMessage(d, buf, msg, l);
+ }
+ if (d->end_group != DECODE_NOGROUP) return kUpb_DecodeStatus_Malformed;
+ if (d->missing_required) return kUpb_DecodeStatus_MissingRequired;
+ return kUpb_DecodeStatus_Ok;
+}
+
+UPB_NOINLINE
+const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e,
+ const char* ptr, int overrun) {
+ return _upb_EpsCopyInputStream_IsDoneFallbackInline(
+ e, ptr, overrun, _upb_Decoder_BufferFlipCallback);
+}
+
+static upb_DecodeStatus upb_Decoder_Decode(upb_Decoder* const decoder,
+ const char* const buf,
+ void* const msg,
+ const upb_MiniTable* const l,
+ upb_Arena* const arena) {
+ if (UPB_SETJMP(decoder->err) == 0) {
+ decoder->status = _upb_Decoder_DecodeTop(decoder, buf, msg, l);
+ } else {
+ UPB_ASSERT(decoder->status != kUpb_DecodeStatus_Ok);
+ }
+
+ _upb_MemBlock* blocks =
+ upb_Atomic_Load(&decoder->arena.blocks, memory_order_relaxed);
+ arena->head = decoder->arena.head;
+ upb_Atomic_Store(&arena->blocks, blocks, memory_order_relaxed);
+ return decoder->status;
+}
+
+upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg,
+ const upb_MiniTable* l,
+ const upb_ExtensionRegistry* extreg, int options,
+ upb_Arena* arena) {
+ upb_Decoder decoder;
+ unsigned depth = (unsigned)options >> 16;
+
+ upb_EpsCopyInputStream_Init(&decoder.input, &buf, size,
+ options & kUpb_DecodeOption_AliasString);
+
+ decoder.extreg = extreg;
+ decoder.unknown = NULL;
+ decoder.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit;
+ decoder.end_group = DECODE_NOGROUP;
+ decoder.options = (uint16_t)options;
+ decoder.missing_required = false;
+ decoder.status = kUpb_DecodeStatus_Ok;
+
+ // Violating the encapsulation of the arena for performance reasons.
+ // This is a temporary arena that we swap into and swap out of when we are
+ // done. The temporary arena only needs to be able to handle allocation,
+ // not fuse or free, so it does not need many of the members to be initialized
+ // (particularly parent_or_count).
+ _upb_MemBlock* blocks = upb_Atomic_Load(&arena->blocks, memory_order_relaxed);
+ decoder.arena.head = arena->head;
+ decoder.arena.block_alloc = arena->block_alloc;
+ upb_Atomic_Init(&decoder.arena.blocks, blocks);
+
+ return upb_Decoder_Decode(&decoder, buf, msg, l, arena);
+}
+
+#undef OP_FIXPCK_LG2
+#undef OP_VARPCK_LG2
diff --git a/upb/upb/wire/decode.h b/upb/upb/wire/decode.h
new file mode 100644
index 0000000..4bd6eb8
--- /dev/null
+++ b/upb/upb/wire/decode.h
@@ -0,0 +1,152 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// upb_decode: parsing into a upb_Message using a upb_MiniTable.
+
+#ifndef UPB_WIRE_DECODE_H_
+#define UPB_WIRE_DECODE_H_
+
+#include "upb/mem/arena.h"
+#include "upb/message/message.h"
+#include "upb/mini_table/extension_registry.h"
+#include "upb/wire/types.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+ /* If set, strings will alias the input buffer instead of copying into the
+ * arena. */
+ kUpb_DecodeOption_AliasString = 1,
+
+ /* If set, the parse will return failure if any message is missing any
+ * required fields when the message data ends. The parse will still continue,
+ * and the failure will only be reported at the end.
+ *
+ * IMPORTANT CAVEATS:
+ *
+ * 1. This can throw a false positive failure if an incomplete message is seen
+ * on the wire but is later completed when the sub-message occurs again.
+ * For this reason, a second pass is required to verify a failure, to be
+ * truly robust.
+ *
+ * 2. This can return a false success if you are decoding into a message that
+ * already has some sub-message fields present. If the sub-message does
+ * not occur in the binary payload, we will never visit it and discover the
+ * incomplete sub-message. For this reason, this check is only useful for
+ * implemting ParseFromString() semantics. For MergeFromString(), a
+ * post-parse validation step will always be necessary. */
+ kUpb_DecodeOption_CheckRequired = 2,
+
+ /* EXPERIMENTAL:
+ *
+ * If set, the parser will allow parsing of sub-message fields that were not
+ * previously linked using upb_MiniTable_SetSubMessage(). The data will be
+ * parsed into an internal "empty" message type that cannot be accessed
+ * directly, but can be later promoted into the true message type if the
+ * sub-message fields are linked at a later time.
+ *
+ * Users should set this option if they intend to perform dynamic tree shaking
+ * and promoting using the interfaces in message/promote.h. If this option is
+ * enabled, it is important that the resulting messages are only accessed by
+ * code that is aware of promotion rules:
+ *
+ * 1. Message pointers in upb_Message, upb_Array, and upb_Map are represented
+ * by a tagged pointer upb_TaggedMessagePointer. The tag indicates whether
+ * the message uses the internal "empty" type.
+ *
+ * 2. Any code *reading* these message pointers must test whether the "empty"
+ * tag bit is set, using the interfaces in mini_table/types.h. However
+ * writing of message pointers should always use plain upb_Message*, since
+ * users are not allowed to create "empty" messages.
+ *
+ * 3. It is always safe to test whether a field is present or test the array
+ * length; these interfaces will reflect that empty messages are present,
+ * even though their data cannot be accessed without promoting first.
+ *
+ * 4. If a message pointer is indeed tagged as empty, the message may not be
+ * accessed directly, only promoted through the interfaces in
+ * message/promote.h.
+ *
+ * 5. Tagged/empty messages may never be created by the user. They may only
+ * be created by the parser or the message-copying logic in message/copy.h.
+ */
+ kUpb_DecodeOption_ExperimentalAllowUnlinked = 4,
+};
+
+UPB_INLINE uint32_t upb_DecodeOptions_MaxDepth(uint16_t depth) {
+ return (uint32_t)depth << 16;
+}
+
+UPB_INLINE uint16_t upb_DecodeOptions_GetMaxDepth(uint32_t options) {
+ return options >> 16;
+}
+
+// Enforce an upper bound on recursion depth.
+UPB_INLINE int upb_Decode_LimitDepth(uint32_t decode_options, uint32_t limit) {
+ uint32_t max_depth = upb_DecodeOptions_GetMaxDepth(decode_options);
+ if (max_depth > limit) max_depth = limit;
+ return upb_DecodeOptions_MaxDepth(max_depth) | (decode_options & 0xffff);
+}
+
+typedef enum {
+ kUpb_DecodeStatus_Ok = 0,
+ kUpb_DecodeStatus_Malformed = 1, // Wire format was corrupt
+ kUpb_DecodeStatus_OutOfMemory = 2, // Arena alloc failed
+ kUpb_DecodeStatus_BadUtf8 = 3, // String field had bad UTF-8
+ kUpb_DecodeStatus_MaxDepthExceeded =
+ 4, // Exceeded upb_DecodeOptions_MaxDepth
+
+ // kUpb_DecodeOption_CheckRequired failed (see above), but the parse otherwise
+ // succeeded.
+ kUpb_DecodeStatus_MissingRequired = 5,
+
+ // Unlinked sub-message field was present, but
+ // kUpb_DecodeOptions_ExperimentalAllowUnlinked was not specified in the list
+ // of options.
+ kUpb_DecodeStatus_UnlinkedSubMessage = 6,
+} upb_DecodeStatus;
+
+UPB_API upb_DecodeStatus upb_Decode(const char* buf, size_t size,
+ upb_Message* msg, const upb_MiniTable* l,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_WIRE_DECODE_H_ */
diff --git a/upb/upb/wire/decode_fast.c b/upb/upb/wire/decode_fast.c
new file mode 100644
index 0000000..1a8b006
--- /dev/null
+++ b/upb/upb/wire/decode_fast.c
@@ -0,0 +1,1017 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64.
+// Also the table size grows by 2x.
+//
+// Could potentially be ported to other 64-bit archs that pass at least six
+// arguments in registers and have 8 unused high bits in pointers.
+//
+// The overall design is to create specialized functions for every possible
+// field type (eg. oneof boolean field with a 1 byte tag) and then dispatch
+// to the specialized function as quickly as possible.
+
+#include "upb/wire/decode_fast.h"
+
+#include "upb/collections/internal/array.h"
+#include "upb/wire/internal/decode.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#if UPB_FASTTABLE
+
+// The standard set of arguments passed to each parsing function.
+// Thanks to x86-64 calling conventions, these will stay in registers.
+#define UPB_PARSE_PARAMS \
+ upb_Decoder *d, const char *ptr, upb_Message *msg, intptr_t table, \
+ uint64_t hasbits, uint64_t data
+
+#define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data
+
+#define RETURN_GENERIC(m) \
+ /* Uncomment either of these for debugging purposes. */ \
+ /* fprintf(stderr, m); */ \
+ /*__builtin_trap(); */ \
+ return _upb_FastDecoder_DecodeGeneric(d, ptr, msg, table, hasbits, 0);
+
+typedef enum {
+ CARD_s = 0, /* Singular (optional, non-repeated) */
+ CARD_o = 1, /* Oneof */
+ CARD_r = 2, /* Repeated */
+ CARD_p = 3 /* Packed Repeated */
+} upb_card;
+
+UPB_NOINLINE
+static const char* fastdecode_isdonefallback(UPB_PARSE_PARAMS) {
+ int overrun = data;
+ ptr = _upb_EpsCopyInputStream_IsDoneFallbackInline(
+ &d->input, ptr, overrun, _upb_Decoder_BufferFlipCallback);
+ data = _upb_FastDecoder_LoadTag(ptr);
+ UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS);
+}
+
+UPB_FORCEINLINE
+static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) {
+ int overrun;
+ switch (upb_EpsCopyInputStream_IsDoneStatus(&d->input, ptr, &overrun)) {
+ case kUpb_IsDoneStatus_Done:
+ *(uint32_t*)msg |= hasbits; // Sync hasbits.
+ const upb_MiniTable* l = decode_totablep(table);
+ return UPB_UNLIKELY(l->required_count)
+ ? _upb_Decoder_CheckRequired(d, ptr, msg, l)
+ : ptr;
+ case kUpb_IsDoneStatus_NotDone:
+ break;
+ case kUpb_IsDoneStatus_NeedFallback:
+ data = overrun;
+ UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS);
+ }
+
+ // Read two bytes of tag data (for a one-byte tag, the high byte is junk).
+ data = _upb_FastDecoder_LoadTag(ptr);
+ UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS);
+}
+
+UPB_FORCEINLINE
+static bool fastdecode_checktag(uint16_t data, int tagbytes) {
+ if (tagbytes == 1) {
+ return (data & 0xff) == 0;
+ } else {
+ return data == 0;
+ }
+}
+
+UPB_FORCEINLINE
+static const char* fastdecode_longsize(const char* ptr, int* size) {
+ int i;
+ UPB_ASSERT(*size & 0x80);
+ *size &= 0xff;
+ for (i = 0; i < 3; i++) {
+ ptr++;
+ size_t byte = (uint8_t)ptr[-1];
+ *size += (byte - 1) << (7 + 7 * i);
+ if (UPB_LIKELY((byte & 0x80) == 0)) return ptr;
+ }
+ ptr++;
+ size_t byte = (uint8_t)ptr[-1];
+ // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected
+ // for a 32 bit varint.
+ if (UPB_UNLIKELY(byte >= 8)) return NULL;
+ *size += (byte - 1) << 28;
+ return ptr;
+}
+
+UPB_FORCEINLINE
+static const char* fastdecode_delimited(
+ upb_Decoder* d, const char* ptr,
+ upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) {
+ ptr++;
+
+ // Sign-extend so varint greater than one byte becomes negative, causing
+ // fast delimited parse to fail.
+ int len = (int8_t)ptr[-1];
+
+ if (!upb_EpsCopyInputStream_TryParseDelimitedFast(&d->input, &ptr, len, func,
+ ctx)) {
+ // Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer.
+ // If it exceeds the buffer limit, limit/limit_ptr will change during
+ // sub-message parsing, so we need to preserve delta, not limit.
+ if (UPB_UNLIKELY(len & 0x80)) {
+ // Size varint >1 byte (length >= 128).
+ ptr = fastdecode_longsize(ptr, &len);
+ if (!ptr) {
+ // Corrupt wire format: size exceeded INT_MAX.
+ return NULL;
+ }
+ }
+ if (!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, len)) {
+ // Corrupt wire format: invalid limit.
+ return NULL;
+ }
+ int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, len);
+ ptr = func(&d->input, ptr, ctx);
+ upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta);
+ }
+ return ptr;
+}
+
+/* singular, oneof, repeated field handling ***********************************/
+
+typedef struct {
+ upb_Array* arr;
+ void* end;
+} fastdecode_arr;
+
+typedef enum {
+ FD_NEXT_ATLIMIT,
+ FD_NEXT_SAMEFIELD,
+ FD_NEXT_OTHERFIELD
+} fastdecode_next;
+
+typedef struct {
+ void* dst;
+ fastdecode_next next;
+ uint32_t tag;
+} fastdecode_nextret;
+
+UPB_FORCEINLINE
+static void* fastdecode_resizearr(upb_Decoder* d, void* dst,
+ fastdecode_arr* farr, int valbytes) {
+ if (UPB_UNLIKELY(dst == farr->end)) {
+ size_t old_size = farr->arr->capacity;
+ size_t old_bytes = old_size * valbytes;
+ size_t new_size = old_size * 2;
+ size_t new_bytes = new_size * valbytes;
+ char* old_ptr = _upb_array_ptr(farr->arr);
+ char* new_ptr = upb_Arena_Realloc(&d->arena, old_ptr, old_bytes, new_bytes);
+ uint8_t elem_size_lg2 = __builtin_ctz(valbytes);
+ farr->arr->capacity = new_size;
+ farr->arr->data = _upb_array_tagptr(new_ptr, elem_size_lg2);
+ dst = (void*)(new_ptr + (old_size * valbytes));
+ farr->end = (void*)(new_ptr + (new_size * valbytes));
+ }
+ return dst;
+}
+
+UPB_FORCEINLINE
+static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) {
+ if (tagbytes == 1) {
+ return (uint8_t)tag == (uint8_t)data;
+ } else {
+ return (uint16_t)tag == (uint16_t)data;
+ }
+}
+
+UPB_FORCEINLINE
+static void fastdecode_commitarr(void* dst, fastdecode_arr* farr,
+ int valbytes) {
+ farr->arr->size =
+ (size_t)((char*)dst - (char*)_upb_array_ptr(farr->arr)) / valbytes;
+}
+
+UPB_FORCEINLINE
+static fastdecode_nextret fastdecode_nextrepeated(upb_Decoder* d, void* dst,
+ const char** ptr,
+ fastdecode_arr* farr,
+ uint64_t data, int tagbytes,
+ int valbytes) {
+ fastdecode_nextret ret;
+ dst = (char*)dst + valbytes;
+
+ if (UPB_LIKELY(!_upb_Decoder_IsDone(d, ptr))) {
+ ret.tag = _upb_FastDecoder_LoadTag(*ptr);
+ if (fastdecode_tagmatch(ret.tag, data, tagbytes)) {
+ ret.next = FD_NEXT_SAMEFIELD;
+ } else {
+ fastdecode_commitarr(dst, farr, valbytes);
+ ret.next = FD_NEXT_OTHERFIELD;
+ }
+ } else {
+ fastdecode_commitarr(dst, farr, valbytes);
+ ret.next = FD_NEXT_ATLIMIT;
+ }
+
+ ret.dst = dst;
+ return ret;
+}
+
+UPB_FORCEINLINE
+static void* fastdecode_fieldmem(upb_Message* msg, uint64_t data) {
+ size_t ofs = data >> 48;
+ return (char*)msg + ofs;
+}
+
+UPB_FORCEINLINE
+static void* fastdecode_getfield(upb_Decoder* d, const char* ptr,
+ upb_Message* msg, uint64_t* data,
+ uint64_t* hasbits, fastdecode_arr* farr,
+ int valbytes, upb_card card) {
+ switch (card) {
+ case CARD_s: {
+ uint8_t hasbit_index = *data >> 24;
+ // Set hasbit and return pointer to scalar field.
+ *hasbits |= 1ull << hasbit_index;
+ return fastdecode_fieldmem(msg, *data);
+ }
+ case CARD_o: {
+ uint16_t case_ofs = *data >> 32;
+ uint32_t* oneof_case = UPB_PTR_AT(msg, case_ofs, uint32_t);
+ uint8_t field_number = *data >> 24;
+ *oneof_case = field_number;
+ return fastdecode_fieldmem(msg, *data);
+ }
+ case CARD_r: {
+ // Get pointer to upb_Array and allocate/expand if necessary.
+ uint8_t elem_size_lg2 = __builtin_ctz(valbytes);
+ upb_Array** arr_p = fastdecode_fieldmem(msg, *data);
+ char* begin;
+ *(uint32_t*)msg |= *hasbits;
+ *hasbits = 0;
+ if (UPB_LIKELY(!*arr_p)) {
+ farr->arr = _upb_Array_New(&d->arena, 8, elem_size_lg2);
+ *arr_p = farr->arr;
+ } else {
+ farr->arr = *arr_p;
+ }
+ begin = _upb_array_ptr(farr->arr);
+ farr->end = begin + (farr->arr->capacity * valbytes);
+ *data = _upb_FastDecoder_LoadTag(ptr);
+ return begin + (farr->arr->size * valbytes);
+ }
+ default:
+ UPB_UNREACHABLE();
+ }
+}
+
+UPB_FORCEINLINE
+static bool fastdecode_flippacked(uint64_t* data, int tagbytes) {
+ *data ^= (0x2 ^ 0x0); // Patch data to match packed wiretype.
+ return fastdecode_checktag(*data, tagbytes);
+}
+
+#define FASTDECODE_CHECKPACKED(tagbytes, card, func) \
+ if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \
+ if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \
+ UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \
+ } \
+ RETURN_GENERIC("packed check tag mismatch\n"); \
+ }
+
+/* varint fields **************************************************************/
+
+UPB_FORCEINLINE
+static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) {
+ if (valbytes == 1) {
+ return val != 0;
+ } else if (zigzag) {
+ if (valbytes == 4) {
+ uint32_t n = val;
+ return (n >> 1) ^ -(int32_t)(n & 1);
+ } else if (valbytes == 8) {
+ return (val >> 1) ^ -(int64_t)(val & 1);
+ }
+ UPB_UNREACHABLE();
+ }
+ return val;
+}
+
+UPB_FORCEINLINE
+static const char* fastdecode_varint64(const char* ptr, uint64_t* val) {
+ ptr++;
+ *val = (uint8_t)ptr[-1];
+ if (UPB_UNLIKELY(*val & 0x80)) {
+ int i;
+ for (i = 0; i < 8; i++) {
+ ptr++;
+ uint64_t byte = (uint8_t)ptr[-1];
+ *val += (byte - 1) << (7 + 7 * i);
+ if (UPB_LIKELY((byte & 0x80) == 0)) goto done;
+ }
+ ptr++;
+ uint64_t byte = (uint8_t)ptr[-1];
+ if (byte > 1) {
+ return NULL;
+ }
+ *val += (byte - 1) << 63;
+ }
+done:
+ UPB_ASSUME(ptr != NULL);
+ return ptr;
+}
+
+#define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
+ valbytes, card, zigzag, packed) \
+ uint64_t val; \
+ void* dst; \
+ fastdecode_arr farr; \
+ \
+ FASTDECODE_CHECKPACKED(tagbytes, card, packed); \
+ \
+ dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \
+ card); \
+ if (card == CARD_r) { \
+ if (UPB_UNLIKELY(!dst)) { \
+ RETURN_GENERIC("need array resize\n"); \
+ } \
+ } \
+ \
+ again: \
+ if (card == CARD_r) { \
+ dst = fastdecode_resizearr(d, dst, &farr, valbytes); \
+ } \
+ \
+ ptr += tagbytes; \
+ ptr = fastdecode_varint64(ptr, &val); \
+ if (ptr == NULL) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \
+ val = fastdecode_munge(val, valbytes, zigzag); \
+ memcpy(dst, &val, valbytes); \
+ \
+ if (card == CARD_r) { \
+ fastdecode_nextret ret = fastdecode_nextrepeated( \
+ d, dst, &ptr, &farr, data, tagbytes, valbytes); \
+ switch (ret.next) { \
+ case FD_NEXT_SAMEFIELD: \
+ dst = ret.dst; \
+ goto again; \
+ case FD_NEXT_OTHERFIELD: \
+ data = ret.tag; \
+ UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \
+ case FD_NEXT_ATLIMIT: \
+ return ptr; \
+ } \
+ } \
+ \
+ UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
+
+typedef struct {
+ uint8_t valbytes;
+ bool zigzag;
+ void* dst;
+ fastdecode_arr farr;
+} fastdecode_varintdata;
+
+UPB_FORCEINLINE
+static const char* fastdecode_topackedvarint(upb_EpsCopyInputStream* e,
+ const char* ptr, void* ctx) {
+ upb_Decoder* d = (upb_Decoder*)e;
+ fastdecode_varintdata* data = ctx;
+ void* dst = data->dst;
+ uint64_t val;
+
+ while (!_upb_Decoder_IsDone(d, &ptr)) {
+ dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes);
+ ptr = fastdecode_varint64(ptr, &val);
+ if (ptr == NULL) return NULL;
+ val = fastdecode_munge(val, data->valbytes, data->zigzag);
+ memcpy(dst, &val, data->valbytes);
+ dst = (char*)dst + data->valbytes;
+ }
+
+ fastdecode_commitarr(dst, &data->farr, data->valbytes);
+ return ptr;
+}
+
+#define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
+ valbytes, zigzag, unpacked) \
+ fastdecode_varintdata ctx = {valbytes, zigzag}; \
+ \
+ FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \
+ \
+ ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \
+ valbytes, CARD_r); \
+ if (UPB_UNLIKELY(!ctx.dst)) { \
+ RETURN_GENERIC("need array resize\n"); \
+ } \
+ \
+ ptr += tagbytes; \
+ ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \
+ \
+ if (UPB_UNLIKELY(ptr == NULL)) { \
+ _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \
+ } \
+ \
+ UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0);
+
+#define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
+ valbytes, card, zigzag, unpacked, packed) \
+ if (card == CARD_p) { \
+ FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
+ valbytes, zigzag, unpacked); \
+ } else { \
+ FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
+ valbytes, card, zigzag, packed); \
+ }
+
+#define z_ZZ true
+#define b_ZZ false
+#define v_ZZ false
+
+/* Generate all combinations:
+ * {s,o,r,p} x {b1,v4,z4,v8,z8} x {1bt,2bt} */
+
+#define F(card, type, valbytes, tagbytes) \
+ UPB_NOINLINE \
+ const char* upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
+ FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \
+ CARD_##card, type##_ZZ, \
+ upb_pr##type##valbytes##_##tagbytes##bt, \
+ upb_pp##type##valbytes##_##tagbytes##bt); \
+ }
+
+#define TYPES(card, tagbytes) \
+ F(card, b, 1, tagbytes) \
+ F(card, v, 4, tagbytes) \
+ F(card, v, 8, tagbytes) \
+ F(card, z, 4, tagbytes) \
+ F(card, z, 8, tagbytes)
+
+#define TAGBYTES(card) \
+ TYPES(card, 1) \
+ TYPES(card, 2)
+
+TAGBYTES(s)
+TAGBYTES(o)
+TAGBYTES(r)
+TAGBYTES(p)
+
+#undef z_ZZ
+#undef b_ZZ
+#undef v_ZZ
+#undef o_ONEOF
+#undef s_ONEOF
+#undef r_ONEOF
+#undef F
+#undef TYPES
+#undef TAGBYTES
+#undef FASTDECODE_UNPACKEDVARINT
+#undef FASTDECODE_PACKEDVARINT
+#undef FASTDECODE_VARINT
+
+/* fixed fields ***************************************************************/
+
+#define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
+ valbytes, card, packed) \
+ void* dst; \
+ fastdecode_arr farr; \
+ \
+ FASTDECODE_CHECKPACKED(tagbytes, card, packed) \
+ \
+ dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \
+ card); \
+ if (card == CARD_r) { \
+ if (UPB_UNLIKELY(!dst)) { \
+ RETURN_GENERIC("couldn't allocate array in arena\n"); \
+ } \
+ } \
+ \
+ again: \
+ if (card == CARD_r) { \
+ dst = fastdecode_resizearr(d, dst, &farr, valbytes); \
+ } \
+ \
+ ptr += tagbytes; \
+ memcpy(dst, ptr, valbytes); \
+ ptr += valbytes; \
+ \
+ if (card == CARD_r) { \
+ fastdecode_nextret ret = fastdecode_nextrepeated( \
+ d, dst, &ptr, &farr, data, tagbytes, valbytes); \
+ switch (ret.next) { \
+ case FD_NEXT_SAMEFIELD: \
+ dst = ret.dst; \
+ goto again; \
+ case FD_NEXT_OTHERFIELD: \
+ data = ret.tag; \
+ UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \
+ case FD_NEXT_ATLIMIT: \
+ return ptr; \
+ } \
+ } \
+ \
+ UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
+
+#define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
+ valbytes, unpacked) \
+ FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \
+ \
+ ptr += tagbytes; \
+ int size = (uint8_t)ptr[0]; \
+ ptr++; \
+ if (size & 0x80) { \
+ ptr = fastdecode_longsize(ptr, &size); \
+ } \
+ \
+ if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \
+ &d->input, ptr, size) || \
+ (size % valbytes) != 0)) { \
+ _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \
+ } \
+ \
+ upb_Array** arr_p = fastdecode_fieldmem(msg, data); \
+ upb_Array* arr = *arr_p; \
+ uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \
+ int elems = size / valbytes; \
+ \
+ if (UPB_LIKELY(!arr)) { \
+ *arr_p = arr = _upb_Array_New(&d->arena, elems, elem_size_lg2); \
+ if (!arr) { \
+ _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \
+ } \
+ } else { \
+ _upb_Array_ResizeUninitialized(arr, elems, &d->arena); \
+ } \
+ \
+ char* dst = _upb_array_ptr(arr); \
+ memcpy(dst, ptr, size); \
+ arr->size = elems; \
+ \
+ ptr += size; \
+ UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
+
+#define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
+ valbytes, card, unpacked, packed) \
+ if (card == CARD_p) { \
+ FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
+ valbytes, unpacked); \
+ } else { \
+ FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
+ valbytes, card, packed); \
+ }
+
+/* Generate all combinations:
+ * {s,o,r,p} x {f4,f8} x {1bt,2bt} */
+
+#define F(card, valbytes, tagbytes) \
+ UPB_NOINLINE \
+ const char* upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
+ FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \
+ CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \
+ upb_prf##valbytes##_##tagbytes##bt); \
+ }
+
+#define TYPES(card, tagbytes) \
+ F(card, 4, tagbytes) \
+ F(card, 8, tagbytes)
+
+#define TAGBYTES(card) \
+ TYPES(card, 1) \
+ TYPES(card, 2)
+
+TAGBYTES(s)
+TAGBYTES(o)
+TAGBYTES(r)
+TAGBYTES(p)
+
+#undef F
+#undef TYPES
+#undef TAGBYTES
+#undef FASTDECODE_UNPACKEDFIXED
+#undef FASTDECODE_PACKEDFIXED
+
+/* string fields **************************************************************/
+
+typedef const char* fastdecode_copystr_func(struct upb_Decoder* d,
+ const char* ptr, upb_Message* msg,
+ const upb_MiniTable* table,
+ uint64_t hasbits,
+ upb_StringView* dst);
+
+UPB_NOINLINE
+static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr,
+ upb_Message* msg, intptr_t table,
+ uint64_t hasbits, uint64_t data) {
+ upb_StringView* dst = (upb_StringView*)data;
+ if (!_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) {
+ _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8);
+ }
+ UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
+}
+
+#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \
+ int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \
+ ptr++; \
+ if (size & 0x80) { \
+ ptr = fastdecode_longsize(ptr, &size); \
+ } \
+ \
+ if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, size))) { \
+ dst->size = 0; \
+ _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \
+ } \
+ \
+ const char* s_ptr = ptr; \
+ ptr = upb_EpsCopyInputStream_ReadString(&d->input, &s_ptr, size, &d->arena); \
+ if (!ptr) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \
+ dst->data = s_ptr; \
+ dst->size = size; \
+ \
+ if (validate_utf8) { \
+ data = (uint64_t)dst; \
+ UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
+ } else { \
+ UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \
+ }
+
+UPB_NOINLINE
+static const char* fastdecode_longstring_utf8(struct upb_Decoder* d,
+ const char* ptr, upb_Message* msg,
+ intptr_t table, uint64_t hasbits,
+ uint64_t data) {
+ upb_StringView* dst = (upb_StringView*)data;
+ FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true);
+}
+
+UPB_NOINLINE
+static const char* fastdecode_longstring_noutf8(
+ struct upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table,
+ uint64_t hasbits, uint64_t data) {
+ upb_StringView* dst = (upb_StringView*)data;
+ FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false);
+}
+
+UPB_FORCEINLINE
+static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size,
+ int copy, char* data, upb_StringView* dst) {
+ d->arena.head.ptr += copy;
+ dst->data = data;
+ UPB_UNPOISON_MEMORY_REGION(data, copy);
+ memcpy(data, ptr, copy);
+ UPB_POISON_MEMORY_REGION(data + size, copy - size);
+}
+
+#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \
+ card, validate_utf8) \
+ upb_StringView* dst; \
+ fastdecode_arr farr; \
+ int64_t size; \
+ size_t arena_has; \
+ size_t common_has; \
+ char* buf; \
+ \
+ UPB_ASSERT(!upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0)); \
+ UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \
+ \
+ dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
+ sizeof(upb_StringView), card); \
+ \
+ again: \
+ if (card == CARD_r) { \
+ dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \
+ } \
+ \
+ size = (uint8_t)ptr[tagbytes]; \
+ ptr += tagbytes + 1; \
+ dst->size = size; \
+ \
+ buf = d->arena.head.ptr; \
+ arena_has = _upb_ArenaHas(&d->arena); \
+ common_has = UPB_MIN(arena_has, \
+ upb_EpsCopyInputStream_BytesAvailable(&d->input, ptr)); \
+ \
+ if (UPB_LIKELY(size <= 15 - tagbytes)) { \
+ if (arena_has < 16) goto longstr; \
+ d->arena.head.ptr += 16; \
+ memcpy(buf, ptr - tagbytes - 1, 16); \
+ dst->data = buf + tagbytes + 1; \
+ } else if (UPB_LIKELY(size <= 32)) { \
+ if (UPB_UNLIKELY(common_has < 32)) goto longstr; \
+ fastdecode_docopy(d, ptr, size, 32, buf, dst); \
+ } else if (UPB_LIKELY(size <= 64)) { \
+ if (UPB_UNLIKELY(common_has < 64)) goto longstr; \
+ fastdecode_docopy(d, ptr, size, 64, buf, dst); \
+ } else if (UPB_LIKELY(size < 128)) { \
+ if (UPB_UNLIKELY(common_has < 128)) goto longstr; \
+ fastdecode_docopy(d, ptr, size, 128, buf, dst); \
+ } else { \
+ goto longstr; \
+ } \
+ \
+ ptr += size; \
+ \
+ if (card == CARD_r) { \
+ if (validate_utf8 && \
+ !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \
+ _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \
+ } \
+ fastdecode_nextret ret = fastdecode_nextrepeated( \
+ d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \
+ switch (ret.next) { \
+ case FD_NEXT_SAMEFIELD: \
+ dst = ret.dst; \
+ goto again; \
+ case FD_NEXT_OTHERFIELD: \
+ data = ret.tag; \
+ UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \
+ case FD_NEXT_ATLIMIT: \
+ return ptr; \
+ } \
+ } \
+ \
+ if (card != CARD_r && validate_utf8) { \
+ data = (uint64_t)dst; \
+ UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
+ } \
+ \
+ UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \
+ \
+ longstr: \
+ if (card == CARD_r) { \
+ fastdecode_commitarr(dst + 1, &farr, sizeof(upb_StringView)); \
+ } \
+ ptr--; \
+ if (validate_utf8) { \
+ UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \
+ hasbits, (uint64_t)dst); \
+ } else { \
+ UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \
+ hasbits, (uint64_t)dst); \
+ }
+
+#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \
+ copyfunc, validate_utf8) \
+ upb_StringView* dst; \
+ fastdecode_arr farr; \
+ int64_t size; \
+ \
+ if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \
+ RETURN_GENERIC("string field tag mismatch\n"); \
+ } \
+ \
+ if (UPB_UNLIKELY( \
+ !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0))) { \
+ UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \
+ } \
+ \
+ dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
+ sizeof(upb_StringView), card); \
+ \
+ again: \
+ if (card == CARD_r) { \
+ dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \
+ } \
+ \
+ size = (int8_t)ptr[tagbytes]; \
+ ptr += tagbytes + 1; \
+ \
+ if (UPB_UNLIKELY( \
+ !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, size))) { \
+ ptr--; \
+ if (validate_utf8) { \
+ return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \
+ (uint64_t)dst); \
+ } else { \
+ return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \
+ (uint64_t)dst); \
+ } \
+ } \
+ \
+ dst->data = ptr; \
+ dst->size = size; \
+ ptr = upb_EpsCopyInputStream_ReadStringAliased(&d->input, &dst->data, \
+ dst->size); \
+ \
+ if (card == CARD_r) { \
+ if (validate_utf8 && \
+ !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \
+ _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \
+ } \
+ fastdecode_nextret ret = fastdecode_nextrepeated( \
+ d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \
+ switch (ret.next) { \
+ case FD_NEXT_SAMEFIELD: \
+ dst = ret.dst; \
+ goto again; \
+ case FD_NEXT_OTHERFIELD: \
+ data = ret.tag; \
+ UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \
+ case FD_NEXT_ATLIMIT: \
+ return ptr; \
+ } \
+ } \
+ \
+ if (card != CARD_r && validate_utf8) { \
+ data = (uint64_t)dst; \
+ UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
+ } \
+ \
+ UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
+
+/* Generate all combinations:
+ * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */
+
+#define s_VALIDATE true
+#define b_VALIDATE false
+
+#define F(card, tagbytes, type) \
+ UPB_NOINLINE \
+ const char* upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
+ FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \
+ CARD_##card, type##_VALIDATE); \
+ } \
+ const char* upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
+ FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \
+ CARD_##card, upb_c##card##type##_##tagbytes##bt, \
+ type##_VALIDATE); \
+ }
+
+#define UTF8(card, tagbytes) \
+ F(card, tagbytes, s) \
+ F(card, tagbytes, b)
+
+#define TAGBYTES(card) \
+ UTF8(card, 1) \
+ UTF8(card, 2)
+
+TAGBYTES(s)
+TAGBYTES(o)
+TAGBYTES(r)
+
+#undef s_VALIDATE
+#undef b_VALIDATE
+#undef F
+#undef TAGBYTES
+#undef FASTDECODE_LONGSTRING
+#undef FASTDECODE_COPYSTRING
+#undef FASTDECODE_STRING
+
+/* message fields *************************************************************/
+
+UPB_INLINE
+upb_Message* decode_newmsg_ceil(upb_Decoder* d, const upb_MiniTable* l,
+ int msg_ceil_bytes) {
+ size_t size = l->size + sizeof(upb_Message_Internal);
+ char* msg_data;
+ if (UPB_LIKELY(msg_ceil_bytes > 0 &&
+ _upb_ArenaHas(&d->arena) >= msg_ceil_bytes)) {
+ UPB_ASSERT(size <= (size_t)msg_ceil_bytes);
+ msg_data = d->arena.head.ptr;
+ d->arena.head.ptr += size;
+ UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes);
+ memset(msg_data, 0, msg_ceil_bytes);
+ UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size);
+ } else {
+ msg_data = (char*)upb_Arena_Malloc(&d->arena, size);
+ memset(msg_data, 0, size);
+ }
+ return msg_data + sizeof(upb_Message_Internal);
+}
+
+typedef struct {
+ intptr_t table;
+ upb_Message* msg;
+} fastdecode_submsgdata;
+
+UPB_FORCEINLINE
+static const char* fastdecode_tosubmsg(upb_EpsCopyInputStream* e,
+ const char* ptr, void* ctx) {
+ upb_Decoder* d = (upb_Decoder*)e;
+ fastdecode_submsgdata* submsg = ctx;
+ ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0);
+ UPB_ASSUME(ptr != NULL);
+ return ptr;
+}
+
+#define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \
+ msg_ceil_bytes, card) \
+ \
+ if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \
+ RETURN_GENERIC("submessage field tag mismatch\n"); \
+ } \
+ \
+ if (--d->depth == 0) { \
+ _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); \
+ } \
+ \
+ upb_Message** dst; \
+ uint32_t submsg_idx = (data >> 16) & 0xff; \
+ const upb_MiniTable* tablep = decode_totablep(table); \
+ const upb_MiniTable* subtablep = tablep->subs[submsg_idx].submsg; \
+ fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \
+ fastdecode_arr farr; \
+ \
+ if (subtablep->table_mask == (uint8_t)-1) { \
+ RETURN_GENERIC("submessage doesn't have fast tables."); \
+ } \
+ \
+ dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
+ sizeof(upb_Message*), card); \
+ \
+ if (card == CARD_s) { \
+ *(uint32_t*)msg |= hasbits; \
+ hasbits = 0; \
+ } \
+ \
+ again: \
+ if (card == CARD_r) { \
+ dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_Message*)); \
+ } \
+ \
+ submsg.msg = *dst; \
+ \
+ if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \
+ *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \
+ } \
+ \
+ ptr += tagbytes; \
+ ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \
+ \
+ if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \
+ _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \
+ } \
+ \
+ if (card == CARD_r) { \
+ fastdecode_nextret ret = fastdecode_nextrepeated( \
+ d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_Message*)); \
+ switch (ret.next) { \
+ case FD_NEXT_SAMEFIELD: \
+ dst = ret.dst; \
+ goto again; \
+ case FD_NEXT_OTHERFIELD: \
+ d->depth++; \
+ data = ret.tag; \
+ UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \
+ case FD_NEXT_ATLIMIT: \
+ d->depth++; \
+ return ptr; \
+ } \
+ } \
+ \
+ d->depth++; \
+ UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
+
+#define F(card, tagbytes, size_ceil, ceil_arg) \
+ const char* upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \
+ UPB_PARSE_PARAMS) { \
+ FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \
+ CARD_##card); \
+ }
+
+#define SIZES(card, tagbytes) \
+ F(card, tagbytes, 64, 64) \
+ F(card, tagbytes, 128, 128) \
+ F(card, tagbytes, 192, 192) \
+ F(card, tagbytes, 256, 256) \
+ F(card, tagbytes, max, -1)
+
+#define TAGBYTES(card) \
+ SIZES(card, 1) \
+ SIZES(card, 2)
+
+TAGBYTES(s)
+TAGBYTES(o)
+TAGBYTES(r)
+
+#undef TAGBYTES
+#undef SIZES
+#undef F
+#undef FASTDECODE_SUBMSG
+
+#endif /* UPB_FASTTABLE */
diff --git a/upb/upb/wire/decode_fast.h b/upb/upb/wire/decode_fast.h
new file mode 100644
index 0000000..cc494d4
--- /dev/null
+++ b/upb/upb/wire/decode_fast.h
@@ -0,0 +1,170 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// These are the specialized field parser functions for the fast parser.
+// Generated tables will refer to these by name.
+//
+// The function names are encoded with names like:
+//
+// // 123 4
+// upb_pss_1bt(); // Parse singular string, 1 byte tag.
+//
+// In position 1:
+// - 'p' for parse, most function use this
+// - 'c' for copy, for when we are copying strings instead of aliasing
+//
+// In position 2 (cardinality):
+// - 's' for singular, with or without hasbit
+// - 'o' for oneof
+// - 'r' for non-packed repeated
+// - 'p' for packed repeated
+//
+// In position 3 (type):
+// - 'b1' for bool
+// - 'v4' for 4-byte varint
+// - 'v8' for 8-byte varint
+// - 'z4' for zig-zag-encoded 4-byte varint
+// - 'z8' for zig-zag-encoded 8-byte varint
+// - 'f4' for 4-byte fixed
+// - 'f8' for 8-byte fixed
+// - 'm' for sub-message
+// - 's' for string (validate UTF-8)
+// - 'b' for bytes
+//
+// In position 4 (tag length):
+// - '1' for one-byte tags (field numbers 1-15)
+// - '2' for two-byte tags (field numbers 16-2048)
+
+#ifndef UPB_WIRE_DECODE_FAST_H_
+#define UPB_WIRE_DECODE_FAST_H_
+
+#include "upb/message/message.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct upb_Decoder;
+
+// The fallback, generic parsing function that can handle any field type.
+// This just uses the regular (non-fast) parser to parse a single field.
+const char* _upb_FastDecoder_DecodeGeneric(struct upb_Decoder* d,
+ const char* ptr, upb_Message* msg,
+ intptr_t table, uint64_t hasbits,
+ uint64_t data);
+
+#define UPB_PARSE_PARAMS \
+ struct upb_Decoder *d, const char *ptr, upb_Message *msg, intptr_t table, \
+ uint64_t hasbits, uint64_t data
+
+/* primitive fields ***********************************************************/
+
+#define F(card, type, valbytes, tagbytes) \
+ const char* upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS);
+
+#define TYPES(card, tagbytes) \
+ F(card, b, 1, tagbytes) \
+ F(card, v, 4, tagbytes) \
+ F(card, v, 8, tagbytes) \
+ F(card, z, 4, tagbytes) \
+ F(card, z, 8, tagbytes) \
+ F(card, f, 4, tagbytes) \
+ F(card, f, 8, tagbytes)
+
+#define TAGBYTES(card) \
+ TYPES(card, 1) \
+ TYPES(card, 2)
+
+TAGBYTES(s)
+TAGBYTES(o)
+TAGBYTES(r)
+TAGBYTES(p)
+
+#undef F
+#undef TYPES
+#undef TAGBYTES
+
+/* string fields **************************************************************/
+
+#define F(card, tagbytes, type) \
+ const char* upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS); \
+ const char* upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS);
+
+#define UTF8(card, tagbytes) \
+ F(card, tagbytes, s) \
+ F(card, tagbytes, b)
+
+#define TAGBYTES(card) \
+ UTF8(card, 1) \
+ UTF8(card, 2)
+
+TAGBYTES(s)
+TAGBYTES(o)
+TAGBYTES(r)
+
+#undef F
+#undef TAGBYTES
+
+/* sub-message fields *********************************************************/
+
+#define F(card, tagbytes, size_ceil, ceil_arg) \
+ const char* upb_p##card##m_##tagbytes##bt_max##size_ceil##b(UPB_PARSE_PARAMS);
+
+#define SIZES(card, tagbytes) \
+ F(card, tagbytes, 64, 64) \
+ F(card, tagbytes, 128, 128) \
+ F(card, tagbytes, 192, 192) \
+ F(card, tagbytes, 256, 256) \
+ F(card, tagbytes, max, -1)
+
+#define TAGBYTES(card) \
+ SIZES(card, 1) \
+ SIZES(card, 2)
+
+TAGBYTES(s)
+TAGBYTES(o)
+TAGBYTES(r)
+
+#undef TAGBYTES
+#undef SIZES
+#undef F
+
+#undef UPB_PARSE_PARAMS
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_WIRE_DECODE_FAST_H_ */
diff --git a/upb/upb/wire/encode.c b/upb/upb/wire/encode.c
new file mode 100644
index 0000000..82429dd
--- /dev/null
+++ b/upb/upb/wire/encode.c
@@ -0,0 +1,643 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// We encode backwards, to avoid pre-computing lengths (one-pass encode).
+
+#include "upb/wire/encode.h"
+
+#include <string.h>
+
+#include "upb/collections/internal/array.h"
+#include "upb/collections/internal/map_sorter.h"
+#include "upb/message/internal/accessors.h"
+#include "upb/message/internal/extension.h"
+#include "upb/mini_table/sub.h"
+#include "upb/wire/internal/common.h"
+#include "upb/wire/internal/swap.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#define UPB_PB_VARINT_MAX_LEN 10
+
+UPB_NOINLINE
+static size_t encode_varint64(uint64_t val, char* buf) {
+ size_t i = 0;
+ do {
+ uint8_t byte = val & 0x7fU;
+ val >>= 7;
+ if (val) byte |= 0x80U;
+ buf[i++] = byte;
+ } while (val);
+ return i;
+}
+
+static uint32_t encode_zz32(int32_t n) {
+ return ((uint32_t)n << 1) ^ (n >> 31);
+}
+static uint64_t encode_zz64(int64_t n) {
+ return ((uint64_t)n << 1) ^ (n >> 63);
+}
+
+typedef struct {
+ upb_EncodeStatus status;
+ jmp_buf err;
+ upb_Arena* arena;
+ char *buf, *ptr, *limit;
+ int options;
+ int depth;
+ _upb_mapsorter sorter;
+} upb_encstate;
+
+static size_t upb_roundup_pow2(size_t bytes) {
+ size_t ret = 128;
+ while (ret < bytes) {
+ ret *= 2;
+ }
+ return ret;
+}
+
+UPB_NORETURN static void encode_err(upb_encstate* e, upb_EncodeStatus s) {
+ UPB_ASSERT(s != kUpb_EncodeStatus_Ok);
+ e->status = s;
+ UPB_LONGJMP(e->err, 1);
+}
+
+UPB_NOINLINE
+static void encode_growbuffer(upb_encstate* e, size_t bytes) {
+ size_t old_size = e->limit - e->buf;
+ size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr));
+ char* new_buf = upb_Arena_Realloc(e->arena, e->buf, old_size, new_size);
+
+ if (!new_buf) encode_err(e, kUpb_EncodeStatus_OutOfMemory);
+
+ // We want previous data at the end, realloc() put it at the beginning.
+ // TODO(salo): This is somewhat inefficient since we are copying twice.
+ // Maybe create a realloc() that copies to the end of the new buffer?
+ if (old_size > 0) {
+ memmove(new_buf + new_size - old_size, e->buf, old_size);
+ }
+
+ e->ptr = new_buf + new_size - (e->limit - e->ptr);
+ e->limit = new_buf + new_size;
+ e->buf = new_buf;
+
+ e->ptr -= bytes;
+}
+
+/* Call to ensure that at least "bytes" bytes are available for writing at
+ * e->ptr. Returns false if the bytes could not be allocated. */
+UPB_FORCEINLINE
+static void encode_reserve(upb_encstate* e, size_t bytes) {
+ if ((size_t)(e->ptr - e->buf) < bytes) {
+ encode_growbuffer(e, bytes);
+ return;
+ }
+
+ e->ptr -= bytes;
+}
+
+/* Writes the given bytes to the buffer, handling reserve/advance. */
+static void encode_bytes(upb_encstate* e, const void* data, size_t len) {
+ if (len == 0) return; /* memcpy() with zero size is UB */
+ encode_reserve(e, len);
+ memcpy(e->ptr, data, len);
+}
+
+static void encode_fixed64(upb_encstate* e, uint64_t val) {
+ val = _upb_BigEndian_Swap64(val);
+ encode_bytes(e, &val, sizeof(uint64_t));
+}
+
+static void encode_fixed32(upb_encstate* e, uint32_t val) {
+ val = _upb_BigEndian_Swap32(val);
+ encode_bytes(e, &val, sizeof(uint32_t));
+}
+
+UPB_NOINLINE
+static void encode_longvarint(upb_encstate* e, uint64_t val) {
+ size_t len;
+ char* start;
+
+ encode_reserve(e, UPB_PB_VARINT_MAX_LEN);
+ len = encode_varint64(val, e->ptr);
+ start = e->ptr + UPB_PB_VARINT_MAX_LEN - len;
+ memmove(start, e->ptr, len);
+ e->ptr = start;
+}
+
+UPB_FORCEINLINE
+static void encode_varint(upb_encstate* e, uint64_t val) {
+ if (val < 128 && e->ptr != e->buf) {
+ --e->ptr;
+ *e->ptr = val;
+ } else {
+ encode_longvarint(e, val);
+ }
+}
+
+static void encode_double(upb_encstate* e, double d) {
+ uint64_t u64;
+ UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
+ memcpy(&u64, &d, sizeof(uint64_t));
+ encode_fixed64(e, u64);
+}
+
+static void encode_float(upb_encstate* e, float d) {
+ uint32_t u32;
+ UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
+ memcpy(&u32, &d, sizeof(uint32_t));
+ encode_fixed32(e, u32);
+}
+
+static void encode_tag(upb_encstate* e, uint32_t field_number,
+ uint8_t wire_type) {
+ encode_varint(e, (field_number << 3) | wire_type);
+}
+
+static void encode_fixedarray(upb_encstate* e, const upb_Array* arr,
+ size_t elem_size, uint32_t tag) {
+ size_t bytes = arr->size * elem_size;
+ const char* data = _upb_array_constptr(arr);
+ const char* ptr = data + bytes - elem_size;
+
+ if (tag || !_upb_IsLittleEndian()) {
+ while (true) {
+ if (elem_size == 4) {
+ uint32_t val;
+ memcpy(&val, ptr, sizeof(val));
+ val = _upb_BigEndian_Swap32(val);
+ encode_bytes(e, &val, elem_size);
+ } else {
+ UPB_ASSERT(elem_size == 8);
+ uint64_t val;
+ memcpy(&val, ptr, sizeof(val));
+ val = _upb_BigEndian_Swap64(val);
+ encode_bytes(e, &val, elem_size);
+ }
+
+ if (tag) encode_varint(e, tag);
+ if (ptr == data) break;
+ ptr -= elem_size;
+ }
+ } else {
+ encode_bytes(e, data, bytes);
+ }
+}
+
+static void encode_message(upb_encstate* e, const upb_Message* msg,
+ const upb_MiniTable* m, size_t* size);
+
+static void encode_TaggedMessagePtr(upb_encstate* e,
+ upb_TaggedMessagePtr tagged,
+ const upb_MiniTable* m, size_t* size) {
+ if (upb_TaggedMessagePtr_IsEmpty(tagged)) {
+ m = &_kUpb_MiniTable_Empty;
+ }
+ encode_message(e, _upb_TaggedMessagePtr_GetMessage(tagged), m, size);
+}
+
+static void encode_scalar(upb_encstate* e, const void* _field_mem,
+ const upb_MiniTableSub* subs,
+ const upb_MiniTableField* f) {
+ const char* field_mem = _field_mem;
+ int wire_type;
+
+#define CASE(ctype, type, wtype, encodeval) \
+ { \
+ ctype val = *(ctype*)field_mem; \
+ encode_##type(e, encodeval); \
+ wire_type = wtype; \
+ break; \
+ }
+
+ switch (f->UPB_PRIVATE(descriptortype)) {
+ case kUpb_FieldType_Double:
+ CASE(double, double, kUpb_WireType_64Bit, val);
+ case kUpb_FieldType_Float:
+ CASE(float, float, kUpb_WireType_32Bit, val);
+ case kUpb_FieldType_Int64:
+ case kUpb_FieldType_UInt64:
+ CASE(uint64_t, varint, kUpb_WireType_Varint, val);
+ case kUpb_FieldType_UInt32:
+ CASE(uint32_t, varint, kUpb_WireType_Varint, val);
+ case kUpb_FieldType_Int32:
+ case kUpb_FieldType_Enum:
+ CASE(int32_t, varint, kUpb_WireType_Varint, (int64_t)val);
+ case kUpb_FieldType_SFixed64:
+ case kUpb_FieldType_Fixed64:
+ CASE(uint64_t, fixed64, kUpb_WireType_64Bit, val);
+ case kUpb_FieldType_Fixed32:
+ case kUpb_FieldType_SFixed32:
+ CASE(uint32_t, fixed32, kUpb_WireType_32Bit, val);
+ case kUpb_FieldType_Bool:
+ CASE(bool, varint, kUpb_WireType_Varint, val);
+ case kUpb_FieldType_SInt32:
+ CASE(int32_t, varint, kUpb_WireType_Varint, encode_zz32(val));
+ case kUpb_FieldType_SInt64:
+ CASE(int64_t, varint, kUpb_WireType_Varint, encode_zz64(val));
+ case kUpb_FieldType_String:
+ case kUpb_FieldType_Bytes: {
+ upb_StringView view = *(upb_StringView*)field_mem;
+ encode_bytes(e, view.data, view.size);
+ encode_varint(e, view.size);
+ wire_type = kUpb_WireType_Delimited;
+ break;
+ }
+ case kUpb_FieldType_Group: {
+ size_t size;
+ upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem;
+ const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg;
+ if (submsg == 0) {
+ return;
+ }
+ if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded);
+ encode_tag(e, f->number, kUpb_WireType_EndGroup);
+ encode_TaggedMessagePtr(e, submsg, subm, &size);
+ wire_type = kUpb_WireType_StartGroup;
+ e->depth++;
+ break;
+ }
+ case kUpb_FieldType_Message: {
+ size_t size;
+ upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem;
+ const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg;
+ if (submsg == 0) {
+ return;
+ }
+ if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded);
+ encode_TaggedMessagePtr(e, submsg, subm, &size);
+ encode_varint(e, size);
+ wire_type = kUpb_WireType_Delimited;
+ e->depth++;
+ break;
+ }
+ default:
+ UPB_UNREACHABLE();
+ }
+#undef CASE
+
+ encode_tag(e, f->number, wire_type);
+}
+
+static void encode_array(upb_encstate* e, const upb_Message* msg,
+ const upb_MiniTableSub* subs,
+ const upb_MiniTableField* f) {
+ const upb_Array* arr = *UPB_PTR_AT(msg, f->offset, upb_Array*);
+ bool packed = f->mode & kUpb_LabelFlags_IsPacked;
+ size_t pre_len = e->limit - e->ptr;
+
+ if (arr == NULL || arr->size == 0) {
+ return;
+ }
+
+#define VARINT_CASE(ctype, encode) \
+ { \
+ const ctype* start = _upb_array_constptr(arr); \
+ const ctype* ptr = start + arr->size; \
+ uint32_t tag = packed ? 0 : (f->number << 3) | kUpb_WireType_Varint; \
+ do { \
+ ptr--; \
+ encode_varint(e, encode); \
+ if (tag) encode_varint(e, tag); \
+ } while (ptr != start); \
+ } \
+ break;
+
+#define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type))
+
+ switch (f->UPB_PRIVATE(descriptortype)) {
+ case kUpb_FieldType_Double:
+ encode_fixedarray(e, arr, sizeof(double), TAG(kUpb_WireType_64Bit));
+ break;
+ case kUpb_FieldType_Float:
+ encode_fixedarray(e, arr, sizeof(float), TAG(kUpb_WireType_32Bit));
+ break;
+ case kUpb_FieldType_SFixed64:
+ case kUpb_FieldType_Fixed64:
+ encode_fixedarray(e, arr, sizeof(uint64_t), TAG(kUpb_WireType_64Bit));
+ break;
+ case kUpb_FieldType_Fixed32:
+ case kUpb_FieldType_SFixed32:
+ encode_fixedarray(e, arr, sizeof(uint32_t), TAG(kUpb_WireType_32Bit));
+ break;
+ case kUpb_FieldType_Int64:
+ case kUpb_FieldType_UInt64:
+ VARINT_CASE(uint64_t, *ptr);
+ case kUpb_FieldType_UInt32:
+ VARINT_CASE(uint32_t, *ptr);
+ case kUpb_FieldType_Int32:
+ case kUpb_FieldType_Enum:
+ VARINT_CASE(int32_t, (int64_t)*ptr);
+ case kUpb_FieldType_Bool:
+ VARINT_CASE(bool, *ptr);
+ case kUpb_FieldType_SInt32:
+ VARINT_CASE(int32_t, encode_zz32(*ptr));
+ case kUpb_FieldType_SInt64:
+ VARINT_CASE(int64_t, encode_zz64(*ptr));
+ case kUpb_FieldType_String:
+ case kUpb_FieldType_Bytes: {
+ const upb_StringView* start = _upb_array_constptr(arr);
+ const upb_StringView* ptr = start + arr->size;
+ do {
+ ptr--;
+ encode_bytes(e, ptr->data, ptr->size);
+ encode_varint(e, ptr->size);
+ encode_tag(e, f->number, kUpb_WireType_Delimited);
+ } while (ptr != start);
+ return;
+ }
+ case kUpb_FieldType_Group: {
+ const upb_TaggedMessagePtr* start = _upb_array_constptr(arr);
+ const upb_TaggedMessagePtr* ptr = start + arr->size;
+ const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg;
+ if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded);
+ do {
+ size_t size;
+ ptr--;
+ encode_tag(e, f->number, kUpb_WireType_EndGroup);
+ encode_TaggedMessagePtr(e, *ptr, subm, &size);
+ encode_tag(e, f->number, kUpb_WireType_StartGroup);
+ } while (ptr != start);
+ e->depth++;
+ return;
+ }
+ case kUpb_FieldType_Message: {
+ const upb_TaggedMessagePtr* start = _upb_array_constptr(arr);
+ const upb_TaggedMessagePtr* ptr = start + arr->size;
+ const upb_MiniTable* subm = subs[f->UPB_PRIVATE(submsg_index)].submsg;
+ if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded);
+ do {
+ size_t size;
+ ptr--;
+ encode_TaggedMessagePtr(e, *ptr, subm, &size);
+ encode_varint(e, size);
+ encode_tag(e, f->number, kUpb_WireType_Delimited);
+ } while (ptr != start);
+ e->depth++;
+ return;
+ }
+ }
+#undef VARINT_CASE
+
+ if (packed) {
+ encode_varint(e, e->limit - e->ptr - pre_len);
+ encode_tag(e, f->number, kUpb_WireType_Delimited);
+ }
+}
+
+static void encode_mapentry(upb_encstate* e, uint32_t number,
+ const upb_MiniTable* layout,
+ const upb_MapEntry* ent) {
+ const upb_MiniTableField* key_field = &layout->fields[0];
+ const upb_MiniTableField* val_field = &layout->fields[1];
+ size_t pre_len = e->limit - e->ptr;
+ size_t size;
+ encode_scalar(e, &ent->data.v, layout->subs, val_field);
+ encode_scalar(e, &ent->data.k, layout->subs, key_field);
+ size = (e->limit - e->ptr) - pre_len;
+ encode_varint(e, size);
+ encode_tag(e, number, kUpb_WireType_Delimited);
+}
+
+static void encode_map(upb_encstate* e, const upb_Message* msg,
+ const upb_MiniTableSub* subs,
+ const upb_MiniTableField* f) {
+ const upb_Map* map = *UPB_PTR_AT(msg, f->offset, const upb_Map*);
+ const upb_MiniTable* layout = subs[f->UPB_PRIVATE(submsg_index)].submsg;
+ UPB_ASSERT(layout->field_count == 2);
+
+ if (map == NULL) return;
+
+ if (e->options & kUpb_EncodeOption_Deterministic) {
+ _upb_sortedmap sorted;
+ _upb_mapsorter_pushmap(&e->sorter,
+ layout->fields[0].UPB_PRIVATE(descriptortype), map,
+ &sorted);
+ upb_MapEntry ent;
+ while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
+ encode_mapentry(e, f->number, layout, &ent);
+ }
+ _upb_mapsorter_popmap(&e->sorter, &sorted);
+ } else {
+ intptr_t iter = UPB_STRTABLE_BEGIN;
+ upb_StringView key;
+ upb_value val;
+ while (upb_strtable_next2(&map->table, &key, &val, &iter)) {
+ upb_MapEntry ent;
+ _upb_map_fromkey(key, &ent.data.k, map->key_size);
+ _upb_map_fromvalue(val, &ent.data.v, map->val_size);
+ encode_mapentry(e, f->number, layout, &ent);
+ }
+ }
+}
+
+static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg,
+ const upb_MiniTableSub* subs,
+ const upb_MiniTableField* f) {
+ if (f->presence == 0) {
+ /* Proto3 presence or map/array. */
+ const void* mem = UPB_PTR_AT(msg, f->offset, void);
+ switch (_upb_MiniTableField_GetRep(f)) {
+ case kUpb_FieldRep_1Byte: {
+ char ch;
+ memcpy(&ch, mem, 1);
+ return ch != 0;
+ }
+ case kUpb_FieldRep_4Byte: {
+ uint32_t u32;
+ memcpy(&u32, mem, 4);
+ return u32 != 0;
+ }
+ case kUpb_FieldRep_8Byte: {
+ uint64_t u64;
+ memcpy(&u64, mem, 8);
+ return u64 != 0;
+ }
+ case kUpb_FieldRep_StringView: {
+ const upb_StringView* str = (const upb_StringView*)mem;
+ return str->size != 0;
+ }
+ default:
+ UPB_UNREACHABLE();
+ }
+ } else if (f->presence > 0) {
+ /* Proto2 presence: hasbit. */
+ return _upb_hasbit_field(msg, f);
+ } else {
+ /* Field is in a oneof. */
+ return _upb_getoneofcase_field(msg, f) == f->number;
+ }
+}
+
+static void encode_field(upb_encstate* e, const upb_Message* msg,
+ const upb_MiniTableSub* subs,
+ const upb_MiniTableField* field) {
+ switch (upb_FieldMode_Get(field)) {
+ case kUpb_FieldMode_Array:
+ encode_array(e, msg, subs, field);
+ break;
+ case kUpb_FieldMode_Map:
+ encode_map(e, msg, subs, field);
+ break;
+ case kUpb_FieldMode_Scalar:
+ encode_scalar(e, UPB_PTR_AT(msg, field->offset, void), subs, field);
+ break;
+ default:
+ UPB_UNREACHABLE();
+ }
+}
+
+static void encode_msgset_item(upb_encstate* e,
+ const upb_Message_Extension* ext) {
+ size_t size;
+ encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_EndGroup);
+ encode_message(e, ext->data.ptr, ext->ext->sub.submsg, &size);
+ encode_varint(e, size);
+ encode_tag(e, kUpb_MsgSet_Message, kUpb_WireType_Delimited);
+ encode_varint(e, ext->ext->field.number);
+ encode_tag(e, kUpb_MsgSet_TypeId, kUpb_WireType_Varint);
+ encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_StartGroup);
+}
+
+static void encode_ext(upb_encstate* e, const upb_Message_Extension* ext,
+ bool is_message_set) {
+ if (UPB_UNLIKELY(is_message_set)) {
+ encode_msgset_item(e, ext);
+ } else {
+ encode_field(e, &ext->data, &ext->ext->sub, &ext->ext->field);
+ }
+}
+
+static void encode_message(upb_encstate* e, const upb_Message* msg,
+ const upb_MiniTable* m, size_t* size) {
+ size_t pre_len = e->limit - e->ptr;
+
+ if ((e->options & kUpb_EncodeOption_CheckRequired) && m->required_count) {
+ uint64_t msg_head;
+ memcpy(&msg_head, msg, 8);
+ msg_head = _upb_BigEndian_Swap64(msg_head);
+ if (upb_MiniTable_requiredmask(m) & ~msg_head) {
+ encode_err(e, kUpb_EncodeStatus_MissingRequired);
+ }
+ }
+
+ if ((e->options & kUpb_EncodeOption_SkipUnknown) == 0) {
+ size_t unknown_size;
+ const char* unknown = upb_Message_GetUnknown(msg, &unknown_size);
+
+ if (unknown) {
+ encode_bytes(e, unknown, unknown_size);
+ }
+ }
+
+ if (m->ext != kUpb_ExtMode_NonExtendable) {
+ /* Encode all extensions together. Unlike C++, we do not attempt to keep
+ * these in field number order relative to normal fields or even to each
+ * other. */
+ size_t ext_count;
+ const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &ext_count);
+ if (ext_count) {
+ if (e->options & kUpb_EncodeOption_Deterministic) {
+ _upb_sortedmap sorted;
+ _upb_mapsorter_pushexts(&e->sorter, ext, ext_count, &sorted);
+ while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) {
+ encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet);
+ }
+ _upb_mapsorter_popmap(&e->sorter, &sorted);
+ } else {
+ const upb_Message_Extension* end = ext + ext_count;
+ for (; ext != end; ext++) {
+ encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet);
+ }
+ }
+ }
+ }
+
+ if (m->field_count) {
+ const upb_MiniTableField* f = &m->fields[m->field_count];
+ const upb_MiniTableField* first = &m->fields[0];
+ while (f != first) {
+ f--;
+ if (encode_shouldencode(e, msg, m->subs, f)) {
+ encode_field(e, msg, m->subs, f);
+ }
+ }
+ }
+
+ *size = (e->limit - e->ptr) - pre_len;
+}
+
+static upb_EncodeStatus upb_Encoder_Encode(upb_encstate* const encoder,
+ const void* const msg,
+ const upb_MiniTable* const l,
+ char** const buf,
+ size_t* const size) {
+ // Unfortunately we must continue to perform hackery here because there are
+ // code paths which blindly copy the returned pointer without bothering to
+ // check for errors until much later (b/235839510). So we still set *buf to
+ // NULL on error and we still set it to non-NULL on a successful empty result.
+ if (UPB_SETJMP(encoder->err) == 0) {
+ encode_message(encoder, msg, l, size);
+ *size = encoder->limit - encoder->ptr;
+ if (*size == 0) {
+ static char ch;
+ *buf = &ch;
+ } else {
+ UPB_ASSERT(encoder->ptr);
+ *buf = encoder->ptr;
+ }
+ } else {
+ UPB_ASSERT(encoder->status != kUpb_EncodeStatus_Ok);
+ *buf = NULL;
+ *size = 0;
+ }
+
+ _upb_mapsorter_destroy(&encoder->sorter);
+ return encoder->status;
+}
+
+upb_EncodeStatus upb_Encode(const void* msg, const upb_MiniTable* l,
+ int options, upb_Arena* arena, char** buf,
+ size_t* size) {
+ upb_encstate e;
+ unsigned depth = (unsigned)options >> 16;
+
+ e.status = kUpb_EncodeStatus_Ok;
+ e.arena = arena;
+ e.buf = NULL;
+ e.limit = NULL;
+ e.ptr = NULL;
+ e.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit;
+ e.options = options;
+ _upb_mapsorter_init(&e.sorter);
+
+ return upb_Encoder_Encode(&e, msg, l, buf, size);
+}
diff --git a/upb/upb/wire/encode.h b/upb/upb/wire/encode.h
new file mode 100644
index 0000000..8212b87
--- /dev/null
+++ b/upb/upb/wire/encode.h
@@ -0,0 +1,96 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// upb_Encode: parsing from a upb_Message using a upb_MiniTable.
+
+#ifndef UPB_WIRE_ENCODE_H_
+#define UPB_WIRE_ENCODE_H_
+
+#include "upb/message/message.h"
+#include "upb/wire/types.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+ /* If set, the results of serializing will be deterministic across all
+ * instances of this binary. There are no guarantees across different
+ * binary builds.
+ *
+ * If your proto contains maps, the encoder will need to malloc()/free()
+ * memory during encode. */
+ kUpb_EncodeOption_Deterministic = 1,
+
+ // When set, unknown fields are not printed.
+ kUpb_EncodeOption_SkipUnknown = 2,
+
+ // When set, the encode will fail if any required fields are missing.
+ kUpb_EncodeOption_CheckRequired = 4,
+};
+
+typedef enum {
+ kUpb_EncodeStatus_Ok = 0,
+ kUpb_EncodeStatus_OutOfMemory = 1, // Arena alloc failed
+ kUpb_EncodeStatus_MaxDepthExceeded = 2,
+
+ // kUpb_EncodeOption_CheckRequired failed but the parse otherwise succeeded.
+ kUpb_EncodeStatus_MissingRequired = 3,
+} upb_EncodeStatus;
+
+UPB_INLINE uint32_t upb_EncodeOptions_MaxDepth(uint16_t depth) {
+ return (uint32_t)depth << 16;
+}
+
+UPB_INLINE uint16_t upb_EncodeOptions_GetMaxDepth(uint32_t options) {
+ return options >> 16;
+}
+
+// Enforce an upper bound on recursion depth.
+UPB_INLINE int upb_Encode_LimitDepth(uint32_t encode_options, uint32_t limit) {
+ uint32_t max_depth = upb_EncodeOptions_GetMaxDepth(encode_options);
+ if (max_depth > limit) max_depth = limit;
+ return upb_EncodeOptions_MaxDepth(max_depth) | (encode_options & 0xffff);
+}
+
+UPB_API upb_EncodeStatus upb_Encode(const void* msg, const upb_MiniTable* l,
+ int options, upb_Arena* arena, char** buf,
+ size_t* size);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_WIRE_ENCODE_H_ */
diff --git a/upb/upb/wire/eps_copy_input_stream.c b/upb/upb/wire/eps_copy_input_stream.c
new file mode 100644
index 0000000..b46a50d
--- /dev/null
+++ b/upb/upb/wire/eps_copy_input_stream.c
@@ -0,0 +1,42 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/wire/eps_copy_input_stream.h"
+
+static const char* _upb_EpsCopyInputStream_NoOpCallback(
+ upb_EpsCopyInputStream* e, const char* old_end, const char* new_start) {
+ return new_start;
+}
+
+const char* _upb_EpsCopyInputStream_IsDoneFallbackNoCallback(
+ upb_EpsCopyInputStream* e, const char* ptr, int overrun) {
+ return _upb_EpsCopyInputStream_IsDoneFallbackInline(
+ e, ptr, overrun, _upb_EpsCopyInputStream_NoOpCallback);
+}
diff --git a/upb/upb/wire/eps_copy_input_stream.h b/upb/upb/wire/eps_copy_input_stream.h
new file mode 100644
index 0000000..515b9bb
--- /dev/null
+++ b/upb/upb/wire/eps_copy_input_stream.h
@@ -0,0 +1,428 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_WIRE_EPS_COPY_INPUT_STREAM_H_
+#define UPB_WIRE_EPS_COPY_INPUT_STREAM_H_
+
+#include <string.h>
+
+#include "upb/mem/arena.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// The maximum number of bytes a single protobuf field can take up in the
+// wire format. We only want to do one bounds check per field, so the input
+// stream guarantees that after upb_EpsCopyInputStream_IsDone() is called,
+// the decoder can read this many bytes without performing another bounds
+// check. The stream will copy into a patch buffer as necessary to guarantee
+// this invariant.
+#define kUpb_EpsCopyInputStream_SlopBytes 16
+
+enum {
+ kUpb_EpsCopyInputStream_NoAliasing = 0,
+ kUpb_EpsCopyInputStream_OnPatch = 1,
+ kUpb_EpsCopyInputStream_NoDelta = 2
+};
+
+typedef struct {
+ const char* end; // Can read up to SlopBytes bytes beyond this.
+ const char* limit_ptr; // For bounds checks, = end + UPB_MIN(limit, 0)
+ uintptr_t aliasing;
+ int limit; // Submessage limit relative to end
+ bool error; // To distinguish between EOF and error.
+ char patch[kUpb_EpsCopyInputStream_SlopBytes * 2];
+} upb_EpsCopyInputStream;
+
+// Returns true if the stream is in the error state. A stream enters the error
+// state when the user reads past a limit (caught in IsDone()) or the
+// ZeroCopyInputStream returns an error.
+UPB_INLINE bool upb_EpsCopyInputStream_IsError(upb_EpsCopyInputStream* e) {
+ return e->error;
+}
+
+typedef const char* upb_EpsCopyInputStream_BufferFlipCallback(
+ upb_EpsCopyInputStream* e, const char* old_end, const char* new_start);
+
+typedef const char* upb_EpsCopyInputStream_IsDoneFallbackFunc(
+ upb_EpsCopyInputStream* e, const char* ptr, int overrun);
+
+// Initializes a upb_EpsCopyInputStream using the contents of the buffer
+// [*ptr, size]. Updates `*ptr` as necessary to guarantee that at least
+// kUpb_EpsCopyInputStream_SlopBytes are available to read.
+UPB_INLINE void upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e,
+ const char** ptr, size_t size,
+ bool enable_aliasing) {
+ if (size <= kUpb_EpsCopyInputStream_SlopBytes) {
+ memset(&e->patch, 0, 32);
+ if (size) memcpy(&e->patch, *ptr, size);
+ e->aliasing = enable_aliasing ? (uintptr_t)*ptr - (uintptr_t)e->patch
+ : kUpb_EpsCopyInputStream_NoAliasing;
+ *ptr = e->patch;
+ e->end = *ptr + size;
+ e->limit = 0;
+ } else {
+ e->end = *ptr + size - kUpb_EpsCopyInputStream_SlopBytes;
+ e->limit = kUpb_EpsCopyInputStream_SlopBytes;
+ e->aliasing = enable_aliasing ? kUpb_EpsCopyInputStream_NoDelta
+ : kUpb_EpsCopyInputStream_NoAliasing;
+ }
+ e->limit_ptr = e->end;
+ e->error = false;
+}
+
+typedef enum {
+ // The current stream position is at a limit.
+ kUpb_IsDoneStatus_Done,
+
+ // The current stream position is not at a limit.
+ kUpb_IsDoneStatus_NotDone,
+
+ // The current stream position is not at a limit, and the stream needs to
+ // be flipped to a new buffer before more data can be read.
+ kUpb_IsDoneStatus_NeedFallback,
+} upb_IsDoneStatus;
+
+// Returns the status of the current stream position. This is a low-level
+// function, it is simpler to call upb_EpsCopyInputStream_IsDone() if possible.
+UPB_INLINE upb_IsDoneStatus upb_EpsCopyInputStream_IsDoneStatus(
+ upb_EpsCopyInputStream* e, const char* ptr, int* overrun) {
+ *overrun = ptr - e->end;
+ if (UPB_LIKELY(ptr < e->limit_ptr)) {
+ return kUpb_IsDoneStatus_NotDone;
+ } else if (UPB_LIKELY(*overrun == e->limit)) {
+ return kUpb_IsDoneStatus_Done;
+ } else {
+ return kUpb_IsDoneStatus_NeedFallback;
+ }
+}
+
+// Returns true if the stream has hit a limit, either the current delimited
+// limit or the overall end-of-stream. As a side effect, this function may flip
+// the pointer to a new buffer if there are less than
+// kUpb_EpsCopyInputStream_SlopBytes of data to be read in the current buffer.
+//
+// Postcondition: if the function returns false, there are at least
+// kUpb_EpsCopyInputStream_SlopBytes of data available to read at *ptr.
+UPB_INLINE bool upb_EpsCopyInputStream_IsDoneWithCallback(
+ upb_EpsCopyInputStream* e, const char** ptr,
+ upb_EpsCopyInputStream_IsDoneFallbackFunc* func) {
+ int overrun;
+ switch (upb_EpsCopyInputStream_IsDoneStatus(e, *ptr, &overrun)) {
+ case kUpb_IsDoneStatus_Done:
+ return true;
+ case kUpb_IsDoneStatus_NotDone:
+ return false;
+ case kUpb_IsDoneStatus_NeedFallback:
+ *ptr = func(e, *ptr, overrun);
+ return *ptr == NULL;
+ }
+ UPB_UNREACHABLE();
+}
+
+const char* _upb_EpsCopyInputStream_IsDoneFallbackNoCallback(
+ upb_EpsCopyInputStream* e, const char* ptr, int overrun);
+
+// A simpler version of IsDoneWithCallback() that does not support a buffer flip
+// callback. Useful in cases where we do not need to insert custom logic at
+// every buffer flip.
+//
+// If this returns true, the user must call upb_EpsCopyInputStream_IsError()
+// to distinguish between EOF and error.
+UPB_INLINE bool upb_EpsCopyInputStream_IsDone(upb_EpsCopyInputStream* e,
+ const char** ptr) {
+ return upb_EpsCopyInputStream_IsDoneWithCallback(
+ e, ptr, _upb_EpsCopyInputStream_IsDoneFallbackNoCallback);
+}
+
+// Returns the total number of bytes that are safe to read from the current
+// buffer without reading uninitialized or unallocated memory.
+//
+// Note that this check does not respect any semantic limits on the stream,
+// either limits from PushLimit() or the overall stream end, so some of these
+// bytes may have unpredictable, nonsense values in them. The guarantee is only
+// that the bytes are valid to read from the perspective of the C language
+// (ie. you can read without triggering UBSAN or ASAN).
+UPB_INLINE size_t upb_EpsCopyInputStream_BytesAvailable(
+ upb_EpsCopyInputStream* e, const char* ptr) {
+ return (e->end - ptr) + kUpb_EpsCopyInputStream_SlopBytes;
+}
+
+// Returns true if the given delimited field size is valid (it does not extend
+// beyond any previously-pushed limits). `ptr` should point to the beginning
+// of the field data, after the delimited size.
+//
+// Note that this does *not* guarantee that all of the data for this field is in
+// the current buffer.
+UPB_INLINE bool upb_EpsCopyInputStream_CheckSize(
+ const upb_EpsCopyInputStream* e, const char* ptr, int size) {
+ UPB_ASSERT(size >= 0);
+ return ptr - e->end + size <= e->limit;
+}
+
+UPB_INLINE bool _upb_EpsCopyInputStream_CheckSizeAvailable(
+ upb_EpsCopyInputStream* e, const char* ptr, int size, bool submessage) {
+ // This is one extra branch compared to the more normal:
+ // return (size_t)(end - ptr) < size;
+ // However it is one less computation if we are just about to use "ptr + len":
+ // https://godbolt.org/z/35YGPz
+ // In microbenchmarks this shows a small improvement.
+ uintptr_t uptr = (uintptr_t)ptr;
+ uintptr_t uend = (uintptr_t)e->limit_ptr;
+ uintptr_t res = uptr + (size_t)size;
+ if (!submessage) uend += kUpb_EpsCopyInputStream_SlopBytes;
+ // NOTE: this check depends on having a linear address space. This is not
+ // technically guaranteed by uintptr_t.
+ bool ret = res >= uptr && res <= uend;
+ if (size < 0) UPB_ASSERT(!ret);
+ return ret;
+}
+
+// Returns true if the given delimited field size is valid (it does not extend
+// beyond any previously-pushed limited) *and* all of the data for this field is
+// available to be read in the current buffer.
+//
+// If the size is negative, this function will always return false. This
+// property can be useful in some cases.
+UPB_INLINE bool upb_EpsCopyInputStream_CheckDataSizeAvailable(
+ upb_EpsCopyInputStream* e, const char* ptr, int size) {
+ return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, false);
+}
+
+// Returns true if the given sub-message size is valid (it does not extend
+// beyond any previously-pushed limited) *and* all of the data for this
+// sub-message is available to be parsed in the current buffer.
+//
+// This implies that all fields from the sub-message can be parsed from the
+// current buffer while maintaining the invariant that we always have at least
+// kUpb_EpsCopyInputStream_SlopBytes of data available past the beginning of
+// any individual field start.
+//
+// If the size is negative, this function will always return false. This
+// property can be useful in some cases.
+UPB_INLINE bool upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(
+ upb_EpsCopyInputStream* e, const char* ptr, int size) {
+ return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, true);
+}
+
+// Returns true if aliasing_enabled=true was passed to
+// upb_EpsCopyInputStream_Init() when this stream was initialized.
+UPB_INLINE bool upb_EpsCopyInputStream_AliasingEnabled(
+ upb_EpsCopyInputStream* e) {
+ return e->aliasing != kUpb_EpsCopyInputStream_NoAliasing;
+}
+
+// Returns true if aliasing_enabled=true was passed to
+// upb_EpsCopyInputStream_Init() when this stream was initialized *and* we can
+// alias into the region [ptr, size] in an input buffer.
+UPB_INLINE bool upb_EpsCopyInputStream_AliasingAvailable(
+ upb_EpsCopyInputStream* e, const char* ptr, size_t size) {
+ // When EpsCopyInputStream supports streaming, this will need to become a
+ // runtime check.
+ return upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size) &&
+ e->aliasing >= kUpb_EpsCopyInputStream_NoDelta;
+}
+
+// Returns a pointer into an input buffer that corresponds to the parsing
+// pointer `ptr`. The returned pointer may be the same as `ptr`, but also may
+// be different if we are currently parsing out of the patch buffer.
+//
+// REQUIRES: Aliasing must be available for the given pointer. If the input is a
+// flat buffer and aliasing is enabled, then aliasing will always be available.
+UPB_INLINE const char* upb_EpsCopyInputStream_GetAliasedPtr(
+ upb_EpsCopyInputStream* e, const char* ptr) {
+ UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, ptr, 0));
+ uintptr_t delta =
+ e->aliasing == kUpb_EpsCopyInputStream_NoDelta ? 0 : e->aliasing;
+ return (const char*)((uintptr_t)ptr + delta);
+}
+
+// Reads string data from the input, aliasing into the input buffer instead of
+// copying. The parsing pointer is passed in `*ptr`, and will be updated if
+// necessary to point to the actual input buffer. Returns the new parsing
+// pointer, which will be advanced past the string data.
+//
+// REQUIRES: Aliasing must be available for this data region (test with
+// upb_EpsCopyInputStream_AliasingAvailable().
+UPB_INLINE const char* upb_EpsCopyInputStream_ReadStringAliased(
+ upb_EpsCopyInputStream* e, const char** ptr, size_t size) {
+ UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size));
+ const char* ret = *ptr + size;
+ *ptr = upb_EpsCopyInputStream_GetAliasedPtr(e, *ptr);
+ UPB_ASSUME(ret != NULL);
+ return ret;
+}
+
+// Skips `size` bytes of data from the input and returns a pointer past the end.
+// Returns NULL on end of stream or error.
+UPB_INLINE const char* upb_EpsCopyInputStream_Skip(upb_EpsCopyInputStream* e,
+ const char* ptr, int size) {
+ if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL;
+ return ptr + size;
+}
+
+// Copies `size` bytes of data from the input `ptr` into the buffer `to`, and
+// returns a pointer past the end. Returns NULL on end of stream or error.
+UPB_INLINE const char* upb_EpsCopyInputStream_Copy(upb_EpsCopyInputStream* e,
+ const char* ptr, void* to,
+ int size) {
+ if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL;
+ memcpy(to, ptr, size);
+ return ptr + size;
+}
+
+// Reads string data from the stream and advances the pointer accordingly.
+// If aliasing was enabled when the stream was initialized, then the returned
+// pointer will point into the input buffer if possible, otherwise new data
+// will be allocated from arena and copied into. We may be forced to copy even
+// if aliasing was enabled if the input data spans input buffers.
+//
+// Returns NULL if memory allocation failed, or we reached a premature EOF.
+UPB_INLINE const char* upb_EpsCopyInputStream_ReadString(
+ upb_EpsCopyInputStream* e, const char** ptr, size_t size,
+ upb_Arena* arena) {
+ if (upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size)) {
+ return upb_EpsCopyInputStream_ReadStringAliased(e, ptr, size);
+ } else {
+ // We need to allocate and copy.
+ if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, *ptr, size)) {
+ return NULL;
+ }
+ UPB_ASSERT(arena);
+ char* data = (char*)upb_Arena_Malloc(arena, size);
+ if (!data) return NULL;
+ const char* ret = upb_EpsCopyInputStream_Copy(e, *ptr, data, size);
+ *ptr = data;
+ return ret;
+ }
+}
+
+UPB_INLINE void _upb_EpsCopyInputStream_CheckLimit(upb_EpsCopyInputStream* e) {
+ UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
+}
+
+// Pushes a limit onto the stack of limits for the current stream. The limit
+// will extend for `size` bytes beyond the position in `ptr`. Future calls to
+// upb_EpsCopyInputStream_IsDone() will return `true` when the stream position
+// reaches this limit.
+//
+// Returns a delta that the caller must store and supply to PopLimit() below.
+UPB_INLINE int upb_EpsCopyInputStream_PushLimit(upb_EpsCopyInputStream* e,
+ const char* ptr, int size) {
+ int limit = size + (int)(ptr - e->end);
+ int delta = e->limit - limit;
+ _upb_EpsCopyInputStream_CheckLimit(e);
+ UPB_ASSERT(limit <= e->limit);
+ e->limit = limit;
+ e->limit_ptr = e->end + UPB_MIN(0, limit);
+ _upb_EpsCopyInputStream_CheckLimit(e);
+ return delta;
+}
+
+// Pops the last limit that was pushed on this stream. This may only be called
+// once IsDone() returns true. The user must pass the delta that was returned
+// from PushLimit().
+UPB_INLINE void upb_EpsCopyInputStream_PopLimit(upb_EpsCopyInputStream* e,
+ const char* ptr,
+ int saved_delta) {
+ UPB_ASSERT(ptr - e->end == e->limit);
+ _upb_EpsCopyInputStream_CheckLimit(e);
+ e->limit += saved_delta;
+ e->limit_ptr = e->end + UPB_MIN(0, e->limit);
+ _upb_EpsCopyInputStream_CheckLimit(e);
+}
+
+UPB_INLINE const char* _upb_EpsCopyInputStream_IsDoneFallbackInline(
+ upb_EpsCopyInputStream* e, const char* ptr, int overrun,
+ upb_EpsCopyInputStream_BufferFlipCallback* callback) {
+ if (overrun < e->limit) {
+ // Need to copy remaining data into patch buffer.
+ UPB_ASSERT(overrun < kUpb_EpsCopyInputStream_SlopBytes);
+ const char* old_end = ptr;
+ const char* new_start = &e->patch[0] + overrun;
+ memset(e->patch + kUpb_EpsCopyInputStream_SlopBytes, 0,
+ kUpb_EpsCopyInputStream_SlopBytes);
+ memcpy(e->patch, e->end, kUpb_EpsCopyInputStream_SlopBytes);
+ ptr = new_start;
+ e->end = &e->patch[kUpb_EpsCopyInputStream_SlopBytes];
+ e->limit -= kUpb_EpsCopyInputStream_SlopBytes;
+ e->limit_ptr = e->end + e->limit;
+ UPB_ASSERT(ptr < e->limit_ptr);
+ if (e->aliasing != kUpb_EpsCopyInputStream_NoAliasing) {
+ e->aliasing = (uintptr_t)old_end - (uintptr_t)new_start;
+ }
+ return callback(e, old_end, new_start);
+ } else {
+ UPB_ASSERT(overrun > e->limit);
+ e->error = true;
+ return callback(e, NULL, NULL);
+ }
+}
+
+typedef const char* upb_EpsCopyInputStream_ParseDelimitedFunc(
+ upb_EpsCopyInputStream* e, const char* ptr, void* ctx);
+
+// Tries to perform a fast-path handling of the given delimited message data.
+// If the sub-message beginning at `*ptr` and extending for `len` is short and
+// fits within this buffer, calls `func` with `ctx` as a parameter, where the
+// pushing and popping of limits is handled automatically and with lower cost
+// than the normal PushLimit()/PopLimit() sequence.
+static UPB_FORCEINLINE bool upb_EpsCopyInputStream_TryParseDelimitedFast(
+ upb_EpsCopyInputStream* e, const char** ptr, int len,
+ upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) {
+ if (!upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(e, *ptr, len)) {
+ return false;
+ }
+
+ // Fast case: Sub-message is <128 bytes and fits in the current buffer.
+ // This means we can preserve limit/limit_ptr verbatim.
+ const char* saved_limit_ptr = e->limit_ptr;
+ int saved_limit = e->limit;
+ e->limit_ptr = *ptr + len;
+ e->limit = e->limit_ptr - e->end;
+ UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
+ *ptr = func(e, *ptr, ctx);
+ e->limit_ptr = saved_limit_ptr;
+ e->limit = saved_limit;
+ UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
+ return true;
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif // UPB_WIRE_EPS_COPY_INPUT_STREAM_H_
diff --git a/upb/upb/wire/eps_copy_input_stream_test.cc b/upb/upb/wire/eps_copy_input_stream_test.cc
new file mode 100644
index 0000000..688e076
--- /dev/null
+++ b/upb/upb/wire/eps_copy_input_stream_test.cc
@@ -0,0 +1,364 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/wire/eps_copy_input_stream.h"
+
+#include <string.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "upb/mem/arena.hpp"
+// begin:google_only
+// #include "testing/fuzzing/fuzztest.h"
+// end:google_only
+
+namespace {
+
+TEST(EpsCopyInputStreamTest, ZeroSize) {
+ upb_EpsCopyInputStream stream;
+ const char* ptr = NULL;
+ upb_EpsCopyInputStream_Init(&stream, &ptr, 0, false);
+ EXPECT_TRUE(upb_EpsCopyInputStream_IsDoneWithCallback(&stream, &ptr, NULL));
+}
+
+// begin:google_only
+//
+// // We create a simple, trivial implementation of the stream that we can test
+// // our real implementation against.
+//
+// class FakeStream {
+// public:
+// FakeStream(const std::string& data) : data_(data), offset_(0) {
+// limits_.push_back(data.size());
+// }
+//
+// // If we reached one or more limits correctly, returns the number of limits
+// // ended. If we tried to read beyond the current limit, returns -1.
+// // Otherwise, for simple success, returns 0.
+// int ReadData(int n, std::string* data) {
+// if (n > BytesUntilLimit()) return -1;
+//
+// data->assign(data_.data() + offset_, n);
+// offset_ += n;
+//
+// int end_limit_count = 0;
+//
+// while (BytesUntilLimit() == 0) {
+// if (PopLimit()) {
+// end_limit_count++;
+// } else {
+// eof_ = true;
+// break;
+// }
+// }
+//
+// return end_limit_count;
+// }
+//
+// bool TryPushLimit(int limit) {
+// if (!CheckSize(limit)) return false;
+// limits_.push_back(offset_ + limit);
+// return true;
+// }
+//
+// bool IsEof() const { return eof_; }
+//
+// private:
+// int BytesUntilLimit() const { return limits_.back() - offset_; }
+// bool CheckSize(int size) const { return BytesUntilLimit() >= size; }
+//
+// // Return false on EOF.
+// bool PopLimit() {
+// limits_.pop_back();
+// return !limits_.empty();
+// }
+//
+// std::string data_;
+// // Limits, specified in absolute stream terms.
+// std::vector<int> limits_;
+// int offset_;
+// bool eof_ = false;
+// };
+//
+// char tmp_buf[kUpb_EpsCopyInputStream_SlopBytes];
+//
+// class EpsStream {
+// public:
+// EpsStream(const std::string& data, bool enable_aliasing)
+// : data_(data), enable_aliasing_(enable_aliasing) {
+// ptr_ = data_.data();
+// upb_EpsCopyInputStream_Init(&eps_, &ptr_, data_.size(), enable_aliasing);
+// }
+//
+// // Returns false at EOF or error.
+// int ReadData(int n, std::string* data) {
+// EXPECT_LE(n, kUpb_EpsCopyInputStream_SlopBytes);
+// if (enable_aliasing_) {
+// EXPECT_TRUE(upb_EpsCopyInputStream_AliasingAvailable(&eps_, ptr_, n));
+// }
+// // We want to verify that we can read kUpb_EpsCopyInputStream_SlopBytes
+// // safely, even if we haven't actually been requested to read that much.
+// // We copy to a global buffer so the copy can't be optimized away.
+// memcpy(&tmp_buf, ptr_, kUpb_EpsCopyInputStream_SlopBytes);
+// data->assign(tmp_buf, n);
+// ptr_ += n;
+// if (enable_aliasing_) {
+// EXPECT_TRUE(upb_EpsCopyInputStream_AliasingAvailable(&eps_, ptr_, 0));
+// }
+// return PopLimits();
+// }
+//
+// int ReadString(int n, std::string* data) {
+// if (!upb_EpsCopyInputStream_CheckSize(&eps_, ptr_, n)) return -1;
+// const char* str_data = ptr_;
+// if (enable_aliasing_) {
+// EXPECT_TRUE(upb_EpsCopyInputStream_AliasingAvailable(&eps_, ptr_, n));
+// }
+// ptr_ = upb_EpsCopyInputStream_ReadString(&eps_, &str_data, n, arena_.ptr());
+// if (!ptr_) return -1;
+// if (enable_aliasing_ && n) {
+// EXPECT_GE(reinterpret_cast<uintptr_t>(str_data),
+// reinterpret_cast<uintptr_t>(data_.data()));
+// EXPECT_LT(reinterpret_cast<uintptr_t>(str_data),
+// reinterpret_cast<uintptr_t>(data_.data() + data_.size()));
+// EXPECT_TRUE(upb_EpsCopyInputStream_AliasingAvailable(&eps_, ptr_, 0));
+// }
+// data->assign(str_data, n);
+// return PopLimits();
+// }
+//
+// bool TryPushLimit(int limit) {
+// if (!upb_EpsCopyInputStream_CheckSize(&eps_, ptr_, limit)) return false;
+// deltas_.push_back(upb_EpsCopyInputStream_PushLimit(&eps_, ptr_, limit));
+// return true;
+// }
+//
+// bool IsEof() const { return eof_; }
+//
+// private:
+// int PopLimits() {
+// int end_limit_count = 0;
+//
+// while (IsAtLimit()) {
+// if (error_) return -1;
+// if (PopLimit()) {
+// end_limit_count++;
+// } else {
+// eof_ = true; // EOF.
+// break;
+// }
+// }
+//
+// return error_ ? -1 : end_limit_count;
+// }
+//
+// bool IsAtLimit() {
+// return upb_EpsCopyInputStream_IsDoneWithCallback(
+// &eps_, &ptr_, &EpsStream::IsDoneFallback);
+// }
+//
+// // Return false on EOF.
+// bool PopLimit() {
+// if (deltas_.empty()) return false;
+// upb_EpsCopyInputStream_PopLimit(&eps_, ptr_, deltas_.back());
+// deltas_.pop_back();
+// return true;
+// }
+//
+// static const char* IsDoneFallback(upb_EpsCopyInputStream* e, const char* ptr,
+// int overrun) {
+// return _upb_EpsCopyInputStream_IsDoneFallbackInline(
+// e, ptr, overrun, &EpsStream::BufferFlipCallback);
+// }
+//
+// static const char* BufferFlipCallback(upb_EpsCopyInputStream* e,
+// const char* old_end,
+// const char* new_start) {
+// EpsStream* stream = reinterpret_cast<EpsStream*>(e);
+// if (!old_end) stream->error_ = true;
+// return new_start;
+// }
+//
+// upb_EpsCopyInputStream eps_;
+// std::string data_;
+// const char* ptr_;
+// std::vector<int> deltas_;
+// upb::Arena arena_;
+// bool error_ = false;
+// bool eof_ = false;
+// bool enable_aliasing_;
+// };
+//
+// // Reads N bytes from the given position.
+// struct ReadOp {
+// int bytes; // Must be <= kUpb_EpsCopyInputStream_SlopBytes.
+// };
+//
+// struct ReadStringOp {
+// int bytes;
+// };
+//
+// // Pushes a new limit of N bytes from the current position.
+// struct PushLimitOp {
+// int bytes;
+// };
+//
+// typedef std::variant<ReadOp, ReadStringOp, PushLimitOp> Op;
+//
+// struct EpsCopyTestScript {
+// int data_size;
+// bool enable_aliasing;
+// std::vector<Op> ops;
+// };
+//
+// auto ArbitraryEpsCopyTestScript() {
+// using ::fuzztest::Arbitrary;
+// using ::fuzztest::InRange;
+// using ::fuzztest::NonNegative;
+// using ::fuzztest::StructOf;
+// using ::fuzztest::VariantOf;
+// using ::fuzztest::VectorOf;
+//
+// int max_data_size = 512;
+//
+// return StructOf<EpsCopyTestScript>(
+// InRange(0, max_data_size), // data_size
+// Arbitrary<bool>(), // enable_aliasing
+// VectorOf(VariantOf(
+// // ReadOp
+// StructOf<ReadOp>(InRange(0, kUpb_EpsCopyInputStream_SlopBytes)),
+// // ReadStringOp
+// StructOf<ReadStringOp>(NonNegative<int>()),
+// // PushLimitOp
+// StructOf<PushLimitOp>(NonNegative<int>()))));
+// }
+//
+// // Run a test that creates both real stream and a fake stream, and validates
+// // that they have the same behavior.
+// void TestAgainstFakeStream(const EpsCopyTestScript& script) {
+// std::string data(script.data_size, 'x');
+// for (int i = 0; i < script.data_size; ++i) {
+// data[i] = static_cast<char>(i & 0xff);
+// }
+//
+// FakeStream fake_stream(data);
+// EpsStream eps_stream(data, script.enable_aliasing);
+//
+// for (const auto& op : script.ops) {
+// if (const ReadOp* read_op = std::get_if<ReadOp>(&op)) {
+// std::string data_fake;
+// std::string data_eps;
+// int fake_result = fake_stream.ReadData(read_op->bytes, &data_fake);
+// int eps_result = eps_stream.ReadData(read_op->bytes, &data_eps);
+// EXPECT_EQ(fake_result, eps_result);
+// if (fake_result == -1) break; // Error
+// EXPECT_EQ(data_fake, data_eps);
+// EXPECT_EQ(fake_stream.IsEof(), eps_stream.IsEof());
+// if (fake_stream.IsEof()) break;
+// } else if (const ReadStringOp* read_op = std::get_if<ReadStringOp>(&op)) {
+// std::string data_fake;
+// std::string data_eps;
+// int fake_result = fake_stream.ReadData(read_op->bytes, &data_fake);
+// int eps_result = eps_stream.ReadString(read_op->bytes, &data_eps);
+// EXPECT_EQ(fake_result, eps_result);
+// if (fake_result == -1) break; // Error
+// EXPECT_EQ(data_fake, data_eps);
+// EXPECT_EQ(fake_stream.IsEof(), eps_stream.IsEof());
+// if (fake_stream.IsEof()) break;
+// } else if (const PushLimitOp* push = std::get_if<PushLimitOp>(&op)) {
+// EXPECT_EQ(fake_stream.TryPushLimit(push->bytes),
+// eps_stream.TryPushLimit(push->bytes));
+// } else {
+// EXPECT_TRUE(false); // Unknown op.
+// }
+// }
+// }
+//
+// // Test with:
+// // $ blaze run --config=fuzztest third_party/upb:eps_copy_input_stream_test \
+// // -- --gunit_fuzz=
+// FUZZ_TEST(EpsCopyFuzzTest, TestAgainstFakeStream)
+// .WithDomains(ArbitraryEpsCopyTestScript());
+//
+// TEST(EpsCopyFuzzTest, TestAgainstFakeStreamRegression) {
+// TestAgainstFakeStream({299,
+// false,
+// {
+// PushLimitOp{2},
+// ReadOp{14},
+// }});
+// }
+//
+// TEST(EpsCopyFuzzTest, AliasingEnabledZeroSizeReadString) {
+// TestAgainstFakeStream({510, true, {ReadStringOp{0}}});
+// }
+//
+// TEST(EpsCopyFuzzTest, AliasingDisabledZeroSizeReadString) {
+// TestAgainstFakeStream({510, false, {ReadStringOp{0}}});
+// }
+//
+// TEST(EpsCopyFuzzTest, ReadStringZero) {
+// TestAgainstFakeStream({0, true, {ReadStringOp{0}}});
+// }
+//
+// TEST(EpsCopyFuzzTest, ReadZero) {
+// TestAgainstFakeStream({0, true, {ReadOp{0}}});
+// }
+//
+// TEST(EpsCopyFuzzTest, ReadZeroTwice) {
+// TestAgainstFakeStream({0, true, {ReadOp{0}, ReadOp{0}}});
+// }
+//
+// TEST(EpsCopyFuzzTest, ReadStringZeroThenRead) {
+// TestAgainstFakeStream({0, true, {ReadStringOp{0}, ReadOp{0}}});
+// }
+//
+// TEST(EpsCopyFuzzTest, ReadStringOverflowsBufferButNotLimit) {
+// TestAgainstFakeStream({351,
+// false,
+// {
+// ReadOp{7},
+// PushLimitOp{2147483647},
+// ReadStringOp{344},
+// }});
+// }
+//
+// TEST(EpsCopyFuzzTest, LastBufferAliasing) {
+// TestAgainstFakeStream({27, true, {ReadOp{12}, ReadStringOp{3}}});
+// }
+//
+// TEST(EpsCopyFuzzTest, FirstBufferAliasing) {
+// TestAgainstFakeStream({7, true, {ReadStringOp{3}}});
+// }
+//
+// end:google_only
+
+} // namespace
diff --git a/upb/upb/wire/internal/common.h b/upb/upb/wire/internal/common.h
new file mode 100644
index 0000000..bb9cf42
--- /dev/null
+++ b/upb/upb/wire/internal/common.h
@@ -0,0 +1,53 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_WIRE_INTERNAL_COMMON_H_
+#define UPB_WIRE_INTERNAL_COMMON_H_
+
+// Must be last.
+#include "upb/port/def.inc"
+
+// MessageSet wire format is:
+// message MessageSet {
+// repeated group Item = 1 {
+// required int32 type_id = 2;
+// required bytes message = 3;
+// }
+// }
+
+enum {
+ kUpb_MsgSet_Item = 1,
+ kUpb_MsgSet_TypeId = 2,
+ kUpb_MsgSet_Message = 3,
+};
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_WIRE_INTERNAL_COMMON_H_ */
diff --git a/upb/upb/wire/internal/decode.h b/upb/upb/wire/internal/decode.h
new file mode 100644
index 0000000..aa3d1b3
--- /dev/null
+++ b/upb/upb/wire/internal/decode.h
@@ -0,0 +1,166 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+/*
+ * Internal implementation details of the decoder that are shared between
+ * decode.c and decode_fast.c.
+ */
+
+#ifndef UPB_WIRE_INTERNAL_DECODE_H_
+#define UPB_WIRE_INTERNAL_DECODE_H_
+
+#include "upb/mem/internal/arena.h"
+#include "upb/message/internal/message.h"
+#include "upb/wire/decode.h"
+#include "upb/wire/eps_copy_input_stream.h"
+#include "utf8_range.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#define DECODE_NOGROUP (uint32_t) - 1
+
+typedef struct upb_Decoder {
+ upb_EpsCopyInputStream input;
+ const upb_ExtensionRegistry* extreg;
+ const char* unknown; // Start of unknown data, preserve at buffer flip
+ upb_Message* unknown_msg; // Pointer to preserve data to
+ int depth; // Tracks recursion depth to bound stack usage.
+ uint32_t end_group; // field number of END_GROUP tag, else DECODE_NOGROUP.
+ uint16_t options;
+ bool missing_required;
+ upb_Arena arena;
+ upb_DecodeStatus status;
+ jmp_buf err;
+
+#ifndef NDEBUG
+ const char* debug_tagstart;
+ const char* debug_valstart;
+#endif
+} upb_Decoder;
+
+/* Error function that will abort decoding with longjmp(). We can't declare this
+ * UPB_NORETURN, even though it is appropriate, because if we do then compilers
+ * will "helpfully" refuse to tailcall to it
+ * (see: https://stackoverflow.com/a/55657013), which will defeat a major goal
+ * of our optimizations. That is also why we must declare it in a separate file,
+ * otherwise the compiler will see that it calls longjmp() and deduce that it is
+ * noreturn. */
+const char* _upb_FastDecoder_ErrorJmp(upb_Decoder* d, int status);
+
+extern const uint8_t upb_utf8_offsets[];
+
+UPB_INLINE
+bool _upb_Decoder_VerifyUtf8Inline(const char* ptr, int len) {
+ const char* end = ptr + len;
+
+ // Check 8 bytes at a time for any non-ASCII char.
+ while (end - ptr >= 8) {
+ uint64_t data;
+ memcpy(&data, ptr, 8);
+ if (data & 0x8080808080808080) goto non_ascii;
+ ptr += 8;
+ }
+
+ // Check one byte at a time for non-ASCII.
+ while (ptr < end) {
+ if (*ptr & 0x80) goto non_ascii;
+ ptr++;
+ }
+
+ return true;
+
+non_ascii:
+ return utf8_range2((const unsigned char*)ptr, end - ptr) == 0;
+}
+
+const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr,
+ const upb_Message* msg,
+ const upb_MiniTable* l);
+
+/* x86-64 pointers always have the high 16 bits matching. So we can shift
+ * left 8 and right 8 without loss of information. */
+UPB_INLINE intptr_t decode_totable(const upb_MiniTable* tablep) {
+ return ((intptr_t)tablep << 8) | tablep->table_mask;
+}
+
+UPB_INLINE const upb_MiniTable* decode_totablep(intptr_t table) {
+ return (const upb_MiniTable*)(table >> 8);
+}
+
+const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e,
+ const char* ptr, int overrun);
+
+UPB_INLINE bool _upb_Decoder_IsDone(upb_Decoder* d, const char** ptr) {
+ return upb_EpsCopyInputStream_IsDoneWithCallback(
+ &d->input, ptr, &_upb_Decoder_IsDoneFallback);
+}
+
+UPB_INLINE const char* _upb_Decoder_BufferFlipCallback(
+ upb_EpsCopyInputStream* e, const char* old_end, const char* new_start) {
+ upb_Decoder* d = (upb_Decoder*)e;
+ if (!old_end) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
+
+ if (d->unknown) {
+ if (!_upb_Message_AddUnknown(d->unknown_msg, d->unknown,
+ old_end - d->unknown, &d->arena)) {
+ _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
+ }
+ d->unknown = new_start;
+ }
+ return new_start;
+}
+
+#if UPB_FASTTABLE
+UPB_INLINE
+const char* _upb_FastDecoder_TagDispatch(upb_Decoder* d, const char* ptr,
+ upb_Message* msg, intptr_t table,
+ uint64_t hasbits, uint64_t tag) {
+ const upb_MiniTable* table_p = decode_totablep(table);
+ uint8_t mask = table;
+ uint64_t data;
+ size_t idx = tag & mask;
+ UPB_ASSUME((idx & 7) == 0);
+ idx >>= 3;
+ data = table_p->fasttable[idx].field_data ^ tag;
+ UPB_MUSTTAIL return table_p->fasttable[idx].field_parser(d, ptr, msg, table,
+ hasbits, data);
+}
+#endif
+
+UPB_INLINE uint32_t _upb_FastDecoder_LoadTag(const char* ptr) {
+ uint16_t tag;
+ memcpy(&tag, ptr, 2);
+ return tag;
+}
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_WIRE_INTERNAL_DECODE_H_ */
diff --git a/upb/upb/wire/internal/swap.h b/upb/upb/wire/internal/swap.h
new file mode 100644
index 0000000..9d9e780
--- /dev/null
+++ b/upb/upb/wire/internal/swap.h
@@ -0,0 +1,68 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_WIRE_INTERNAL_SWAP_H_
+#define UPB_WIRE_INTERNAL_SWAP_H_
+
+#include <stdint.h>
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+UPB_INLINE bool _upb_IsLittleEndian(void) {
+ int x = 1;
+ return *(char*)&x == 1;
+}
+
+UPB_INLINE uint32_t _upb_BigEndian_Swap32(uint32_t val) {
+ if (_upb_IsLittleEndian()) return val;
+
+ return ((val & 0xff) << 24) | ((val & 0xff00) << 8) |
+ ((val & 0xff0000) >> 8) | ((val & 0xff000000) >> 24);
+}
+
+UPB_INLINE uint64_t _upb_BigEndian_Swap64(uint64_t val) {
+ if (_upb_IsLittleEndian()) return val;
+
+ return ((uint64_t)_upb_BigEndian_Swap32((uint32_t)val) << 32) |
+ _upb_BigEndian_Swap32((uint32_t)(val >> 32));
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPB_WIRE_INTERNAL_SWAP_H_ */
diff --git a/upb/upb/wire/reader.c b/upb/upb/wire/reader.c
new file mode 100644
index 0000000..92a7b5b
--- /dev/null
+++ b/upb/upb/wire/reader.c
@@ -0,0 +1,70 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upb/wire/reader.h"
+
+#include "upb/wire/eps_copy_input_stream.h"
+#include "upb/wire/types.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+UPB_NOINLINE _upb_WireReader_ReadLongVarintRet
+_upb_WireReader_ReadLongVarint(const char* ptr, uint64_t val) {
+ _upb_WireReader_ReadLongVarintRet ret = {NULL, 0};
+ uint64_t byte;
+ int i;
+ for (i = 1; i < 10; i++) {
+ byte = (uint8_t)ptr[i];
+ val += (byte - 1) << (i * 7);
+ if (!(byte & 0x80)) {
+ ret.ptr = ptr + i + 1;
+ ret.val = val;
+ return ret;
+ }
+ }
+ return ret;
+}
+
+const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag,
+ int depth_limit,
+ upb_EpsCopyInputStream* stream) {
+ if (--depth_limit == 0) return NULL;
+ uint32_t end_group_tag = (tag & ~7ULL) | kUpb_WireType_EndGroup;
+ while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
+ uint32_t tag;
+ ptr = upb_WireReader_ReadTag(ptr, &tag);
+ if (!ptr) return NULL;
+ if (tag == end_group_tag) return ptr;
+ ptr = _upb_WireReader_SkipValue(ptr, tag, depth_limit, stream);
+ if (!ptr) return NULL;
+ }
+ return ptr;
+}
diff --git a/upb/upb/wire/reader.h b/upb/upb/wire/reader.h
new file mode 100644
index 0000000..e5663db
--- /dev/null
+++ b/upb/upb/wire/reader.h
@@ -0,0 +1,230 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_WIRE_READER_H_
+#define UPB_WIRE_READER_H_
+
+#include "upb/wire/eps_copy_input_stream.h"
+#include "upb/wire/internal/swap.h"
+#include "upb/wire/types.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// The upb_WireReader interface is suitable for general-purpose parsing of
+// protobuf binary wire format. It is designed to be used along with
+// upb_EpsCopyInputStream for buffering, and all parsing routines in this file
+// assume that at least kUpb_EpsCopyInputStream_SlopBytes worth of data is
+// available to read without any bounds checks.
+
+#define kUpb_WireReader_WireTypeMask 7
+#define kUpb_WireReader_WireTypeBits 3
+
+typedef struct {
+ const char* ptr;
+ uint64_t val;
+} _upb_WireReader_ReadLongVarintRet;
+
+_upb_WireReader_ReadLongVarintRet _upb_WireReader_ReadLongVarint(
+ const char* ptr, uint64_t val);
+
+static UPB_FORCEINLINE const char* _upb_WireReader_ReadVarint(const char* ptr,
+ uint64_t* val,
+ int maxlen,
+ uint64_t maxval) {
+ uint64_t byte = (uint8_t)*ptr;
+ if (UPB_LIKELY((byte & 0x80) == 0)) {
+ *val = (uint32_t)byte;
+ return ptr + 1;
+ }
+ const char* start = ptr;
+ _upb_WireReader_ReadLongVarintRet res =
+ _upb_WireReader_ReadLongVarint(ptr, byte);
+ if (!res.ptr || (maxlen < 10 && res.ptr - start > maxlen) ||
+ res.val > maxval) {
+ return NULL; // Malformed.
+ }
+ *val = res.val;
+ return res.ptr;
+}
+
+// Parses a tag into `tag`, and returns a pointer past the end of the tag, or
+// NULL if there was an error in the tag data.
+//
+// REQUIRES: there must be at least 10 bytes of data available at `ptr`.
+// Bounds checks must be performed before calling this function, preferably
+// by calling upb_EpsCopyInputStream_IsDone().
+static UPB_FORCEINLINE const char* upb_WireReader_ReadTag(const char* ptr,
+ uint32_t* tag) {
+ uint64_t val;
+ ptr = _upb_WireReader_ReadVarint(ptr, &val, 5, UINT32_MAX);
+ if (!ptr) return NULL;
+ *tag = val;
+ return ptr;
+}
+
+// Given a tag, returns the field number.
+UPB_INLINE uint32_t upb_WireReader_GetFieldNumber(uint32_t tag) {
+ return tag >> kUpb_WireReader_WireTypeBits;
+}
+
+// Given a tag, returns the wire type.
+UPB_INLINE uint8_t upb_WireReader_GetWireType(uint32_t tag) {
+ return tag & kUpb_WireReader_WireTypeMask;
+}
+
+UPB_INLINE const char* upb_WireReader_ReadVarint(const char* ptr,
+ uint64_t* val) {
+ return _upb_WireReader_ReadVarint(ptr, val, 10, UINT64_MAX);
+}
+
+// Skips data for a varint, returning a pointer past the end of the varint, or
+// NULL if there was an error in the varint data.
+//
+// REQUIRES: there must be at least 10 bytes of data available at `ptr`.
+// Bounds checks must be performed before calling this function, preferably
+// by calling upb_EpsCopyInputStream_IsDone().
+UPB_INLINE const char* upb_WireReader_SkipVarint(const char* ptr) {
+ uint64_t val;
+ return upb_WireReader_ReadVarint(ptr, &val);
+}
+
+// Reads a varint indicating the size of a delimited field into `size`, or
+// NULL if there was an error in the varint data.
+//
+// REQUIRES: there must be at least 10 bytes of data available at `ptr`.
+// Bounds checks must be performed before calling this function, preferably
+// by calling upb_EpsCopyInputStream_IsDone().
+UPB_INLINE const char* upb_WireReader_ReadSize(const char* ptr, int* size) {
+ uint64_t size64;
+ ptr = upb_WireReader_ReadVarint(ptr, &size64);
+ if (!ptr || size64 >= INT32_MAX) return NULL;
+ *size = size64;
+ return ptr;
+}
+
+// Reads a fixed32 field, performing byte swapping if necessary.
+//
+// REQUIRES: there must be at least 4 bytes of data available at `ptr`.
+// Bounds checks must be performed before calling this function, preferably
+// by calling upb_EpsCopyInputStream_IsDone().
+UPB_INLINE const char* upb_WireReader_ReadFixed32(const char* ptr, void* val) {
+ uint32_t uval;
+ memcpy(&uval, ptr, 4);
+ uval = _upb_BigEndian_Swap32(uval);
+ memcpy(val, &uval, 4);
+ return ptr + 4;
+}
+
+// Reads a fixed64 field, performing byte swapping if necessary.
+//
+// REQUIRES: there must be at least 4 bytes of data available at `ptr`.
+// Bounds checks must be performed before calling this function, preferably
+// by calling upb_EpsCopyInputStream_IsDone().
+UPB_INLINE const char* upb_WireReader_ReadFixed64(const char* ptr, void* val) {
+ uint64_t uval;
+ memcpy(&uval, ptr, 8);
+ uval = _upb_BigEndian_Swap64(uval);
+ memcpy(val, &uval, 8);
+ return ptr + 8;
+}
+
+const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag,
+ int depth_limit,
+ upb_EpsCopyInputStream* stream);
+
+// Skips data for a group, returning a pointer past the end of the group, or
+// NULL if there was an error parsing the group. The `tag` argument should be
+// the start group tag that begins the group. The `depth_limit` argument
+// indicates how many levels of recursion the group is allowed to have before
+// reporting a parse error (this limit exists to protect against stack
+// overflow).
+//
+// TODO: evaluate how the depth_limit should be specified. Do users need
+// control over this?
+UPB_INLINE const char* upb_WireReader_SkipGroup(
+ const char* ptr, uint32_t tag, upb_EpsCopyInputStream* stream) {
+ return _upb_WireReader_SkipGroup(ptr, tag, 100, stream);
+}
+
+UPB_INLINE const char* _upb_WireReader_SkipValue(
+ const char* ptr, uint32_t tag, int depth_limit,
+ upb_EpsCopyInputStream* stream) {
+ switch (upb_WireReader_GetWireType(tag)) {
+ case kUpb_WireType_Varint:
+ return upb_WireReader_SkipVarint(ptr);
+ case kUpb_WireType_32Bit:
+ return ptr + 4;
+ case kUpb_WireType_64Bit:
+ return ptr + 8;
+ case kUpb_WireType_Delimited: {
+ int size;
+ ptr = upb_WireReader_ReadSize(ptr, &size);
+ if (!ptr) return NULL;
+ ptr += size;
+ return ptr;
+ }
+ case kUpb_WireType_StartGroup:
+ return _upb_WireReader_SkipGroup(ptr, tag, depth_limit, stream);
+ case kUpb_WireType_EndGroup:
+ return NULL; // Should be handled before now.
+ default:
+ return NULL; // Unknown wire type.
+ }
+}
+
+// Skips data for a wire value of any type, returning a pointer past the end of
+// the data, or NULL if there was an error parsing the group. The `tag` argument
+// should be the tag that was just parsed. The `depth_limit` argument indicates
+// how many levels of recursion a group is allowed to have before reporting a
+// parse error (this limit exists to protect against stack overflow).
+//
+// REQUIRES: there must be at least 10 bytes of data available at `ptr`.
+// Bounds checks must be performed before calling this function, preferably
+// by calling upb_EpsCopyInputStream_IsDone().
+//
+// TODO: evaluate how the depth_limit should be specified. Do users need
+// control over this?
+UPB_INLINE const char* upb_WireReader_SkipValue(
+ const char* ptr, uint32_t tag, upb_EpsCopyInputStream* stream) {
+ return _upb_WireReader_SkipValue(ptr, tag, 100, stream);
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif // UPB_WIRE_READER_H_
diff --git a/upb/upb/wire/types.h b/upb/upb/wire/types.h
new file mode 100644
index 0000000..b66bcfe
--- /dev/null
+++ b/upb/upb/wire/types.h
@@ -0,0 +1,46 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_WIRE_TYPES_H_
+#define UPB_WIRE_TYPES_H_
+
+#define kUpb_WireFormat_DefaultDepthLimit 100
+
+// A list of types as they are encoded on the wire.
+typedef enum {
+ kUpb_WireType_Varint = 0,
+ kUpb_WireType_64Bit = 1,
+ kUpb_WireType_Delimited = 2,
+ kUpb_WireType_StartGroup = 3,
+ kUpb_WireType_EndGroup = 4,
+ kUpb_WireType_32Bit = 5
+} upb_WireType;
+
+#endif /* UPB_WIRE_TYPES_H_ */
diff --git a/upb/upbc/BUILD b/upb/upbc/BUILD
new file mode 100644
index 0000000..b3a4d00
--- /dev/null
+++ b/upb/upbc/BUILD
@@ -0,0 +1,313 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Google LLC nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+load(
+ "//bazel:build_defs.bzl",
+ "UPB_DEFAULT_COPTS",
+ "UPB_DEFAULT_CPPOPTS",
+)
+load(
+ "//bazel:upb_proto_library.bzl",
+ "upb_proto_library",
+ "upb_proto_reflection_library",
+)
+load(
+ "//upbc:bootstrap_compiler.bzl",
+ "bootstrap_cc_binary",
+ "bootstrap_cc_library",
+ "bootstrap_upb_proto_library",
+)
+
+# begin:google_only
+# package(default_applicable_licenses = ["//:license"])
+# end:google_only
+
+licenses(["notice"])
+
+proto_library(
+ name = "code_generator_request",
+ srcs = ["code_generator_request.proto"],
+ visibility = ["//:friends"],
+ deps = ["@com_google_protobuf//:compiler_plugin_proto"],
+)
+
+upb_proto_library(
+ name = "code_generator_request_upb_proto",
+ visibility = ["//:friends"],
+ deps = [":code_generator_request"],
+)
+
+upb_proto_reflection_library(
+ name = "code_generator_request_upb_proto_reflection",
+ visibility = ["//:friends"],
+ deps = [":code_generator_request"],
+)
+
+bootstrap_upb_proto_library(
+ name = "plugin_upb_proto",
+ base_dir = "",
+ # TODO(b/289127200): Export 'net/proto2/proto/descriptor.upb.h' and remove "-layering_check".
+ features = ["-layering_check"],
+ google3_src_files = [
+ "net/proto2/compiler/proto/profile.proto",
+ "third_party/protobuf/compiler/plugin.proto",
+ ],
+ google3_src_rules = [
+ "//net/proto2/proto:descriptor_proto_source",
+ "//net/proto2/compiler/proto:profile.proto",
+ "//third_party/protobuf/compiler:plugin_proto_source",
+ ],
+ oss_src_files = ["google/protobuf/compiler/plugin.proto"],
+ oss_src_rules = [
+ "@com_google_protobuf//:descriptor_proto_srcs",
+ "@com_google_protobuf//src/google/protobuf/compiler:plugin_proto_src",
+ ],
+ oss_strip_prefix = "third_party/protobuf/github/bootstrap/src",
+ proto_lib_deps = ["@com_google_protobuf//:compiler_plugin_proto"],
+ visibility = ["//:friends"],
+ deps = ["//:descriptor_upb_proto"],
+)
+
+upb_proto_reflection_library(
+ name = "plugin_upb_proto_reflection",
+ visibility = ["//:friends"],
+ deps = ["@com_google_protobuf//:compiler_plugin_proto"],
+)
+
+bootstrap_cc_library(
+ name = "common",
+ srcs = [
+ "common.cc",
+ ],
+ hdrs = [
+ "common.h",
+ ],
+ bootstrap_deps = [
+ "//:reflection",
+ ],
+ copts = UPB_DEFAULT_CPPOPTS,
+ visibility = ["//protos_generator:__pkg__"],
+ deps = [
+ "@com_google_absl//absl/strings",
+ ],
+)
+
+bootstrap_cc_library(
+ name = "file_layout",
+ srcs = [
+ "file_layout.cc",
+ ],
+ hdrs = [
+ "file_layout.h",
+ ],
+ bootstrap_deps = [
+ ":common",
+ "//:reflection",
+ "//:descriptor_upb_proto",
+ ],
+ copts = UPB_DEFAULT_CPPOPTS,
+ visibility = ["//protos_generator:__pkg__"],
+ deps = [
+ "//:base",
+ "//:mini_descriptor",
+ "//:mini_table",
+ "//:mini_table_internal",
+ "//:port",
+ "@com_google_absl//absl/container:flat_hash_map",
+ "@com_google_absl//absl/strings",
+ ],
+)
+
+cc_library(
+ name = "keywords",
+ srcs = [
+ "keywords.cc",
+ ],
+ hdrs = [
+ "keywords.h",
+ ],
+ copts = UPB_DEFAULT_CPPOPTS,
+ visibility = ["//protos_generator:__pkg__"],
+)
+
+bootstrap_cc_library(
+ name = "plugin",
+ hdrs = [
+ "plugin.h",
+ ],
+ bootstrap_deps = [
+ ":plugin_upb_proto",
+ "//:descriptor_upb_proto",
+ "//:reflection",
+ ],
+ copts = UPB_DEFAULT_CPPOPTS,
+ visibility = ["//protos_generator:__pkg__"],
+ deps = [
+ "//:port",
+ "@com_google_absl//absl/container:flat_hash_set",
+ "@com_google_absl//absl/log:absl_check",
+ "@com_google_absl//absl/log:absl_log",
+ "@com_google_absl//absl/strings",
+ ],
+)
+
+bootstrap_cc_library(
+ name = "names",
+ srcs = [
+ "names.cc",
+ ],
+ hdrs = [
+ "names.h",
+ ],
+ bootstrap_deps = [
+ "//:reflection",
+ ],
+ copts = UPB_DEFAULT_CPPOPTS,
+ visibility = ["//protos_generator:__pkg__"],
+ deps = [
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/container:flat_hash_map",
+ "@com_google_absl//absl/strings",
+ "@com_google_protobuf//:protobuf",
+ "@com_google_protobuf//src/google/protobuf/compiler:code_generator",
+ ],
+)
+
+cc_binary(
+ name = "libupbc.so",
+ srcs = ["upbc_so.c"],
+ copts = UPB_DEFAULT_COPTS + ["-DUPB_BUILD_API"],
+ linkshared = 1,
+ linkstatic = 1,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":upbdev",
+ "//:port",
+ ],
+)
+
+cc_library(
+ name = "upbdev",
+ srcs = [
+ "code_generator_request.c",
+ "code_generator_request.h",
+ "get_used_fields.c",
+ "upbdev.c",
+ ],
+ hdrs = [
+ "get_used_fields.h",
+ "upbdev.h",
+ ],
+ copts = UPB_DEFAULT_COPTS,
+ visibility = ["//visibility:private"],
+ deps = [
+ ":code_generator_request_upb_proto",
+ ":code_generator_request_upb_proto_reflection",
+ ":plugin_upb_proto",
+ ":plugin_upb_proto_reflection",
+ "//:base",
+ "//:descriptor_upb_proto",
+ "//:json",
+ "//:mem",
+ "//:mini_descriptor",
+ "//:mini_table",
+ "//:port",
+ "//:reflection",
+ "//:reflection_internal",
+ "//:wire",
+ ],
+)
+
+bootstrap_cc_binary(
+ name = "protoc-gen-upb",
+ srcs = ["protoc-gen-upb.cc"],
+ bootstrap_deps = [
+ ":common",
+ ":file_layout",
+ ":names",
+ ":plugin",
+ ":plugin_upb_proto",
+ "//:descriptor_upb_proto",
+ "//:reflection",
+ ],
+ copts = UPB_DEFAULT_CPPOPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ "//:base",
+ "//:mem",
+ "//:mini_table_internal",
+ "//:port",
+ "//:wire_types",
+ "@com_google_absl//absl/container:flat_hash_map",
+ "@com_google_absl//absl/container:flat_hash_set",
+ "@com_google_absl//absl/log:absl_check",
+ "@com_google_absl//absl/log:absl_log",
+ "@com_google_absl//absl/strings",
+ ],
+)
+
+cc_binary(
+ name = "protoc-gen-upbdefs",
+ srcs = [
+ "protoc-gen-upbdefs.cc",
+ ],
+ copts = UPB_DEFAULT_CPPOPTS,
+ # To work around the following link error from ABSL:
+ # /usr/bin/x86_64-linux-gnu-ld: bazel-out/k8-opt-exec-2B5CBBC6-ST-c1776f9924ec/bin/external/com_google_absl/absl/time/libtime.a(duration.o): undefined reference to symbol 'floor@@GLIBC_2.2.5'
+ # /usr/bin/x86_64-linux-gnu-ld: /opt/manylinux/2014/x86_64/lib64/libm.so.6: error adding symbols: DSO missing from command line
+ # clang-14: error: linker command failed with exit code 1 (use -v to see invocation)
+ linkopts = ["-lm"],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":common",
+ ":file_layout",
+ ":plugin",
+ "//:descriptor_upb_proto",
+ "//:reflection",
+ "//upb/util:def_to_proto",
+ ],
+)
+
+cc_binary(
+ name = "protoc-gen-upbdev",
+ srcs = [
+ "protoc-gen-upbdev.cc",
+ "subprocess.cc",
+ "subprocess.h",
+ ],
+ copts = UPB_DEFAULT_CPPOPTS,
+ target_compatible_with = select({
+ "@platforms//os:windows": ["@platforms//:incompatible"],
+ "//conditions:default": [],
+ }),
+ visibility = ["//visibility:public"],
+ deps = [
+ ":plugin_upb_proto",
+ ":upbdev",
+ "//:port",
+ "@com_google_absl//absl/log:absl_log",
+ "@com_google_absl//absl/strings",
+ ],
+)
diff --git a/upb/upbc/bootstrap_compiler.bzl b/upb/upbc/bootstrap_compiler.bzl
new file mode 100644
index 0000000..e2bb7d7
--- /dev/null
+++ b/upb/upbc/bootstrap_compiler.bzl
@@ -0,0 +1,166 @@
+"""Macros that implement bootstrapping for the upb code generator."""
+
+load(
+ "//bazel:upb_proto_library.bzl",
+ "upb_proto_library",
+)
+load(
+ "//cmake:build_defs.bzl",
+ "staleness_test",
+)
+
+_stages = ["_stage0", "_stage1", ""]
+_protoc = "@com_google_protobuf//:protoc"
+_upbc_base = "//upbc:protoc-gen-upb"
+
+# begin:google_only
+# _is_google3 = True
+# _extra_proto_path = ""
+# end:google_only
+
+# begin:github_only
+_is_google3 = False
+_extra_proto_path = "-I$$(dirname $(location @com_google_protobuf//:descriptor_proto_srcs))/../.. "
+# end:github_only
+
+def _upbc(stage):
+ return _upbc_base + _stages[stage]
+
+def bootstrap_cc_library(name, visibility, deps, bootstrap_deps, **kwargs):
+ for stage in _stages:
+ stage_visibility = visibility if stage == "" else ["//upbc:__pkg__"]
+ native.cc_library(
+ name = name + stage,
+ deps = deps + [dep + stage for dep in bootstrap_deps],
+ visibility = stage_visibility,
+ **kwargs
+ )
+
+def bootstrap_cc_binary(name, deps, bootstrap_deps, **kwargs):
+ for stage in _stages:
+ native.cc_binary(
+ name = name + stage,
+ deps = deps + [dep + stage for dep in bootstrap_deps],
+ **kwargs
+ )
+
+def _generated_srcs_for_suffix(prefix, srcs, suffix):
+ return [prefix + "/" + src[:-len(".proto")] + suffix for src in srcs]
+
+def _generated_srcs(prefix, srcs):
+ return _generated_srcs_for_suffix(prefix, srcs, ".upb.h") + _generated_srcs_for_suffix(prefix, srcs, ".upb.c")
+
+def _stage0_proto_staleness_test(name, base_dir, src_files, src_rules, strip_prefix):
+ native.genrule(
+ name = name + "_generate_bootstrap",
+ srcs = src_rules,
+ outs = _generated_srcs("bootstrap_generated_sources/" + base_dir + "stage0", src_files),
+ tools = [_protoc, _upbc(0)],
+ cmd =
+ "$(location " + _protoc + ") " +
+ "-I$(GENDIR)/" + strip_prefix + " " + _extra_proto_path +
+ "--plugin=protoc-gen-upb=$(location " + _upbc(0) + ") " +
+ "--upb_out=bootstrap_upb:$(@D)/bootstrap_generated_sources/" + base_dir + "stage0 " +
+ " ".join(src_files),
+ )
+
+ staleness_test(
+ name = name + "_staleness_test",
+ outs = _generated_srcs(base_dir + "stage0", src_files),
+ generated_pattern = "bootstrap_generated_sources/%s",
+ target_files = native.glob([base_dir + "stage0/**"]),
+ # To avoid skew problems for descriptor.proto/pluging.proto between
+ # GitHub repos. It's not critical that the checked-in protos are up to
+ # date for every change, they just needs to be complete enough to have
+ # everything needed by the code generator itself.
+ tags = ["manual"],
+ )
+
+def bootstrap_upb_proto_library(
+ name,
+ base_dir,
+ google3_src_files,
+ google3_src_rules,
+ oss_src_files,
+ oss_src_rules,
+ oss_strip_prefix,
+ proto_lib_deps,
+ visibility,
+ deps = [],
+ **kwargs):
+ """A version of upb_proto_library() that is augmented to allow for bootstrapping the compiler.
+
+ Args:
+ name: Name of this rule. This name will resolve to a upb_proto_library().
+ base_dir: The directory that all generated files should be placed under.
+ google3_src_files: Google3 filenames of .proto files that should be built by this rule.
+ The names should be relative to the depot base.
+ google3_src_rules: Target names of the Blaze rules that will provide these filenames.
+ oss_src_files: OSS filenames of .proto files that should be built by this rule.
+ oss_src_rules: Target names of the Bazel rules that will provide these filenames.
+ oss_strip_prefix: Prefix that should be stripped from OSS file names.
+ proto_lib_deps: proto_library() rules that we will use to build the protos when we are
+ not bootstrapping.
+ visibility: Visibility list for the final upb_proto_library() rule. Bootstrapping rules
+ will always be hidden, and will not honor the visibility parameter passed here.
+ deps: other bootstrap_upb_proto_library() rules that this one depends on.
+ **kwargs: Other arguments that will be passed through to cc_library(), genrule(), and
+ upb_proto_library().
+ """
+ _stage0_proto_staleness_test(name, base_dir, oss_src_files, oss_src_rules, oss_strip_prefix)
+
+ # stage0 uses checked-in protos.
+ native.cc_library(
+ name = name + "_stage0",
+ srcs = _generated_srcs_for_suffix(base_dir + "stage0", oss_src_files, ".upb.c"),
+ hdrs = _generated_srcs_for_suffix(base_dir + "stage0", oss_src_files, ".upb.h"),
+ includes = [base_dir + "stage0"],
+ visibility = ["//upbc:__pkg__"],
+ # This macro signals to the runtime that it must use OSS APIs for descriptor.proto/plugin.proto.
+ defines = ["UPB_BOOTSTRAP_STAGE0"],
+ deps = [
+ "//:generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
+ "//:mini_table",
+ ] + [dep + "_stage0" for dep in deps],
+ **kwargs
+ )
+
+ src_files = google3_src_files if _is_google3 else oss_src_files
+ src_rules = google3_src_rules if _is_google3 else oss_src_rules
+
+ # Generate stage1 protos using stage0 compiler.
+ native.genrule(
+ name = "gen_" + name + "_stage1",
+ srcs = src_rules,
+ outs = _generated_srcs(base_dir + "stage1", src_files),
+ cmd = "$(location " + _protoc + ") " +
+ "--plugin=protoc-gen-upb=$(location " + _upbc(0) + ") " + _extra_proto_path +
+ "--upb_out=$(@D)/" + base_dir + "stage1 " +
+ " ".join(src_files),
+ visibility = ["//upbc:__pkg__"],
+ tools = [
+ _protoc,
+ _upbc(0),
+ ],
+ **kwargs
+ )
+
+ native.cc_library(
+ name = name + "_stage1",
+ srcs = _generated_srcs_for_suffix(base_dir + "stage1", src_files, ".upb.c"),
+ hdrs = _generated_srcs_for_suffix(base_dir + "stage1", src_files, ".upb.h"),
+ includes = [base_dir + "stage1"],
+ visibility = ["//upbc:__pkg__"],
+ deps = [
+ "//:generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
+ ] + [dep + "_stage1" for dep in deps],
+ **kwargs
+ )
+
+ # The final protos are generated via normal upb_proto_library().
+ upb_proto_library(
+ name = name,
+ deps = proto_lib_deps,
+ visibility = visibility,
+ **kwargs
+ )
diff --git a/upb/upbc/code_generator_request.c b/upb/upbc/code_generator_request.c
new file mode 100644
index 0000000..4ab5fcc
--- /dev/null
+++ b/upb/upbc/code_generator_request.c
@@ -0,0 +1,262 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upbc/code_generator_request.h"
+
+#include <inttypes.h>
+
+#include "google/protobuf/compiler/plugin.upb.h"
+#include "upb/mini_descriptor/decode.h"
+#include "upb/reflection/def.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+/******************************************************************************/
+
+// Kitchen sink storage for all of our state as we build the mini descriptors.
+
+typedef struct {
+ upb_Arena* arena;
+ upb_Status* status;
+ upb_DefPool* symtab;
+
+ upbc_CodeGeneratorRequest* out;
+
+ jmp_buf jmp;
+} upbc_State;
+
+static void upbc_State_Fini(upbc_State* s) {
+ if (s->symtab) upb_DefPool_Free(s->symtab);
+}
+
+UPB_NORETURN static void upbc_Error(upbc_State* s, const char* fn,
+ const char* msg) {
+ upb_Status_SetErrorFormat(s->status, "%s(): %s", fn, msg);
+ upbc_State_Fini(s);
+ UPB_LONGJMP(s->jmp, -1);
+}
+
+static void upbc_State_Init(upbc_State* s) {
+ s->symtab = upb_DefPool_New();
+ if (!s->symtab) upbc_Error(s, __func__, "could not allocate def pool");
+
+ s->out = upbc_CodeGeneratorRequest_new(s->arena);
+ if (!s->out) upbc_Error(s, __func__, "could not allocate request");
+}
+
+static upb_StringView upbc_State_StrDup(upbc_State* s, const char* str) {
+ upb_StringView from = upb_StringView_FromString(str);
+ char* to = upb_Arena_Malloc(s->arena, from.size);
+ if (!to) upbc_Error(s, __func__, "Out of memory");
+ memcpy(to, from.data, from.size);
+ return upb_StringView_FromDataAndSize(to, from.size);
+}
+
+static void upbc_State_AddMiniDescriptor(upbc_State* s, const char* name,
+ upb_StringView encoding) {
+ const upb_StringView key = upb_StringView_FromString(name);
+ upbc_CodeGeneratorRequest_UpbInfo* info =
+ upbc_CodeGeneratorRequest_UpbInfo_new(s->arena);
+ if (!info) upbc_Error(s, __func__, "Out of memory");
+ upbc_CodeGeneratorRequest_UpbInfo_set_mini_descriptor(info, encoding);
+ bool ok = upbc_CodeGeneratorRequest_upb_info_set(s->out, key, info, s->arena);
+ if (!ok) upbc_Error(s, __func__, "could not set mini descriptor in map");
+}
+
+/******************************************************************************/
+
+// Forward declaration.
+static void upbc_Scrape_Message(upbc_State*, const upb_MessageDef*);
+
+static void upbc_Scrape_Enum(upbc_State* s, const upb_EnumDef* e) {
+ upb_StringView desc;
+ bool ok = upb_EnumDef_MiniDescriptorEncode(e, s->arena, &desc);
+ if (!ok) upbc_Error(s, __func__, "could not encode enum");
+
+ upbc_State_AddMiniDescriptor(s, upb_EnumDef_FullName(e), desc);
+}
+
+static void upbc_Scrape_Extension(upbc_State* s, const upb_FieldDef* f) {
+ upb_StringView desc;
+ bool ok = upb_FieldDef_MiniDescriptorEncode(f, s->arena, &desc);
+ if (!ok) upbc_Error(s, __func__, "could not encode extension");
+
+ upbc_State_AddMiniDescriptor(s, upb_FieldDef_FullName(f), desc);
+}
+
+static void upbc_Scrape_FileEnums(upbc_State* s, const upb_FileDef* f) {
+ const size_t len = upb_FileDef_TopLevelEnumCount(f);
+
+ for (size_t i = 0; i < len; i++) {
+ upbc_Scrape_Enum(s, upb_FileDef_TopLevelEnum(f, i));
+ }
+}
+
+static void upbc_Scrape_FileExtensions(upbc_State* s, const upb_FileDef* f) {
+ const size_t len = upb_FileDef_TopLevelExtensionCount(f);
+
+ for (size_t i = 0; i < len; i++) {
+ upbc_Scrape_Extension(s, upb_FileDef_TopLevelExtension(f, i));
+ }
+}
+
+static void upbc_Scrape_FileMessages(upbc_State* s, const upb_FileDef* f) {
+ const size_t len = upb_FileDef_TopLevelMessageCount(f);
+
+ for (size_t i = 0; i < len; i++) {
+ upbc_Scrape_Message(s, upb_FileDef_TopLevelMessage(f, i));
+ }
+}
+
+static void upbc_Scrape_File(upbc_State* s, const upb_FileDef* f) {
+ upbc_Scrape_FileEnums(s, f);
+ upbc_Scrape_FileExtensions(s, f);
+ upbc_Scrape_FileMessages(s, f);
+}
+
+static void upbc_Scrape_Files(upbc_State* s) {
+ const google_protobuf_compiler_CodeGeneratorRequest* request =
+ upbc_CodeGeneratorRequest_request(s->out);
+
+ size_t len = 0;
+ const google_protobuf_FileDescriptorProto* const* files =
+ google_protobuf_compiler_CodeGeneratorRequest_proto_file(request, &len);
+
+ for (size_t i = 0; i < len; i++) {
+ const upb_FileDef* f = upb_DefPool_AddFile(s->symtab, files[i], s->status);
+ if (!f) upbc_Error(s, __func__, "could not add file to def pool");
+
+ upbc_Scrape_File(s, f);
+ }
+}
+
+static void upbc_Scrape_NestedEnums(upbc_State* s, const upb_MessageDef* m) {
+ const size_t len = upb_MessageDef_NestedEnumCount(m);
+
+ for (size_t i = 0; i < len; i++) {
+ upbc_Scrape_Enum(s, upb_MessageDef_NestedEnum(m, i));
+ }
+}
+
+static void upbc_Scrape_NestedExtensions(upbc_State* s,
+ const upb_MessageDef* m) {
+ const size_t len = upb_MessageDef_NestedExtensionCount(m);
+
+ for (size_t i = 0; i < len; i++) {
+ upbc_Scrape_Extension(s, upb_MessageDef_NestedExtension(m, i));
+ }
+}
+
+static void upbc_Scrape_NestedMessages(upbc_State* s, const upb_MessageDef* m) {
+ const size_t len = upb_MessageDef_NestedMessageCount(m);
+
+ for (size_t i = 0; i < len; i++) {
+ upbc_Scrape_Message(s, upb_MessageDef_NestedMessage(m, i));
+ }
+}
+
+static void upbc_Scrape_MessageSubs(upbc_State* s,
+ upbc_CodeGeneratorRequest_UpbInfo* info,
+ const upb_MessageDef* m) {
+ const upb_MiniTableField** fields =
+ malloc(upb_MessageDef_FieldCount(m) * sizeof(*fields));
+ const upb_MiniTable* mt = upb_MessageDef_MiniTable(m);
+ uint32_t counts = upb_MiniTable_GetSubList(mt, fields);
+ uint32_t msg_count = counts >> 16;
+ uint32_t enum_count = counts & 0xffff;
+
+ for (uint32_t i = 0; i < msg_count; i++) {
+ const upb_FieldDef* f =
+ upb_MessageDef_FindFieldByNumber(m, fields[i]->number);
+ if (!f) upbc_Error(s, __func__, "Missing f");
+ const upb_MessageDef* sub = upb_FieldDef_MessageSubDef(f);
+ if (!sub) upbc_Error(s, __func__, "Missing sub");
+ upb_StringView name = upbc_State_StrDup(s, upb_MessageDef_FullName(sub));
+ upbc_CodeGeneratorRequest_UpbInfo_add_sub_message(info, name, s->arena);
+ }
+
+ for (uint32_t i = 0; i < enum_count; i++) {
+ const upb_FieldDef* f =
+ upb_MessageDef_FindFieldByNumber(m, fields[msg_count + i]->number);
+ if (!f) upbc_Error(s, __func__, "Missing f (2)");
+ const upb_EnumDef* sub = upb_FieldDef_EnumSubDef(f);
+ if (!sub) upbc_Error(s, __func__, "Missing sub (2)");
+ upb_StringView name = upbc_State_StrDup(s, upb_EnumDef_FullName(sub));
+ upbc_CodeGeneratorRequest_UpbInfo_add_sub_enum(info, name, s->arena);
+ }
+
+ free(fields);
+}
+
+static void upbc_Scrape_Message(upbc_State* s, const upb_MessageDef* m) {
+ upb_StringView desc;
+ bool ok = upb_MessageDef_MiniDescriptorEncode(m, s->arena, &desc);
+ if (!ok) upbc_Error(s, __func__, "could not encode message");
+
+ upbc_CodeGeneratorRequest_UpbInfo* info =
+ upbc_CodeGeneratorRequest_UpbInfo_new(s->arena);
+ if (!info) upbc_Error(s, __func__, "Out of memory");
+ upbc_CodeGeneratorRequest_UpbInfo_set_mini_descriptor(info, desc);
+
+ upbc_Scrape_MessageSubs(s, info, m);
+
+ const upb_StringView key = upbc_State_StrDup(s, upb_MessageDef_FullName(m));
+ ok = upbc_CodeGeneratorRequest_upb_info_set(s->out, key, info, s->arena);
+ if (!ok) upbc_Error(s, __func__, "could not set mini descriptor in map");
+
+ upbc_Scrape_NestedEnums(s, m);
+ upbc_Scrape_NestedExtensions(s, m);
+ upbc_Scrape_NestedMessages(s, m);
+}
+
+static upbc_CodeGeneratorRequest* upbc_State_MakeCodeGeneratorRequest(
+ upbc_State* const s, google_protobuf_compiler_CodeGeneratorRequest* const request) {
+ if (UPB_SETJMP(s->jmp)) return NULL;
+ upbc_State_Init(s);
+
+ upbc_CodeGeneratorRequest_set_request(s->out, request);
+ upbc_Scrape_Files(s);
+ upbc_State_Fini(s);
+ return s->out;
+}
+
+upbc_CodeGeneratorRequest* upbc_MakeCodeGeneratorRequest(
+ google_protobuf_compiler_CodeGeneratorRequest* request, upb_Arena* arena,
+ upb_Status* status) {
+ upbc_State s = {
+ .arena = arena,
+ .status = status,
+ .symtab = NULL,
+ .out = NULL,
+ };
+
+ return upbc_State_MakeCodeGeneratorRequest(&s, request);
+}
diff --git a/upb/upbc/code_generator_request.h b/upb/upbc/code_generator_request.h
new file mode 100644
index 0000000..3218c08
--- /dev/null
+++ b/upb/upbc/code_generator_request.h
@@ -0,0 +1,55 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPBC_CODE_GENERATOR_REQUEST_H_
+#define UPBC_CODE_GENERATOR_REQUEST_H_
+
+#include "upb/mem/arena.h"
+#include "upb/reflection/def.h"
+#include "upbc/code_generator_request.upb.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+upbc_CodeGeneratorRequest* upbc_MakeCodeGeneratorRequest(
+ struct google_protobuf_compiler_CodeGeneratorRequest* request, upb_Arena* a,
+ upb_Status* s);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* UPBC_CODE_GENERATOR_REQUEST_H_ */
diff --git a/upb/upbc/code_generator_request.proto b/upb/upbc/code_generator_request.proto
new file mode 100644
index 0000000..600faee
--- /dev/null
+++ b/upb/upbc/code_generator_request.proto
@@ -0,0 +1,56 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto2";
+
+package upbc;
+
+import "google/protobuf/compiler/plugin.proto";
+
+message CodeGeneratorRequest {
+ message UpbInfo {
+ optional string mini_descriptor = 1;
+
+ // An ordered list of fully qualified sub-message names whose upb_MiniTable
+ // should be passed to upb_MiniTable_Link().
+ repeated string sub_message = 3;
+
+ // An ordered list of fully qualified sub-enum names whose upb_MiniTableEnum
+ // should be passed to upb_MiniTable_Link().
+ repeated string sub_enum = 4;
+ }
+
+ // The pb sent by protoc to its plugins.
+ optional google.protobuf.compiler.CodeGeneratorRequest request = 1;
+
+ // upb-specific info for the messages/enums/extensions in the request, keyed
+ // by the fully qualified names.
+ map<string, UpbInfo> upb_info = 2;
+}
diff --git a/upb/upbc/common.cc b/upb/upbc/common.cc
new file mode 100644
index 0000000..cfca0fc
--- /dev/null
+++ b/upb/upbc/common.cc
@@ -0,0 +1,86 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upbc/common.h"
+
+#include "absl/strings/str_replace.h"
+#include "upb/reflection/def.hpp"
+
+namespace upbc {
+
+std::string StripExtension(absl::string_view fname) {
+ size_t lastdot = fname.find_last_of('.');
+ if (lastdot == std::string::npos) {
+ return std::string(fname);
+ }
+ return std::string(fname.substr(0, lastdot));
+}
+
+std::string ToCIdent(absl::string_view str) {
+ return absl::StrReplaceAll(str, {{".", "_"}, {"/", "_"}, {"-", "_"}});
+}
+
+std::string ToPreproc(absl::string_view str) {
+ return absl::AsciiStrToUpper(ToCIdent(str));
+}
+
+void EmitFileWarning(absl::string_view name, Output& output) {
+ output(
+ "/* This file was generated by upbc (the upb compiler) from the input\n"
+ " * file:\n"
+ " *\n"
+ " * $0\n"
+ " *\n"
+ " * Do not edit -- your changes will be discarded when the file is\n"
+ " * regenerated. */\n\n",
+ name);
+}
+
+std::string MessageName(upb::MessageDefPtr descriptor) {
+ return ToCIdent(descriptor.full_name());
+}
+
+std::string FileLayoutName(upb::FileDefPtr file) {
+ return ToCIdent(file.name()) + "_upb_file_layout";
+}
+
+std::string HeaderFilename(upb::FileDefPtr file) {
+ return StripExtension(file.name()) + ".upb.h";
+}
+
+std::string MessageInit(absl::string_view full_name) {
+ return ToCIdent(full_name) + "_msg_init";
+}
+
+std::string EnumInit(upb::EnumDefPtr descriptor) {
+ return ToCIdent(descriptor.full_name()) + "_enum_init";
+}
+
+} // namespace upbc
diff --git a/upb/upbc/common.h b/upb/upbc/common.h
new file mode 100644
index 0000000..1c70088
--- /dev/null
+++ b/upb/upbc/common.h
@@ -0,0 +1,92 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPBC_COMMON_H
+#define UPBC_COMMON_H
+
+#include <vector>
+
+#include "absl/strings/str_replace.h"
+#include "absl/strings/substitute.h"
+#include "upb/reflection/def.hpp"
+
+namespace upbc {
+
+class Output {
+ public:
+ template <class... Arg>
+ void operator()(absl::string_view format, const Arg&... arg) {
+ Write(absl::Substitute(format, arg...));
+ }
+
+ absl::string_view output() const { return output_; }
+
+ private:
+ void Write(absl::string_view data) {
+ std::string stripped;
+ if (absl::StartsWith(data, "\n ")) {
+ size_t indent = data.substr(1).find_first_not_of(' ');
+ if (indent != absl::string_view::npos) {
+ // Remove indentation from all lines.
+ auto line_prefix = data.substr(0, indent + 1);
+ // The final line has an extra newline and is indented two less, eg.
+ // R"cc(
+ // UPB_INLINE $0 $1_$2(const $1 *msg) {
+ // return $1_has_$2(msg) ? *UPB_PTR_AT(msg, $3, $0) : $4;
+ // }
+ // )cc",
+ std::string last_line_prefix = std::string(line_prefix);
+ last_line_prefix.resize(last_line_prefix.size() - 2);
+ data.remove_prefix(line_prefix.size());
+ stripped = absl::StrReplaceAll(
+ data, {{line_prefix, "\n"}, {last_line_prefix, "\n"}});
+ data = stripped;
+ }
+ }
+ absl::StrAppend(&output_, data);
+ }
+
+ std::string output_;
+};
+
+std::string StripExtension(absl::string_view fname);
+std::string ToCIdent(absl::string_view str);
+std::string ToPreproc(absl::string_view str);
+void EmitFileWarning(absl::string_view name, Output& output);
+std::string MessageName(upb::MessageDefPtr descriptor);
+std::string FileLayoutName(upb::FileDefPtr file);
+std::string HeaderFilename(upb::FileDefPtr file);
+
+std::string MessageInit(absl::string_view full_name);
+std::string EnumInit(upb::EnumDefPtr descriptor);
+
+} // namespace upbc
+
+#endif // UPBC_COMMON_H
diff --git a/upb/upbc/file_layout.cc b/upb/upbc/file_layout.cc
new file mode 100644
index 0000000..fab6120
--- /dev/null
+++ b/upb/upbc/file_layout.cc
@@ -0,0 +1,144 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upbc/file_layout.h"
+
+#include <string>
+#include <unordered_set>
+
+#include "upb/mini_table/internal/extension.h"
+#include "upbc/common.h"
+
+namespace upbc {
+
+const char* kEnumsInit = "enums_layout";
+const char* kExtensionsInit = "extensions_layout";
+const char* kMessagesInit = "messages_layout";
+
+void AddEnums(upb::MessageDefPtr message, std::vector<upb::EnumDefPtr>* enums) {
+ enums->reserve(enums->size() + message.enum_type_count());
+ for (int i = 0; i < message.enum_type_count(); i++) {
+ enums->push_back(message.enum_type(i));
+ }
+ for (int i = 0; i < message.nested_message_count(); i++) {
+ AddEnums(message.nested_message(i), enums);
+ }
+}
+
+std::vector<upb::EnumDefPtr> SortedEnums(upb::FileDefPtr file) {
+ std::vector<upb::EnumDefPtr> enums;
+ enums.reserve(file.toplevel_enum_count());
+ for (int i = 0; i < file.toplevel_enum_count(); i++) {
+ enums.push_back(file.toplevel_enum(i));
+ }
+ for (int i = 0; i < file.toplevel_message_count(); i++) {
+ AddEnums(file.toplevel_message(i), &enums);
+ }
+ std::sort(enums.begin(), enums.end(),
+ [](upb::EnumDefPtr a, upb::EnumDefPtr b) {
+ return strcmp(a.full_name(), b.full_name()) < 0;
+ });
+ return enums;
+}
+
+std::vector<uint32_t> SortedUniqueEnumNumbers(upb::EnumDefPtr e) {
+ std::vector<uint32_t> values;
+ values.reserve(e.value_count());
+ for (int i = 0; i < e.value_count(); i++) {
+ values.push_back(static_cast<uint32_t>(e.value(i).number()));
+ }
+ std::sort(values.begin(), values.end());
+ auto last = std::unique(values.begin(), values.end());
+ values.erase(last, values.end());
+ return values;
+}
+
+void AddMessages(upb::MessageDefPtr message,
+ std::vector<upb::MessageDefPtr>* messages) {
+ messages->push_back(message);
+ for (int i = 0; i < message.nested_message_count(); i++) {
+ AddMessages(message.nested_message(i), messages);
+ }
+}
+
+// Ordering must match upb/def.c!
+//
+// The ordering is significant because each upb_MessageDef* will point at the
+// corresponding upb_MiniTable and we just iterate through the list without
+// any search or lookup.
+std::vector<upb::MessageDefPtr> SortedMessages(upb::FileDefPtr file) {
+ std::vector<upb::MessageDefPtr> messages;
+ for (int i = 0; i < file.toplevel_message_count(); i++) {
+ AddMessages(file.toplevel_message(i), &messages);
+ }
+ return messages;
+}
+
+void AddExtensionsFromMessage(upb::MessageDefPtr message,
+ std::vector<upb::FieldDefPtr>* exts) {
+ for (int i = 0; i < message.nested_extension_count(); i++) {
+ exts->push_back(message.nested_extension(i));
+ }
+ for (int i = 0; i < message.nested_message_count(); i++) {
+ AddExtensionsFromMessage(message.nested_message(i), exts);
+ }
+}
+
+// Ordering must match upb/def.c!
+//
+// The ordering is significant because each upb_FieldDef* will point at the
+// corresponding upb_MiniTableExtension and we just iterate through the list
+// without any search or lookup.
+std::vector<upb::FieldDefPtr> SortedExtensions(upb::FileDefPtr file) {
+ std::vector<upb::FieldDefPtr> ret;
+ ret.reserve(file.toplevel_extension_count());
+ for (int i = 0; i < file.toplevel_extension_count(); i++) {
+ ret.push_back(file.toplevel_extension(i));
+ }
+ for (int i = 0; i < file.toplevel_message_count(); i++) {
+ AddExtensionsFromMessage(file.toplevel_message(i), &ret);
+ }
+ return ret;
+}
+
+std::vector<upb::FieldDefPtr> FieldNumberOrder(upb::MessageDefPtr message) {
+ std::vector<upb::FieldDefPtr> fields;
+ fields.reserve(message.field_count());
+ for (int i = 0; i < message.field_count(); i++) {
+ fields.push_back(message.field(i));
+ }
+ std::sort(fields.begin(), fields.end(),
+ [](upb::FieldDefPtr a, upb::FieldDefPtr b) {
+ return a.number() < b.number();
+ });
+ return fields;
+}
+
+} // namespace upbc
diff --git a/upb/upbc/file_layout.h b/upb/upbc/file_layout.h
new file mode 100644
index 0000000..9df43aa
--- /dev/null
+++ b/upb/upbc/file_layout.h
@@ -0,0 +1,129 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPBC_FILE_LAYOUT_H
+#define UPBC_FILE_LAYOUT_H
+
+#include <string>
+
+// begin:google_only
+// #ifndef UPB_BOOTSTRAP_STAGE0
+// #include "net/proto2/proto/descriptor.upb.h"
+// #else
+// #include "google/protobuf/descriptor.upb.h"
+// #endif
+// end:google_only
+
+// begin:github_only
+#include "google/protobuf/descriptor.upb.h"
+// end:github_only
+
+#include "absl/container/flat_hash_map.h"
+#include "upb/base/status.hpp"
+#include "upb/mini_descriptor/decode.h"
+#include "upb/reflection/def.h"
+#include "upb/reflection/def.hpp"
+
+// Must be last
+#include "upb/port/def.inc"
+
+namespace upbc {
+
+std::vector<upb::EnumDefPtr> SortedEnums(upb::FileDefPtr file);
+
+// Ordering must match upb/def.c!
+//
+// The ordering is significant because each upb_MessageDef* will point at the
+// corresponding upb_MiniTable and we just iterate through the list without
+// any search or lookup.
+std::vector<upb::MessageDefPtr> SortedMessages(upb::FileDefPtr file);
+
+// Ordering must match upb/def.c!
+//
+// The ordering is significant because each upb_FieldDef* will point at the
+// corresponding upb_MiniTableExtension and we just iterate through the list
+// without any search or lookup.
+std::vector<upb::FieldDefPtr> SortedExtensions(upb::FileDefPtr file);
+
+std::vector<upb::FieldDefPtr> FieldNumberOrder(upb::MessageDefPtr message);
+
+// DefPoolPair is a pair of DefPools: one for 32-bit and one for 64-bit.
+class DefPoolPair {
+ public:
+ DefPoolPair() {
+ pool32_._SetPlatform(kUpb_MiniTablePlatform_32Bit);
+ pool64_._SetPlatform(kUpb_MiniTablePlatform_64Bit);
+ }
+
+ upb::FileDefPtr AddFile(const UPB_DESC(FileDescriptorProto) * file_proto,
+ upb::Status* status) {
+ upb::FileDefPtr file32 = pool32_.AddFile(file_proto, status);
+ upb::FileDefPtr file64 = pool64_.AddFile(file_proto, status);
+ if (!file32) return file32;
+ return file64;
+ }
+
+ const upb_MiniTable* GetMiniTable32(upb::MessageDefPtr m) const {
+ return pool32_.FindMessageByName(m.full_name()).mini_table();
+ }
+
+ const upb_MiniTable* GetMiniTable64(upb::MessageDefPtr m) const {
+ return pool64_.FindMessageByName(m.full_name()).mini_table();
+ }
+
+ const upb_MiniTableField* GetField32(upb::FieldDefPtr f) const {
+ return GetFieldFromPool(&pool32_, f);
+ }
+
+ const upb_MiniTableField* GetField64(upb::FieldDefPtr f) const {
+ return GetFieldFromPool(&pool64_, f);
+ }
+
+ private:
+ static const upb_MiniTableField* GetFieldFromPool(const upb::DefPool* pool,
+ upb::FieldDefPtr f) {
+ if (f.is_extension()) {
+ return pool->FindExtensionByName(f.full_name()).mini_table();
+ } else {
+ return pool->FindMessageByName(f.containing_type().full_name())
+ .FindFieldByNumber(f.number())
+ .mini_table();
+ }
+ }
+
+ upb::DefPool pool32_;
+ upb::DefPool pool64_;
+};
+
+} // namespace upbc
+
+#include "upb/port/undef.inc"
+
+#endif // UPBC_FILE_LAYOUT_H
diff --git a/upb/upbc/get_used_fields.c b/upb/upbc/get_used_fields.c
new file mode 100644
index 0000000..796e501
--- /dev/null
+++ b/upb/upbc/get_used_fields.c
@@ -0,0 +1,143 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upbc/get_used_fields.h"
+
+#include "google/protobuf/descriptor.upb.h"
+#include "google/protobuf/compiler/plugin.upb.h"
+#include "upb/reflection/def_pool.h"
+#include "upb/reflection/field_def.h"
+#include "upb/reflection/message.h"
+#include "upb/reflection/message_def.h"
+#include "upb/wire/decode.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#define upbdev_Err(...) \
+ { \
+ fprintf(stderr, __VA_ARGS__); \
+ exit(1); \
+ }
+
+typedef struct {
+ char* buf;
+ size_t size;
+ size_t capacity;
+ upb_Arena* arena;
+} upbdev_StringBuf;
+
+void upbdev_StringBuf_Add(upbdev_StringBuf* buf, const char* sym) {
+ size_t len = strlen(sym);
+ size_t need = buf->size + len + (buf->size != 0);
+ if (need > buf->capacity) {
+ size_t new_cap = UPB_MAX(buf->capacity, 32);
+ while (need > new_cap) new_cap *= 2;
+ buf->buf = upb_Arena_Realloc(buf->arena, buf->buf, buf->capacity, new_cap);
+ buf->capacity = new_cap;
+ }
+ if (buf->size != 0) {
+ buf->buf[buf->size++] = '\n'; // Separator
+ }
+ memcpy(buf->buf + buf->size, sym, len);
+ buf->size = need;
+}
+
+void upbdev_VisitMessage(upbdev_StringBuf* buf, const upb_Message* msg,
+ const upb_MessageDef* m) {
+ size_t iter = kUpb_Message_Begin;
+ const upb_FieldDef* f;
+ upb_MessageValue val;
+ while (upb_Message_Next(msg, m, NULL, &f, &val, &iter)) {
+ // This could be a duplicate, but we don't worry about it; we'll dedupe
+ // one level up.
+ upbdev_StringBuf_Add(buf, upb_FieldDef_FullName(f));
+
+ if (upb_FieldDef_CType(f) != kUpb_CType_Message) continue;
+ const upb_MessageDef* sub = upb_FieldDef_MessageSubDef(f);
+
+ if (upb_FieldDef_IsMap(f)) {
+ const upb_Map* map = val.map_val;
+ size_t iter = kUpb_Map_Begin;
+ upb_MessageValue map_key, map_val;
+ while (upb_Map_Next(map, &map_key, &map_val, &iter)) {
+ upbdev_VisitMessage(buf, map_val.msg_val, sub);
+ }
+ } else if (upb_FieldDef_IsRepeated(f)) {
+ const upb_Array* arr = val.array_val;
+ size_t n = upb_Array_Size(arr);
+ for (size_t i = 0; i < n; i++) {
+ upb_MessageValue val = upb_Array_Get(arr, i);
+ upbdev_VisitMessage(buf, val.msg_val, sub);
+ }
+ } else {
+ upbdev_VisitMessage(buf, val.msg_val, sub);
+ }
+ }
+}
+
+upb_StringView upbdev_GetUsedFields(const char* request, size_t request_size,
+ const char* payload, size_t payload_size,
+ const char* message_name,
+ upb_Arena* arena) {
+ upb_Arena* tmp_arena = upb_Arena_New();
+ google_protobuf_compiler_CodeGeneratorRequest* request_proto =
+ google_protobuf_compiler_CodeGeneratorRequest_parse(request, request_size,
+ tmp_arena);
+ if (!request_proto) upbdev_Err("Couldn't parse request proto\n");
+
+ size_t len;
+ const google_protobuf_FileDescriptorProto* const* files =
+ google_protobuf_compiler_CodeGeneratorRequest_proto_file(request_proto, &len);
+
+ upb_DefPool* pool = upb_DefPool_New();
+ for (size_t i = 0; i < len; i++) {
+ const upb_FileDef* f = upb_DefPool_AddFile(pool, files[i], NULL);
+ if (!f) upbdev_Err("could not add file to def pool\n");
+ }
+
+ const upb_MessageDef* m = upb_DefPool_FindMessageByName(pool, message_name);
+ if (!m) upbdev_Err("Couldn't find message name\n");
+
+ const upb_MiniTable* mt = upb_MessageDef_MiniTable(m);
+ upb_Message* msg = upb_Message_New(mt, tmp_arena);
+ upb_DecodeStatus st =
+ upb_Decode(payload, payload_size, msg, mt, NULL, 0, tmp_arena);
+ if (st != kUpb_DecodeStatus_Ok) upbdev_Err("Error parsing payload: %d\n", st);
+
+ upbdev_StringBuf buf = {
+ .buf = NULL,
+ .size = 0,
+ .capacity = 0,
+ .arena = arena,
+ };
+ upbdev_VisitMessage(&buf, msg, m);
+ return upb_StringView_FromDataAndSize(buf.buf, buf.size);
+}
diff --git a/upb/upbc/get_used_fields.h b/upb/upbc/get_used_fields.h
new file mode 100644
index 0000000..ccf3208
--- /dev/null
+++ b/upb/upbc/get_used_fields.h
@@ -0,0 +1,57 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPBC_GET_USED_FIELDS
+#define UPBC_GET_USED_FIELDS
+
+#include "upb/base/status.h"
+#include "upb/base/string_view.h"
+#include "upb/mem/arena.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Consume |buf|, deserialize it to a Code_Generator_Request proto, then
+// upbc_Code_Generator_Request, and return it as a JSON-encoded string.
+UPB_API upb_StringView upbdev_GetUsedFields(
+ const char* request, size_t request_size, const char* payload,
+ size_t payload_size, const char* message_name, upb_Arena* arena);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif // UPBC_GET_USED_FIELDS
diff --git a/upb/upbc/keywords.cc b/upb/upbc/keywords.cc
new file mode 100644
index 0000000..3f51705
--- /dev/null
+++ b/upb/upbc/keywords.cc
@@ -0,0 +1,152 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upbc/keywords.h"
+
+#include <string>
+#include <unordered_set>
+
+namespace upbc {
+
+static const char* const kKeywordList[] = {
+ //
+ "NULL",
+ "alignas",
+ "alignof",
+ "and",
+ "and_eq",
+ "asm",
+ "auto",
+ "bitand",
+ "bitor",
+ "bool",
+ "break",
+ "case",
+ "catch",
+ "char",
+ "class",
+ "compl",
+ "const",
+ "constexpr",
+ "const_cast",
+ "continue",
+ "decltype",
+ "default",
+ "delete",
+ "do",
+ "double",
+ "dynamic_cast",
+ "else",
+ "enum",
+ "explicit",
+ "export",
+ "extern",
+ "false",
+ "float",
+ "for",
+ "friend",
+ "goto",
+ "if",
+ "inline",
+ "int",
+ "long",
+ "mutable",
+ "namespace",
+ "new",
+ "noexcept",
+ "not",
+ "not_eq",
+ "nullptr",
+ "operator",
+ "or",
+ "or_eq",
+ "private",
+ "protected",
+ "public",
+ "register",
+ "reinterpret_cast",
+ "return",
+ "short",
+ "signed",
+ "sizeof",
+ "static",
+ "static_assert",
+ "static_cast",
+ "struct",
+ "switch",
+ "template",
+ "this",
+ "thread_local",
+ "throw",
+ "true",
+ "try",
+ "typedef",
+ "typeid",
+ "typename",
+ "union",
+ "unsigned",
+ "using",
+ "virtual",
+ "void",
+ "volatile",
+ "wchar_t",
+ "while",
+ "xor",
+ "xor_eq",
+ "char8_t",
+ "char16_t",
+ "char32_t",
+ "concept",
+ "consteval",
+ "constinit",
+ "co_await",
+ "co_return",
+ "co_yield",
+ "requires",
+};
+
+static std::unordered_set<std::string>* MakeKeywordsMap() {
+ auto* result = new std::unordered_set<std::string>();
+ for (const auto keyword : kKeywordList) {
+ result->emplace(keyword);
+ }
+ return result;
+}
+
+static std::unordered_set<std::string>& kKeywords = *MakeKeywordsMap();
+
+std::string ResolveKeywordConflict(const std::string& name) {
+ if (kKeywords.count(name) > 0) {
+ return name + "_";
+ }
+ return name;
+}
+
+} // namespace upbc
diff --git a/upb/upbc/keywords.h b/upb/upbc/keywords.h
new file mode 100644
index 0000000..6638f99
--- /dev/null
+++ b/upb/upbc/keywords.h
@@ -0,0 +1,43 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_PROTOS_GENERATOR_KEYWORDS_H
+#define UPB_PROTOS_GENERATOR_KEYWORDS_H
+
+#include <string>
+
+namespace upbc {
+
+// Resolves proto field name conflict with C++ reserved keywords.
+std::string ResolveKeywordConflict(const std::string& name);
+
+} // namespace upbc
+
+#endif // UPB_PROTOS_GENERATOR_KEYWORDS_H
diff --git a/upb/upbc/names.cc b/upb/upbc/names.cc
new file mode 100644
index 0000000..63521b8
--- /dev/null
+++ b/upb/upbc/names.cc
@@ -0,0 +1,132 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upbc/names.h"
+
+#include <string>
+
+#include "absl/strings/match.h"
+#include "absl/strings/string_view.h"
+#include "google/protobuf/descriptor.h"
+#include "upb/reflection/def.hpp"
+
+namespace upbc {
+
+namespace protobuf = ::google::protobuf;
+
+// Prefixes used by C code generator for field access.
+static constexpr absl::string_view kClearMethodPrefix = "clear_";
+static constexpr absl::string_view kSetMethodPrefix = "set_";
+static constexpr absl::string_view kHasMethodPrefix = "has_";
+static constexpr absl::string_view kDeleteMethodPrefix = "delete_";
+static constexpr absl::string_view kAddToRepeatedMethodPrefix = "add_";
+static constexpr absl::string_view kResizeArrayMethodPrefix = "resize_";
+
+ABSL_CONST_INIT const absl::string_view kRepeatedFieldArrayGetterPostfix =
+ "upb_array";
+ABSL_CONST_INIT const absl::string_view
+ kRepeatedFieldMutableArrayGetterPostfix = "mutable_upb_array";
+
+// List of generated accessor prefixes to check against.
+// Example:
+// optional repeated string phase = 236;
+// optional bool clear_phase = 237;
+static constexpr absl::string_view kAccessorPrefixes[] = {
+ kClearMethodPrefix, kDeleteMethodPrefix, kAddToRepeatedMethodPrefix,
+ kResizeArrayMethodPrefix, kSetMethodPrefix, kHasMethodPrefix};
+
+std::string ResolveFieldName(const protobuf::FieldDescriptor* field,
+ const NameToFieldDescriptorMap& field_names) {
+ absl::string_view field_name = field->name();
+ for (const auto prefix : kAccessorPrefixes) {
+ // If field name starts with a prefix such as clear_ and the proto
+ // contains a field name with trailing end, depending on type of field
+ // (repeated, map, message) we have a conflict to resolve.
+ if (absl::StartsWith(field_name, prefix)) {
+ auto match = field_names.find(field_name.substr(prefix.size()));
+ if (match != field_names.end()) {
+ const auto* candidate = match->second;
+ if (candidate->is_repeated() || candidate->is_map() ||
+ (candidate->cpp_type() ==
+ protobuf::FieldDescriptor::CPPTYPE_STRING &&
+ prefix == kClearMethodPrefix) ||
+ prefix == kSetMethodPrefix || prefix == kHasMethodPrefix) {
+ return absl::StrCat(field_name, "_");
+ }
+ }
+ }
+ }
+ return std::string(field_name);
+}
+
+// Returns field map by name to use for conflict checks.
+NameToFieldDescriptorMap CreateFieldNameMap(
+ const protobuf::Descriptor* message) {
+ NameToFieldDescriptorMap field_names;
+ for (int i = 0; i < message->field_count(); i++) {
+ const protobuf::FieldDescriptor* field = message->field(i);
+ field_names.emplace(field->name(), field);
+ }
+ return field_names;
+}
+
+NameToFieldDefMap CreateFieldNameMap(upb::MessageDefPtr message) {
+ NameToFieldDefMap field_names;
+ field_names.reserve(message.field_count());
+ for (const auto& field : message.fields()) {
+ field_names.emplace(field.name(), field);
+ }
+ return field_names;
+}
+
+std::string ResolveFieldName(upb::FieldDefPtr field,
+ const NameToFieldDefMap& field_names) {
+ absl::string_view field_name(field.name());
+ for (absl::string_view prefix : kAccessorPrefixes) {
+ // If field name starts with a prefix such as clear_ and the proto
+ // contains a field name with trailing end, depending on type of field
+ // (repeated, map, message) we have a conflict to resolve.
+ if (absl::StartsWith(field_name, prefix)) {
+ auto match = field_names.find(field_name.substr(prefix.size()));
+ if (match != field_names.end()) {
+ const auto candidate = match->second;
+ if (candidate.IsSequence() || candidate.IsMap() ||
+ (candidate.ctype() == kUpb_CType_String &&
+ prefix == kClearMethodPrefix) ||
+ prefix == kSetMethodPrefix || prefix == kHasMethodPrefix) {
+ return absl::StrCat(field_name, "_");
+ }
+ }
+ }
+ }
+ return std::string(field_name);
+}
+
+} // namespace upbc
diff --git a/upb/upbc/names.h b/upb/upbc/names.h
new file mode 100644
index 0000000..87b0a15
--- /dev/null
+++ b/upb/upbc/names.h
@@ -0,0 +1,73 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_PROTOS_GENERATOR_NAMES_H
+#define UPB_PROTOS_GENERATOR_NAMES_H
+
+#include <string>
+
+#include "absl/base/attributes.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/strings/string_view.h"
+#include "google/protobuf/descriptor.h"
+#include "upb/reflection/def.hpp"
+
+namespace upbc {
+
+using NameToFieldDescriptorMap =
+ absl::flat_hash_map<absl::string_view, const google::protobuf::FieldDescriptor*>;
+
+// Returns field name by resolving naming conflicts across
+// proto field names (such as clear_ prefixes).
+std::string ResolveFieldName(const google::protobuf::FieldDescriptor* field,
+ const NameToFieldDescriptorMap& field_names);
+
+// Returns field map by name to use for conflict checks.
+NameToFieldDescriptorMap CreateFieldNameMap(const google::protobuf::Descriptor* message);
+
+using NameToFieldDefMap =
+ absl::flat_hash_map<absl::string_view, upb::FieldDefPtr>;
+
+// Returns field name by resolving naming conflicts across
+// proto field names (such as clear_ prefixes).
+std::string ResolveFieldName(upb::FieldDefPtr field,
+ const NameToFieldDefMap& field_names);
+
+// Returns field map by name to use for conflict checks.
+NameToFieldDefMap CreateFieldNameMap(upb::MessageDefPtr message);
+
+// Private array getter name postfix for repeated fields.
+ABSL_CONST_INIT extern const absl::string_view kRepeatedFieldArrayGetterPostfix;
+ABSL_CONST_INIT extern const absl::string_view
+ kRepeatedFieldMutableArrayGetterPostfix;
+
+} // namespace upbc
+
+#endif // UPB_PROTOS_GENERATOR_NAMES_H
diff --git a/upb/upbc/plugin.h b/upb/upbc/plugin.h
new file mode 100644
index 0000000..465bda0
--- /dev/null
+++ b/upb/upbc/plugin.h
@@ -0,0 +1,212 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPB_UPBC_PLUGIN_H_
+#define UPB_UPBC_PLUGIN_H_
+
+#include <stdio.h>
+
+#include <string>
+#include <vector>
+#ifdef _WIN32
+#include <fcntl.h>
+#include <io.h>
+#endif
+
+// begin:google_only
+// #ifndef UPB_BOOTSTRAP_STAGE0
+// #include "net/proto2/proto/descriptor.upb.h"
+// #include "third_party/protobuf/compiler/plugin.upb.h"
+// #else
+// #include "google/protobuf/compiler/plugin.upb.h"
+// #include "google/protobuf/descriptor.upb.h"
+// #endif
+// end:google_only
+
+// begin:github_only
+#include "google/protobuf/compiler/plugin.upb.h"
+#include "google/protobuf/descriptor.upb.h"
+// end:github_only
+
+#include "absl/container/flat_hash_set.h"
+#include "absl/log/absl_log.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "upb/reflection/def.hpp"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+namespace upbc {
+
+inline std::vector<std::pair<std::string, std::string>> ParseGeneratorParameter(
+ const absl::string_view text) {
+ std::vector<std::pair<std::string, std::string>> ret;
+ for (absl::string_view sp : absl::StrSplit(text, ',', absl::SkipEmpty())) {
+ std::string::size_type equals_pos = sp.find_first_of('=');
+ std::pair<std::string, std::string> value;
+ if (equals_pos == std::string::npos) {
+ value.first = std::string(sp);
+ } else {
+ value.first = std::string(sp.substr(0, equals_pos));
+ value.second = std::string(sp.substr(equals_pos + 1));
+ }
+ ret.push_back(std::move(value));
+ }
+ return ret;
+}
+
+class Plugin {
+ public:
+ Plugin() { ReadRequest(); }
+ ~Plugin() { WriteResponse(); }
+
+ absl::string_view parameter() const {
+ return ToStringView(
+ UPB_DESC(compiler_CodeGeneratorRequest_parameter)(request_));
+ }
+
+ template <class T>
+ void GenerateFilesRaw(T&& func) {
+ absl::flat_hash_set<absl::string_view> files_to_generate;
+ size_t size;
+ const upb_StringView* file_to_generate = UPB_DESC(
+ compiler_CodeGeneratorRequest_file_to_generate)(request_, &size);
+ for (size_t i = 0; i < size; i++) {
+ files_to_generate.insert(
+ {file_to_generate[i].data, file_to_generate[i].size});
+ }
+
+ const UPB_DESC(FileDescriptorProto)* const* files =
+ UPB_DESC(compiler_CodeGeneratorRequest_proto_file)(request_, &size);
+ for (size_t i = 0; i < size; i++) {
+ upb::Status status;
+ absl::string_view name =
+ ToStringView(UPB_DESC(FileDescriptorProto_name)(files[i]));
+ func(files[i], files_to_generate.contains(name));
+ }
+ }
+
+ template <class T>
+ void GenerateFiles(T&& func) {
+ GenerateFilesRaw(
+ [this, &func](const UPB_DESC(FileDescriptorProto) * file_proto,
+ bool generate) {
+ upb::Status status;
+ upb::FileDefPtr file = pool_.AddFile(file_proto, &status);
+ if (!file) {
+ absl::string_view name =
+ ToStringView(UPB_DESC(FileDescriptorProto_name)(file_proto));
+ ABSL_LOG(FATAL) << "Couldn't add file " << name
+ << " to DefPool: " << status.error_message();
+ }
+ if (generate) func(file);
+ });
+ }
+
+ void SetError(absl::string_view error) {
+ char* data =
+ static_cast<char*>(upb_Arena_Malloc(arena_.ptr(), error.size()));
+ memcpy(data, error.data(), error.size());
+ UPB_DESC(compiler_CodeGeneratorResponse_set_error)
+ (response_, upb_StringView_FromDataAndSize(data, error.size()));
+ }
+
+ void AddOutputFile(absl::string_view filename, absl::string_view content) {
+ UPB_DESC(compiler_CodeGeneratorResponse_File)* file = UPB_DESC(
+ compiler_CodeGeneratorResponse_add_file)(response_, arena_.ptr());
+ UPB_DESC(compiler_CodeGeneratorResponse_File_set_name)
+ (file, StringDup(filename));
+ UPB_DESC(compiler_CodeGeneratorResponse_File_set_content)
+ (file, StringDup(content));
+ }
+
+ private:
+ upb::Arena arena_;
+ upb::DefPool pool_;
+ UPB_DESC(compiler_CodeGeneratorRequest) * request_;
+ UPB_DESC(compiler_CodeGeneratorResponse) * response_;
+
+ static absl::string_view ToStringView(upb_StringView sv) {
+ return absl::string_view(sv.data, sv.size);
+ }
+
+ upb_StringView StringDup(absl::string_view s) {
+ char* data =
+ reinterpret_cast<char*>(upb_Arena_Malloc(arena_.ptr(), s.size()));
+ memcpy(data, s.data(), s.size());
+ return upb_StringView_FromDataAndSize(data, s.size());
+ }
+
+ std::string ReadAllStdinBinary() {
+ std::string data;
+#ifdef _WIN32
+ _setmode(_fileno(stdin), _O_BINARY);
+ _setmode(_fileno(stdout), _O_BINARY);
+#endif
+ char buf[4096];
+ while (size_t len = fread(buf, 1, sizeof(buf), stdin)) {
+ data.append(buf, len);
+ }
+ return data;
+ }
+
+ void ReadRequest() {
+ std::string data = ReadAllStdinBinary();
+ request_ = UPB_DESC(compiler_CodeGeneratorRequest_parse)(
+ data.data(), data.size(), arena_.ptr());
+ if (!request_) {
+ ABSL_LOG(FATAL) << "Failed to parse CodeGeneratorRequest";
+ }
+ response_ = UPB_DESC(compiler_CodeGeneratorResponse_new)(arena_.ptr());
+ UPB_DESC(compiler_CodeGeneratorResponse_set_supported_features)
+ (response_,
+ UPB_DESC(compiler_CodeGeneratorResponse_FEATURE_PROTO3_OPTIONAL));
+ }
+
+ void WriteResponse() {
+ size_t size;
+ char* serialized = UPB_DESC(compiler_CodeGeneratorResponse_serialize)(
+ response_, arena_.ptr(), &size);
+ if (!serialized) {
+ ABSL_LOG(FATAL) << "Failed to serialize CodeGeneratorResponse";
+ }
+
+ if (fwrite(serialized, 1, size, stdout) != size) {
+ ABSL_LOG(FATAL) << "Failed to write response to stdout";
+ }
+ }
+};
+
+} // namespace upbc
+
+#include "upb/port/undef.inc"
+
+#endif // UPB_UPBC_PLUGIN_H_
diff --git a/upb/upbc/protoc-gen-upb.cc b/upb/upbc/protoc-gen-upb.cc
new file mode 100644
index 0000000..d817c80
--- /dev/null
+++ b/upb/upbc/protoc-gen-upb.cc
@@ -0,0 +1,1724 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <limits>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/log/absl_check.h"
+#include "absl/log/absl_log.h"
+#include "absl/strings/escaping.h"
+#include "absl/strings/string_view.h"
+#include "absl/strings/substitute.h"
+#include "upb/base/descriptor_constants.h"
+#include "upb/base/string_view.h"
+#include "upb/reflection/def.hpp"
+#include "upb/wire/types.h"
+#include "upbc/common.h"
+#include "upbc/file_layout.h"
+#include "upbc/names.h"
+#include "upbc/plugin.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+namespace upbc {
+namespace {
+
+struct Options {
+ bool bootstrap = false;
+};
+
+// Returns fields in order of "hotness", eg. how frequently they appear in
+// serialized payloads. Ideally this will use a profile. When we don't have
+// that, we assume that fields with smaller numbers are used more frequently.
+inline std::vector<upb::FieldDefPtr> FieldHotnessOrder(
+ upb::MessageDefPtr message) {
+ std::vector<upb::FieldDefPtr> fields;
+ size_t field_count = message.field_count();
+ fields.reserve(field_count);
+ for (size_t i = 0; i < field_count; i++) {
+ fields.push_back(message.field(i));
+ }
+ std::sort(fields.begin(), fields.end(),
+ [](upb::FieldDefPtr a, upb::FieldDefPtr b) {
+ return std::make_pair(!a.is_required(), a.number()) <
+ std::make_pair(!b.is_required(), b.number());
+ });
+ return fields;
+}
+
+std::string SourceFilename(upb::FileDefPtr file) {
+ return StripExtension(file.name()) + ".upb.c";
+}
+
+std::string MessageInitName(upb::MessageDefPtr descriptor) {
+ return absl::StrCat(MessageName(descriptor), "_msg_init");
+}
+
+std::string MessageMiniTableRef(upb::MessageDefPtr descriptor,
+ const Options& options) {
+ if (options.bootstrap) {
+ return absl::StrCat(MessageInitName(descriptor), "()");
+ } else {
+ return absl::StrCat("&", MessageInitName(descriptor));
+ }
+}
+
+std::string EnumInitName(upb::EnumDefPtr descriptor) {
+ return ToCIdent(descriptor.full_name()) + "_enum_init";
+}
+
+std::string EnumMiniTableRef(upb::EnumDefPtr descriptor,
+ const Options& options) {
+ if (options.bootstrap) {
+ return absl::StrCat(EnumInitName(descriptor), "()");
+ } else {
+ return absl::StrCat("&", EnumInitName(descriptor));
+ }
+}
+
+std::string ExtensionIdentBase(upb::FieldDefPtr ext) {
+ assert(ext.is_extension());
+ std::string ext_scope;
+ if (ext.extension_scope()) {
+ return MessageName(ext.extension_scope());
+ } else {
+ return ToCIdent(ext.file().package());
+ }
+}
+
+std::string ExtensionLayout(upb::FieldDefPtr ext) {
+ return absl::StrCat(ExtensionIdentBase(ext), "_", ext.name(), "_ext");
+}
+
+const char* kEnumsInit = "enums_layout";
+const char* kExtensionsInit = "extensions_layout";
+const char* kMessagesInit = "messages_layout";
+
+std::string EnumValueSymbol(upb::EnumValDefPtr value) {
+ return ToCIdent(value.full_name());
+}
+
+std::string CTypeInternal(upb::FieldDefPtr field, bool is_const) {
+ std::string maybe_const = is_const ? "const " : "";
+ switch (field.ctype()) {
+ case kUpb_CType_Message: {
+ std::string maybe_struct =
+ field.file() != field.message_type().file() ? "struct " : "";
+ return maybe_const + maybe_struct + MessageName(field.message_type()) +
+ "*";
+ }
+ case kUpb_CType_Bool:
+ return "bool";
+ case kUpb_CType_Float:
+ return "float";
+ case kUpb_CType_Int32:
+ case kUpb_CType_Enum:
+ return "int32_t";
+ case kUpb_CType_UInt32:
+ return "uint32_t";
+ case kUpb_CType_Double:
+ return "double";
+ case kUpb_CType_Int64:
+ return "int64_t";
+ case kUpb_CType_UInt64:
+ return "uint64_t";
+ case kUpb_CType_String:
+ case kUpb_CType_Bytes:
+ return "upb_StringView";
+ default:
+ abort();
+ }
+}
+
+std::string FloatToCLiteral(float value) {
+ if (value == std::numeric_limits<float>::infinity()) {
+ return "kUpb_FltInfinity";
+ } else if (value == -std::numeric_limits<float>::infinity()) {
+ return "-kUpb_FltInfinity";
+ } else if (std::isnan(value)) {
+ return "kUpb_NaN";
+ } else {
+ return absl::StrCat(value);
+ }
+}
+
+std::string DoubleToCLiteral(double value) {
+ if (value == std::numeric_limits<double>::infinity()) {
+ return "kUpb_Infinity";
+ } else if (value == -std::numeric_limits<double>::infinity()) {
+ return "-kUpb_Infinity";
+ } else if (std::isnan(value)) {
+ return "kUpb_NaN";
+ } else {
+ return absl::StrCat(value);
+ }
+}
+
+std::string FieldDefault(upb::FieldDefPtr field) {
+ switch (field.ctype()) {
+ case kUpb_CType_Message:
+ return "NULL";
+ case kUpb_CType_Bytes:
+ case kUpb_CType_String: {
+ upb_StringView str = field.default_value().str_val;
+ return absl::Substitute(
+ "upb_StringView_FromString(\"$0\")",
+ absl::CEscape(absl::string_view(str.data, str.size)));
+ }
+ case kUpb_CType_Int32:
+ return absl::Substitute("(int32_t)$0", field.default_value().int32_val);
+ case kUpb_CType_Int64:
+ if (field.default_value().int64_val == INT64_MIN) {
+ // Special-case to avoid:
+ // integer literal is too large to be represented in a signed integer
+ // type, interpreting as unsigned
+ // [-Werror,-Wimplicitly-unsigned-literal]
+ // int64_t default_val = (int64_t)-9223372036854775808ll;
+ //
+ // More info here: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
+ return "INT64_MIN";
+ } else {
+ return absl::Substitute("(int64_t)$0ll",
+ field.default_value().int64_val);
+ }
+ case kUpb_CType_UInt32:
+ return absl::Substitute("(uint32_t)$0u",
+ field.default_value().uint32_val);
+ case kUpb_CType_UInt64:
+ return absl::Substitute("(uint64_t)$0ull",
+ field.default_value().uint64_val);
+ case kUpb_CType_Float:
+ return FloatToCLiteral(field.default_value().float_val);
+ case kUpb_CType_Double:
+ return DoubleToCLiteral(field.default_value().double_val);
+ case kUpb_CType_Bool:
+ return field.default_value().bool_val ? "true" : "false";
+ case kUpb_CType_Enum:
+ // Use a number instead of a symbolic name so that we don't require
+ // this enum's header to be included.
+ return absl::StrCat(field.default_value().int32_val);
+ }
+ ABSL_ASSERT(false);
+ return "XXX";
+}
+
+std::string CType(upb::FieldDefPtr field) {
+ return CTypeInternal(field, false);
+}
+
+std::string CTypeConst(upb::FieldDefPtr field) {
+ return CTypeInternal(field, true);
+}
+
+std::string MapKeyCType(upb::FieldDefPtr map_field) {
+ return CType(map_field.message_type().map_key());
+}
+
+std::string MapValueCType(upb::FieldDefPtr map_field) {
+ return CType(map_field.message_type().map_value());
+}
+
+std::string MapKeySize(upb::FieldDefPtr map_field, absl::string_view expr) {
+ return map_field.message_type().map_key().ctype() == kUpb_CType_String
+ ? "0"
+ : absl::StrCat("sizeof(", expr, ")");
+}
+
+std::string MapValueSize(upb::FieldDefPtr map_field, absl::string_view expr) {
+ return map_field.message_type().map_value().ctype() == kUpb_CType_String
+ ? "0"
+ : absl::StrCat("sizeof(", expr, ")");
+}
+
+std::string FieldInitializer(const DefPoolPair& pools, upb::FieldDefPtr field,
+ const Options& options);
+
+void DumpEnumValues(upb::EnumDefPtr desc, Output& output) {
+ std::vector<upb::EnumValDefPtr> values;
+ values.reserve(desc.value_count());
+ for (int i = 0; i < desc.value_count(); i++) {
+ values.push_back(desc.value(i));
+ }
+ std::sort(values.begin(), values.end(),
+ [](upb::EnumValDefPtr a, upb::EnumValDefPtr b) {
+ return a.number() < b.number();
+ });
+
+ for (size_t i = 0; i < values.size(); i++) {
+ auto value = values[i];
+ output(" $0 = $1", EnumValueSymbol(value), value.number());
+ if (i != values.size() - 1) {
+ output(",");
+ }
+ output("\n");
+ }
+}
+
+std::string GetFieldRep(const DefPoolPair& pools, upb::FieldDefPtr field);
+
+void GenerateExtensionInHeader(const DefPoolPair& pools, upb::FieldDefPtr ext,
+ Output& output) {
+ output(
+ R"cc(
+ UPB_INLINE bool $0_has_$1(const struct $2* msg) {
+ return _upb_Message_HasExtensionField(msg, &$3);
+ }
+ )cc",
+ ExtensionIdentBase(ext), ext.name(), MessageName(ext.containing_type()),
+ ExtensionLayout(ext));
+
+ output(
+ R"cc(
+ UPB_INLINE void $0_clear_$1(struct $2* msg) {
+ _upb_Message_ClearExtensionField(msg, &$3);
+ }
+ )cc",
+ ExtensionIdentBase(ext), ext.name(), MessageName(ext.containing_type()),
+ ExtensionLayout(ext));
+
+ if (ext.IsSequence()) {
+ // TODO(b/259861668): We need generated accessors for repeated extensions.
+ } else {
+ output(
+ R"cc(
+ UPB_INLINE $0 $1_$2(const struct $3* msg) {
+ const upb_MiniTableExtension* ext = &$4;
+ UPB_ASSUME(!upb_IsRepeatedOrMap(&ext->field));
+ UPB_ASSUME(_upb_MiniTableField_GetRep(&ext->field) == $5);
+ $0 default_val = $6;
+ $0 ret;
+ _upb_Message_GetExtensionField(msg, ext, &default_val, &ret);
+ return ret;
+ }
+ )cc",
+ CTypeConst(ext), ExtensionIdentBase(ext), ext.name(),
+ MessageName(ext.containing_type()), ExtensionLayout(ext),
+ GetFieldRep(pools, ext), FieldDefault(ext));
+ output(
+ R"cc(
+ UPB_INLINE void $1_set_$2(struct $3* msg, $0 val, upb_Arena* arena) {
+ const upb_MiniTableExtension* ext = &$4;
+ UPB_ASSUME(!upb_IsRepeatedOrMap(&ext->field));
+ UPB_ASSUME(_upb_MiniTableField_GetRep(&ext->field) == $5);
+ bool ok = _upb_Message_SetExtensionField(msg, ext, &val, arena);
+ UPB_ASSERT(ok);
+ }
+ )cc",
+ CTypeConst(ext), ExtensionIdentBase(ext), ext.name(),
+ MessageName(ext.containing_type()), ExtensionLayout(ext),
+ GetFieldRep(pools, ext));
+ }
+}
+
+void GenerateMessageFunctionsInHeader(upb::MessageDefPtr message,
+ const Options& options, Output& output) {
+ // TODO(b/235839510): The generated code here does not check the return values
+ // from upb_Encode(). How can we even fix this without breaking other things?
+ output(
+ R"cc(
+ UPB_INLINE $0* $0_new(upb_Arena* arena) {
+ return ($0*)_upb_Message_New($1, arena);
+ }
+ UPB_INLINE $0* $0_parse(const char* buf, size_t size, upb_Arena* arena) {
+ $0* ret = $0_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, $1, NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+ }
+ UPB_INLINE $0* $0_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ $0* ret = $0_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, $1, extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+ }
+ UPB_INLINE char* $0_serialize(const $0* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, $1, 0, arena, &ptr, len);
+ return ptr;
+ }
+ UPB_INLINE char* $0_serialize_ex(const $0* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, $1, options, arena, &ptr, len);
+ return ptr;
+ }
+ )cc",
+ MessageName(message), MessageMiniTableRef(message, options));
+}
+
+void GenerateOneofInHeader(upb::OneofDefPtr oneof, const DefPoolPair& pools,
+ absl::string_view msg_name, const Options& options,
+ Output& output) {
+ std::string fullname = ToCIdent(oneof.full_name());
+ output("typedef enum {\n");
+ for (int j = 0; j < oneof.field_count(); j++) {
+ upb::FieldDefPtr field = oneof.field(j);
+ output(" $0_$1 = $2,\n", fullname, field.name(), field.number());
+ }
+ output(
+ " $0_NOT_SET = 0\n"
+ "} $0_oneofcases;\n",
+ fullname);
+ output(
+ R"cc(
+ UPB_INLINE $0_oneofcases $1_$2_case(const $1* msg) {
+ const upb_MiniTableField field = $3;
+ return ($0_oneofcases)upb_Message_WhichOneofFieldNumber(msg, &field);
+ }
+ )cc",
+ fullname, msg_name, oneof.name(),
+ FieldInitializer(pools, oneof.field(0), options));
+}
+
+void GenerateHazzer(upb::FieldDefPtr field, const DefPoolPair& pools,
+ absl::string_view msg_name,
+ const NameToFieldDefMap& field_names,
+ const Options& options, Output& output) {
+ std::string resolved_name = ResolveFieldName(field, field_names);
+ if (field.has_presence()) {
+ output(
+ R"cc(
+ UPB_INLINE bool $0_has_$1(const $0* msg) {
+ const upb_MiniTableField field = $2;
+ return _upb_Message_HasNonExtensionField(msg, &field);
+ }
+ )cc",
+ msg_name, resolved_name, FieldInitializer(pools, field, options));
+ } else if (field.IsMap()) {
+ // Do nothing.
+ } else if (field.IsSequence()) {
+ // TODO(b/259616267): remove.
+ output(
+ R"cc(
+ UPB_INLINE bool $0_has_$1(const $0* msg) {
+ size_t size;
+ $0_$1(msg, &size);
+ return size != 0;
+ }
+ )cc",
+ msg_name, resolved_name);
+ }
+}
+
+void GenerateClear(upb::FieldDefPtr field, const DefPoolPair& pools,
+ absl::string_view msg_name,
+ const NameToFieldDefMap& field_names, const Options& options,
+ Output& output) {
+ if (field == field.containing_type().map_key() ||
+ field == field.containing_type().map_value()) {
+ // Cannot be cleared.
+ return;
+ }
+ std::string resolved_name = ResolveFieldName(field, field_names);
+ output(
+ R"cc(
+ UPB_INLINE void $0_clear_$1($0* msg) {
+ const upb_MiniTableField field = $2;
+ _upb_Message_ClearNonExtensionField(msg, &field);
+ }
+ )cc",
+ msg_name, resolved_name, FieldInitializer(pools, field, options));
+}
+
+void GenerateMapGetters(upb::FieldDefPtr field, const DefPoolPair& pools,
+ absl::string_view msg_name,
+ const NameToFieldDefMap& field_names,
+ const Options& options, Output& output) {
+ std::string resolved_name = ResolveFieldName(field, field_names);
+ output(
+ R"cc(
+ UPB_INLINE size_t $0_$1_size(const $0* msg) {
+ const upb_MiniTableField field = $2;
+ const upb_Map* map = upb_Message_GetMap(msg, &field);
+ return map ? _upb_Map_Size(map) : 0;
+ }
+ )cc",
+ msg_name, resolved_name, FieldInitializer(pools, field, options));
+ output(
+ R"cc(
+ UPB_INLINE bool $0_$1_get(const $0* msg, $2 key, $3* val) {
+ const upb_MiniTableField field = $4;
+ const upb_Map* map = upb_Message_GetMap(msg, &field);
+ if (!map) return false;
+ return _upb_Map_Get(map, &key, $5, val, $6);
+ }
+ )cc",
+ msg_name, resolved_name, MapKeyCType(field), MapValueCType(field),
+ FieldInitializer(pools, field, options), MapKeySize(field, "key"),
+ MapValueSize(field, "*val"));
+ output(
+ R"cc(
+ UPB_INLINE $0 $1_$2_next(const $1* msg, size_t* iter) {
+ const upb_MiniTableField field = $3;
+ const upb_Map* map = upb_Message_GetMap(msg, &field);
+ if (!map) return NULL;
+ return ($0)_upb_map_next(map, iter);
+ }
+ )cc",
+ CTypeConst(field), msg_name, resolved_name,
+ FieldInitializer(pools, field, options));
+}
+
+void GenerateMapEntryGetters(upb::FieldDefPtr field, absl::string_view msg_name,
+ Output& output) {
+ output(
+ R"cc(
+ UPB_INLINE $0 $1_$2(const $1* msg) {
+ $3 ret;
+ _upb_msg_map_$2(msg, &ret, $4);
+ return ret;
+ }
+ )cc",
+ CTypeConst(field), msg_name, field.name(), CType(field),
+ field.ctype() == kUpb_CType_String ? "0" : "sizeof(ret)");
+}
+
+void GenerateRepeatedGetters(upb::FieldDefPtr field, const DefPoolPair& pools,
+ absl::string_view msg_name,
+ const NameToFieldDefMap& field_names,
+ const Options& options, Output& output) {
+ // Generate getter returning first item and size.
+ //
+ // Example:
+ // UPB_INLINE const struct Bar* const* name(const Foo* msg, size_t* size)
+ output(
+ R"cc(
+ UPB_INLINE $0 const* $1_$2(const $1* msg, size_t* size) {
+ const upb_MiniTableField field = $3;
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return ($0 const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+ }
+ )cc",
+ CTypeConst(field), // $0
+ msg_name, // $1
+ ResolveFieldName(field, field_names), // $2
+ FieldInitializer(pools, field, options) // #3
+ );
+ // Generate private getter returning array or NULL for immutable and upb_Array
+ // for mutable.
+ //
+ // Example:
+ // UPB_INLINE const upb_Array* _name_upbarray(size_t* size)
+ // UPB_INLINE upb_Array* _name_mutable_upbarray(size_t* size)
+ output(
+ R"cc(
+ UPB_INLINE const upb_Array* _$1_$2_$4(const $1* msg, size_t* size) {
+ const upb_MiniTableField field = $3;
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (size) {
+ *size = arr ? arr->size : 0;
+ }
+ return arr;
+ }
+ UPB_INLINE upb_Array* _$1_$2_$5(const $1* msg, size_t* size, upb_Arena* arena) {
+ const upb_MiniTableField field = $3;
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(
+ (upb_Message*)msg, &field, arena);
+ if (size) {
+ *size = arr ? arr->size : 0;
+ }
+ return arr;
+ }
+ )cc",
+ CTypeConst(field), // $0
+ msg_name, // $1
+ ResolveFieldName(field, field_names), // $2
+ FieldInitializer(pools, field, options), // $3
+ kRepeatedFieldArrayGetterPostfix, // $4
+ kRepeatedFieldMutableArrayGetterPostfix // $5
+ );
+}
+
+void GenerateScalarGetters(upb::FieldDefPtr field, const DefPoolPair& pools,
+ absl::string_view msg_name,
+ const NameToFieldDefMap& field_names,
+ const Options& Options, Output& output) {
+ std::string field_name = ResolveFieldName(field, field_names);
+ output(
+ R"cc(
+ UPB_INLINE $0 $1_$2(const $1* msg) {
+ $0 default_val = $3;
+ $0 ret;
+ const upb_MiniTableField field = $4;
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+ }
+ )cc",
+ CTypeConst(field), msg_name, field_name, FieldDefault(field),
+ FieldInitializer(pools, field, Options));
+}
+
+void GenerateGetters(upb::FieldDefPtr field, const DefPoolPair& pools,
+ absl::string_view msg_name,
+ const NameToFieldDefMap& field_names,
+ const Options& options, Output& output) {
+ if (field.IsMap()) {
+ GenerateMapGetters(field, pools, msg_name, field_names, options, output);
+ } else if (UPB_DESC(MessageOptions_map_entry)(
+ field.containing_type().options())) {
+ GenerateMapEntryGetters(field, msg_name, output);
+ } else if (field.IsSequence()) {
+ GenerateRepeatedGetters(field, pools, msg_name, field_names, options,
+ output);
+ } else {
+ GenerateScalarGetters(field, pools, msg_name, field_names, options, output);
+ }
+}
+
+void GenerateMapSetters(upb::FieldDefPtr field, const DefPoolPair& pools,
+ absl::string_view msg_name,
+ const NameToFieldDefMap& field_names,
+ const Options& options, Output& output) {
+ std::string resolved_name = ResolveFieldName(field, field_names);
+ output(
+ R"cc(
+ UPB_INLINE void $0_$1_clear($0* msg) {
+ const upb_MiniTableField field = $2;
+ upb_Map* map = (upb_Map*)upb_Message_GetMap(msg, &field);
+ if (!map) return;
+ _upb_Map_Clear(map);
+ }
+ )cc",
+ msg_name, resolved_name, FieldInitializer(pools, field, options));
+ output(
+ R"cc(
+ UPB_INLINE bool $0_$1_set($0* msg, $2 key, $3 val, upb_Arena* a) {
+ const upb_MiniTableField field = $4;
+ upb_Map* map = _upb_Message_GetOrCreateMutableMap(msg, &field, $5, $6, a);
+ return _upb_Map_Insert(map, &key, $5, &val, $6, a) !=
+ kUpb_MapInsertStatus_OutOfMemory;
+ }
+ )cc",
+ msg_name, resolved_name, MapKeyCType(field), MapValueCType(field),
+ FieldInitializer(pools, field, options), MapKeySize(field, "key"),
+ MapValueSize(field, "val"));
+ output(
+ R"cc(
+ UPB_INLINE bool $0_$1_delete($0* msg, $2 key) {
+ const upb_MiniTableField field = $3;
+ upb_Map* map = (upb_Map*)upb_Message_GetMap(msg, &field);
+ if (!map) return false;
+ return _upb_Map_Delete(map, &key, $4, NULL);
+ }
+ )cc",
+ msg_name, resolved_name, MapKeyCType(field),
+ FieldInitializer(pools, field, options), MapKeySize(field, "key"));
+ output(
+ R"cc(
+ UPB_INLINE $0 $1_$2_nextmutable($1* msg, size_t* iter) {
+ const upb_MiniTableField field = $3;
+ upb_Map* map = (upb_Map*)upb_Message_GetMap(msg, &field);
+ if (!map) return NULL;
+ return ($0)_upb_map_next(map, iter);
+ }
+ )cc",
+ CType(field), msg_name, resolved_name,
+ FieldInitializer(pools, field, options));
+}
+
+void GenerateRepeatedSetters(upb::FieldDefPtr field, const DefPoolPair& pools,
+ absl::string_view msg_name,
+ const NameToFieldDefMap& field_names,
+ const Options& options, Output& output) {
+ std::string resolved_name = ResolveFieldName(field, field_names);
+ output(
+ R"cc(
+ UPB_INLINE $0* $1_mutable_$2($1* msg, size_t* size) {
+ upb_MiniTableField field = $3;
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return ($0*)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+ }
+ )cc",
+ CType(field), msg_name, resolved_name,
+ FieldInitializer(pools, field, options));
+ output(
+ R"cc(
+ UPB_INLINE $0* $1_resize_$2($1* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = $3;
+ return ($0*)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+ }
+ )cc",
+ CType(field), msg_name, resolved_name,
+ FieldInitializer(pools, field, options));
+ if (field.ctype() == kUpb_CType_Message) {
+ output(
+ R"cc(
+ UPB_INLINE struct $0* $1_add_$2($1* msg, upb_Arena* arena) {
+ upb_MiniTableField field = $4;
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct $0* sub = (struct $0*)_upb_Message_New($3, arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+ }
+ )cc",
+ MessageName(field.message_type()), msg_name, resolved_name,
+ MessageMiniTableRef(field.message_type(), options),
+ FieldInitializer(pools, field, options));
+ } else {
+ output(
+ R"cc(
+ UPB_INLINE bool $1_add_$2($1* msg, $0 val, upb_Arena* arena) {
+ upb_MiniTableField field = $3;
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return false;
+ }
+ _upb_Array_Set(arr, arr->size - 1, &val, sizeof(val));
+ return true;
+ }
+ )cc",
+ CType(field), msg_name, resolved_name,
+ FieldInitializer(pools, field, options));
+ }
+}
+
+void GenerateNonRepeatedSetters(upb::FieldDefPtr field,
+ const DefPoolPair& pools,
+ absl::string_view msg_name,
+ const NameToFieldDefMap& field_names,
+ const Options& options, Output& output) {
+ if (field == field.containing_type().map_key()) {
+ // Key cannot be mutated.
+ return;
+ }
+
+ std::string field_name = ResolveFieldName(field, field_names);
+
+ if (field == field.containing_type().map_value()) {
+ output(R"cc(
+ UPB_INLINE void $0_set_$1($0 *msg, $2 value) {
+ _upb_msg_map_set_value(msg, &value, $3);
+ }
+ )cc",
+ msg_name, field_name, CType(field),
+ field.ctype() == kUpb_CType_String ? "0"
+ : "sizeof(" + CType(field) + ")");
+ } else {
+ output(R"cc(
+ UPB_INLINE void $0_set_$1($0 *msg, $2 value) {
+ const upb_MiniTableField field = $3;
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+ }
+ )cc",
+ msg_name, field_name, CType(field),
+ FieldInitializer(pools, field, options));
+ }
+
+ // Message fields also have a Msg_mutable_foo() accessor that will create
+ // the sub-message if it doesn't already exist.
+ if (field.ctype() == kUpb_CType_Message &&
+ !UPB_DESC(MessageOptions_map_entry)(field.containing_type().options())) {
+ output(
+ R"cc(
+ UPB_INLINE struct $0* $1_mutable_$2($1* msg, upb_Arena* arena) {
+ struct $0* sub = (struct $0*)$1_$2(msg);
+ if (sub == NULL) {
+ sub = (struct $0*)_upb_Message_New($3, arena);
+ if (sub) $1_set_$2(msg, sub);
+ }
+ return sub;
+ }
+ )cc",
+ MessageName(field.message_type()), msg_name, field_name,
+ MessageMiniTableRef(field.message_type(), options));
+ }
+}
+
+void GenerateSetters(upb::FieldDefPtr field, const DefPoolPair& pools,
+ absl::string_view msg_name,
+ const NameToFieldDefMap& field_names,
+ const Options& options, Output& output) {
+ if (field.IsMap()) {
+ GenerateMapSetters(field, pools, msg_name, field_names, options, output);
+ } else if (field.IsSequence()) {
+ GenerateRepeatedSetters(field, pools, msg_name, field_names, options,
+ output);
+ } else {
+ GenerateNonRepeatedSetters(field, pools, msg_name, field_names, options,
+ output);
+ }
+}
+
+void GenerateMessageInHeader(upb::MessageDefPtr message,
+ const DefPoolPair& pools, const Options& options,
+ Output& output) {
+ output("/* $0 */\n\n", message.full_name());
+ std::string msg_name = ToCIdent(message.full_name());
+ if (!UPB_DESC(MessageOptions_map_entry)(message.options())) {
+ GenerateMessageFunctionsInHeader(message, options, output);
+ }
+
+ for (int i = 0; i < message.real_oneof_count(); i++) {
+ GenerateOneofInHeader(message.oneof(i), pools, msg_name, options, output);
+ }
+
+ auto field_names = CreateFieldNameMap(message);
+ for (auto field : FieldNumberOrder(message)) {
+ GenerateClear(field, pools, msg_name, field_names, options, output);
+ GenerateGetters(field, pools, msg_name, field_names, options, output);
+ GenerateHazzer(field, pools, msg_name, field_names, options, output);
+ }
+
+ output("\n");
+
+ for (auto field : FieldNumberOrder(message)) {
+ GenerateSetters(field, pools, msg_name, field_names, options, output);
+ }
+
+ output("\n");
+}
+
+void ForwardDeclareMiniTableInit(upb::MessageDefPtr message,
+ const Options& options, Output& output) {
+ if (options.bootstrap) {
+ output("extern const upb_MiniTable* $0();\n", MessageInitName(message));
+ } else {
+ output("extern const upb_MiniTable $0;\n", MessageInitName(message));
+ }
+}
+
+void WriteHeader(const DefPoolPair& pools, upb::FileDefPtr file,
+ const Options& options, Output& output) {
+ EmitFileWarning(file.name(), output);
+ output(
+ "#ifndef $0_UPB_H_\n"
+ "#define $0_UPB_H_\n\n"
+ "#include \"upb/generated_code_support.h\"\n",
+ ToPreproc(file.name()));
+
+ for (int i = 0; i < file.public_dependency_count(); i++) {
+ if (i == 0) {
+ output("/* Public Imports. */\n");
+ }
+ output("#include \"$0\"\n", HeaderFilename(file.public_dependency(i)));
+ if (i == file.public_dependency_count() - 1) {
+ output("\n");
+ }
+ }
+
+ output(
+ "// Must be last. \n"
+ "#include \"upb/port/def.inc\"\n"
+ "\n"
+ "#ifdef __cplusplus\n"
+ "extern \"C\" {\n"
+ "#endif\n"
+ "\n");
+
+ const std::vector<upb::MessageDefPtr> this_file_messages =
+ SortedMessages(file);
+ const std::vector<upb::FieldDefPtr> this_file_exts = SortedExtensions(file);
+
+ // Forward-declare types defined in this file.
+ for (auto message : this_file_messages) {
+ output("typedef struct $0 $0;\n", ToCIdent(message.full_name()));
+ }
+ for (auto message : this_file_messages) {
+ ForwardDeclareMiniTableInit(message, options, output);
+ }
+ for (auto ext : this_file_exts) {
+ output("extern const upb_MiniTableExtension $0;\n", ExtensionLayout(ext));
+ }
+
+ // Forward-declare types not in this file, but used as submessages.
+ // Order by full name for consistent ordering.
+ std::map<std::string, upb::MessageDefPtr> forward_messages;
+
+ for (auto message : this_file_messages) {
+ for (int i = 0; i < message.field_count(); i++) {
+ upb::FieldDefPtr field = message.field(i);
+ if (field.ctype() == kUpb_CType_Message &&
+ field.file() != field.message_type().file()) {
+ forward_messages[field.message_type().full_name()] =
+ field.message_type();
+ }
+ }
+ }
+ for (auto ext : this_file_exts) {
+ if (ext.file() != ext.containing_type().file()) {
+ forward_messages[ext.containing_type().full_name()] =
+ ext.containing_type();
+ }
+ }
+ for (const auto& pair : forward_messages) {
+ output("struct $0;\n", MessageName(pair.second));
+ }
+ for (const auto& pair : forward_messages) {
+ ForwardDeclareMiniTableInit(pair.second, options, output);
+ }
+
+ if (!this_file_messages.empty()) {
+ output("\n");
+ }
+
+ std::vector<upb::EnumDefPtr> this_file_enums = SortedEnums(file);
+
+ for (auto enumdesc : this_file_enums) {
+ output("typedef enum {\n");
+ DumpEnumValues(enumdesc, output);
+ output("} $0;\n\n", ToCIdent(enumdesc.full_name()));
+ }
+
+ output("\n");
+
+ if (file.syntax() == kUpb_Syntax_Proto2) {
+ for (const auto enumdesc : this_file_enums) {
+ if (options.bootstrap) {
+ output("extern const upb_MiniTableEnum* $0();\n", EnumInit(enumdesc));
+ } else {
+ output("extern const upb_MiniTableEnum $0;\n", EnumInit(enumdesc));
+ }
+ }
+ }
+
+ output("\n");
+ for (auto message : this_file_messages) {
+ GenerateMessageInHeader(message, pools, options, output);
+ }
+
+ for (auto ext : this_file_exts) {
+ GenerateExtensionInHeader(pools, ext, output);
+ }
+
+ output("extern const upb_MiniTableFile $0;\n\n", FileLayoutName(file));
+
+ if (absl::string_view(file.name()) == "google/protobuf/descriptor.proto" ||
+ absl::string_view(file.name()) == "net/proto2/proto/descriptor.proto") {
+ // This is gratuitously inefficient with how many times it rebuilds
+ // MessageLayout objects for the same message. But we only do this for one
+ // proto (descriptor.proto) so we don't worry about it.
+ upb::MessageDefPtr max32_message;
+ upb::MessageDefPtr max64_message;
+ size_t max32 = 0;
+ size_t max64 = 0;
+ for (const auto message : this_file_messages) {
+ if (absl::EndsWith(message.name(), "Options")) {
+ size_t size32 = pools.GetMiniTable32(message)->size;
+ size_t size64 = pools.GetMiniTable64(message)->size;
+ if (size32 > max32) {
+ max32 = size32;
+ max32_message = message;
+ }
+ if (size64 > max64) {
+ max64 = size64;
+ max64_message = message;
+ }
+ }
+ }
+
+ output("/* Max size 32 is $0 */\n", max32_message.full_name());
+ output("/* Max size 64 is $0 */\n", max64_message.full_name());
+ output("#define _UPB_MAXOPT_SIZE UPB_SIZE($0, $1)\n\n", max32, max64);
+ }
+
+ output(
+ "#ifdef __cplusplus\n"
+ "} /* extern \"C\" */\n"
+ "#endif\n"
+ "\n"
+ "#include \"upb/port/undef.inc\"\n"
+ "\n"
+ "#endif /* $0_UPB_H_ */\n",
+ ToPreproc(file.name()));
+}
+
+typedef std::pair<std::string, uint64_t> TableEntry;
+
+uint32_t GetWireTypeForField(upb::FieldDefPtr field) {
+ if (field.packed()) return kUpb_WireType_Delimited;
+ switch (field.type()) {
+ case kUpb_FieldType_Double:
+ case kUpb_FieldType_Fixed64:
+ case kUpb_FieldType_SFixed64:
+ return kUpb_WireType_64Bit;
+ case kUpb_FieldType_Float:
+ case kUpb_FieldType_Fixed32:
+ case kUpb_FieldType_SFixed32:
+ return kUpb_WireType_32Bit;
+ case kUpb_FieldType_Int64:
+ case kUpb_FieldType_UInt64:
+ case kUpb_FieldType_Int32:
+ case kUpb_FieldType_Bool:
+ case kUpb_FieldType_UInt32:
+ case kUpb_FieldType_Enum:
+ case kUpb_FieldType_SInt32:
+ case kUpb_FieldType_SInt64:
+ return kUpb_WireType_Varint;
+ case kUpb_FieldType_Group:
+ return kUpb_WireType_StartGroup;
+ case kUpb_FieldType_Message:
+ case kUpb_FieldType_String:
+ case kUpb_FieldType_Bytes:
+ return kUpb_WireType_Delimited;
+ }
+ UPB_UNREACHABLE();
+}
+
+uint32_t MakeTag(uint32_t field_number, uint32_t wire_type) {
+ return field_number << 3 | wire_type;
+}
+
+size_t WriteVarint32ToArray(uint64_t val, char* buf) {
+ size_t i = 0;
+ do {
+ uint8_t byte = val & 0x7fU;
+ val >>= 7;
+ if (val) byte |= 0x80U;
+ buf[i++] = byte;
+ } while (val);
+ return i;
+}
+
+uint64_t GetEncodedTag(upb::FieldDefPtr field) {
+ uint32_t wire_type = GetWireTypeForField(field);
+ uint32_t unencoded_tag = MakeTag(field.number(), wire_type);
+ char tag_bytes[10] = {0};
+ WriteVarint32ToArray(unencoded_tag, tag_bytes);
+ uint64_t encoded_tag = 0;
+ memcpy(&encoded_tag, tag_bytes, sizeof(encoded_tag));
+ // TODO: byte-swap for big endian.
+ return encoded_tag;
+}
+
+int GetTableSlot(upb::FieldDefPtr field) {
+ uint64_t tag = GetEncodedTag(field);
+ if (tag > 0x7fff) {
+ // Tag must fit within a two-byte varint.
+ return -1;
+ }
+ return (tag & 0xf8) >> 3;
+}
+
+bool TryFillTableEntry(const DefPoolPair& pools, upb::FieldDefPtr field,
+ TableEntry& ent) {
+ const upb_MiniTable* mt = pools.GetMiniTable64(field.containing_type());
+ const upb_MiniTableField* mt_f =
+ upb_MiniTable_FindFieldByNumber(mt, field.number());
+ std::string type = "";
+ std::string cardinality = "";
+ switch (upb_MiniTableField_Type(mt_f)) {
+ case kUpb_FieldType_Bool:
+ type = "b1";
+ break;
+ case kUpb_FieldType_Enum:
+ if (upb_MiniTableField_IsClosedEnum(mt_f)) {
+ // We don't have the means to test proto2 enum fields for valid values.
+ return false;
+ }
+ [[fallthrough]];
+ case kUpb_FieldType_Int32:
+ case kUpb_FieldType_UInt32:
+ type = "v4";
+ break;
+ case kUpb_FieldType_Int64:
+ case kUpb_FieldType_UInt64:
+ type = "v8";
+ break;
+ case kUpb_FieldType_Fixed32:
+ case kUpb_FieldType_SFixed32:
+ case kUpb_FieldType_Float:
+ type = "f4";
+ break;
+ case kUpb_FieldType_Fixed64:
+ case kUpb_FieldType_SFixed64:
+ case kUpb_FieldType_Double:
+ type = "f8";
+ break;
+ case kUpb_FieldType_SInt32:
+ type = "z4";
+ break;
+ case kUpb_FieldType_SInt64:
+ type = "z8";
+ break;
+ case kUpb_FieldType_String:
+ type = "s";
+ break;
+ case kUpb_FieldType_Bytes:
+ type = "b";
+ break;
+ case kUpb_FieldType_Message:
+ type = "m";
+ break;
+ default:
+ return false; // Not supported yet.
+ }
+
+ switch (upb_FieldMode_Get(mt_f)) {
+ case kUpb_FieldMode_Map:
+ return false; // Not supported yet (ever?).
+ case kUpb_FieldMode_Array:
+ if (mt_f->mode & kUpb_LabelFlags_IsPacked) {
+ cardinality = "p";
+ } else {
+ cardinality = "r";
+ }
+ break;
+ case kUpb_FieldMode_Scalar:
+ if (mt_f->presence < 0) {
+ cardinality = "o";
+ } else {
+ cardinality = "s";
+ }
+ break;
+ }
+
+ uint64_t expected_tag = GetEncodedTag(field);
+
+ // Data is:
+ //
+ // 48 32 16 0
+ // |--------|--------|--------|--------|--------|--------|--------|--------|
+ // | offset (16) |case offset (16) |presence| submsg | exp. tag (16) |
+ // |--------|--------|--------|--------|--------|--------|--------|--------|
+ //
+ // - |presence| is either hasbit index or field number for oneofs.
+
+ uint64_t data = static_cast<uint64_t>(mt_f->offset) << 48 | expected_tag;
+
+ if (field.IsSequence()) {
+ // No hasbit/oneof-related fields.
+ }
+ if (field.real_containing_oneof()) {
+ uint64_t case_offset = ~mt_f->presence;
+ if (case_offset > 0xffff || field.number() > 0xff) return false;
+ data |= field.number() << 24;
+ data |= case_offset << 32;
+ } else {
+ uint64_t hasbit_index = 63; // No hasbit (set a high, unused bit).
+ if (mt_f->presence) {
+ hasbit_index = mt_f->presence;
+ if (hasbit_index > 31) return false;
+ }
+ data |= hasbit_index << 24;
+ }
+
+ if (field.ctype() == kUpb_CType_Message) {
+ uint64_t idx = mt_f->UPB_PRIVATE(submsg_index);
+ if (idx > 255) return false;
+ data |= idx << 16;
+
+ std::string size_ceil = "max";
+ size_t size = SIZE_MAX;
+ if (field.message_type().file() == field.file()) {
+ // We can only be guaranteed the size of the sub-message if it is in the
+ // same file as us. We could relax this to increase the speed of
+ // cross-file sub-message parsing if we are comfortable requiring that
+ // users compile all messages at the same time.
+ const upb_MiniTable* sub_mt = pools.GetMiniTable64(field.message_type());
+ size = sub_mt->size + 8;
+ }
+ std::vector<size_t> breaks = {64, 128, 192, 256};
+ for (auto brk : breaks) {
+ if (size <= brk) {
+ size_ceil = std::to_string(brk);
+ break;
+ }
+ }
+ ent.first = absl::Substitute("upb_p$0$1_$2bt_max$3b", cardinality, type,
+ expected_tag > 0xff ? "2" : "1", size_ceil);
+
+ } else {
+ ent.first = absl::Substitute("upb_p$0$1_$2bt", cardinality, type,
+ expected_tag > 0xff ? "2" : "1");
+ }
+ ent.second = data;
+ return true;
+}
+
+std::vector<TableEntry> FastDecodeTable(upb::MessageDefPtr message,
+ const DefPoolPair& pools) {
+ std::vector<TableEntry> table;
+ for (const auto field : FieldHotnessOrder(message)) {
+ TableEntry ent;
+ int slot = GetTableSlot(field);
+ // std::cerr << "table slot: " << field->number() << ": " << slot << "\n";
+ if (slot < 0) {
+ // Tag can't fit in the table.
+ continue;
+ }
+ if (!TryFillTableEntry(pools, field, ent)) {
+ // Unsupported field type or offset, hasbit index, etc. doesn't fit.
+ continue;
+ }
+ while ((size_t)slot >= table.size()) {
+ size_t size = std::max(static_cast<size_t>(1), table.size() * 2);
+ table.resize(size, TableEntry{"_upb_FastDecoder_DecodeGeneric", 0});
+ }
+ if (table[slot].first != "_upb_FastDecoder_DecodeGeneric") {
+ // A hotter field already filled this slot.
+ continue;
+ }
+ table[slot] = ent;
+ }
+ return table;
+}
+
+std::string ArchDependentSize(int64_t size32, int64_t size64) {
+ if (size32 == size64) return absl::StrCat(size32);
+ return absl::Substitute("UPB_SIZE($0, $1)", size32, size64);
+}
+
+std::string GetFieldRep(const upb_MiniTableField* field32,
+ const upb_MiniTableField* field64) {
+ switch (_upb_MiniTableField_GetRep(field32)) {
+ case kUpb_FieldRep_1Byte:
+ return "kUpb_FieldRep_1Byte";
+ break;
+ case kUpb_FieldRep_4Byte: {
+ if (_upb_MiniTableField_GetRep(field64) == kUpb_FieldRep_4Byte) {
+ return "kUpb_FieldRep_4Byte";
+ } else {
+ assert(_upb_MiniTableField_GetRep(field64) == kUpb_FieldRep_8Byte);
+ return "UPB_SIZE(kUpb_FieldRep_4Byte, kUpb_FieldRep_8Byte)";
+ }
+ break;
+ }
+ case kUpb_FieldRep_StringView:
+ return "kUpb_FieldRep_StringView";
+ break;
+ case kUpb_FieldRep_8Byte:
+ return "kUpb_FieldRep_8Byte";
+ break;
+ }
+ UPB_UNREACHABLE();
+}
+
+std::string GetFieldRep(const DefPoolPair& pools, upb::FieldDefPtr field) {
+ return GetFieldRep(pools.GetField32(field), pools.GetField64(field));
+}
+
+// Returns the field mode as a string initializer.
+//
+// We could just emit this as a number (and we may yet go in that direction) but
+// for now emitting symbolic constants gives this better readability and
+// debuggability.
+std::string GetModeInit(const upb_MiniTableField* field32,
+ const upb_MiniTableField* field64) {
+ std::string ret;
+ uint8_t mode32 = field32->mode;
+ switch (mode32 & kUpb_FieldMode_Mask) {
+ case kUpb_FieldMode_Map:
+ ret = "(int)kUpb_FieldMode_Map";
+ break;
+ case kUpb_FieldMode_Array:
+ ret = "(int)kUpb_FieldMode_Array";
+ break;
+ case kUpb_FieldMode_Scalar:
+ ret = "(int)kUpb_FieldMode_Scalar";
+ break;
+ default:
+ break;
+ }
+
+ if (mode32 & kUpb_LabelFlags_IsPacked) {
+ absl::StrAppend(&ret, " | (int)kUpb_LabelFlags_IsPacked");
+ }
+
+ if (mode32 & kUpb_LabelFlags_IsExtension) {
+ absl::StrAppend(&ret, " | (int)kUpb_LabelFlags_IsExtension");
+ }
+
+ if (mode32 & kUpb_LabelFlags_IsAlternate) {
+ absl::StrAppend(&ret, " | (int)kUpb_LabelFlags_IsAlternate");
+ }
+
+ absl::StrAppend(&ret, " | ((int)", GetFieldRep(field32, field64),
+ " << kUpb_FieldRep_Shift)");
+ return ret;
+}
+
+std::string FieldInitializer(upb::FieldDefPtr field,
+ const upb_MiniTableField* field64,
+ const upb_MiniTableField* field32,
+ const Options& options) {
+ if (options.bootstrap) {
+ ABSL_CHECK(!field.is_extension());
+ return absl::Substitute(
+ "*upb_MiniTable_FindFieldByNumber($0, $1)",
+ MessageMiniTableRef(field.containing_type(), options), field.number());
+ } else {
+ return absl::Substitute(
+ "{$0, $1, $2, $3, $4, $5}", field64->number,
+ ArchDependentSize(field32->offset, field64->offset),
+ ArchDependentSize(field32->presence, field64->presence),
+ field64->UPB_PRIVATE(submsg_index) == kUpb_NoSub
+ ? "kUpb_NoSub"
+ : absl::StrCat(field64->UPB_PRIVATE(submsg_index)).c_str(),
+ field64->UPB_PRIVATE(descriptortype), GetModeInit(field32, field64));
+ }
+}
+
+std::string FieldInitializer(const DefPoolPair& pools, upb::FieldDefPtr field,
+ const Options& options) {
+ return FieldInitializer(field, pools.GetField64(field),
+ pools.GetField32(field), options);
+}
+
+// Writes a single field into a .upb.c source file.
+void WriteMessageField(upb::FieldDefPtr field,
+ const upb_MiniTableField* field64,
+ const upb_MiniTableField* field32,
+ const Options& options, Output& output) {
+ output(" $0,\n", FieldInitializer(field, field64, field32, options));
+}
+
+std::string GetSub(upb::FieldDefPtr field) {
+ if (auto message_def = field.message_type()) {
+ return absl::Substitute("{.submsg = &$0}", MessageInitName(message_def));
+ }
+
+ if (auto enum_def = field.enum_subdef()) {
+ if (enum_def.is_closed()) {
+ return absl::Substitute("{.subenum = &$0}", EnumInit(enum_def));
+ }
+ }
+
+ return std::string("{.submsg = NULL}");
+}
+
+// Writes a single message into a .upb.c source file.
+void WriteMessage(upb::MessageDefPtr message, const DefPoolPair& pools,
+ const Options& options, Output& output) {
+ std::string msg_name = ToCIdent(message.full_name());
+ std::string fields_array_ref = "NULL";
+ std::string submsgs_array_ref = "NULL";
+ std::string subenums_array_ref = "NULL";
+ const upb_MiniTable* mt_32 = pools.GetMiniTable32(message);
+ const upb_MiniTable* mt_64 = pools.GetMiniTable64(message);
+ std::map<int, std::string> subs;
+
+ for (int i = 0; i < mt_64->field_count; i++) {
+ const upb_MiniTableField* f = &mt_64->fields[i];
+ uint32_t index = f->UPB_PRIVATE(submsg_index);
+ if (index != kUpb_NoSub) {
+ auto pair =
+ subs.emplace(index, GetSub(message.FindFieldByNumber(f->number)));
+ ABSL_CHECK(pair.second);
+ }
+ }
+
+ if (!subs.empty()) {
+ std::string submsgs_array_name = msg_name + "_submsgs";
+ submsgs_array_ref = "&" + submsgs_array_name + "[0]";
+ output("static const upb_MiniTableSub $0[$1] = {\n", submsgs_array_name,
+ subs.size());
+
+ int i = 0;
+ for (const auto& pair : subs) {
+ ABSL_CHECK(pair.first == i++);
+ output(" $0,\n", pair.second);
+ }
+
+ output("};\n\n");
+ }
+
+ if (mt_64->field_count > 0) {
+ std::string fields_array_name = msg_name + "__fields";
+ fields_array_ref = "&" + fields_array_name + "[0]";
+ output("static const upb_MiniTableField $0[$1] = {\n", fields_array_name,
+ mt_64->field_count);
+ for (int i = 0; i < mt_64->field_count; i++) {
+ WriteMessageField(message.FindFieldByNumber(mt_64->fields[i].number),
+ &mt_64->fields[i], &mt_32->fields[i], options, output);
+ }
+ output("};\n\n");
+ }
+
+ std::vector<TableEntry> table;
+ uint8_t table_mask = -1;
+
+ table = FastDecodeTable(message, pools);
+
+ if (table.size() > 1) {
+ assert((table.size() & (table.size() - 1)) == 0);
+ table_mask = (table.size() - 1) << 3;
+ }
+
+ std::string msgext = "kUpb_ExtMode_NonExtendable";
+
+ if (message.extension_range_count()) {
+ if (UPB_DESC(MessageOptions_message_set_wire_format)(message.options())) {
+ msgext = "kUpb_ExtMode_IsMessageSet";
+ } else {
+ msgext = "kUpb_ExtMode_Extendable";
+ }
+ }
+
+ output("const upb_MiniTable $0 = {\n", MessageInitName(message));
+ output(" $0,\n", submsgs_array_ref);
+ output(" $0,\n", fields_array_ref);
+ output(" $0, $1, $2, $3, UPB_FASTTABLE_MASK($4), $5,\n",
+ ArchDependentSize(mt_32->size, mt_64->size), mt_64->field_count,
+ msgext, mt_64->dense_below, table_mask, mt_64->required_count);
+ if (!table.empty()) {
+ output(" UPB_FASTTABLE_INIT({\n");
+ for (const auto& ent : table) {
+ output(" {0x$1, &$0},\n", ent.first,
+ absl::StrCat(absl::Hex(ent.second, absl::kZeroPad16)));
+ }
+ output(" })\n");
+ }
+ output("};\n\n");
+}
+
+void WriteEnum(upb::EnumDefPtr e, Output& output) {
+ std::string values_init = "{\n";
+ const upb_MiniTableEnum* mt = e.mini_table();
+ uint32_t value_count = (mt->mask_limit / 32) + mt->value_count;
+ for (uint32_t i = 0; i < value_count; i++) {
+ absl::StrAppend(&values_init, " 0x", absl::Hex(mt->data[i]),
+ ",\n");
+ }
+ values_init += " }";
+
+ output(
+ R"cc(
+ const upb_MiniTableEnum $0 = {
+ $1,
+ $2,
+ $3,
+ };
+ )cc",
+ EnumInit(e), mt->mask_limit, mt->value_count, values_init);
+ output("\n");
+}
+
+int WriteEnums(const DefPoolPair& pools, upb::FileDefPtr file, Output& output) {
+ if (file.syntax() != kUpb_Syntax_Proto2) return 0;
+
+ std::vector<upb::EnumDefPtr> this_file_enums = SortedEnums(file);
+
+ for (const auto e : this_file_enums) {
+ WriteEnum(e, output);
+ }
+
+ if (!this_file_enums.empty()) {
+ output("static const upb_MiniTableEnum *$0[$1] = {\n", kEnumsInit,
+ this_file_enums.size());
+ for (const auto e : this_file_enums) {
+ output(" &$0,\n", EnumInit(e));
+ }
+ output("};\n");
+ output("\n");
+ }
+
+ return this_file_enums.size();
+}
+
+int WriteMessages(const DefPoolPair& pools, upb::FileDefPtr file,
+ const Options& options, Output& output) {
+ std::vector<upb::MessageDefPtr> file_messages = SortedMessages(file);
+
+ if (file_messages.empty()) return 0;
+
+ for (auto message : file_messages) {
+ WriteMessage(message, pools, options, output);
+ }
+
+ output("static const upb_MiniTable *$0[$1] = {\n", kMessagesInit,
+ file_messages.size());
+ for (auto message : file_messages) {
+ output(" &$0,\n", MessageInitName(message));
+ }
+ output("};\n");
+ output("\n");
+ return file_messages.size();
+}
+
+void WriteExtension(upb::FieldDefPtr ext, const DefPoolPair& pools,
+ const Options& options, Output& output) {
+ output("$0,\n", FieldInitializer(pools, ext, options));
+ output(" &$0,\n", MessageInitName(ext.containing_type()));
+ output(" $0,\n", GetSub(ext));
+}
+
+int WriteExtensions(const DefPoolPair& pools, upb::FileDefPtr file,
+ const Options& options, Output& output) {
+ auto exts = SortedExtensions(file);
+
+ if (exts.empty()) return 0;
+
+ // Order by full name for consistent ordering.
+ std::map<std::string, upb::MessageDefPtr> forward_messages;
+
+ for (auto ext : exts) {
+ forward_messages[ext.containing_type().full_name()] = ext.containing_type();
+ if (ext.message_type()) {
+ forward_messages[ext.message_type().full_name()] = ext.message_type();
+ }
+ }
+
+ for (const auto& decl : forward_messages) {
+ ForwardDeclareMiniTableInit(decl.second, options, output);
+ }
+
+ for (auto ext : exts) {
+ output("const upb_MiniTableExtension $0 = {\n ", ExtensionLayout(ext));
+ WriteExtension(ext, pools, options, output);
+ output("\n};\n");
+ }
+
+ output(
+ "\n"
+ "static const upb_MiniTableExtension *$0[$1] = {\n",
+ kExtensionsInit, exts.size());
+
+ for (auto ext : exts) {
+ output(" &$0,\n", ExtensionLayout(ext));
+ }
+
+ output(
+ "};\n"
+ "\n");
+ return exts.size();
+}
+
+void WriteMiniTableSource(const DefPoolPair& pools, upb::FileDefPtr file,
+ const Options& options, Output& output) {
+ EmitFileWarning(file.name(), output);
+
+ output(
+ "#include <stddef.h>\n"
+ "#include \"upb/generated_code_support.h\"\n"
+ "#include \"$0\"\n",
+ HeaderFilename(file));
+
+ for (int i = 0; i < file.dependency_count(); i++) {
+ output("#include \"$0\"\n", HeaderFilename(file.dependency(i)));
+ }
+
+ output(
+ "\n"
+ "// Must be last.\n"
+ "#include \"upb/port/def.inc\"\n"
+ "\n");
+
+ int msg_count = WriteMessages(pools, file, options, output);
+ int ext_count = WriteExtensions(pools, file, options, output);
+ int enum_count = WriteEnums(pools, file, output);
+
+ output("const upb_MiniTableFile $0 = {\n", FileLayoutName(file));
+ output(" $0,\n", msg_count ? kMessagesInit : "NULL");
+ output(" $0,\n", enum_count ? kEnumsInit : "NULL");
+ output(" $0,\n", ext_count ? kExtensionsInit : "NULL");
+ output(" $0,\n", msg_count);
+ output(" $0,\n", enum_count);
+ output(" $0,\n", ext_count);
+ output("};\n\n");
+
+ output("#include \"upb/port/undef.inc\"\n");
+ output("\n");
+}
+
+void WriteMessageMiniDescriptorInitializer(upb::MessageDefPtr msg,
+ const Options& options,
+ Output& output) {
+ Output resolve_calls;
+ for (int i = 0; i < msg.field_count(); i++) {
+ upb::FieldDefPtr field = msg.field(i);
+ if (!field.message_type() && !field.enum_subdef()) continue;
+ if (field.message_type()) {
+ resolve_calls(
+ "upb_MiniTable_SetSubMessage(mini_table, "
+ "(upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, "
+ "$0), $1);\n ",
+ field.number(), MessageMiniTableRef(field.message_type(), options));
+ } else if (field.enum_subdef() && field.enum_subdef().is_closed()) {
+ resolve_calls(
+ "upb_MiniTable_SetSubEnum(mini_table, "
+ "(upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, "
+ "$0), $1);\n ",
+ field.number(), EnumMiniTableRef(field.enum_subdef(), options));
+ }
+ }
+
+ output(
+ R"cc(
+ const upb_MiniTable* $0() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$1";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ $2return mini_table;
+ }
+ )cc",
+ MessageInitName(msg), msg.MiniDescriptorEncode(), resolve_calls.output());
+ output("\n");
+}
+
+void WriteEnumMiniDescriptorInitializer(upb::EnumDefPtr enum_def,
+ const Options& options,
+ Output& output) {
+ output(
+ R"cc(
+ const upb_MiniTableEnum* $0() {
+ static const upb_MiniTableEnum* mini_table = NULL;
+ static const char* mini_descriptor = "$1";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTableEnum_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ return mini_table;
+ }
+ )cc",
+ EnumInitName(enum_def), enum_def.MiniDescriptorEncode());
+ output("\n");
+}
+
+void WriteMiniDescriptorSource(const DefPoolPair& pools, upb::FileDefPtr file,
+ const Options& options, Output& output) {
+ output(
+ "#include <stddef.h>\n"
+ "#include \"upb/generated_code_support.h\"\n"
+ "#include \"$0\"\n\n",
+ HeaderFilename(file));
+
+ for (int i = 0; i < file.dependency_count(); i++) {
+ output("#include \"$0\"\n", HeaderFilename(file.dependency(i)));
+ }
+
+ output(
+ R"cc(
+ static upb_Arena* upb_BootstrapArena() {
+ static upb_Arena* arena = NULL;
+ if (!arena) arena = upb_Arena_New();
+ return arena;
+ }
+ )cc");
+
+ output("\n");
+
+ for (const auto msg : SortedMessages(file)) {
+ WriteMessageMiniDescriptorInitializer(msg, options, output);
+ }
+
+ for (const auto msg : SortedEnums(file)) {
+ WriteEnumMiniDescriptorInitializer(msg, options, output);
+ }
+}
+
+void WriteSource(const DefPoolPair& pools, upb::FileDefPtr file,
+ const Options& options, Output& output) {
+ if (options.bootstrap) {
+ WriteMiniDescriptorSource(pools, file, options, output);
+ } else {
+ WriteMiniTableSource(pools, file, options, output);
+ }
+}
+
+void GenerateFile(const DefPoolPair& pools, upb::FileDefPtr file,
+ const Options& options, Plugin* plugin) {
+ Output h_output;
+ WriteHeader(pools, file, options, h_output);
+ plugin->AddOutputFile(HeaderFilename(file), h_output.output());
+
+ Output c_output;
+ WriteSource(pools, file, options, c_output);
+ plugin->AddOutputFile(SourceFilename(file), c_output.output());
+}
+
+bool ParseOptions(Plugin* plugin, Options* options) {
+ for (const auto& pair : ParseGeneratorParameter(plugin->parameter())) {
+ if (pair.first == "bootstrap_upb") {
+ options->bootstrap = true;
+ } else {
+ plugin->SetError(absl::Substitute("Unknown parameter: $0", pair.first));
+ return false;
+ }
+ }
+
+ return true;
+}
+
+absl::string_view ToStringView(upb_StringView str) {
+ return absl::string_view(str.data, str.size);
+}
+
+} // namespace
+
+} // namespace upbc
+
+int main(int argc, char** argv) {
+ upbc::DefPoolPair pools;
+ upbc::Plugin plugin;
+ upbc::Options options;
+ if (!ParseOptions(&plugin, &options)) return 0;
+ plugin.GenerateFilesRaw([&](const UPB_DESC(FileDescriptorProto) * file_proto,
+ bool generate) {
+ upb::Status status;
+ upb::FileDefPtr file = pools.AddFile(file_proto, &status);
+ if (!file) {
+ absl::string_view name =
+ upbc::ToStringView(UPB_DESC(FileDescriptorProto_name)(file_proto));
+ ABSL_LOG(FATAL) << "Couldn't add file " << name
+ << " to DefPool: " << status.error_message();
+ }
+ if (generate) GenerateFile(pools, file, options, &plugin);
+ });
+ return 0;
+}
diff --git a/upb/upbc/protoc-gen-upbdefs.cc b/upb/upbc/protoc-gen-upbdefs.cc
new file mode 100644
index 0000000..b345530
--- /dev/null
+++ b/upb/upbc/protoc-gen-upbdefs.cc
@@ -0,0 +1,175 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <memory>
+
+#include "google/protobuf/descriptor.upb.h"
+#include "upb/reflection/def.hpp"
+#include "upb/util/def_to_proto.h"
+#include "upbc/common.h"
+#include "upbc/file_layout.h"
+#include "upbc/plugin.h"
+
+namespace upbc {
+namespace {
+
+std::string DefInitSymbol(upb::FileDefPtr file) {
+ return ToCIdent(file.name()) + "_upbdefinit";
+}
+
+static std::string DefHeaderFilename(upb::FileDefPtr file) {
+ return StripExtension(file.name()) + ".upbdefs.h";
+}
+
+static std::string DefSourceFilename(upb::FileDefPtr file) {
+ return StripExtension(file.name()) + ".upbdefs.c";
+}
+
+void GenerateMessageDefAccessor(upb::MessageDefPtr d, Output& output) {
+ output("UPB_INLINE const upb_MessageDef *$0_getmsgdef(upb_DefPool *s) {\n",
+ ToCIdent(d.full_name()));
+ output(" _upb_DefPool_LoadDefInit(s, &$0);\n", DefInitSymbol(d.file()));
+ output(" return upb_DefPool_FindMessageByName(s, \"$0\");\n", d.full_name());
+ output("}\n");
+ output("\n");
+}
+
+void WriteDefHeader(upb::FileDefPtr file, Output& output) {
+ EmitFileWarning(file.name(), output);
+
+ output(
+ "#ifndef $0_UPBDEFS_H_\n"
+ "#define $0_UPBDEFS_H_\n\n"
+ "#include \"upb/reflection/def.h\"\n"
+ "#include \"upb/reflection/internal/def_pool.h\"\n"
+ "#include \"upb/port/def.inc\"\n"
+ "#ifdef __cplusplus\n"
+ "extern \"C\" {\n"
+ "#endif\n\n",
+ ToPreproc(file.name()));
+
+ output("#include \"upb/reflection/def.h\"\n");
+ output("\n");
+ output("#include \"upb/port/def.inc\"\n");
+ output("\n");
+
+ output("extern _upb_DefPool_Init $0;\n", DefInitSymbol(file));
+ output("\n");
+
+ for (auto msg : SortedMessages(file)) {
+ GenerateMessageDefAccessor(msg, output);
+ }
+
+ output(
+ "#ifdef __cplusplus\n"
+ "} /* extern \"C\" */\n"
+ "#endif\n"
+ "\n"
+ "#include \"upb/port/undef.inc\"\n"
+ "\n"
+ "#endif /* $0_UPBDEFS_H_ */\n",
+ ToPreproc(file.name()));
+}
+
+void WriteDefSource(upb::FileDefPtr file, Output& output) {
+ EmitFileWarning(file.name(), output);
+
+ output("#include \"upb/reflection/def.h\"\n");
+ output("#include \"$0\"\n", DefHeaderFilename(file));
+ output("#include \"$0\"\n", HeaderFilename(file));
+ output("\n");
+
+ for (int i = 0; i < file.dependency_count(); i++) {
+ output("extern _upb_DefPool_Init $0;\n", DefInitSymbol(file.dependency(i)));
+ }
+
+ upb::Arena arena;
+ google_protobuf_FileDescriptorProto* file_proto =
+ upb_FileDef_ToProto(file.ptr(), arena.ptr());
+ size_t serialized_size;
+ const char* serialized = google_protobuf_FileDescriptorProto_serialize(
+ file_proto, arena.ptr(), &serialized_size);
+ absl::string_view file_data(serialized, serialized_size);
+
+ output("static const char descriptor[$0] = {", serialized_size);
+
+ // C90 only guarantees that strings can be up to 509 characters, and some
+ // implementations have limits here (for example, MSVC only allows 64k:
+ // https://docs.microsoft.com/en-us/cpp/error-messages/compiler-errors-1/fatal-error-c1091.
+ // So we always emit an array instead of a string.
+ for (size_t i = 0; i < serialized_size;) {
+ for (size_t j = 0; j < 25 && i < serialized_size; ++i, ++j) {
+ output("'$0', ", absl::CEscape(file_data.substr(i, 1)));
+ }
+ output("\n");
+ }
+ output("};\n\n");
+
+ output("static _upb_DefPool_Init *deps[$0] = {\n",
+ file.dependency_count() + 1);
+ for (int i = 0; i < file.dependency_count(); i++) {
+ output(" &$0,\n", DefInitSymbol(file.dependency(i)));
+ }
+ output(" NULL\n");
+ output("};\n");
+ output("\n");
+
+ output("_upb_DefPool_Init $0 = {\n", DefInitSymbol(file));
+ output(" deps,\n");
+ output(" &$0,\n", FileLayoutName(file));
+ output(" \"$0\",\n", file.name());
+ output(" UPB_STRINGVIEW_INIT(descriptor, $0)\n", file_data.size());
+ output("};\n");
+}
+
+void GenerateFile(upb::FileDefPtr file, Plugin* plugin) {
+ Output h_def_output;
+ WriteDefHeader(file, h_def_output);
+ plugin->AddOutputFile(DefHeaderFilename(file), h_def_output.output());
+
+ Output c_def_output;
+ WriteDefSource(file, c_def_output);
+ plugin->AddOutputFile(DefSourceFilename(file), c_def_output.output());
+}
+
+} // namespace
+} // namespace upbc
+
+int main(int argc, char** argv) {
+ upbc::Plugin plugin;
+ if (!plugin.parameter().empty()) {
+ plugin.SetError(
+ absl::StrCat("Expected no parameters, got: ", plugin.parameter()));
+ return 0;
+ }
+ plugin.GenerateFiles(
+ [&](upb::FileDefPtr file) { upbc::GenerateFile(file, &plugin); });
+ return 0;
+}
diff --git a/upb/upbc/protoc-gen-upbdev.cc b/upb/upbc/protoc-gen-upbdev.cc
new file mode 100644
index 0000000..3093169
--- /dev/null
+++ b/upb/upbc/protoc-gen-upbdev.cc
@@ -0,0 +1,91 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <iostream>
+#include <string>
+
+#include "google/protobuf/compiler/plugin.upb.h"
+#include "upbc/subprocess.h"
+#include "upbc/upbdev.h"
+
+static constexpr char kDefaultPlugin[] = "protoc_dart_plugin";
+
+int main() {
+ upb_Arena* a = upb_Arena_New();
+ upb_Status status;
+ upb_Status_Clear(&status);
+
+ // Read (binary) stdin into a string.
+ const std::string input = {std::istreambuf_iterator<char>(std::cin),
+ std::istreambuf_iterator<char>()};
+
+ // Parse the request.
+ auto inner_request = google_protobuf_compiler_CodeGeneratorRequest_parse(
+ input.c_str(), input.size(), a);
+
+ // Check the request for a plugin name.
+ std::string plugin = kDefaultPlugin;
+ if (google_protobuf_compiler_CodeGeneratorRequest_has_parameter(inner_request)) {
+ auto param = google_protobuf_compiler_CodeGeneratorRequest_parameter(inner_request);
+ plugin = std::string(param.data, param.size);
+ }
+
+ // Wrap the request inside a upbc_CodeGeneratorRequest and JSON-encode it.
+ const upb_StringView sv =
+ upbdev_ProcessInput(input.data(), input.size(), a, &status);
+ if (!upb_Status_IsOk(&status)) {
+ std::cerr << status.msg << std::endl;
+ return -1;
+ }
+
+ // Launch the subprocess.
+ upbc::Subprocess subprocess;
+ subprocess.Start(plugin, upbc::Subprocess::SEARCH_PATH);
+
+ // Exchange JSON strings with the subprocess.
+ const std::string json_request = std::string(sv.data, sv.size);
+ std::string json_response, error;
+ const bool ok = subprocess.Communicate(json_request, &json_response, &error);
+ if (!ok) {
+ // Dump the JSON request to stderr if we can't launch the next plugin.
+ std::cerr << json_request << std::endl;
+ return -1;
+ }
+
+ // Decode, serialize, and write the JSON response.
+ upbdev_ProcessOutput(json_response.data(), json_response.size(), a, &status);
+ if (!upb_Status_IsOk(&status)) {
+ std::cerr << status.msg << std::endl;
+ return -1;
+ }
+
+ upb_Arena_Free(a);
+ return 0;
+}
diff --git a/upb/upbc/stage0/google/protobuf/compiler/plugin.upb.c b/upb/upbc/stage0/google/protobuf/compiler/plugin.upb.c
new file mode 100644
index 0000000..1590f35
--- /dev/null
+++ b/upb/upbc/stage0/google/protobuf/compiler/plugin.upb.c
@@ -0,0 +1,64 @@
+#include <stddef.h>
+#include "upb/generated_code_support.h"
+#include "google/protobuf/compiler/plugin.upb.h"
+
+#include "google/protobuf/descriptor.upb.h"
+static upb_Arena* upb_BootstrapArena() {
+ static upb_Arena* arena = NULL;
+ if (!arena) arena = upb_Arena_New();
+ return arena;
+}
+
+const upb_MiniTable* google_protobuf_compiler_Version_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$(((1";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_compiler_CodeGeneratorRequest_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$E13kG";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 15), google_protobuf_FileDescriptorProto_msg_init());
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 3), google_protobuf_compiler_Version_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_compiler_CodeGeneratorResponse_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$1,lG";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 15), google_protobuf_compiler_CodeGeneratorResponse_File_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTable* google_protobuf_compiler_CodeGeneratorResponse_File_msg_init() {
+ static upb_MiniTable* mini_table = NULL;
+ static const char* mini_descriptor = "$11l13";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTable_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ upb_MiniTable_SetSubMessage(mini_table, (upb_MiniTableField*)upb_MiniTable_FindFieldByNumber(mini_table, 16), google_protobuf_GeneratedCodeInfo_msg_init());
+ return mini_table;
+}
+
+const upb_MiniTableEnum* google_protobuf_compiler_CodeGeneratorResponse_Feature_enum_init() {
+ static const upb_MiniTableEnum* mini_table = NULL;
+ static const char* mini_descriptor = "!$";
+ if (mini_table) return mini_table;
+ mini_table =
+ upb_MiniTableEnum_Build(mini_descriptor, strlen(mini_descriptor),
+ upb_BootstrapArena(), NULL);
+ return mini_table;
+}
diff --git a/upb/upbc/stage0/google/protobuf/compiler/plugin.upb.h b/upb/upbc/stage0/google/protobuf/compiler/plugin.upb.h
new file mode 100644
index 0000000..be35a42
--- /dev/null
+++ b/upb/upbc/stage0/google/protobuf/compiler/plugin.upb.h
@@ -0,0 +1,578 @@
+/* This file was generated by upbc (the upb compiler) from the input
+ * file:
+ *
+ * google/protobuf/compiler/plugin.proto
+ *
+ * Do not edit -- your changes will be discarded when the file is
+ * regenerated. */
+
+#ifndef GOOGLE_PROTOBUF_COMPILER_PLUGIN_PROTO_UPB_H_
+#define GOOGLE_PROTOBUF_COMPILER_PLUGIN_PROTO_UPB_H_
+
+#include "upb/generated_code_support.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct google_protobuf_compiler_Version google_protobuf_compiler_Version;
+typedef struct google_protobuf_compiler_CodeGeneratorRequest google_protobuf_compiler_CodeGeneratorRequest;
+typedef struct google_protobuf_compiler_CodeGeneratorResponse google_protobuf_compiler_CodeGeneratorResponse;
+typedef struct google_protobuf_compiler_CodeGeneratorResponse_File google_protobuf_compiler_CodeGeneratorResponse_File;
+extern const upb_MiniTable* google_protobuf_compiler_Version_msg_init();
+extern const upb_MiniTable* google_protobuf_compiler_CodeGeneratorRequest_msg_init();
+extern const upb_MiniTable* google_protobuf_compiler_CodeGeneratorResponse_msg_init();
+extern const upb_MiniTable* google_protobuf_compiler_CodeGeneratorResponse_File_msg_init();
+struct google_protobuf_FileDescriptorProto;
+struct google_protobuf_GeneratedCodeInfo;
+extern const upb_MiniTable* google_protobuf_FileDescriptorProto_msg_init();
+extern const upb_MiniTable* google_protobuf_GeneratedCodeInfo_msg_init();
+
+typedef enum {
+ google_protobuf_compiler_CodeGeneratorResponse_FEATURE_NONE = 0,
+ google_protobuf_compiler_CodeGeneratorResponse_FEATURE_PROTO3_OPTIONAL = 1
+} google_protobuf_compiler_CodeGeneratorResponse_Feature;
+
+
+extern const upb_MiniTableEnum* google_protobuf_compiler_CodeGeneratorResponse_Feature_enum_init();
+
+/* google.protobuf.compiler.Version */
+
+UPB_INLINE google_protobuf_compiler_Version* google_protobuf_compiler_Version_new(upb_Arena* arena) {
+ return (google_protobuf_compiler_Version*)_upb_Message_New(google_protobuf_compiler_Version_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_compiler_Version* google_protobuf_compiler_Version_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_compiler_Version* ret = google_protobuf_compiler_Version_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_compiler_Version_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_compiler_Version* google_protobuf_compiler_Version_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_compiler_Version* ret = google_protobuf_compiler_Version_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_compiler_Version_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_compiler_Version_serialize(const google_protobuf_compiler_Version* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_compiler_Version_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_compiler_Version_serialize_ex(const google_protobuf_compiler_Version* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_compiler_Version_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_compiler_Version_clear_major(google_protobuf_compiler_Version* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_Version_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_compiler_Version_major(const google_protobuf_compiler_Version* msg) {
+ int32_t default_val = (int32_t)0;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_Version_msg_init(), 1);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_compiler_Version_has_major(const google_protobuf_compiler_Version* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_Version_msg_init(), 1);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_compiler_Version_clear_minor(google_protobuf_compiler_Version* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_Version_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_compiler_Version_minor(const google_protobuf_compiler_Version* msg) {
+ int32_t default_val = (int32_t)0;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_Version_msg_init(), 2);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_compiler_Version_has_minor(const google_protobuf_compiler_Version* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_Version_msg_init(), 2);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_compiler_Version_clear_patch(google_protobuf_compiler_Version* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_Version_msg_init(), 3);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE int32_t google_protobuf_compiler_Version_patch(const google_protobuf_compiler_Version* msg) {
+ int32_t default_val = (int32_t)0;
+ int32_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_Version_msg_init(), 3);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_compiler_Version_has_patch(const google_protobuf_compiler_Version* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_Version_msg_init(), 3);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_compiler_Version_clear_suffix(google_protobuf_compiler_Version* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_Version_msg_init(), 4);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_compiler_Version_suffix(const google_protobuf_compiler_Version* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_Version_msg_init(), 4);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_compiler_Version_has_suffix(const google_protobuf_compiler_Version* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_Version_msg_init(), 4);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+
+UPB_INLINE void google_protobuf_compiler_Version_set_major(google_protobuf_compiler_Version *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_Version_msg_init(), 1);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_compiler_Version_set_minor(google_protobuf_compiler_Version *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_Version_msg_init(), 2);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_compiler_Version_set_patch(google_protobuf_compiler_Version *msg, int32_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_Version_msg_init(), 3);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_compiler_Version_set_suffix(google_protobuf_compiler_Version *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_Version_msg_init(), 4);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+
+/* google.protobuf.compiler.CodeGeneratorRequest */
+
+UPB_INLINE google_protobuf_compiler_CodeGeneratorRequest* google_protobuf_compiler_CodeGeneratorRequest_new(upb_Arena* arena) {
+ return (google_protobuf_compiler_CodeGeneratorRequest*)_upb_Message_New(google_protobuf_compiler_CodeGeneratorRequest_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_compiler_CodeGeneratorRequest* google_protobuf_compiler_CodeGeneratorRequest_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_compiler_CodeGeneratorRequest* ret = google_protobuf_compiler_CodeGeneratorRequest_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_compiler_CodeGeneratorRequest_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_compiler_CodeGeneratorRequest* google_protobuf_compiler_CodeGeneratorRequest_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_compiler_CodeGeneratorRequest* ret = google_protobuf_compiler_CodeGeneratorRequest_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_compiler_CodeGeneratorRequest_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_compiler_CodeGeneratorRequest_serialize(const google_protobuf_compiler_CodeGeneratorRequest* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_compiler_CodeGeneratorRequest_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_compiler_CodeGeneratorRequest_serialize_ex(const google_protobuf_compiler_CodeGeneratorRequest* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_compiler_CodeGeneratorRequest_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_compiler_CodeGeneratorRequest_clear_file_to_generate(google_protobuf_compiler_CodeGeneratorRequest* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorRequest_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView const* google_protobuf_compiler_CodeGeneratorRequest_file_to_generate(const google_protobuf_compiler_CodeGeneratorRequest* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorRequest_msg_init(), 1);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (upb_StringView const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_compiler_CodeGeneratorRequest_has_file_to_generate(const google_protobuf_compiler_CodeGeneratorRequest* msg) {
+ size_t size;
+ google_protobuf_compiler_CodeGeneratorRequest_file_to_generate(msg, &size);
+ return size != 0;
+}
+UPB_INLINE void google_protobuf_compiler_CodeGeneratorRequest_clear_parameter(google_protobuf_compiler_CodeGeneratorRequest* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorRequest_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_compiler_CodeGeneratorRequest_parameter(const google_protobuf_compiler_CodeGeneratorRequest* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorRequest_msg_init(), 2);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_compiler_CodeGeneratorRequest_has_parameter(const google_protobuf_compiler_CodeGeneratorRequest* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorRequest_msg_init(), 2);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_compiler_CodeGeneratorRequest_clear_compiler_version(google_protobuf_compiler_CodeGeneratorRequest* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorRequest_msg_init(), 3);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_compiler_Version* google_protobuf_compiler_CodeGeneratorRequest_compiler_version(const google_protobuf_compiler_CodeGeneratorRequest* msg) {
+ const google_protobuf_compiler_Version* default_val = NULL;
+ const google_protobuf_compiler_Version* ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorRequest_msg_init(), 3);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_compiler_CodeGeneratorRequest_has_compiler_version(const google_protobuf_compiler_CodeGeneratorRequest* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorRequest_msg_init(), 3);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_compiler_CodeGeneratorRequest_clear_proto_file(google_protobuf_compiler_CodeGeneratorRequest* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorRequest_msg_init(), 15);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const struct google_protobuf_FileDescriptorProto* const* google_protobuf_compiler_CodeGeneratorRequest_proto_file(const google_protobuf_compiler_CodeGeneratorRequest* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorRequest_msg_init(), 15);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const struct google_protobuf_FileDescriptorProto* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_compiler_CodeGeneratorRequest_has_proto_file(const google_protobuf_compiler_CodeGeneratorRequest* msg) {
+ size_t size;
+ google_protobuf_compiler_CodeGeneratorRequest_proto_file(msg, &size);
+ return size != 0;
+}
+
+UPB_INLINE upb_StringView* google_protobuf_compiler_CodeGeneratorRequest_mutable_file_to_generate(google_protobuf_compiler_CodeGeneratorRequest* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorRequest_msg_init(), 1);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (upb_StringView*)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE upb_StringView* google_protobuf_compiler_CodeGeneratorRequest_resize_file_to_generate(google_protobuf_compiler_CodeGeneratorRequest* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorRequest_msg_init(), 1);
+ return (upb_StringView*)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE bool google_protobuf_compiler_CodeGeneratorRequest_add_file_to_generate(google_protobuf_compiler_CodeGeneratorRequest* msg, upb_StringView val, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorRequest_msg_init(), 1);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return false;
+ }
+ _upb_Array_Set(arr, arr->size - 1, &val, sizeof(val));
+ return true;
+}
+UPB_INLINE void google_protobuf_compiler_CodeGeneratorRequest_set_parameter(google_protobuf_compiler_CodeGeneratorRequest *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorRequest_msg_init(), 2);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_compiler_CodeGeneratorRequest_set_compiler_version(google_protobuf_compiler_CodeGeneratorRequest *msg, google_protobuf_compiler_Version* value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorRequest_msg_init(), 3);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE struct google_protobuf_compiler_Version* google_protobuf_compiler_CodeGeneratorRequest_mutable_compiler_version(google_protobuf_compiler_CodeGeneratorRequest* msg, upb_Arena* arena) {
+ struct google_protobuf_compiler_Version* sub = (struct google_protobuf_compiler_Version*)google_protobuf_compiler_CodeGeneratorRequest_compiler_version(msg);
+ if (sub == NULL) {
+ sub = (struct google_protobuf_compiler_Version*)_upb_Message_New(google_protobuf_compiler_Version_msg_init(), arena);
+ if (sub) google_protobuf_compiler_CodeGeneratorRequest_set_compiler_version(msg, sub);
+ }
+ return sub;
+}
+UPB_INLINE struct google_protobuf_FileDescriptorProto** google_protobuf_compiler_CodeGeneratorRequest_mutable_proto_file(google_protobuf_compiler_CodeGeneratorRequest* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorRequest_msg_init(), 15);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (struct google_protobuf_FileDescriptorProto**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE struct google_protobuf_FileDescriptorProto** google_protobuf_compiler_CodeGeneratorRequest_resize_proto_file(google_protobuf_compiler_CodeGeneratorRequest* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorRequest_msg_init(), 15);
+ return (struct google_protobuf_FileDescriptorProto**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_FileDescriptorProto* google_protobuf_compiler_CodeGeneratorRequest_add_proto_file(google_protobuf_compiler_CodeGeneratorRequest* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorRequest_msg_init(), 15);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_FileDescriptorProto* sub = (struct google_protobuf_FileDescriptorProto*)_upb_Message_New(google_protobuf_FileDescriptorProto_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+
+/* google.protobuf.compiler.CodeGeneratorResponse */
+
+UPB_INLINE google_protobuf_compiler_CodeGeneratorResponse* google_protobuf_compiler_CodeGeneratorResponse_new(upb_Arena* arena) {
+ return (google_protobuf_compiler_CodeGeneratorResponse*)_upb_Message_New(google_protobuf_compiler_CodeGeneratorResponse_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_compiler_CodeGeneratorResponse* google_protobuf_compiler_CodeGeneratorResponse_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_compiler_CodeGeneratorResponse* ret = google_protobuf_compiler_CodeGeneratorResponse_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_compiler_CodeGeneratorResponse_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_compiler_CodeGeneratorResponse* google_protobuf_compiler_CodeGeneratorResponse_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_compiler_CodeGeneratorResponse* ret = google_protobuf_compiler_CodeGeneratorResponse_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_compiler_CodeGeneratorResponse_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_compiler_CodeGeneratorResponse_serialize(const google_protobuf_compiler_CodeGeneratorResponse* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_compiler_CodeGeneratorResponse_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_compiler_CodeGeneratorResponse_serialize_ex(const google_protobuf_compiler_CodeGeneratorResponse* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_compiler_CodeGeneratorResponse_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_compiler_CodeGeneratorResponse_clear_error(google_protobuf_compiler_CodeGeneratorResponse* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_compiler_CodeGeneratorResponse_error(const google_protobuf_compiler_CodeGeneratorResponse* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_msg_init(), 1);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_compiler_CodeGeneratorResponse_has_error(const google_protobuf_compiler_CodeGeneratorResponse* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_msg_init(), 1);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_compiler_CodeGeneratorResponse_clear_supported_features(google_protobuf_compiler_CodeGeneratorResponse* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE uint64_t google_protobuf_compiler_CodeGeneratorResponse_supported_features(const google_protobuf_compiler_CodeGeneratorResponse* msg) {
+ uint64_t default_val = (uint64_t)0ull;
+ uint64_t ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_msg_init(), 2);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_compiler_CodeGeneratorResponse_has_supported_features(const google_protobuf_compiler_CodeGeneratorResponse* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_msg_init(), 2);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_compiler_CodeGeneratorResponse_clear_file(google_protobuf_compiler_CodeGeneratorResponse* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_msg_init(), 15);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const google_protobuf_compiler_CodeGeneratorResponse_File* const* google_protobuf_compiler_CodeGeneratorResponse_file(const google_protobuf_compiler_CodeGeneratorResponse* msg, size_t* size) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_msg_init(), 15);
+ const upb_Array* arr = upb_Message_GetArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (const google_protobuf_compiler_CodeGeneratorResponse_File* const*)_upb_array_constptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE bool google_protobuf_compiler_CodeGeneratorResponse_has_file(const google_protobuf_compiler_CodeGeneratorResponse* msg) {
+ size_t size;
+ google_protobuf_compiler_CodeGeneratorResponse_file(msg, &size);
+ return size != 0;
+}
+
+UPB_INLINE void google_protobuf_compiler_CodeGeneratorResponse_set_error(google_protobuf_compiler_CodeGeneratorResponse *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_msg_init(), 1);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_compiler_CodeGeneratorResponse_set_supported_features(google_protobuf_compiler_CodeGeneratorResponse *msg, uint64_t value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_msg_init(), 2);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE google_protobuf_compiler_CodeGeneratorResponse_File** google_protobuf_compiler_CodeGeneratorResponse_mutable_file(google_protobuf_compiler_CodeGeneratorResponse* msg, size_t* size) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_msg_init(), 15);
+ upb_Array* arr = upb_Message_GetMutableArray(msg, &field);
+ if (arr) {
+ if (size) *size = arr->size;
+ return (google_protobuf_compiler_CodeGeneratorResponse_File**)_upb_array_ptr(arr);
+ } else {
+ if (size) *size = 0;
+ return NULL;
+ }
+}
+UPB_INLINE google_protobuf_compiler_CodeGeneratorResponse_File** google_protobuf_compiler_CodeGeneratorResponse_resize_file(google_protobuf_compiler_CodeGeneratorResponse* msg, size_t size, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_msg_init(), 15);
+ return (google_protobuf_compiler_CodeGeneratorResponse_File**)upb_Message_ResizeArrayUninitialized(msg, &field, size, arena);
+}
+UPB_INLINE struct google_protobuf_compiler_CodeGeneratorResponse_File* google_protobuf_compiler_CodeGeneratorResponse_add_file(google_protobuf_compiler_CodeGeneratorResponse* msg, upb_Arena* arena) {
+ upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_msg_init(), 15);
+ upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena);
+ if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) {
+ return NULL;
+ }
+ struct google_protobuf_compiler_CodeGeneratorResponse_File* sub = (struct google_protobuf_compiler_CodeGeneratorResponse_File*)_upb_Message_New(google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), arena);
+ if (!arr || !sub) return NULL;
+ _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub));
+ return sub;
+}
+
+/* google.protobuf.compiler.CodeGeneratorResponse.File */
+
+UPB_INLINE google_protobuf_compiler_CodeGeneratorResponse_File* google_protobuf_compiler_CodeGeneratorResponse_File_new(upb_Arena* arena) {
+ return (google_protobuf_compiler_CodeGeneratorResponse_File*)_upb_Message_New(google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), arena);
+}
+UPB_INLINE google_protobuf_compiler_CodeGeneratorResponse_File* google_protobuf_compiler_CodeGeneratorResponse_File_parse(const char* buf, size_t size, upb_Arena* arena) {
+ google_protobuf_compiler_CodeGeneratorResponse_File* ret = google_protobuf_compiler_CodeGeneratorResponse_File_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE google_protobuf_compiler_CodeGeneratorResponse_File* google_protobuf_compiler_CodeGeneratorResponse_File_parse_ex(const char* buf, size_t size,
+ const upb_ExtensionRegistry* extreg,
+ int options, upb_Arena* arena) {
+ google_protobuf_compiler_CodeGeneratorResponse_File* ret = google_protobuf_compiler_CodeGeneratorResponse_File_new(arena);
+ if (!ret) return NULL;
+ if (upb_Decode(buf, size, ret, google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), extreg, options, arena) !=
+ kUpb_DecodeStatus_Ok) {
+ return NULL;
+ }
+ return ret;
+}
+UPB_INLINE char* google_protobuf_compiler_CodeGeneratorResponse_File_serialize(const google_protobuf_compiler_CodeGeneratorResponse_File* msg, upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), 0, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE char* google_protobuf_compiler_CodeGeneratorResponse_File_serialize_ex(const google_protobuf_compiler_CodeGeneratorResponse_File* msg, int options,
+ upb_Arena* arena, size_t* len) {
+ char* ptr;
+ (void)upb_Encode(msg, google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), options, arena, &ptr, len);
+ return ptr;
+}
+UPB_INLINE void google_protobuf_compiler_CodeGeneratorResponse_File_clear_name(google_protobuf_compiler_CodeGeneratorResponse_File* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), 1);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_compiler_CodeGeneratorResponse_File_name(const google_protobuf_compiler_CodeGeneratorResponse_File* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), 1);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_compiler_CodeGeneratorResponse_File_has_name(const google_protobuf_compiler_CodeGeneratorResponse_File* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), 1);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_compiler_CodeGeneratorResponse_File_clear_insertion_point(google_protobuf_compiler_CodeGeneratorResponse_File* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), 2);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_compiler_CodeGeneratorResponse_File_insertion_point(const google_protobuf_compiler_CodeGeneratorResponse_File* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), 2);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_compiler_CodeGeneratorResponse_File_has_insertion_point(const google_protobuf_compiler_CodeGeneratorResponse_File* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), 2);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_compiler_CodeGeneratorResponse_File_clear_content(google_protobuf_compiler_CodeGeneratorResponse_File* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), 15);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE upb_StringView google_protobuf_compiler_CodeGeneratorResponse_File_content(const google_protobuf_compiler_CodeGeneratorResponse_File* msg) {
+ upb_StringView default_val = upb_StringView_FromString("");
+ upb_StringView ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), 15);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_compiler_CodeGeneratorResponse_File_has_content(const google_protobuf_compiler_CodeGeneratorResponse_File* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), 15);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+UPB_INLINE void google_protobuf_compiler_CodeGeneratorResponse_File_clear_generated_code_info(google_protobuf_compiler_CodeGeneratorResponse_File* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), 16);
+ _upb_Message_ClearNonExtensionField(msg, &field);
+}
+UPB_INLINE const struct google_protobuf_GeneratedCodeInfo* google_protobuf_compiler_CodeGeneratorResponse_File_generated_code_info(const google_protobuf_compiler_CodeGeneratorResponse_File* msg) {
+ const struct google_protobuf_GeneratedCodeInfo* default_val = NULL;
+ const struct google_protobuf_GeneratedCodeInfo* ret;
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), 16);
+ _upb_Message_GetNonExtensionField(msg, &field, &default_val, &ret);
+ return ret;
+}
+UPB_INLINE bool google_protobuf_compiler_CodeGeneratorResponse_File_has_generated_code_info(const google_protobuf_compiler_CodeGeneratorResponse_File* msg) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), 16);
+ return _upb_Message_HasNonExtensionField(msg, &field);
+}
+
+UPB_INLINE void google_protobuf_compiler_CodeGeneratorResponse_File_set_name(google_protobuf_compiler_CodeGeneratorResponse_File *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), 1);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_compiler_CodeGeneratorResponse_File_set_insertion_point(google_protobuf_compiler_CodeGeneratorResponse_File *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), 2);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_compiler_CodeGeneratorResponse_File_set_content(google_protobuf_compiler_CodeGeneratorResponse_File *msg, upb_StringView value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), 15);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE void google_protobuf_compiler_CodeGeneratorResponse_File_set_generated_code_info(google_protobuf_compiler_CodeGeneratorResponse_File *msg, struct google_protobuf_GeneratedCodeInfo* value) {
+ const upb_MiniTableField field = *upb_MiniTable_FindFieldByNumber(google_protobuf_compiler_CodeGeneratorResponse_File_msg_init(), 16);
+ _upb_Message_SetNonExtensionField(msg, &field, &value);
+}
+UPB_INLINE struct google_protobuf_GeneratedCodeInfo* google_protobuf_compiler_CodeGeneratorResponse_File_mutable_generated_code_info(google_protobuf_compiler_CodeGeneratorResponse_File* msg, upb_Arena* arena) {
+ struct google_protobuf_GeneratedCodeInfo* sub = (struct google_protobuf_GeneratedCodeInfo*)google_protobuf_compiler_CodeGeneratorResponse_File_generated_code_info(msg);
+ if (sub == NULL) {
+ sub = (struct google_protobuf_GeneratedCodeInfo*)_upb_Message_New(google_protobuf_GeneratedCodeInfo_msg_init(), arena);
+ if (sub) google_protobuf_compiler_CodeGeneratorResponse_File_set_generated_code_info(msg, sub);
+ }
+ return sub;
+}
+
+extern const upb_MiniTableFile google_protobuf_compiler_plugin_proto_upb_file_layout;
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif /* GOOGLE_PROTOBUF_COMPILER_PLUGIN_PROTO_UPB_H_ */
diff --git a/upb/upbc/subprocess.cc b/upb/upbc/subprocess.cc
new file mode 100644
index 0000000..e0c2604
--- /dev/null
+++ b/upb/upbc/subprocess.cc
@@ -0,0 +1,465 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Shamelessly copied from the protobuf compiler's subprocess.cc
+// except this version passes strings instead of Messages.
+
+#include "upbc/subprocess.h"
+
+#include <algorithm>
+#include <cstring>
+#include <iostream>
+
+#ifndef _MSVC_LANG
+#include <errno.h>
+#include <signal.h>
+#include <sys/select.h>
+#include <sys/wait.h>
+#endif
+
+#include "absl/log/absl_log.h"
+#include "absl/strings/substitute.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+namespace upbc {
+
+namespace {
+char* portable_strdup(const char* s) {
+ char* ns = (char*)malloc(strlen(s) + 1);
+ if (ns != nullptr) {
+ strcpy(ns, s);
+ }
+ return ns;
+}
+} // namespace
+
+#ifdef _WIN32
+
+static void CloseHandleOrDie(HANDLE handle) {
+ if (!CloseHandle(handle)) {
+ ABSL_LOG(FATAL) << "CloseHandle: "
+ << Subprocess::Win32ErrorMessage(GetLastError());
+ }
+}
+
+Subprocess::Subprocess()
+ : process_start_error_(ERROR_SUCCESS),
+ child_handle_(nullptr),
+ child_stdin_(nullptr),
+ child_stdout_(nullptr) {}
+
+Subprocess::~Subprocess() {
+ if (child_stdin_ != nullptr) {
+ CloseHandleOrDie(child_stdin_);
+ }
+ if (child_stdout_ != nullptr) {
+ CloseHandleOrDie(child_stdout_);
+ }
+}
+
+void Subprocess::Start(const std::string& program, SearchMode search_mode) {
+ // Create the pipes.
+ HANDLE stdin_pipe_read;
+ HANDLE stdin_pipe_write;
+ HANDLE stdout_pipe_read;
+ HANDLE stdout_pipe_write;
+
+ if (!CreatePipe(&stdin_pipe_read, &stdin_pipe_write, nullptr, 0)) {
+ ABSL_LOG(FATAL) << "CreatePipe: " << Win32ErrorMessage(GetLastError());
+ }
+ if (!CreatePipe(&stdout_pipe_read, &stdout_pipe_write, nullptr, 0)) {
+ ABSL_LOG(FATAL) << "CreatePipe: " << Win32ErrorMessage(GetLastError());
+ }
+
+ // Make child side of the pipes inheritable.
+ if (!SetHandleInformation(stdin_pipe_read, HANDLE_FLAG_INHERIT,
+ HANDLE_FLAG_INHERIT)) {
+ ABSL_LOG(FATAL) << "SetHandleInformation: "
+ << Win32ErrorMessage(GetLastError());
+ }
+ if (!SetHandleInformation(stdout_pipe_write, HANDLE_FLAG_INHERIT,
+ HANDLE_FLAG_INHERIT)) {
+ ABSL_LOG(FATAL) << "SetHandleInformation: "
+ << Win32ErrorMessage(GetLastError());
+ }
+
+ // Setup STARTUPINFO to redirect handles.
+ STARTUPINFOA startup_info;
+ ZeroMemory(&startup_info, sizeof(startup_info));
+ startup_info.cb = sizeof(startup_info);
+ startup_info.dwFlags = STARTF_USESTDHANDLES;
+ startup_info.hStdInput = stdin_pipe_read;
+ startup_info.hStdOutput = stdout_pipe_write;
+ startup_info.hStdError = GetStdHandle(STD_ERROR_HANDLE);
+
+ if (startup_info.hStdError == INVALID_HANDLE_VALUE) {
+ ABSL_LOG(FATAL) << "GetStdHandle: " << Win32ErrorMessage(GetLastError());
+ }
+
+ // Invoking cmd.exe allows for '.bat' files from the path as well as '.exe'.
+ // Using a malloc'ed string because CreateProcess() can mutate its second
+ // parameter.
+ char* command_line =
+ portable_strdup(("cmd.exe /c \"" + program + "\"").c_str());
+
+ // Create the process.
+ PROCESS_INFORMATION process_info;
+
+ if (CreateProcessA((search_mode == SEARCH_PATH) ? nullptr : program.c_str(),
+ (search_mode == SEARCH_PATH) ? command_line : nullptr,
+ nullptr, // process security attributes
+ nullptr, // thread security attributes
+ TRUE, // inherit handles?
+ 0, // obscure creation flags
+ nullptr, // environment (inherit from parent)
+ nullptr, // current directory (inherit from parent)
+ &startup_info, &process_info)) {
+ child_handle_ = process_info.hProcess;
+ CloseHandleOrDie(process_info.hThread);
+ child_stdin_ = stdin_pipe_write;
+ child_stdout_ = stdout_pipe_read;
+ } else {
+ process_start_error_ = GetLastError();
+ CloseHandleOrDie(stdin_pipe_write);
+ CloseHandleOrDie(stdout_pipe_read);
+ }
+
+ CloseHandleOrDie(stdin_pipe_read);
+ CloseHandleOrDie(stdout_pipe_write);
+ free(command_line);
+}
+
+bool Subprocess::Communicate(const std::string& input_data,
+ std::string* output_data, std::string* error) {
+ if (process_start_error_ != ERROR_SUCCESS) {
+ *error = Win32ErrorMessage(process_start_error_);
+ return false;
+ }
+
+ GOOGLE_CHECK(child_handle_ != nullptr) << "Must call Start() first.";
+
+ int input_pos = 0;
+
+ while (child_stdout_ != nullptr) {
+ HANDLE handles[2];
+ int handle_count = 0;
+
+ if (child_stdin_ != nullptr) {
+ handles[handle_count++] = child_stdin_;
+ }
+ if (child_stdout_ != nullptr) {
+ handles[handle_count++] = child_stdout_;
+ }
+
+ DWORD wait_result =
+ WaitForMultipleObjects(handle_count, handles, FALSE, INFINITE);
+
+ HANDLE signaled_handle = nullptr;
+ if (wait_result >= WAIT_OBJECT_0 &&
+ wait_result < WAIT_OBJECT_0 + handle_count) {
+ signaled_handle = handles[wait_result - WAIT_OBJECT_0];
+ } else if (wait_result == WAIT_FAILED) {
+ ABSL_LOG(FATAL) << "WaitForMultipleObjects: "
+ << Win32ErrorMessage(GetLastError());
+ } else {
+ ABSL_LOG(FATAL) << "WaitForMultipleObjects: Unexpected return code: "
+ << wait_result;
+ }
+
+ if (signaled_handle == child_stdin_) {
+ DWORD n;
+ if (!WriteFile(child_stdin_, input_data.data() + input_pos,
+ input_data.size() - input_pos, &n, nullptr)) {
+ // Child closed pipe. Presumably it will report an error later.
+ // Pretend we're done for now.
+ input_pos = input_data.size();
+ } else {
+ input_pos += n;
+ }
+
+ if (input_pos == input_data.size()) {
+ // We're done writing. Close.
+ CloseHandleOrDie(child_stdin_);
+ child_stdin_ = nullptr;
+ }
+ } else if (signaled_handle == child_stdout_) {
+ char buffer[4096];
+ DWORD n;
+
+ if (!ReadFile(child_stdout_, buffer, sizeof(buffer), &n, nullptr)) {
+ // We're done reading. Close.
+ CloseHandleOrDie(child_stdout_);
+ child_stdout_ = nullptr;
+ } else {
+ output_data->append(buffer, n);
+ }
+ }
+ }
+
+ if (child_stdin_ != nullptr) {
+ // Child did not finish reading input before it closed the output.
+ // Presumably it exited with an error.
+ CloseHandleOrDie(child_stdin_);
+ child_stdin_ = nullptr;
+ }
+
+ DWORD wait_result = WaitForSingleObject(child_handle_, INFINITE);
+
+ if (wait_result == WAIT_FAILED) {
+ ABSL_LOG(FATAL) << "WaitForSingleObject: "
+ << Win32ErrorMessage(GetLastError());
+ } else if (wait_result != WAIT_OBJECT_0) {
+ ABSL_LOG(FATAL) << "WaitForSingleObject: Unexpected return code: "
+ << wait_result;
+ }
+
+ DWORD exit_code;
+ if (!GetExitCodeProcess(child_handle_, &exit_code)) {
+ ABSL_LOG(FATAL) << "GetExitCodeProcess: "
+ << Win32ErrorMessage(GetLastError());
+ }
+
+ CloseHandleOrDie(child_handle_);
+ child_handle_ = nullptr;
+
+ if (exit_code != 0) {
+ *error = absl::Substitute("Plugin failed with status code $0.", exit_code);
+ return false;
+ }
+
+ return true;
+}
+
+std::string Subprocess::Win32ErrorMessage(DWORD error_code) {
+ char* message;
+
+ // WTF?
+ FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
+ FORMAT_MESSAGE_IGNORE_INSERTS,
+ nullptr, error_code,
+ MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US),
+ (LPSTR)&message, // NOT A BUG!
+ 0, nullptr);
+
+ std::string result = message;
+ LocalFree(message);
+ return result;
+}
+
+// ===================================================================
+
+#else // _WIN32
+
+Subprocess::Subprocess()
+ : child_pid_(-1), child_stdin_(-1), child_stdout_(-1) {}
+
+Subprocess::~Subprocess() {
+ if (child_stdin_ != -1) {
+ close(child_stdin_);
+ }
+ if (child_stdout_ != -1) {
+ close(child_stdout_);
+ }
+}
+
+void Subprocess::Start(const std::string& program, SearchMode search_mode) {
+ // Note that we assume that there are no other threads, thus we don't have to
+ // do crazy stuff like using socket pairs or avoiding libc locks.
+
+ // [0] is read end, [1] is write end.
+ int stdin_pipe[2];
+ int stdout_pipe[2];
+
+ int p0 = pipe(stdin_pipe);
+ int p1 = pipe(stdout_pipe);
+ UPB_ASSERT(p0 != -1);
+ UPB_ASSERT(p1 != -1);
+
+ char* argv[2] = {portable_strdup(program.c_str()), nullptr};
+
+ child_pid_ = fork();
+ if (child_pid_ == -1) {
+ std::cerr << "fork: " << strerror(errno);
+ } else if (child_pid_ == 0) {
+ // We are the child.
+ dup2(stdin_pipe[0], STDIN_FILENO);
+ dup2(stdout_pipe[1], STDOUT_FILENO);
+
+ close(stdin_pipe[0]);
+ close(stdin_pipe[1]);
+ close(stdout_pipe[0]);
+ close(stdout_pipe[1]);
+
+ switch (search_mode) {
+ case SEARCH_PATH:
+ execvp(argv[0], argv);
+ break;
+ case EXACT_NAME:
+ execv(argv[0], argv);
+ break;
+ }
+
+ // Write directly to STDERR_FILENO to avoid stdio code paths that may do
+ // stuff that is unsafe here.
+ int ignored;
+ ignored = write(STDERR_FILENO, argv[0], strlen(argv[0]));
+ const char* message =
+ ": program not found or is not executable\n"
+ "Please specify a program using absolute path or make sure "
+ "the program is available in your PATH system variable\n";
+ ignored = write(STDERR_FILENO, message, strlen(message));
+ (void)ignored;
+
+ // Must use _exit() rather than exit() to avoid flushing output buffers
+ // that will also be flushed by the parent.
+ _exit(1);
+ } else {
+ free(argv[0]);
+
+ close(stdin_pipe[0]);
+ close(stdout_pipe[1]);
+
+ child_stdin_ = stdin_pipe[1];
+ child_stdout_ = stdout_pipe[0];
+ }
+}
+
+bool Subprocess::Communicate(const std::string& input_data,
+ std::string* output_data, std::string* error) {
+ if (child_stdin_ == -1) {
+ std::cerr << "Must call Start() first." << std::endl;
+ UPB_ASSERT(child_stdin_ != -1);
+ }
+
+ // The "sighandler_t" typedef is GNU-specific, so define our own.
+ typedef void SignalHandler(int);
+
+ // Make sure SIGPIPE is disabled so that if the child dies it doesn't kill us.
+ SignalHandler* old_pipe_handler = signal(SIGPIPE, SIG_IGN);
+
+ int input_pos = 0;
+ int max_fd = std::max(child_stdin_, child_stdout_);
+
+ while (child_stdout_ != -1) {
+ fd_set read_fds;
+ fd_set write_fds;
+ FD_ZERO(&read_fds);
+ FD_ZERO(&write_fds);
+ if (child_stdout_ != -1) {
+ FD_SET(child_stdout_, &read_fds);
+ }
+ if (child_stdin_ != -1) {
+ FD_SET(child_stdin_, &write_fds);
+ }
+
+ if (select(max_fd + 1, &read_fds, &write_fds, nullptr, nullptr) < 0) {
+ if (errno == EINTR) {
+ // Interrupted by signal. Try again.
+ continue;
+ } else {
+ std::cerr << "select: " << strerror(errno) << std::endl;
+ UPB_ASSERT(0);
+ }
+ }
+
+ if (child_stdin_ != -1 && FD_ISSET(child_stdin_, &write_fds)) {
+ int n = write(child_stdin_, input_data.data() + input_pos,
+ input_data.size() - input_pos);
+ if (n < 0) {
+ // Child closed pipe. Presumably it will report an error later.
+ // Pretend we're done for now.
+ input_pos = input_data.size();
+ } else {
+ input_pos += n;
+ }
+
+ if (input_pos == (int)input_data.size()) {
+ // We're done writing. Close.
+ close(child_stdin_);
+ child_stdin_ = -1;
+ }
+ }
+
+ if (child_stdout_ != -1 && FD_ISSET(child_stdout_, &read_fds)) {
+ char buffer[4096];
+ int n = read(child_stdout_, buffer, sizeof(buffer));
+
+ if (n > 0) {
+ output_data->append(buffer, (size_t)n);
+ } else {
+ // We're done reading. Close.
+ close(child_stdout_);
+ child_stdout_ = -1;
+ }
+ }
+ }
+
+ if (child_stdin_ != -1) {
+ // Child did not finish reading input before it closed the output.
+ // Presumably it exited with an error.
+ close(child_stdin_);
+ child_stdin_ = -1;
+ }
+
+ int status;
+ while (waitpid(child_pid_, &status, 0) == -1) {
+ if (errno != EINTR) {
+ std::cerr << "waitpid: " << strerror(errno) << std::endl;
+ UPB_ASSERT(0);
+ }
+ }
+
+ // Restore SIGPIPE handling.
+ signal(SIGPIPE, old_pipe_handler);
+
+ if (WIFEXITED(status)) {
+ if (WEXITSTATUS(status) != 0) {
+ int error_code = WEXITSTATUS(status);
+ *error =
+ absl::Substitute("Plugin failed with status code $0.", error_code);
+ return false;
+ }
+ } else if (WIFSIGNALED(status)) {
+ int signal = WTERMSIG(status);
+ *error = absl::Substitute("Plugin killed by signal $0.", signal);
+ return false;
+ } else {
+ *error = "Neither WEXITSTATUS nor WTERMSIG is true?";
+ return false;
+ }
+
+ return true;
+}
+
+#endif // !_WIN32
+
+} // namespace upbc
diff --git a/upb/upbc/subprocess.h b/upb/upbc/subprocess.h
new file mode 100644
index 0000000..b30b5bf
--- /dev/null
+++ b/upb/upbc/subprocess.h
@@ -0,0 +1,102 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Shamelessly copied from the protobuf compiler's subprocess.h
+// except this version passes strings instead of Messages.
+
+#ifndef THIRD_PARTY_UPB_UPBC_H_
+#define THIRD_PARTY_UPB_UPBC_H_
+
+#ifdef _WIN32
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN // right...
+#endif
+#include <windows.h>
+#else // _WIN32
+#include <sys/types.h>
+#include <unistd.h>
+#endif // !_WIN32
+#include <string>
+
+namespace upbc {
+
+// Utility class for launching sub-processes.
+class Subprocess {
+ public:
+ Subprocess();
+ ~Subprocess();
+
+ enum SearchMode {
+ SEARCH_PATH, // Use PATH environment variable.
+ EXACT_NAME // Program is an exact file name; don't use the PATH.
+ };
+
+ // Start the subprocess. Currently we don't provide a way to specify
+ // arguments as protoc plugins don't have any.
+ void Start(const std::string& program, SearchMode search_mode);
+
+ // Pipe the input message to the subprocess's stdin, then close the pipe.
+ // Meanwhile, read from the subprocess's stdout and copy into *output.
+ // All this is done carefully to avoid deadlocks.
+ // Returns true if successful. On any sort of error, returns false and sets
+ // *error to a description of the problem.
+ bool Communicate(const std::string& input_data, std::string* output_data,
+ std::string* error);
+
+#ifdef _WIN32
+ // Given an error code, returns a human-readable error message. This is
+ // defined here so that CommandLineInterface can share it.
+ static std::string Win32ErrorMessage(DWORD error_code);
+#endif
+
+ private:
+#ifdef _WIN32
+ DWORD process_start_error_;
+ HANDLE child_handle_;
+
+ // The file handles for our end of the child's pipes. We close each and
+ // set it to NULL when no longer needed.
+ HANDLE child_stdin_;
+ HANDLE child_stdout_;
+
+#else // _WIN32
+ pid_t child_pid_;
+
+ // The file descriptors for our end of the child's pipes. We close each and
+ // set it to -1 when no longer needed.
+ int child_stdin_;
+ int child_stdout_;
+
+#endif // !_WIN32
+};
+
+} // namespace upbc
+
+#endif // THIRD_PARTY_UPB_UPBC_H_
diff --git a/upb/upbc/upbc_so.c b/upb/upbc/upbc_so.c
new file mode 100644
index 0000000..1cfc04b
--- /dev/null
+++ b/upb/upbc/upbc_so.c
@@ -0,0 +1,34 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// These headers form a spanning tree for the upbc defs needed by FFI layers.
+
+#include "upbc/get_used_fields.h"
+#include "upbc/upbdev.h"
diff --git a/upb/upbc/upbdev.c b/upb/upbc/upbdev.c
new file mode 100644
index 0000000..ffccd07
--- /dev/null
+++ b/upb/upbc/upbdev.c
@@ -0,0 +1,136 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "upbc/upbdev.h"
+
+#ifdef _WIN32
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#include <windows.h>
+#else // _WIN32
+#include <unistd.h>
+#endif // !_WIN32
+
+#include "google/protobuf/compiler/plugin.upb.h"
+#include "google/protobuf/compiler/plugin.upbdefs.h"
+#include "upb/base/status.h"
+#include "upb/json/decode.h"
+#include "upb/json/encode.h"
+#include "upb/mem/arena.h"
+#include "upbc/code_generator_request.h"
+#include "upbc/code_generator_request.upb.h"
+#include "upbc/code_generator_request.upbdefs.h"
+
+static google_protobuf_compiler_CodeGeneratorResponse* upbc_JsonDecode(
+ const char* data, size_t size, upb_Arena* arena, upb_Status* status) {
+ google_protobuf_compiler_CodeGeneratorResponse* response =
+ google_protobuf_compiler_CodeGeneratorResponse_new(arena);
+
+ upb_DefPool* s = upb_DefPool_New();
+ const upb_MessageDef* m = google_protobuf_compiler_CodeGeneratorResponse_getmsgdef(s);
+
+ (void)upb_JsonDecode(data, size, response, m, s, 0, arena, status);
+ if (!upb_Status_IsOk(status)) return NULL;
+
+ upb_DefPool_Free(s);
+
+ return response;
+}
+
+static upb_StringView upbc_JsonEncode(const upbc_CodeGeneratorRequest* request,
+ upb_Arena* arena, upb_Status* status) {
+ upb_StringView out = {.data = NULL, .size = 0};
+
+ upb_DefPool* s = upb_DefPool_New();
+ const upb_MessageDef* m = upbc_CodeGeneratorRequest_getmsgdef(s);
+ const int options = upb_JsonEncode_FormatEnumsAsIntegers;
+
+ out.size = upb_JsonEncode(request, m, s, options, NULL, 0, status);
+ if (!upb_Status_IsOk(status)) goto done;
+
+ char* data = (char*)upb_Arena_Malloc(arena, out.size + 1);
+
+ (void)upb_JsonEncode(request, m, s, options, data, out.size + 1, status);
+ if (!upb_Status_IsOk(status)) goto done;
+
+ out.data = (const char*)data;
+
+done:
+ upb_DefPool_Free(s);
+ return out;
+}
+
+upb_StringView upbdev_ProcessInput(const char* buf, size_t size,
+ upb_Arena* arena, upb_Status* status) {
+ upb_StringView out = {.data = NULL, .size = 0};
+
+ google_protobuf_compiler_CodeGeneratorRequest* inner_request =
+ google_protobuf_compiler_CodeGeneratorRequest_parse(buf, size, arena);
+
+ const upbc_CodeGeneratorRequest* outer_request =
+ upbc_MakeCodeGeneratorRequest(inner_request, arena, status);
+ if (!upb_Status_IsOk(status)) return out;
+
+ return upbc_JsonEncode(outer_request, arena, status);
+}
+
+upb_StringView upbdev_ProcessOutput(const char* buf, size_t size,
+ upb_Arena* arena, upb_Status* status) {
+ upb_StringView out = {.data = NULL, .size = 0};
+
+ const google_protobuf_compiler_CodeGeneratorResponse* response =
+ upbc_JsonDecode(buf, size, arena, status);
+ if (!upb_Status_IsOk(status)) return out;
+
+ out.data = google_protobuf_compiler_CodeGeneratorResponse_serialize(response, arena,
+ &out.size);
+ return out;
+}
+
+void upbdev_ProcessStdout(const char* buf, size_t size, upb_Arena* arena,
+ upb_Status* status) {
+ const upb_StringView sv = upbdev_ProcessOutput(buf, size, arena, status);
+ if (!upb_Status_IsOk(status)) return;
+
+ const char* ptr = sv.data;
+ size_t len = sv.size;
+ while (len) {
+ int n = write(1, ptr, len);
+ if (n > 0) {
+ ptr += n;
+ len -= n;
+ }
+ }
+}
+
+upb_Arena* upbdev_Arena_New() { return upb_Arena_New(); }
+
+void upbdev_Status_Clear(upb_Status* status) { upb_Status_Clear(status); }
diff --git a/upb/upbc/upbdev.h b/upb/upbc/upbdev.h
new file mode 100644
index 0000000..047a1be
--- /dev/null
+++ b/upb/upbc/upbdev.h
@@ -0,0 +1,71 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google LLC nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UPBC_UPBDEV_H_
+#define UPBC_UPBDEV_H_
+
+#include "upb/base/status.h"
+#include "upb/base/string_view.h"
+#include "upb/mem/arena.h"
+
+// Must be last.
+#include "upb/port/def.inc"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Consume |buf|, deserialize it to a Code_Generator_Request proto, construct a
+// upbc_Code_Generator_Request, and return it as a JSON-encoded string.
+UPB_API upb_StringView upbdev_ProcessInput(const char* buf, size_t size,
+ upb_Arena* arena,
+ upb_Status* status);
+
+// Decode |buf| from JSON, serialize to wire format, and return it.
+UPB_API upb_StringView upbdev_ProcessOutput(const char* buf, size_t size,
+ upb_Arena* arena,
+ upb_Status* status);
+
+// Decode |buf| from JSON, serialize to wire format, and write it to stdout.
+UPB_API void upbdev_ProcessStdout(const char* buf, size_t size,
+ upb_Arena* arena, upb_Status* status);
+
+// The following wrappers allow the protoc plugins to call the above functions
+// without pulling in the entire pb_runtime library.
+UPB_API upb_Arena* upbdev_Arena_New(void);
+UPB_API void upbdev_Status_Clear(upb_Status* status);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#include "upb/port/undef.inc"
+
+#endif // UPBC_UPBDEV_H_