Merge pull request #499 from haberman/python-pkg

Added a Bazel rule to build a binary wheel for Python
diff --git a/WORKSPACE b/WORKSPACE
index fd90b56..ca1036e 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -47,9 +47,9 @@
 
 http_archive(
     name = "rules_fuzzing",
-    sha256 = "e1b54211f7cee604194db080a8765220d3ef5db2a873fded429ce13e74d93a6b",
-    strip_prefix = "rules_fuzzing-4bafba51ffd9d418d236adb61de36fda1a90e764",
-    urls = ["https://github.com/bazelbuild/rules_fuzzing/archive/4bafba51ffd9d418d236adb61de36fda1a90e764.zip"],
+    sha256 = "23bb074064c6f488d12044934ab1b0631e8e6898d5cf2f6bde087adb01111573",
+    strip_prefix = "rules_fuzzing-0.3.1",
+    urls = ["https://github.com/bazelbuild/rules_fuzzing/archive/v0.3.1.zip"],
 )
 
 load("@rules_fuzzing//fuzzing:repositories.bzl", "rules_fuzzing_dependencies")
diff --git a/bazel/BUILD b/bazel/BUILD
index b21941a..1564219 100644
--- a/bazel/BUILD
+++ b/bazel/BUILD
@@ -23,8 +23,29 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+load("@bazel_skylib//:bzl_library.bzl", "bzl_library")
+
 py_binary(
     name = "amalgamate",
     srcs = ["amalgamate.py"],
     visibility = ["//:__pkg__"],
 )
+
+# py_proto_library() is private rule, only intended for internal use by upb.
+# Hopefully py_proto_library() will eventually be availble in rules_proto or
+# another upstream package.
+bzl_library(
+    name = "py_proto_library_bzl",
+    srcs = ["py_proto_library.bzl"],
+)
+
+bzl_library(
+    name = "upb_proto_library_bzl",
+    srcs = ["upb_proto_library.bzl"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "@bazel_skylib//lib:paths",
+        "@bazel_tools//tools/cpp:toolchain_utils.bzl",
+        "@rules_proto//proto:defs",
+    ],
+)
diff --git a/bazel/py_proto_library.bzl b/bazel/py_proto_library.bzl
new file mode 100644
index 0000000..9f99112
--- /dev/null
+++ b/bazel/py_proto_library.bzl
@@ -0,0 +1,134 @@
+# Copyright (c) 2009-2021, Google LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of Google LLC nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""An implementation of py_proto_library().
+
+We have to implement this ourselves because there is currently no reasonable
+py_proto_library() rule available for Bazel.
+
+Our py_proto_library() is similar to how a real py_proto_library() should work.
+But it hasn't been deeply tested or reviewed, and upb should not be in the
+business of vending py_proto_library(), so we keep it private to upb.
+"""
+
+load("@bazel_skylib//lib:paths.bzl", "paths")
+load("@rules_proto//proto:defs.bzl", "ProtoInfo")  # copybara:strip_for_google3
+
+# Generic support code #########################################################
+
+def _get_real_short_path(file):
+    # For some reason, files from other archives have short paths that look like:
+    #   ../com_google_protobuf/google/protobuf/descriptor.proto
+    short_path = file.short_path
+    if short_path.startswith("../"):
+        second_slash = short_path.index("/", 3)
+        short_path = short_path[second_slash + 1:]
+
+    # Sometimes it has another few prefixes like:
+    #   _virtual_imports/any_proto/google/protobuf/any.proto
+    #   benchmarks/_virtual_imports/100_msgs_proto/benchmarks/100_msgs.proto
+    # We want just google/protobuf/any.proto.
+    virtual_imports = "_virtual_imports/"
+    if virtual_imports in short_path:
+        short_path = short_path.split(virtual_imports)[1].split("/", 1)[1]
+    return short_path
+
+def _get_real_root(file):
+    real_short_path = _get_real_short_path(file)
+    return file.path[:-len(real_short_path) - 1]
+
+def _generate_output_file(ctx, src, extension):
+    real_short_path = _get_real_short_path(src)
+    real_short_path = paths.relativize(real_short_path, ctx.label.package)
+    output_filename = paths.replace_extension(real_short_path, extension)
+    ret = ctx.actions.declare_file(output_filename)
+    return ret
+
+# py_proto_library() ###########################################################
+
+def _py_proto_library_rule_impl(ctx):
+    # A real py_proto_library() should enforce this constraint.
+    # We don't bother for now, since it saves us some effort not to.
+    #
+    # if len(ctx.attr.deps) != 1:
+    #     fail("only one deps dependency allowed.")
+
+    files = []
+    for dep in ctx.attr.deps:
+        files += dep[PyInfo].transitive_sources.to_list()
+    return [
+        DefaultInfo(files = depset(direct = files)),
+    ]
+
+def _py_proto_library_aspect_impl(target, ctx):
+    proto_info = target[ProtoInfo]
+    proto_sources = proto_info.direct_sources
+    srcs = [_generate_output_file(ctx, name, "_pb2.py") for name in proto_sources]
+    transitive_sets = proto_info.transitive_descriptor_sets.to_list()
+    ctx.actions.run(
+        inputs = depset(
+            direct = [proto_info.direct_descriptor_set],
+            transitive = [proto_info.transitive_descriptor_sets],
+        ),
+        outputs = srcs,
+        executable = ctx.executable._protoc,
+        arguments = [
+                        "--python_out=" + _get_real_root(srcs[0]),
+                        "--descriptor_set_in=" + ctx.configuration.host_path_separator.join([f.path for f in transitive_sets]),
+                    ] +
+                    [_get_real_short_path(file) for file in proto_sources],
+        progress_message = "Generating Python protos for :" + ctx.label.name,
+    )
+    outs_depset = depset(srcs)
+    return [
+        PyInfo(transitive_sources = outs_depset)
+    ]
+
+_py_proto_library_aspect = aspect(
+    attrs = {
+        "_protoc": attr.label(
+            executable = True,
+            cfg = "exec",
+            default = "@com_google_protobuf//:protoc",
+        ),
+    },
+    implementation = _py_proto_library_aspect_impl,
+    provides = [
+        PyInfo,
+    ],
+    attr_aspects = ["deps"],
+)
+
+py_proto_library = rule(
+    output_to_genfiles = True,
+    implementation = _py_proto_library_rule_impl,
+    attrs = {
+        "deps": attr.label_list(
+            aspects = [_py_proto_library_aspect],
+            allow_rules = ["proto_library"],
+            providers = [ProtoInfo],
+        ),
+    },
+)
diff --git a/bazel/workspace_deps.bzl b/bazel/workspace_deps.bzl
index f825cf9..7976446 100644
--- a/bazel/workspace_deps.bzl
+++ b/bazel/workspace_deps.bzl
@@ -29,11 +29,14 @@
         ]
     )
 
+    rules_python_version = "740825b7f74930c62f44af95c9a4c1bd428d2c53" # Latest @ 2021-06-23
+
     maybe(
         http_archive,
         name = "rules_python",
-        url = "https://github.com/bazelbuild/rules_python/releases/download/0.1.0/rules_python-0.1.0.tar.gz",
-        sha256 = "b6d46438523a3ec0f3cead544190ee13223a52f6a6765a29eae7b7cc24cc83a0",
+        strip_prefix = "rules_python-{}".format(rules_python_version),
+        url = "https://github.com/bazelbuild/rules_python/archive/{}.zip".format(rules_python_version),
+        sha256 = "09a3c4791c61b62c2cbc5b2cbea4ccc32487b38c7a2cc8f87a794d7a659cc742",
     )
 
     maybe(
diff --git a/python/BUILD b/python/BUILD
index ca0a1bb..158b79e 100644
--- a/python/BUILD
+++ b/python/BUILD
@@ -27,6 +27,14 @@
     "//bazel:build_defs.bzl",
     "UPB_DEFAULT_COPTS",
 )
+load(
+    "//bazel:py_proto_library.bzl",
+    "py_proto_library",
+)
+load(
+    "@rules_python//python:packaging.bzl",
+    "py_wheel",
+)
 
 cc_binary(
     name = "message",
@@ -108,12 +116,17 @@
     visibility = ["//python:__subpackages__"],
 )
 
-py_library(
-    name = "message_ext",
-    data = [
+filegroup(
+    name = "extension_files",
+    srcs = [
         "google/protobuf/pyext/_message" + EXT_SUFFIX,
         "google/protobuf/internal/_api_implementation" + EXT_SUFFIX,
     ],
+)
+
+py_library(
+    name = "message_ext",
+    data = [":extension_files"],
     imports = ["."],
     visibility = ["//python:__subpackages__"],
 )
@@ -132,3 +145,42 @@
     imports = ["."],
     legacy_create_init = False,
 )
+
+py_proto_library(
+    name = "well_known_proto_pb2",
+    deps = [
+        "@com_google_protobuf//:any_proto",
+        "@com_google_protobuf//:api_proto",
+        "@com_google_protobuf//:compiler_plugin_proto",
+        "@com_google_protobuf//:descriptor_proto",
+        "@com_google_protobuf//:duration_proto",
+        "@com_google_protobuf//:empty_proto",
+        "@com_google_protobuf//:field_mask_proto",
+        "@com_google_protobuf//:source_context_proto",
+        "@com_google_protobuf//:struct_proto",
+        "@com_google_protobuf//:timestamp_proto",
+        "@com_google_protobuf//:type_proto",
+        "@com_google_protobuf//:wrappers_proto",
+    ],
+)
+
+py_wheel(
+    name = "binary_wheel",
+    abi = "abi3",
+    distribution = "protobuf",
+    python_tag = "cp36",
+    # TODO(https://github.com/protocolbuffers/upb/issues/502): we need to make
+    # this a select() that is calculated from the platform we are actually
+    # building on.
+    platform = "manylinux2014_x86_64",
+    version = "4.20.0",
+    strip_path_prefixes = ["python/"],
+    deps = [
+        ":extension_files",
+        ":well_known_proto_pb2",
+        # TODO(https://github.com/protocolbuffers/upb/issues/503): currently
+        # this includes the unit tests.  We should filter these out so we are
+        # only distributing true source files.
+        "@com_google_protobuf//:python_srcs",
+    ],
+)