rust/bazel/encode_raw_string_as_crate_name.bzl - third_party/protobuf - Git at Google

 # Protocol Buffers - Google's data interchange format
 # Copyright 2026 Google LLC.  All rights reserved.
 #
 # Use of this source code is governed by a BSD-style
 # license that can be found in the LICENSE file or at
 # https://developers.google.com/open-source/licenses/bsd

 """Implements encode_raw_string_as_crate_name.

 Based on an implementation in rules_rust:
 * https://github.com/bazelbuild/rules_rust/blob/cdaf15f5796e3e934b074526272823284bbaed01/rust/private/utils.bzl#L643
 """

 # This is a list of pairs, where the first element of the pair is a character
 # that is allowed in Bazel package or target names but not in crate names; and
 # the second element is an encoding of that char suitable for use in a crate
 # name.
 _encodings = (
     (":", "x"),
     ("!", "excl"),
     ("%", "prc"),
     ("@", "ao"),
     ("^", "caret"),
     ("`", "bt"),
     (" ", "sp"),
     ("\"", "dq"),
     ("#", "octo"),
     ("$", "dllr"),
     ("&", "amp"),
     ("'", "sq"),
     ("(", "lp"),
     (")", "rp"),
     ("*", "astr"),
     ("-", "d"),
     ("+", "pl"),
     (",", "cm"),
     (";", "sm"),
     ("<", "la"),
     ("=", "eq"),
     (">", "ra"),
     ("?", "qm"),
     ("[", "lbk"),
     ("]", "rbk"),
     ("{", "lbe"),
     ("|", "pp"),
     ("}", "rbe"),
     ("~", "td"),
     ("/", "y"),
     (".", "pd"),
 )

 # For each of the above encodings, we generate two substitution rules: one that
 # ensures any occurrences of the encodings themselves in the package/target
 # aren't clobbered by this translation, and one that does the encoding itself.
 # We also include a rule that protects the clobbering-protection rules from
 # getting clobbered.
 _substitutions = [("_z", "_zz_")] + [
     subst
     for (pattern, replacement) in _encodings
     for subst in (
         ("_{}_".format(replacement), "_z{}_".format(replacement)),
         (pattern, "_{}_".format(replacement)),
     )
 ]

 # Expose the substitutions for testing only.
 substitutions_for_testing = _substitutions

 def _replace_all(string, substitutions):
     """Replaces occurrences of the given patterns in `string`.

     There are a few reasons this looks complicated:
     * The substitutions are performed with some priority, i.e. patterns that are
       listed first in `substitutions` are higher priority than patterns that are
       listed later.
     * We also take pains to avoid doing replacements that overlap with each
       other, since overlaps invalidate pattern matches.
     * To avoid hairy offset invalidation, we apply the substitutions
       right-to-left.
     * To avoid the "_quote" -> "_quotequote_" rule introducing new pattern
       matches later in the string during decoding, we take the leftmost
       replacement, in cases of overlap.  (Note that no rule can induce new
       pattern matches *earlier* in the string.) (E.g. "_quotedot_" encodes to
       "_quotequote_dot_". Note that "_quotequote_" and "_dot_" both occur in
       this string, and overlap.).

     Args:
         string (string): the string in which the replacements should be performed.
         substitutions: the list of patterns and replacements to apply.

     Returns:
         A string with the appropriate substitutions performed.
     """

     # Find the highest-priority pattern matches for each string index, going
     # left-to-right and skipping indices that are already involved in a
     # pattern match.
     plan = {}
     matched_indices_set = {}
     for pattern_start in range(len(string)):
         if pattern_start in matched_indices_set:
             continue
         for (pattern, replacement) in substitutions:
             if not string.startswith(pattern, pattern_start):
                 continue
             length = len(pattern)
             plan[pattern_start] = (length, replacement)
             matched_indices_set.update([(pattern_start + i, True) for i in range(length)])
             break

     # Execute the replacement plan, working from right to left.
     for pattern_start in sorted(plan.keys(), reverse = True):
         length, replacement = plan[pattern_start]
         after_pattern = pattern_start + length
         string = string[:pattern_start] + replacement + string[after_pattern:]

     return string

 def encode_raw_string_as_crate_name(str):
     """Encodes a string using the above encoding format.

     Args:
         str (string): The string to be encoded.

     Returns:
         An encoded version of the input string.
     """
     return _replace_all(str, _substitutions)

 def decode_crate_name_as_raw_string_for_testing(crate_name):
     """Decodes a crate_name that was encoded by encode_raw_string_as_crate_name.

     This is used to check that the encoding is bijective; it is expected to only
     be used in tests.

     Args:
         crate_name (string): The name of the crate.

     Returns:
         A string representing the Bazel label (package and target).
     """
     return _replace_all(crate_name, [(t[1], t[0]) for t in _substitutions])
	# Protocol Buffers - Google's data interchange format
	# Copyright 2026 Google LLC. All rights reserved.
	#
	# Use of this source code is governed by a BSD-style
	# license that can be found in the LICENSE file or at
	# https://developers.google.com/open-source/licenses/bsd

	"""Implements encode_raw_string_as_crate_name.

	Based on an implementation in rules_rust:
	* https://github.com/bazelbuild/rules_rust/blob/cdaf15f5796e3e934b074526272823284bbaed01/rust/private/utils.bzl#L643
	"""

	# This is a list of pairs, where the first element of the pair is a character
	# that is allowed in Bazel package or target names but not in crate names; and
	# the second element is an encoding of that char suitable for use in a crate
	# name.
	_encodings = (
	(":", "x"),
	("!", "excl"),
	("%", "prc"),
	("@", "ao"),
	("^", "caret"),
	("`", "bt"),
	(" ", "sp"),
	("\"", "dq"),
	("#", "octo"),
	("$", "dllr"),
	("&", "amp"),
	("'", "sq"),
	("(", "lp"),
	(")", "rp"),
	("*", "astr"),
	("-", "d"),
	("+", "pl"),
	(",", "cm"),
	(";", "sm"),
	("<", "la"),
	("=", "eq"),
	(">", "ra"),
	("?", "qm"),
	("[", "lbk"),
	("]", "rbk"),
	("{", "lbe"),
	("\|", "pp"),
	("}", "rbe"),
	("~", "td"),
	("/", "y"),
	(".", "pd"),
	)

	# For each of the above encodings, we generate two substitution rules: one that
	# ensures any occurrences of the encodings themselves in the package/target
	# aren't clobbered by this translation, and one that does the encoding itself.
	# We also include a rule that protects the clobbering-protection rules from
	# getting clobbered.
	_substitutions = [("_z", "_zz_")] + [
	subst
	for (pattern, replacement) in _encodings
	for subst in (
	("_{}_".format(replacement), "_z{}_".format(replacement)),
	(pattern, "_{}_".format(replacement)),
	)
	]

	# Expose the substitutions for testing only.
	substitutions_for_testing = _substitutions

	def _replace_all(string, substitutions):
	"""Replaces occurrences of the given patterns in `string`.

	There are a few reasons this looks complicated:
	* The substitutions are performed with some priority, i.e. patterns that are
	listed first in `substitutions` are higher priority than patterns that are
	listed later.
	* We also take pains to avoid doing replacements that overlap with each
	other, since overlaps invalidate pattern matches.
	* To avoid hairy offset invalidation, we apply the substitutions
	right-to-left.
	* To avoid the "_quote" -> "_quotequote_" rule introducing new pattern
	matches later in the string during decoding, we take the leftmost
	replacement, in cases of overlap. (Note that no rule can induce new
	pattern matches earlier in the string.) (E.g. "_quotedot_" encodes to
	"_quotequote_dot_". Note that "_quotequote_" and "_dot_" both occur in
	this string, and overlap.).

	Args:
	string (string): the string in which the replacements should be performed.
	substitutions: the list of patterns and replacements to apply.

	Returns:
	A string with the appropriate substitutions performed.
	"""

	# Find the highest-priority pattern matches for each string index, going
	# left-to-right and skipping indices that are already involved in a
	# pattern match.
	plan = {}
	matched_indices_set = {}
	for pattern_start in range(len(string)):
	if pattern_start in matched_indices_set:
	continue
	for (pattern, replacement) in substitutions:
	if not string.startswith(pattern, pattern_start):
	continue
	length = len(pattern)
	plan[pattern_start] = (length, replacement)
	matched_indices_set.update([(pattern_start + i, True) for i in range(length)])
	break

	# Execute the replacement plan, working from right to left.
	for pattern_start in sorted(plan.keys(), reverse = True):
	length, replacement = plan[pattern_start]
	after_pattern = pattern_start + length
	string = string[:pattern_start] + replacement + string[after_pattern:]

	return string

	def encode_raw_string_as_crate_name(str):
	"""Encodes a string using the above encoding format.

	Args:
	str (string): The string to be encoded.

	Returns:
	An encoded version of the input string.
	"""
	return _replace_all(str, _substitutions)

	def decode_crate_name_as_raw_string_for_testing(crate_name):
	"""Decodes a crate_name that was encoded by encode_raw_string_as_crate_name.

	This is used to check that the encoding is bijective; it is expected to only
	be used in tests.

	Args:
	crate_name (string): The name of the crate.

	Returns:
	A string representing the Bazel label (package and target).
	"""
	return _replace_all(crate_name, [(t[1], t[0]) for t in _substitutions])