Avoid interface calls in hot loop Before, every charAt would emit (on android): ``` 0x00002104 adrp x17, #+0x1000 (addr 0x3000) 0x00002108 ldr w17, [x17, #20] 0x0000210c ldr x0, [x0, #128] 0x00002110 ldr x0, [x0, #328] 0x00002114 ldr lr, [x0, #24] 0x00002118 blr lr <-- Call into String.charAt(int) ``` Now, it emits the inlined implementation of charAt (branch is for possibly compressed strings): ``` 0x000020b4 ldur w16, [x4, #-8] 0x000020b8 tbnz w16, #0, #+0xc (addr 0x20c4) 0x000020bc ldrb w4, [x4, x0] 0x000020c0 b #+0x8 (addr 0x20c8) 0x000020c4 ldrh w4, [x4, x0, lsl #1] ``` PiperOrigin-RevId: 591147406

commit: b10d3f93b689a0b39fbc861d8d367491b4442a4f [log] [tgz]
author: Protobuf Team Bot <protobuf-github-bot@google.com> Thu Dec 14 22:16:21 2023 -0800
committer: Copybara-Service <copybara-worker@google.com> Thu Dec 14 22:20:28 2023 -0800
tree: edb25eb18838ba84de87480ea1255013b18c35d2
parent: 220415ddfb59d16c9309e5bae37bed3be0943a25 [diff]
diff --git a/java/core/src/main/java/com/google/protobuf/Utf8.java b/java/core/src/main/java/com/google/protobuf/Utf8.java
index f71820e..2eace98 100644
--- a/java/core/src/main/java/com/google/protobuf/Utf8.java
+++ b/java/core/src/main/java/com/google/protobuf/Utf8.java

@@ -214,24 +214,24 @@
    * @throws IllegalArgumentException if {@code sequence} contains ill-formed UTF-16 (unpaired
    *     surrogates)
    */
-  static int encodedLength(CharSequence sequence) {
+  static int encodedLength(String string) {
     // Warning to maintainers: this implementation is highly optimized.
-    int utf16Length = sequence.length();
+    int utf16Length = string.length();
     int utf8Length = utf16Length;
     int i = 0;
 
     // This loop optimizes for pure ASCII.
-    while (i < utf16Length && sequence.charAt(i) < 0x80) {
+    while (i < utf16Length && string.charAt(i) < 0x80) {
       i++;
     }
 
     // This loop optimizes for chars less than 0x800.
     for (; i < utf16Length; i++) {
-      char c = sequence.charAt(i);
+      char c = string.charAt(i);
       if (c < 0x800) {
         utf8Length += ((0x7f - c) >>> 31); // branch free!
       } else {
-        utf8Length += encodedLengthGeneral(sequence, i);
+        utf8Length += encodedLengthGeneral(string, i);
         break;
       }
     }
@@ -244,11 +244,11 @@
     return utf8Length;
   }
 
-  private static int encodedLengthGeneral(CharSequence sequence, int start) {
-    int utf16Length = sequence.length();
+  private static int encodedLengthGeneral(String string, int start) {
+    int utf16Length = string.length();
     int utf8Length = 0;
     for (int i = start; i < utf16Length; i++) {
-      char c = sequence.charAt(i);
+      char c = string.charAt(i);
       if (c < 0x800) {
         utf8Length += (0x7f - c) >>> 31; // branch free!
       } else {
@@ -256,7 +256,7 @@
         // jdk7+: if (Character.isSurrogate(c)) {
         if (Character.MIN_SURROGATE <= c && c <= Character.MAX_SURROGATE) {
           // Check that we have a well-formed surrogate pair.
-          int cp = Character.codePointAt(sequence, i);
+          int cp = Character.codePointAt(string, i);
           if (cp < MIN_SUPPLEMENTARY_CODE_POINT) {
             throw new UnpairedSurrogateException(i, utf16Length);
           }
@@ -267,7 +267,7 @@
     return utf8Length;
   }
 
-  static int encode(CharSequence in, byte[] out, int offset, int length) {
+  static int encode(String in, byte[] out, int offset, int length) {
     return processor.encodeUtf8(in, out, offset, length);
   }
   // End Guava UTF-8 methods.
@@ -326,9 +326,9 @@
    *
    * @param in the source string to be encoded
    * @param out the target buffer to receive the encoded string.
-   * @see Utf8#encode(CharSequence, byte[], int, int)
+   * @see Utf8#encode(String, byte[], int, int)
    */
-  static void encodeUtf8(CharSequence in, ByteBuffer out) {
+  static void encodeUtf8(String in, ByteBuffer out) {
     processor.encodeUtf8(in, out);
   }
 
@@ -724,7 +724,7 @@
      *     {@code bytes.length - offset}
      * @return the new offset, equivalent to {@code offset + Utf8.encodedLength(sequence)}
      */
-    abstract int encodeUtf8(CharSequence in, byte[] out, int offset, int length);
+    abstract int encodeUtf8(String in, byte[] out, int offset, int length);
 
     /**
      * Encodes an input character sequence ({@code in}) to UTF-8 in the target buffer ({@code out}).
@@ -743,7 +743,7 @@
      * @throws ArrayIndexOutOfBoundsException if {@code in} encoded in UTF-8 is longer than {@code
      *     out.remaining()}
      */
-    final void encodeUtf8(CharSequence in, ByteBuffer out) {
+    final void encodeUtf8(String in, ByteBuffer out) {
       if (out.hasArray()) {
         final int offset = out.arrayOffset();
         int endIndex = Utf8.encode(in, out.array(), offset + out.position(), out.remaining());
@@ -756,13 +756,13 @@
     }
 
     /** Encodes the input character sequence to a direct {@link ByteBuffer} instance. */
-    abstract void encodeUtf8Direct(CharSequence in, ByteBuffer out);
+    abstract void encodeUtf8Direct(String in, ByteBuffer out);
 
     /**
      * Encodes the input character sequence to a {@link ByteBuffer} instance using the {@link
      * ByteBuffer} API, rather than potentially faster approaches.
      */
-    final void encodeUtf8Default(CharSequence in, ByteBuffer out) {
+    final void encodeUtf8Default(String in, ByteBuffer out) {
       final int inLength = in.length();
       int outIx = out.position();
       int inIx = 0;
@@ -1013,7 +1013,7 @@
     }
 
     @Override
-    int encodeUtf8(CharSequence in, byte[] out, int offset, int length) {
+    int encodeUtf8(String in, byte[] out, int offset, int length) {
       int utf16Length = in.length();
       int j = offset;
       int i = 0;
@@ -1065,7 +1065,7 @@
     }
 
     @Override
-    void encodeUtf8Direct(CharSequence in, ByteBuffer out) {
+    void encodeUtf8Direct(String in, ByteBuffer out) {
       // For safe processing, we have to use the ByteBuffer API.
       encodeUtf8Default(in, out);
     }
@@ -1442,7 +1442,7 @@
     }
 
     @Override
-    int encodeUtf8(final CharSequence in, final byte[] out, final int offset, final int length) {
+    int encodeUtf8(final String in, final byte[] out, final int offset, final int length) {
       long outIx = offset;
       final long outLimit = outIx + length;
       final int inLimit = in.length();
@@ -1503,7 +1503,7 @@
     }
 
     @Override
-    void encodeUtf8Direct(CharSequence in, ByteBuffer out) {
+    void encodeUtf8Direct(String in, ByteBuffer out) {
       final long address = addressOffset(out);
       long outIx = address + out.position();
       final long outLimit = address + out.limit();

diff --git a/java/core/src/test/java/com/google/protobuf/Utf8Test.java b/java/core/src/test/java/com/google/protobuf/Utf8Test.java
index 986702d..2a53e82 100644
--- a/java/core/src/test/java/com/google/protobuf/Utf8Test.java
+++ b/java/core/src/test/java/com/google/protobuf/Utf8Test.java

@@ -194,7 +194,7 @@
 
   private static byte[] encodeToByteArray(String message, int length, Utf8.Processor processor) {
     byte[] output = new byte[length];
-    processor.encodeUtf8(message, output, 0, output.length);
+    int unused = processor.encodeUtf8(message, output, 0, output.length);
     return output;
   }
commit	b10d3f93b689a0b39fbc861d8d367491b4442a4f	[log] [tgz]
author	Protobuf Team Bot <protobuf-github-bot@google.com>	Thu Dec 14 22:16:21 2023 -0800
committer	Copybara-Service <copybara-worker@google.com>	Thu Dec 14 22:20:28 2023 -0800
tree	edb25eb18838ba84de87480ea1255013b18c35d2
parent	220415ddfb59d16c9309e5bae37bed3be0943a25 [diff]