Avoid interface calls in hot loop
Before, every charAt would emit (on android):
```
0x00002104 adrp x17, #+0x1000 (addr 0x3000)
0x00002108 ldr w17, [x17, #20]
0x0000210c ldr x0, [x0, #128]
0x00002110 ldr x0, [x0, #328]
0x00002114 ldr lr, [x0, #24]
0x00002118 blr lr <-- Call into String.charAt(int)
```
Now, it emits the inlined implementation of charAt (branch is for possibly compressed strings):
```
0x000020b4 ldur w16, [x4, #-8]
0x000020b8 tbnz w16, #0, #+0xc (addr 0x20c4)
0x000020bc ldrb w4, [x4, x0]
0x000020c0 b #+0x8 (addr 0x20c8)
0x000020c4 ldrh w4, [x4, x0, lsl #1]
```
PiperOrigin-RevId: 591147406
diff --git a/java/core/src/main/java/com/google/protobuf/Utf8.java b/java/core/src/main/java/com/google/protobuf/Utf8.java
index f71820e..2eace98 100644
--- a/java/core/src/main/java/com/google/protobuf/Utf8.java
+++ b/java/core/src/main/java/com/google/protobuf/Utf8.java
@@ -214,24 +214,24 @@
* @throws IllegalArgumentException if {@code sequence} contains ill-formed UTF-16 (unpaired
* surrogates)
*/
- static int encodedLength(CharSequence sequence) {
+ static int encodedLength(String string) {
// Warning to maintainers: this implementation is highly optimized.
- int utf16Length = sequence.length();
+ int utf16Length = string.length();
int utf8Length = utf16Length;
int i = 0;
// This loop optimizes for pure ASCII.
- while (i < utf16Length && sequence.charAt(i) < 0x80) {
+ while (i < utf16Length && string.charAt(i) < 0x80) {
i++;
}
// This loop optimizes for chars less than 0x800.
for (; i < utf16Length; i++) {
- char c = sequence.charAt(i);
+ char c = string.charAt(i);
if (c < 0x800) {
utf8Length += ((0x7f - c) >>> 31); // branch free!
} else {
- utf8Length += encodedLengthGeneral(sequence, i);
+ utf8Length += encodedLengthGeneral(string, i);
break;
}
}
@@ -244,11 +244,11 @@
return utf8Length;
}
- private static int encodedLengthGeneral(CharSequence sequence, int start) {
- int utf16Length = sequence.length();
+ private static int encodedLengthGeneral(String string, int start) {
+ int utf16Length = string.length();
int utf8Length = 0;
for (int i = start; i < utf16Length; i++) {
- char c = sequence.charAt(i);
+ char c = string.charAt(i);
if (c < 0x800) {
utf8Length += (0x7f - c) >>> 31; // branch free!
} else {
@@ -256,7 +256,7 @@
// jdk7+: if (Character.isSurrogate(c)) {
if (Character.MIN_SURROGATE <= c && c <= Character.MAX_SURROGATE) {
// Check that we have a well-formed surrogate pair.
- int cp = Character.codePointAt(sequence, i);
+ int cp = Character.codePointAt(string, i);
if (cp < MIN_SUPPLEMENTARY_CODE_POINT) {
throw new UnpairedSurrogateException(i, utf16Length);
}
@@ -267,7 +267,7 @@
return utf8Length;
}
- static int encode(CharSequence in, byte[] out, int offset, int length) {
+ static int encode(String in, byte[] out, int offset, int length) {
return processor.encodeUtf8(in, out, offset, length);
}
// End Guava UTF-8 methods.
@@ -326,9 +326,9 @@
*
* @param in the source string to be encoded
* @param out the target buffer to receive the encoded string.
- * @see Utf8#encode(CharSequence, byte[], int, int)
+ * @see Utf8#encode(String, byte[], int, int)
*/
- static void encodeUtf8(CharSequence in, ByteBuffer out) {
+ static void encodeUtf8(String in, ByteBuffer out) {
processor.encodeUtf8(in, out);
}
@@ -724,7 +724,7 @@
* {@code bytes.length - offset}
* @return the new offset, equivalent to {@code offset + Utf8.encodedLength(sequence)}
*/
- abstract int encodeUtf8(CharSequence in, byte[] out, int offset, int length);
+ abstract int encodeUtf8(String in, byte[] out, int offset, int length);
/**
* Encodes an input character sequence ({@code in}) to UTF-8 in the target buffer ({@code out}).
@@ -743,7 +743,7 @@
* @throws ArrayIndexOutOfBoundsException if {@code in} encoded in UTF-8 is longer than {@code
* out.remaining()}
*/
- final void encodeUtf8(CharSequence in, ByteBuffer out) {
+ final void encodeUtf8(String in, ByteBuffer out) {
if (out.hasArray()) {
final int offset = out.arrayOffset();
int endIndex = Utf8.encode(in, out.array(), offset + out.position(), out.remaining());
@@ -756,13 +756,13 @@
}
/** Encodes the input character sequence to a direct {@link ByteBuffer} instance. */
- abstract void encodeUtf8Direct(CharSequence in, ByteBuffer out);
+ abstract void encodeUtf8Direct(String in, ByteBuffer out);
/**
* Encodes the input character sequence to a {@link ByteBuffer} instance using the {@link
* ByteBuffer} API, rather than potentially faster approaches.
*/
- final void encodeUtf8Default(CharSequence in, ByteBuffer out) {
+ final void encodeUtf8Default(String in, ByteBuffer out) {
final int inLength = in.length();
int outIx = out.position();
int inIx = 0;
@@ -1013,7 +1013,7 @@
}
@Override
- int encodeUtf8(CharSequence in, byte[] out, int offset, int length) {
+ int encodeUtf8(String in, byte[] out, int offset, int length) {
int utf16Length = in.length();
int j = offset;
int i = 0;
@@ -1065,7 +1065,7 @@
}
@Override
- void encodeUtf8Direct(CharSequence in, ByteBuffer out) {
+ void encodeUtf8Direct(String in, ByteBuffer out) {
// For safe processing, we have to use the ByteBuffer API.
encodeUtf8Default(in, out);
}
@@ -1442,7 +1442,7 @@
}
@Override
- int encodeUtf8(final CharSequence in, final byte[] out, final int offset, final int length) {
+ int encodeUtf8(final String in, final byte[] out, final int offset, final int length) {
long outIx = offset;
final long outLimit = outIx + length;
final int inLimit = in.length();
@@ -1503,7 +1503,7 @@
}
@Override
- void encodeUtf8Direct(CharSequence in, ByteBuffer out) {
+ void encodeUtf8Direct(String in, ByteBuffer out) {
final long address = addressOffset(out);
long outIx = address + out.position();
final long outLimit = address + out.limit();
diff --git a/java/core/src/test/java/com/google/protobuf/Utf8Test.java b/java/core/src/test/java/com/google/protobuf/Utf8Test.java
index 986702d..2a53e82 100644
--- a/java/core/src/test/java/com/google/protobuf/Utf8Test.java
+++ b/java/core/src/test/java/com/google/protobuf/Utf8Test.java
@@ -194,7 +194,7 @@
private static byte[] encodeToByteArray(String message, int length, Utf8.Processor processor) {
byte[] output = new byte[length];
- processor.encodeUtf8(message, output, 0, output.length);
+ int unused = processor.encodeUtf8(message, output, 0, output.length);
return output;
}