Properly byte-swap fixed packed fields.
diff --git a/benchmarks/compare.py b/benchmarks/compare.py
index 6ce9e76..55ed699 100755
--- a/benchmarks/compare.py
+++ b/benchmarks/compare.py
@@ -85,7 +85,7 @@
   Run("cp -f bazel-bin/tests/conformance_upb {}.bin".format(outbase))
 
 
-baseline = "master"
+baseline = "main"
 bench_cpu = True
 fasttable = False
 
diff --git a/upb/decode.c b/upb/decode.c
index 822694f..abc3ecb 100644
--- a/upb/decode.c
+++ b/upb/decode.c
@@ -449,8 +449,31 @@
   arr->len += count;
   // Note: if/when the decoder supports multi-buffer input, we will need to
   // handle buffer seams here.
-  memcpy(mem, ptr, val->size);
-  return ptr + val->size;
+  if (_upb_isle()) {
+    memcpy(mem, ptr, val->size);
+    ptr += val->size;
+  } else {
+    const char *end = ptr + val->size;
+    char *dst = mem;
+    while (ptr < end) {
+      if (lg2 == 2) {
+        uint32_t val;
+        memcpy(&val, ptr, sizeof(val));
+        val = _upb_be_swap32(val);
+        memcpy(dst, &val, sizeof(val));
+      } else {
+        UPB_ASSERT(lg2 == 3);
+        uint64_t val;
+        memcpy(&val, ptr, sizeof(val));
+        val = _upb_be_swap64(val);
+        memcpy(dst, &val, sizeof(val));
+      }
+      ptr += 1 << lg2;
+      dst += 1 << lg2;
+    }
+  }
+
+  return ptr;
 }
 
 UPB_FORCEINLINE