Docs: minor fix to the benchmark code

Just realized the benchmark code was technically incorrect:
- 32-bit writes would mis-align 64-bit ones
- the storage itself was not guaranteed to be aligned

TBR: lalitm
Change-Id: Id39646a0a68c20367450672c9201f09b4c3ffe66
diff --git a/docs/design-docs/protozero.md b/docs/design-docs/protozero.md
index d77da48..fa87fe2 100644
--- a/docs/design-docs/protozero.md
+++ b/docs/design-docs/protozero.md
@@ -397,7 +397,7 @@
   void Append(T x) {
     // The memcpy will be elided by the compiler, which will emit just a
     // 64-bit aligned mov instruction.
-    memcpy(reinterpret_cast<T*>(ptr_), &x, sizeof(x));
+    memcpy(reinterpret_cast<void*>(ptr_), &x, sizeof(x));
     ptr_ += sizeof(x);
   }
 
@@ -407,7 +407,7 @@
   void set_field_uint64(uint64_t x) { Append(x); }
   void set_field_string(const char* str) { ptr_ = strcpy(ptr_, str); }
 
-  char storage_[sizeof(g_fake_input_simple)];
+  alignas(uint64_t) char storage_[sizeof(g_fake_input_simple) + 8];
   char* ptr_ = &storage_[0];
 };
 ```