Add discard unknown API in ruby. (#3990)

* Add discard unknown API in ruby.

* Add test for oneof message field.

* Add TestUnknown to represent unknown field data clearly.

* Only serialize the message with unknown fields itself in test.

* Move discard_unknown from Message to Google.Protobuf
diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c
index d1b6e89..12080d0 100644
--- a/ruby/ext/google/protobuf_c/encode_decode.c
+++ b/ruby/ext/google/protobuf_c/encode_decode.c
@@ -1305,3 +1305,91 @@
   }
 }
 
+static void discard_unknown(VALUE msg_rb, const Descriptor* desc) {
+  MessageHeader* msg;
+  upb_msg_field_iter it;
+
+  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
+
+  stringsink* unknown = msg->unknown_fields;
+  if (unknown != NULL) {
+    stringsink_uninit(unknown);
+    msg->unknown_fields = NULL;
+  }
+
+  for (upb_msg_field_begin(&it, desc->msgdef);
+       !upb_msg_field_done(&it);
+       upb_msg_field_next(&it)) {
+    upb_fielddef *f = upb_msg_iter_field(&it);
+    uint32_t offset =
+        desc->layout->fields[upb_fielddef_index(f)].offset +
+        sizeof(MessageHeader);
+
+    if (upb_fielddef_containingoneof(f)) {
+      uint32_t oneof_case_offset =
+          desc->layout->fields[upb_fielddef_index(f)].case_offset +
+          sizeof(MessageHeader);
+      // For a oneof, check that this field is actually present -- skip all the
+      // below if not.
+      if (DEREF(msg, oneof_case_offset, uint32_t) !=
+          upb_fielddef_number(f)) {
+        continue;
+      }
+      // Otherwise, fall through to the appropriate singular-field handler
+      // below.
+    }
+
+    if (!upb_fielddef_issubmsg(f)) {
+      continue;
+    }
+
+    if (is_map_field(f)) {
+      if (!upb_fielddef_issubmsg(map_field_value(f))) continue;
+      VALUE map = DEREF(msg, offset, VALUE);
+      if (map == Qnil) continue;
+      Map_iter map_it;
+      for (Map_begin(map, &map_it); !Map_done(&map_it); Map_next(&map_it)) {
+        VALUE submsg = Map_iter_value(&map_it);
+        VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
+        const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
+        discard_unknown(submsg, subdesc);
+      }
+    } else if (upb_fielddef_isseq(f)) {
+      VALUE ary = DEREF(msg, offset, VALUE);
+      if (ary == Qnil) continue;
+      int size = NUM2INT(RepeatedField_length(ary));
+      for (int i = 0; i < size; i++) {
+        void* memory = RepeatedField_index_native(ary, i);
+        VALUE submsg = *((VALUE *)memory);
+        VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
+        const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
+        discard_unknown(submsg, subdesc);
+      }
+    } else {
+      VALUE submsg = DEREF(msg, offset, VALUE);
+      if (submsg == Qnil) continue;
+      VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
+      const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
+      discard_unknown(submsg, subdesc);
+    }
+  }
+}
+
+/*
+ * call-seq:
+ *     Google::Protobuf.discard_unknown(msg)
+ *
+ * Discard unknown fields in the given message object and recursively discard
+ * unknown fields in submessages.
+ */
+VALUE Google_Protobuf_discard_unknown(VALUE self, VALUE msg_rb) {
+  VALUE klass = CLASS_OF(msg_rb);
+  VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
+  Descriptor* desc = ruby_to_Descriptor(descriptor);
+  if (klass == cRepeatedField || klass == cMap) {
+    rb_raise(rb_eArgError, "Expected proto msg for discard unknown.");
+  } else {
+    discard_unknown(msg_rb, desc);
+  }
+  return Qnil;
+}
diff --git a/ruby/ext/google/protobuf_c/protobuf.c b/ruby/ext/google/protobuf_c/protobuf.c
index c7750c4..db69642 100644
--- a/ruby/ext/google/protobuf_c/protobuf.c
+++ b/ruby/ext/google/protobuf_c/protobuf.c
@@ -103,6 +103,8 @@
   cError = rb_const_get(protobuf, rb_intern("Error"));
   cParseError = rb_const_get(protobuf, rb_intern("ParseError"));
 
+  rb_define_singleton_method(protobuf, "discard_unknown",
+                             Google_Protobuf_discard_unknown, 1);
   rb_define_singleton_method(protobuf, "deep_copy",
                              Google_Protobuf_deep_copy, 1);
 
diff --git a/ruby/ext/google/protobuf_c/protobuf.h b/ruby/ext/google/protobuf_c/protobuf.h
index 1291ac5..5266aa8 100644
--- a/ruby/ext/google/protobuf_c/protobuf.h
+++ b/ruby/ext/google/protobuf_c/protobuf.h
@@ -515,6 +515,7 @@
 VALUE Message_decode_json(VALUE klass, VALUE data);
 VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass);
 
+VALUE Google_Protobuf_discard_unknown(VALUE self, VALUE msg_rb);
 VALUE Google_Protobuf_deep_copy(VALUE self, VALUE obj);
 
 VALUE build_module_from_enumdesc(EnumDescriptor* enumdef);
diff --git a/ruby/tests/encode_decode_test.rb b/ruby/tests/encode_decode_test.rb
new file mode 100644
index 0000000..09581ab
--- /dev/null
+++ b/ruby/tests/encode_decode_test.rb
@@ -0,0 +1,63 @@
+#!/usr/bin/ruby
+
+# generated_code.rb is in the same directory as this test.
+$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
+
+require 'generated_code_pb'
+require 'test/unit'
+
+def hex2bin(s)
+  s.scan(/../).map { |x| x.hex.chr }.join
+end
+
+class EncodeDecodeTest < Test::Unit::TestCase
+  def test_discard_unknown
+    # Test discard unknown in message.
+    unknown_msg = A::B::C::TestUnknown.new(:unknown_field => 1)
+    from = A::B::C::TestUnknown.encode(unknown_msg)
+    m = A::B::C::TestMessage.decode(from)
+    Google::Protobuf.discard_unknown(m)
+    to = A::B::C::TestMessage.encode(m)
+    assert_equal '', to
+
+    # Test discard unknown for singular message field.
+    unknown_msg = A::B::C::TestUnknown.new(
+	    :optional_unknown =>
+	    A::B::C::TestUnknown.new(:unknown_field => 1))
+    from = A::B::C::TestUnknown.encode(unknown_msg)
+    m = A::B::C::TestMessage.decode(from)
+    Google::Protobuf.discard_unknown(m)
+    to = A::B::C::TestMessage.encode(m.optional_msg)
+    assert_equal '', to
+
+    # Test discard unknown for repeated message field.
+    unknown_msg = A::B::C::TestUnknown.new(
+	    :repeated_unknown =>
+	    [A::B::C::TestUnknown.new(:unknown_field => 1)])
+    from = A::B::C::TestUnknown.encode(unknown_msg)
+    m = A::B::C::TestMessage.decode(from)
+    Google::Protobuf.discard_unknown(m)
+    to = A::B::C::TestMessage.encode(m.repeated_msg[0])
+    assert_equal '', to
+
+    # Test discard unknown for map value message field.
+    unknown_msg = A::B::C::TestUnknown.new(
+	    :map_unknown =>
+	    {"" => A::B::C::TestUnknown.new(:unknown_field => 1)})
+    from = A::B::C::TestUnknown.encode(unknown_msg)
+    m = A::B::C::TestMessage.decode(from)
+    Google::Protobuf.discard_unknown(m)
+    to = A::B::C::TestMessage.encode(m.map_string_msg[''])
+    assert_equal '', to
+
+    # Test discard unknown for oneof message field.
+    unknown_msg = A::B::C::TestUnknown.new(
+	    :oneof_unknown =>
+	    A::B::C::TestUnknown.new(:unknown_field => 1))
+    from = A::B::C::TestUnknown.encode(unknown_msg)
+    m = A::B::C::TestMessage.decode(from)
+    Google::Protobuf.discard_unknown(m)
+    to = A::B::C::TestMessage.encode(m.oneof_msg)
+    assert_equal '', to
+  end
+end
diff --git a/ruby/tests/generated_code.proto b/ruby/tests/generated_code.proto
index 62fd83e..3b934bd 100644
--- a/ruby/tests/generated_code.proto
+++ b/ruby/tests/generated_code.proto
@@ -57,6 +57,9 @@
   }
 
   NestedMessage nested_message = 80;
+
+  // Reserved for non-existing field test.
+  // int32 non_exist = 89;
 }
 
 enum TestEnum {
@@ -65,3 +68,13 @@
   B = 2;
   C = 3;
 }
+
+message TestUnknown {
+  TestUnknown optional_unknown = 11;
+  repeated TestUnknown repeated_unknown = 31;
+  oneof my_oneof {
+    TestUnknown oneof_unknown = 51;
+  }
+  map<string, TestUnknown> map_unknown = 67;
+  int32 unknown_field = 89;
+}