| #!/usr/bin/ruby |
| |
| require 'google/protobuf' |
| require 'utf8_pb' |
| require 'test/unit' |
| |
| module CaptureWarnings |
| @@warnings = nil |
| |
| module_function |
| |
| def warn(message, category: nil, **kwargs) |
| if @@warnings |
| @@warnings << message |
| else |
| super |
| end |
| end |
| |
| def capture |
| @@warnings = [] |
| yield |
| @@warnings |
| ensure |
| @@warnings = nil |
| end |
| end |
| |
| Warning.extend CaptureWarnings |
| |
| module Utf8Test |
| def test_scalar |
| msg = Utf8TestProtos::TestUtf8.new |
| assert_bad_utf8 { msg.optional_string = bad_utf8_string() } |
| end |
| |
| def test_repeated |
| msg = Utf8TestProtos::TestUtf8.new |
| assert_bad_utf8 { msg.repeated_string << bad_utf8_string() } |
| end |
| |
| def test_map_key |
| msg = Utf8TestProtos::TestUtf8.new |
| assert_bad_utf8 { msg.map_string_string[bad_utf8_string()] = "abc" } |
| end |
| |
| def test_map_value |
| msg = Utf8TestProtos::TestUtf8.new |
| assert_bad_utf8 { msg.map_string_string["abc"] = bad_utf8_string() } |
| end |
| end |
| |
| # Tests the case of string objects that are marked UTF-8, but contain invalid |
| # UTF-8. |
| # |
| # For now these only warn, but in the next major version they will throw an |
| # exception. |
| class MarkedUtf8Test < Test::Unit::TestCase |
| def assert_bad_utf8(&block) |
| warnings = CaptureWarnings.capture(&block) |
| assert_equal 1, warnings.length |
| assert_match(/String is invalid UTF-8. This will be an error in a future version./, warnings[0]) |
| end |
| |
| def bad_utf8_string |
| str = "\x80" |
| assert_false str.valid_encoding? |
| str |
| end |
| |
| include Utf8Test |
| end |
| |
| # This test doesn't work in JRuby because JRuby appears to have a bug where |
| # the "valid" bit on a string's data is not invalidated properly when the |
| # string is modified: https://github.com/jruby/jruby/issues/8316 |
| if !defined? JRUBY_VERSION |
| # Tests the case of string objects that are marked UTF-8, and initially contain |
| # valid UTF-8, but are later modified to be invalid UTF-8. This may put the |
| # string into an state of "unknown" validity. |
| # |
| # For now these only warn, but in the next major version they will throw an |
| # exception. |
| class MarkedModifiedUtf8Test < Test::Unit::TestCase |
| def assert_bad_utf8(&block) |
| warnings = CaptureWarnings.capture(&block) |
| assert_equal 1, warnings.length |
| assert_match(/String is invalid UTF-8. This will be an error in a future version./, warnings[0]) |
| end |
| |
| def bad_utf8_string |
| str = " " |
| assert_true str.valid_encoding? |
| str[0] = "\x80" |
| str |
| end |
| |
| include Utf8Test |
| end |
| end |
| |
| # Tests the case of string objects that are marked with a non-UTF-8 encoding, |
| # but contain invalid UTF-8. |
| # |
| # This case will raise Encoding::UndefinedConversionError. |
| class MarkedNonUtf8Test < Test::Unit::TestCase |
| def assert_bad_utf8 |
| assert_raises(Encoding::UndefinedConversionError) { yield } |
| end |
| |
| def bad_utf8_string |
| str = "\x80".force_encoding(Encoding::ASCII_8BIT) |
| assert_true str.valid_encoding? |
| str |
| end |
| |
| include Utf8Test |
| end |
| |
| # Tests the case of string objects that are marked with a non-UTF-8 encoding, |
| # but are invalid even in their source encoding. |
| # |
| # This case will raise Encoding::InvalidByteSequenceError |
| class MarkedNonUtf8Test < Test::Unit::TestCase |
| def assert_bad_utf8(&block) |
| assert_raises(Encoding::InvalidByteSequenceError, &block) |
| end |
| |
| def bad_utf8_string |
| str = "\x80".force_encoding(Encoding::ASCII) |
| assert_false str.valid_encoding? |
| str |
| end |
| |
| include Utf8Test |
| end |