@@ -86,15 +86,18 @@ def testPrintExotic(self, message_module):
8686 message .repeated_string .append ('\000 \001 \a \b \f \n \r \t \v \\ \' "' )
8787 message .repeated_string .append (u'\u00fc \ua71f ' )
8888 self .CompareToGoldenText (
89- self .RemoveRedundantZeros (text_format .MessageToString (message )),
89+ self .RemoveRedundantZeros (
90+ text_format .MessageToString (message , as_utf8 = True )
91+ ),
9092 'repeated_int64: -9223372036854775808\n '
9193 'repeated_uint64: 18446744073709551615\n '
9294 'repeated_double: 123.456\n '
9395 'repeated_double: 1.23e+22\n '
9496 'repeated_double: 1.23e-18\n '
9597 'repeated_string:'
9698 ' "\\ 000\\ 001\\ 007\\ 010\\ 014\\ n\\ r\\ t\\ 013\\ \\ \\ \' \\ ""\n '
97- 'repeated_string: "\\ 303\\ 274\\ 352\\ 234\\ 237"\n ' )
99+ 'repeated_string: "üꜟ"\n ' ,
100+ )
98101
99102 def testPrintFloatPrecision (self , message_module ):
100103 message = message_module .TestAllTypes ()
@@ -204,8 +207,8 @@ class UnicodeSub(str):
204207 message = message_module .TestAllTypes ()
205208 message .repeated_string .append (UnicodeSub (u'\u00fc \ua71f ' ))
206209 self .CompareToGoldenText (
207- text_format .MessageToString (message ),
208- 'repeated_string: "\\ 303 \\ 274 \\ 352 \\ 234 \\ 237 "\n ' )
210+ text_format .MessageToString (message , as_utf8 = True ),
211+ 'repeated_string: "üꜟ "\n ' )
209212
210213 def testPrintNestedMessageAsOneLine (self , message_module ):
211214 message = message_module .TestAllTypes ()
@@ -282,15 +285,15 @@ def testPrintExoticAsOneLine(self, message_module):
282285 message .repeated_string .append (u'\u00fc \ua71f ' )
283286 self .CompareToGoldenText (
284287 self .RemoveRedundantZeros (text_format .MessageToString (
285- message , as_one_line = True )),
288+ message , as_one_line = True , as_utf8 = True )),
286289 'repeated_int64: -9223372036854775808'
287290 ' repeated_uint64: 18446744073709551615'
288291 ' repeated_double: 123.456'
289292 ' repeated_double: 1.23e+22'
290293 ' repeated_double: 1.23e-18'
291294 ' repeated_string: '
292295 '"\\ 000\\ 001\\ 007\\ 010\\ 014\\ n\\ r\\ t\\ 013\\ \\ \\ \' \\ ""'
293- ' repeated_string: "\\ 303 \\ 274 \\ 352 \\ 234 \\ 237 "' )
296+ ' repeated_string: "üꜟ "' )
294297
295298 def testRoundTripExoticAsOneLine (self , message_module ):
296299 message = message_module .TestAllTypes ()
@@ -616,8 +619,8 @@ def testMessageToBytes(self, message_module):
616619 def testRawUtf8RoundTrip (self , message_module ):
617620 message = message_module .TestAllTypes ()
618621 message .repeated_string .append (u'\u00fc \t \ua71f ' )
619- utf8_text = text_format .MessageToBytes (message , as_utf8 = True )
620- golden_bytes = b'repeated_string: "\xc3 \xbc \\ t \xea \x9c \x9f "\n '
622+ utf8_text = text_format .MessageToBytes (message , as_utf8 = False )
623+ golden_bytes = b'repeated_string: "\\ 303 \\ 274 \\ t \\ 352 \\ 234 \\ 237 "\n '
621624 self .CompareToGoldenText (utf8_text , golden_bytes )
622625 parsed_message = message_module .TestAllTypes ()
623626 text_format .Parse (utf8_text , parsed_message )
@@ -626,10 +629,41 @@ def testRawUtf8RoundTrip(self, message_module):
626629 (message , parsed_message , message .repeated_string [0 ],
627630 parsed_message .repeated_string [0 ]))
628631
632+ def testRawUtf8RoundTripAsUtf8 (self , message_module ):
633+ message = message_module .TestAllTypes ()
634+ message .repeated_string .append (u'\u00fc \t \ua71f ' )
635+ utf8_text = text_format .MessageToString (message , as_utf8 = True )
636+ parsed_message = message_module .TestAllTypes ()
637+ text_format .Parse (utf8_text , parsed_message )
638+ self .assertEqual (
639+ message , parsed_message , '\n %s != %s (%s != %s)' %
640+ (message , parsed_message , message .repeated_string [0 ],
641+ parsed_message .repeated_string [0 ]))
642+
643+ # We can only test this case under proto2, because proto3 will reject invalid
644+ # UTF-8 in the parser, so there should be no way of creating a string field
645+ # that contains invalid UTF-8.
646+ #
647+ # We also can't test it in pure-Python, which validates all string fields for
648+ # UTF-8 even when the spec says it shouldn't.
649+ @unittest .skipIf (api_implementation .Type () == 'python' ,
650+ 'Python can\' t create invalid UTF-8 strings' )
651+ def testInvalidUtf8RoundTrip (self , message_module ):
652+ if message_module is not unittest_pb2 :
653+ return
654+ one_bytes = unittest_pb2 .OneBytes ()
655+ one_bytes .data = b'ABC\xff 123'
656+ one_string = unittest_pb2 .OneString ()
657+ one_string .ParseFromString (one_bytes .SerializeToString ())
658+ self .assertIn (
659+ 'data: "ABC\\ 377123"' ,
660+ text_format .MessageToString (one_string , as_utf8 = True ),
661+ )
662+
629663 def testEscapedUtf8ASCIIRoundTrip (self , message_module ):
630664 message = message_module .TestAllTypes ()
631665 message .repeated_string .append (u'\u00fc \t \ua71f ' )
632- ascii_text = text_format .MessageToBytes (message ) # as_utf8=False default
666+ ascii_text = text_format .MessageToBytes (message , as_utf8 = False )
633667 golden_bytes = b'repeated_string: "\\ 303\\ 274\\ t\\ 352\\ 234\\ 237"\n '
634668 self .CompareToGoldenText (ascii_text , golden_bytes )
635669 parsed_message = message_module .TestAllTypes ()
0 commit comments