3636#include < type_traits>
3737#include < utility>
3838
39+ #include " absl/base/optimization.h"
3940#include " google/protobuf/generated_message_tctable_decl.h"
4041#include " google/protobuf/generated_message_tctable_impl.h"
4142#include " google/protobuf/inlined_string_field.h"
4243#include " google/protobuf/io/zero_copy_stream_impl_lite.h"
4344#include " google/protobuf/map.h"
4445#include " google/protobuf/message_lite.h"
4546#include " google/protobuf/parse_context.h"
47+ #include " google/protobuf/repeated_field.h"
4648#include " google/protobuf/varint_shuffle.h"
4749#include " google/protobuf/wire_format_lite.h"
4850#include " utf8_validity.h"
@@ -406,6 +408,45 @@ inline PROTOBUF_ALWAYS_INLINE void InvertPacked(TcFieldData& data) {
406408 data.data ^= Wt ^ WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
407409}
408410
411+ constexpr uint32_t kAccumulatorBytesOnStack = 256 ;
412+
413+ // Accumulates fields to buffer repeated fields on parsing path to avoid growing
414+ // repeated field container type too frequently. It flushes to the backing
415+ // repeated fields if it's full or out of the scope. A larger buffer (e.g. 2KiB)
416+ // is actually harmful due to:
417+ // - increased stack overflow risk
418+ // - extra cache misses on accessing local variables
419+ // - less competitive to the cost of growing large buffer
420+ template <typename ElementType, typename ContainerType>
421+ class ScopedFieldAccumulator {
422+ public:
423+ constexpr explicit ScopedFieldAccumulator (ContainerType& field)
424+ : field_(field) {}
425+
426+ ~ScopedFieldAccumulator () {
427+ if (ABSL_PREDICT_TRUE (current_size_ > 0 )) {
428+ field_.MergeFromArray (buffer_, current_size_);
429+ }
430+ }
431+
432+ void Add (ElementType v) {
433+ if (ABSL_PREDICT_FALSE (current_size_ == kSize )) {
434+ field_.MergeFromArray (buffer_, kSize );
435+ current_size_ = 0 ;
436+ }
437+ buffer_[current_size_++] = v;
438+ }
439+
440+ private:
441+ static constexpr uint32_t kSize =
442+ kAccumulatorBytesOnStack / sizeof (ElementType);
443+ static_assert (kSize > 0 , " Size cannot be zero" );
444+
445+ uint32_t current_size_ = 0 ;
446+ ElementType buffer_[kSize ];
447+ ContainerType& field_;
448+ };
449+
409450} // namespace
410451
411452// ////////////////////////////////////////////////////////////////////////////
@@ -632,14 +673,17 @@ PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedFixed(
632673 }
633674 auto & field = RefAt<RepeatedField<LayoutType>>(msg, data.offset ());
634675 const auto tag = UnalignedLoad<TagType>(ptr);
635- do {
636- field.Add (UnalignedLoad<LayoutType>(ptr + sizeof (TagType)));
637- ptr += sizeof (TagType) + sizeof (LayoutType);
638- if (PROTOBUF_PREDICT_FALSE (!ctx->DataAvailable (ptr))) {
639- PROTOBUF_MUSTTAIL return ToParseLoop (PROTOBUF_TC_PARAM_NO_DATA_PASS);
640- }
641- } while (UnalignedLoad<TagType>(ptr) == tag);
676+ {
677+ ScopedFieldAccumulator<LayoutType, decltype (field)> accumulator (field);
678+ do {
679+ accumulator.Add (UnalignedLoad<LayoutType>(ptr + sizeof (TagType)));
680+ ptr += sizeof (TagType) + sizeof (LayoutType);
681+ if (PROTOBUF_PREDICT_FALSE (!ctx->DataAvailable (ptr))) goto parse_loop;
682+ } while (UnalignedLoad<TagType>(ptr) == tag);
683+ }
642684 PROTOBUF_MUSTTAIL return ToTagDispatch (PROTOBUF_TC_PARAM_NO_DATA_PASS);
685+ parse_loop:
686+ PROTOBUF_MUSTTAIL return ToParseLoop (PROTOBUF_TC_PARAM_NO_DATA_PASS);
643687}
644688
645689PROTOBUF_NOINLINE const char * TcParser::FastF32R1 (PROTOBUF_TC_PARAM_DECL) {
@@ -971,19 +1015,22 @@ PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedVarint(
9711015 }
9721016 auto & field = RefAt<RepeatedField<FieldType>>(msg, data.offset ());
9731017 const auto expected_tag = UnalignedLoad<TagType>(ptr);
974- do {
975- ptr += sizeof (TagType);
976- FieldType tmp;
977- ptr = ParseVarint (ptr, &tmp);
978- if (ptr == nullptr ) {
979- PROTOBUF_MUSTTAIL return Error (PROTOBUF_TC_PARAM_NO_DATA_PASS);
980- }
981- field.Add (ZigZagDecodeHelper<FieldType, zigzag>(tmp));
982- if (PROTOBUF_PREDICT_FALSE (!ctx->DataAvailable (ptr))) {
983- PROTOBUF_MUSTTAIL return ToParseLoop (PROTOBUF_TC_PARAM_NO_DATA_PASS);
984- }
985- } while (UnalignedLoad<TagType>(ptr) == expected_tag);
1018+ {
1019+ ScopedFieldAccumulator<FieldType, decltype (field)> accumulator (field);
1020+ do {
1021+ ptr += sizeof (TagType);
1022+ FieldType tmp;
1023+ ptr = ParseVarint (ptr, &tmp);
1024+ if (ptr == nullptr ) goto error;
1025+ accumulator.Add (ZigZagDecodeHelper<FieldType, zigzag>(tmp));
1026+ if (PROTOBUF_PREDICT_FALSE (!ctx->DataAvailable (ptr))) goto parse_loop;
1027+ } while (UnalignedLoad<TagType>(ptr) == expected_tag);
1028+ }
9861029 PROTOBUF_MUSTTAIL return ToTagDispatch (PROTOBUF_TC_PARAM_NO_DATA_PASS);
1030+ parse_loop:
1031+ PROTOBUF_MUSTTAIL return ToParseLoop (PROTOBUF_TC_PARAM_NO_DATA_PASS);
1032+ error:
1033+ PROTOBUF_MUSTTAIL return Error (PROTOBUF_TC_PARAM_NO_DATA_PASS);
9871034}
9881035
9891036PROTOBUF_NOINLINE const char * TcParser::FastV8R1 (PROTOBUF_TC_PARAM_DECL) {
@@ -1044,7 +1091,8 @@ const char* TcParser::PackedVarint(PROTOBUF_TC_PARAM_DECL) {
10441091 // pending hasbits now:
10451092 SyncHasbits (msg, hasbits, table);
10461093 auto * field = &RefAt<RepeatedField<FieldType>>(msg, data.offset ());
1047- return ctx->ReadPackedVarint (ptr, [field](uint64_t varint) {
1094+ ScopedFieldAccumulator<FieldType, decltype (*field)> accumulator (*field);
1095+ return ctx->ReadPackedVarint (ptr, [&](uint64_t varint) {
10481096 FieldType val;
10491097 if (zigzag) {
10501098 if (sizeof (FieldType) == 8 ) {
@@ -1055,7 +1103,7 @@ const char* TcParser::PackedVarint(PROTOBUF_TC_PARAM_DECL) {
10551103 } else {
10561104 val = varint;
10571105 }
1058- field-> Add (val);
1106+ accumulator. Add (val);
10591107 });
10601108}
10611109
@@ -1190,28 +1238,33 @@ const char* TcParser::RepeatedEnum(PROTOBUF_TC_PARAM_DECL) {
11901238 auto & field = RefAt<RepeatedField<int32_t >>(msg, data.offset ());
11911239 const auto expected_tag = UnalignedLoad<TagType>(ptr);
11921240 const TcParseTableBase::FieldAux aux = *table->field_aux (data.aux_idx ());
1193- do {
1194- const char * ptr2 = ptr; // save for unknown enum case
1195- ptr += sizeof (TagType);
1196- uint64_t tmp;
1197- ptr = ParseVarint (ptr, &tmp);
1198- if (ptr == nullptr ) {
1199- PROTOBUF_MUSTTAIL return Error (PROTOBUF_TC_PARAM_NO_DATA_PASS);
1200- }
1201- if (PROTOBUF_PREDICT_FALSE (
1202- !EnumIsValidAux (static_cast <int32_t >(tmp), xform_val, aux))) {
1203- // We can avoid duplicate work in MiniParse by directly calling
1204- // table->fallback.
1205- ptr = ptr2;
1206- PROTOBUF_MUSTTAIL return FastUnknownEnumFallback (PROTOBUF_TC_PARAM_PASS);
1207- }
1208- field.Add (static_cast <int32_t >(tmp));
1209- if (PROTOBUF_PREDICT_FALSE (!ctx->DataAvailable (ptr))) {
1210- PROTOBUF_MUSTTAIL return ToParseLoop (PROTOBUF_TC_PARAM_NO_DATA_PASS);
1211- }
1212- } while (UnalignedLoad<TagType>(ptr) == expected_tag);
1241+ {
1242+ ScopedFieldAccumulator<int32_t , decltype (field)> accumulator (field);
1243+ do {
1244+ const char * ptr2 = ptr; // save for unknown enum case
1245+ ptr += sizeof (TagType);
1246+ uint64_t tmp;
1247+ ptr = ParseVarint (ptr, &tmp);
1248+ if (ptr == nullptr ) goto error;
1249+ if (PROTOBUF_PREDICT_FALSE (
1250+ !EnumIsValidAux (static_cast <int32_t >(tmp), xform_val, aux))) {
1251+ // We can avoid duplicate work in MiniParse by directly calling
1252+ // table->fallback.
1253+ ptr = ptr2;
1254+ goto unknown_enum_fallback;
1255+ }
1256+ accumulator.Add (static_cast <int32_t >(tmp));
1257+ if (PROTOBUF_PREDICT_FALSE (!ctx->DataAvailable (ptr))) goto parse_loop;
1258+ } while (UnalignedLoad<TagType>(ptr) == expected_tag);
1259+ }
12131260
12141261 PROTOBUF_MUSTTAIL return ToTagDispatch (PROTOBUF_TC_PARAM_NO_DATA_PASS);
1262+ parse_loop:
1263+ PROTOBUF_MUSTTAIL return ToParseLoop (PROTOBUF_TC_PARAM_NO_DATA_PASS);
1264+ error:
1265+ PROTOBUF_MUSTTAIL return Error (PROTOBUF_TC_PARAM_NO_DATA_PASS);
1266+ unknown_enum_fallback:
1267+ PROTOBUF_MUSTTAIL return FastUnknownEnumFallback (PROTOBUF_TC_PARAM_PASS);
12151268}
12161269
12171270const TcParser::UnknownFieldOps& TcParser::GetUnknownFieldOps (
@@ -1345,19 +1398,22 @@ const char* TcParser::RepeatedEnumSmallRange(PROTOBUF_TC_PARAM_DECL) {
13451398 auto & field = RefAt<RepeatedField<int32_t >>(msg, data.offset ());
13461399 auto expected_tag = UnalignedLoad<TagType>(ptr);
13471400 const uint8_t max = data.aux_idx ();
1348- do {
1349- uint8_t v = ptr[sizeof (TagType)];
1350- if (PROTOBUF_PREDICT_FALSE (min > v || v > max)) {
1351- PROTOBUF_MUSTTAIL return MiniParse (PROTOBUF_TC_PARAM_NO_DATA_PASS);
1352- }
1353- field.Add (static_cast <int32_t >(v));
1354- ptr += sizeof (TagType) + 1 ;
1355- if (PROTOBUF_PREDICT_FALSE (!ctx->DataAvailable (ptr))) {
1356- PROTOBUF_MUSTTAIL return ToParseLoop (PROTOBUF_TC_PARAM_NO_DATA_PASS);
1357- }
1358- } while (UnalignedLoad<TagType>(ptr) == expected_tag);
1401+ {
1402+ ScopedFieldAccumulator<int32_t , decltype (field)> accumulator (field);
1403+ do {
1404+ uint8_t v = ptr[sizeof (TagType)];
1405+ if (PROTOBUF_PREDICT_FALSE (min > v || v > max)) goto mini_parse;
1406+ accumulator.Add (static_cast <int32_t >(v));
1407+ ptr += sizeof (TagType) + 1 ;
1408+ if (PROTOBUF_PREDICT_FALSE (!ctx->DataAvailable (ptr))) goto parse_loop;
1409+ } while (UnalignedLoad<TagType>(ptr) == expected_tag);
1410+ }
13591411
13601412 PROTOBUF_MUSTTAIL return ToTagDispatch (PROTOBUF_TC_PARAM_NO_DATA_PASS);
1413+ parse_loop:
1414+ PROTOBUF_MUSTTAIL return ToParseLoop (PROTOBUF_TC_PARAM_NO_DATA_PASS);
1415+ mini_parse:
1416+ PROTOBUF_MUSTTAIL return MiniParse (PROTOBUF_TC_PARAM_NO_DATA_PASS);
13611417}
13621418
13631419PROTOBUF_NOINLINE const char * TcParser::FastEr0R1 (PROTOBUF_TC_PARAM_DECL) {
@@ -1846,9 +1902,10 @@ PROTOBUF_NOINLINE const char* TcParser::MpRepeatedFixed(
18461902 constexpr auto size = sizeof (uint64_t );
18471903 const char * ptr2 = ptr;
18481904 uint32_t next_tag;
1905+ ScopedFieldAccumulator<uint64_t , decltype (field)> accumulator (field);
18491906 do {
18501907 ptr = ptr2;
1851- *field .Add () = UnalignedLoad<uint64_t >(ptr);
1908+ accumulator .Add (UnalignedLoad<uint64_t >(ptr) );
18521909 ptr += size;
18531910 if (!ctx->DataAvailable (ptr)) break ;
18541911 ptr2 = ReadTag (ptr, &next_tag);
@@ -1862,9 +1919,10 @@ PROTOBUF_NOINLINE const char* TcParser::MpRepeatedFixed(
18621919 constexpr auto size = sizeof (uint32_t );
18631920 const char * ptr2 = ptr;
18641921 uint32_t next_tag;
1922+ ScopedFieldAccumulator<uint32_t , decltype (field)> accumulator (field);
18651923 do {
18661924 ptr = ptr2;
1867- *field .Add () = UnalignedLoad<uint32_t >(ptr);
1925+ accumulator .Add (UnalignedLoad<uint32_t >(ptr) );
18681926 ptr += size;
18691927 if (!ctx->DataAvailable (ptr)) break ;
18701928 ptr2 = ReadTag (ptr, &next_tag);
@@ -1993,66 +2051,60 @@ PROTOBUF_NOINLINE const char* TcParser::MpRepeatedVarint(
19932051 auto & field = RefAt<RepeatedField<uint64_t >>(msg, entry.offset );
19942052 const char * ptr2 = ptr;
19952053 uint32_t next_tag;
2054+ ScopedFieldAccumulator<uint64_t , decltype (field)> accumulator (field);
19962055 do {
19972056 uint64_t tmp;
19982057 ptr = ParseVarint (ptr2, &tmp);
1999- if (ptr == nullptr ) {
2000- PROTOBUF_MUSTTAIL return Error (PROTOBUF_TC_PARAM_NO_DATA_PASS);
2001- }
2002- field.Add (is_zigzag ? WireFormatLite::ZigZagDecode64 (tmp) : tmp);
2058+ if (ptr == nullptr ) goto error;
2059+ accumulator.Add (is_zigzag ? WireFormatLite::ZigZagDecode64 (tmp) : tmp);
20032060 if (!ctx->DataAvailable (ptr)) break ;
20042061 ptr2 = ReadTag (ptr, &next_tag);
2005- if (ptr2 == nullptr ) {
2006- PROTOBUF_MUSTTAIL return Error (PROTOBUF_TC_PARAM_NO_DATA_PASS);
2007- }
2062+ if (ptr2 == nullptr ) goto error;
20082063 } while (next_tag == decoded_tag);
20092064 } else if (rep == field_layout::kRep32Bits ) {
20102065 auto & field = RefAt<RepeatedField<uint32_t >>(msg, entry.offset );
20112066 const char * ptr2 = ptr;
20122067 uint32_t next_tag;
2068+ ScopedFieldAccumulator<uint32_t , decltype (field)> accumulator (field);
20132069 do {
20142070 uint64_t tmp;
20152071 ptr = ParseVarint (ptr2, &tmp);
2016- if (ptr == nullptr ) {
2017- PROTOBUF_MUSTTAIL return Error (PROTOBUF_TC_PARAM_NO_DATA_PASS);
2018- }
2072+ if (ptr == nullptr ) goto error;
20192073 if (is_validated_enum) {
20202074 if (!EnumIsValidAux (tmp, xform_val, *table->field_aux (&entry))) {
20212075 ptr = ptr2;
2022- PROTOBUF_MUSTTAIL return MpUnknownEnumFallback (
2023- PROTOBUF_TC_PARAM_PASS);
2076+ goto unknown_enum_fallback;
20242077 }
20252078 } else if (is_zigzag) {
20262079 tmp = WireFormatLite::ZigZagDecode32 (tmp);
20272080 }
2028- field .Add (tmp);
2081+ accumulator .Add (tmp);
20292082 if (!ctx->DataAvailable (ptr)) break ;
20302083 ptr2 = ReadTag (ptr, &next_tag);
2031- if (ptr2 == nullptr ) {
2032- PROTOBUF_MUSTTAIL return Error (PROTOBUF_TC_PARAM_NO_DATA_PASS);
2033- }
2084+ if (ptr2 == nullptr ) goto error;
20342085 } while (next_tag == decoded_tag);
20352086 } else {
20362087 ABSL_DCHECK_EQ (rep, static_cast <uint16_t >(field_layout::kRep8Bits ));
20372088 auto & field = RefAt<RepeatedField<bool >>(msg, entry.offset );
20382089 const char * ptr2 = ptr;
20392090 uint32_t next_tag;
2091+ ScopedFieldAccumulator<bool , decltype (field)> accumulator (field);
20402092 do {
20412093 uint64_t tmp;
20422094 ptr = ParseVarint (ptr2, &tmp);
2043- if (ptr == nullptr ) {
2044- PROTOBUF_MUSTTAIL return Error (PROTOBUF_TC_PARAM_NO_DATA_PASS);
2045- }
2046- field.Add (static_cast <bool >(tmp));
2095+ if (ptr == nullptr ) goto error;
2096+ accumulator.Add (static_cast <bool >(tmp));
20472097 if (!ctx->DataAvailable (ptr)) break ;
20482098 ptr2 = ReadTag (ptr, &next_tag);
2049- if (ptr2 == nullptr ) {
2050- PROTOBUF_MUSTTAIL return Error (PROTOBUF_TC_PARAM_NO_DATA_PASS);
2051- }
2099+ if (ptr2 == nullptr ) goto error;
20522100 } while (next_tag == decoded_tag);
20532101 }
20542102
20552103 PROTOBUF_MUSTTAIL return ToTagDispatch (PROTOBUF_TC_PARAM_NO_DATA_PASS);
2104+ error:
2105+ PROTOBUF_MUSTTAIL return Error (PROTOBUF_TC_PARAM_NO_DATA_PASS);
2106+ unknown_enum_fallback:
2107+ PROTOBUF_MUSTTAIL return MpUnknownEnumFallback (PROTOBUF_TC_PARAM_PASS);
20562108}
20572109
20582110PROTOBUF_NOINLINE const char * TcParser::MpPackedVarint (PROTOBUF_TC_PARAM_DECL) {
@@ -2074,33 +2126,41 @@ PROTOBUF_NOINLINE const char* TcParser::MpPackedVarint(PROTOBUF_TC_PARAM_DECL) {
20742126
20752127 uint16_t rep = type_card & field_layout::kRepMask ;
20762128 if (rep == field_layout::kRep64Bits ) {
2077- auto * field = &RefAt<RepeatedField<uint64_t >>(msg, entry.offset );
2078- return ctx->ReadPackedVarint (ptr, [field, is_zigzag](uint64_t value) {
2079- field->Add (is_zigzag ? WireFormatLite::ZigZagDecode64 (value) : value);
2080- });
2129+ auto & field = RefAt<RepeatedField<uint64_t >>(msg, entry.offset );
2130+ ScopedFieldAccumulator<uint64_t , decltype (field)> accumulator (field);
2131+ return ctx->ReadPackedVarint (
2132+ ptr, [&accumulator, is_zigzag](uint64_t value) {
2133+ accumulator.Add (is_zigzag ? WireFormatLite::ZigZagDecode64 (value)
2134+ : value);
2135+ });
20812136 } else if (rep == field_layout::kRep32Bits ) {
2082- auto * field = & RefAt<RepeatedField<uint32_t >>(msg, entry.offset );
2137+ auto & field = RefAt<RepeatedField<uint32_t >>(msg, entry.offset );
20832138 if (is_validated_enum) {
20842139 const TcParseTableBase::FieldAux aux = *table->field_aux (entry.aux_idx );
2085- return ctx->ReadPackedVarint (ptr, [=](int32_t value) {
2140+ ScopedFieldAccumulator<uint32_t , decltype (field)> accumulator (field);
2141+ return ctx->ReadPackedVarint (ptr, [=, &accumulator](int32_t value) {
20862142 if (!EnumIsValidAux (value, xform_val, aux)) {
20872143 AddUnknownEnum (msg, table, data.tag (), value);
20882144 } else {
2089- field-> Add (value);
2145+ accumulator. Add (value);
20902146 }
20912147 });
20922148 } else {
2093- return ctx->ReadPackedVarint (ptr, [field, is_zigzag](uint64_t value) {
2094- field->Add (is_zigzag ? WireFormatLite::ZigZagDecode32 (
2095- static_cast <uint32_t >(value))
2096- : value);
2097- });
2149+ ScopedFieldAccumulator<uint32_t , decltype (field)> accumulator (field);
2150+ return ctx->ReadPackedVarint (
2151+ ptr, [&accumulator, is_zigzag](uint64_t value) {
2152+ accumulator.Add (is_zigzag ? WireFormatLite::ZigZagDecode32 (
2153+ static_cast <uint32_t >(value))
2154+ : value);
2155+ });
20982156 }
20992157 } else {
21002158 ABSL_DCHECK_EQ (rep, static_cast <uint16_t >(field_layout::kRep8Bits ));
2101- auto * field = &RefAt<RepeatedField<bool >>(msg, entry.offset );
2102- return ctx->ReadPackedVarint (
2103- ptr, [field](uint64_t value) { field->Add (value); });
2159+ auto & field = RefAt<RepeatedField<bool >>(msg, entry.offset );
2160+ ScopedFieldAccumulator<bool , decltype (field)> accumulator (field);
2161+ return ctx->ReadPackedVarint (ptr, [&](uint64_t value) {
2162+ accumulator.Add (static_cast <bool >(value));
2163+ });
21042164 }
21052165
21062166 PROTOBUF_MUSTTAIL return Error (PROTOBUF_TC_PARAM_NO_DATA_PASS);
0 commit comments