Skip to content

Commit 1f95830

Browse files
mkruskal-googlecopybara-github
authored andcommitted
Editions: Migrate string_field_validation to a C++ feature
Every language has very different handling of utf8 validation. Any with proto2/proto3 differences will receive language-specific features for edition zero to better model these subtle differences. PiperOrigin-RevId: 557258923
1 parent bf0624f commit 1f95830

21 files changed

+251
-242
lines changed

src/google/protobuf/compiler/command_line_interface_unittest.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1560,7 +1560,6 @@ TEST_F(CommandLineInterfaceTest, Plugin_RuntimeFeatures) {
15601560
EqualsProto(R"pb(field_presence: IMPLICIT
15611561
enum_type: OPEN
15621562
repeated_field_encoding: PACKED
1563-
string_field_validation: MANDATORY
15641563
message_encoding: LENGTH_PREFIXED
15651564
json_format: ALLOW
15661565
raw_features { field_presence: IMPLICIT }
@@ -1577,7 +1576,6 @@ TEST_F(CommandLineInterfaceTest, Plugin_RuntimeFeatures) {
15771576
EqualsProto(R"pb(field_presence: IMPLICIT
15781577
enum_type: OPEN
15791578
repeated_field_encoding: PACKED
1580-
string_field_validation: MANDATORY
15811579
message_encoding: LENGTH_PREFIXED
15821580
json_format: ALLOW
15831581
raw_features { field_presence: IMPLICIT }

src/google/protobuf/compiler/cpp/generator.cc

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,17 @@ absl::flat_hash_map<absl::string_view, std::string> CommonVars(
101101
};
102102
}
103103

104+
static bool IsStringMapType(const FieldDescriptor& field) {
105+
if (!field.is_map()) return false;
106+
for (int i = 0; i < field.message_type()->field_count(); ++i) {
107+
if (field.message_type()->field(i)->type() ==
108+
FieldDescriptor::TYPE_STRING) {
109+
return true;
110+
}
111+
}
112+
return false;
113+
}
114+
104115
} // namespace
105116

106117
bool CppGenerator::Generate(const FileDescriptor* file,
@@ -356,19 +367,44 @@ absl::Status CppGenerator::ValidateFeatures(const FileDescriptor* file) const {
356367
google::protobuf::internal::VisitDescriptors(*file, [&](const FieldDescriptor& field) {
357368
const FeatureSet& source_features = GetSourceFeatures(field);
358369
const FeatureSet& raw_features = GetSourceRawFeatures(field);
359-
if (raw_features.GetExtension(::pb::cpp).has_legacy_closed_enum() &&
360-
field.cpp_type() != FieldDescriptor::CPPTYPE_ENUM) {
361-
status = absl::FailedPreconditionError(absl::StrCat(
362-
"Field ", field.full_name(),
363-
" specifies the legacy_closed_enum feature but has non-enum type."));
364-
}
365370
if (field.enum_type() != nullptr &&
366371
source_features.GetExtension(::pb::cpp).legacy_closed_enum() &&
367372
source_features.field_presence() == FeatureSet::IMPLICIT) {
368373
status = absl::FailedPreconditionError(
369374
absl::StrCat("Field ", field.full_name(),
370375
" has a closed enum type with implicit presence."));
371376
}
377+
if (source_features.GetExtension(::pb::cpp).utf8_validation() ==
378+
pb::CppFeatures::UTF8_VALIDATION_UNKNOWN) {
379+
status = absl::FailedPreconditionError(absl::StrCat(
380+
"Field ", field.full_name(),
381+
" has an unknown value for the utf8_validation feature. ",
382+
source_features.DebugString(), "\nRawFeatures: ", raw_features));
383+
}
384+
385+
if (field.containing_type() == nullptr ||
386+
!field.containing_type()->options().map_entry()) {
387+
// Skip validation of explicit features on generated map fields. These
388+
// will be blindly propagated from the original map field, and may violate
389+
// a lot of these conditions. Note: we do still validate the
390+
// user-specified map field.
391+
if (raw_features.GetExtension(::pb::cpp).has_legacy_closed_enum() &&
392+
field.cpp_type() != FieldDescriptor::CPPTYPE_ENUM) {
393+
status = absl::FailedPreconditionError(
394+
absl::StrCat("Field ", field.full_name(),
395+
" specifies the legacy_closed_enum feature but has "
396+
"non-enum type."));
397+
}
398+
if (field.type() != FieldDescriptor::TYPE_STRING &&
399+
!IsStringMapType(field) &&
400+
raw_features.GetExtension(::pb::cpp).has_utf8_validation()) {
401+
status = absl::FailedPreconditionError(
402+
absl::StrCat("Field ", field.full_name(),
403+
" specifies the utf8_validation feature but is not of "
404+
"string type."));
405+
}
406+
}
407+
372408
#ifdef PROTOBUF_FUTURE_REMOVE_WRONG_CTYPE
373409
if (field.options().has_ctype()) {
374410
if (field.cpp_type() != FieldDescriptor::CPPTYPE_STRING) {

src/google/protobuf/compiler/cpp/generator_unittest.cc

Lines changed: 85 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,6 @@ TEST_F(CppGeneratorTest, BasicError) {
8787
"foo.proto:4:7: Expected \"required\", \"optional\", or \"repeated\"");
8888
}
8989

90-
9190
TEST_F(CppGeneratorTest, LegacyClosedEnumOnNonEnumField) {
9291
CreateTempFile("foo.proto",
9392
R"schema(
@@ -150,8 +149,7 @@ TEST_F(CppGeneratorTest, LegacyClosedEnumInherited) {
150149
}
151150

152151
TEST_F(CppGeneratorTest, LegacyClosedEnumImplicit) {
153-
CreateTempFile("foo.proto",
154-
R"schema(
152+
CreateTempFile("foo.proto", R"schema(
155153
edition = "2023";
156154
import "google/protobuf/cpp_features.proto";
157155
option features.(pb.cpp).legacy_closed_enum = true;
@@ -162,7 +160,8 @@ TEST_F(CppGeneratorTest, LegacyClosedEnumImplicit) {
162160
message Foo {
163161
TestEnum bar = 1 [features.field_presence = IMPLICIT];
164162
int32 baz = 2;
165-
})schema");
163+
}
164+
)schema");
166165

167166
RunProtoc(
168167
"protocol_compiler --proto_path=$tmpdir --cpp_out=$tmpdir "
@@ -172,6 +171,88 @@ TEST_F(CppGeneratorTest, LegacyClosedEnumImplicit) {
172171
"Field Foo.bar has a closed enum type with implicit presence.");
173172
}
174173

174+
TEST_F(CppGeneratorTest, Utf8ValidationMap) {
175+
CreateTempFile("foo.proto", R"schema(
176+
edition = "2023";
177+
import "google/protobuf/cpp_features.proto";
178+
179+
message Foo {
180+
map<string, string> map_field = 1 [
181+
features.(pb.cpp).utf8_validation = NONE
182+
];
183+
map<int32, string> map_field_value = 2 [
184+
features.(pb.cpp).utf8_validation = VERIFY_PARSE
185+
];
186+
map<string, int32> map_field_key = 3 [
187+
features.(pb.cpp).utf8_validation = VERIFY_DLOG
188+
];
189+
}
190+
)schema");
191+
192+
RunProtoc(
193+
"protocol_compiler --proto_path=$tmpdir --cpp_out=$tmpdir "
194+
"--experimental_editions foo.proto");
195+
196+
ExpectNoErrors();
197+
}
198+
199+
TEST_F(CppGeneratorTest, Utf8ValidationNonString) {
200+
CreateTempFile("foo.proto", R"schema(
201+
edition = "2023";
202+
import "google/protobuf/cpp_features.proto";
203+
204+
message Foo {
205+
int64 bar = 1 [
206+
features.(pb.cpp).utf8_validation = NONE
207+
];
208+
}
209+
)schema");
210+
211+
RunProtoc(
212+
"protocol_compiler --proto_path=$tmpdir --cpp_out=$tmpdir "
213+
"--experimental_editions foo.proto");
214+
215+
ExpectErrorSubstring("Field Foo.bar specifies the utf8_validation feature");
216+
}
217+
218+
TEST_F(CppGeneratorTest, Utf8ValidationNonStringMap) {
219+
CreateTempFile("foo.proto", R"schema(
220+
edition = "2023";
221+
import "google/protobuf/cpp_features.proto";
222+
223+
message Foo {
224+
map<int64, int64> bar = 1 [
225+
features.(pb.cpp).utf8_validation = VERIFY_PARSE
226+
];
227+
}
228+
)schema");
229+
230+
RunProtoc(
231+
"protocol_compiler --proto_path=$tmpdir --cpp_out=$tmpdir "
232+
"--experimental_editions foo.proto");
233+
234+
ExpectErrorSubstring("Field Foo.bar specifies the utf8_validation feature");
235+
}
236+
237+
TEST_F(CppGeneratorTest, Utf8ValidationUnknownValue) {
238+
CreateTempFile("foo.proto", R"schema(
239+
edition = "2023";
240+
import "google/protobuf/cpp_features.proto";
241+
242+
message Foo {
243+
string bar = 1 [
244+
features.(pb.cpp).utf8_validation = UTF8_VALIDATION_UNKNOWN
245+
];
246+
}
247+
)schema");
248+
249+
RunProtoc(
250+
"protocol_compiler --proto_path=$tmpdir --cpp_out=$tmpdir "
251+
"--experimental_editions foo.proto");
252+
253+
ExpectErrorSubstring(
254+
"Field Foo.bar has an unknown value for the utf8_validation feature.");
255+
}
175256
#ifdef PROTOBUF_FUTURE_REMOVE_WRONG_CTYPE
176257
TEST_F(CppGeneratorTest, CtypeOnNoneStringFieldTest) {
177258
CreateTempFile("foo.proto",

src/google/protobuf/compiler/parser_unittest.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4243,7 +4243,7 @@ TEST_F(ParseEditionsTest, InvalidMerge) {
42434243
string foo = 1 [
42444244
default = "hello",
42454245
features.field_presence = FIELD_PRESENCE_UNKNOWN,
4246-
features.string_field_validation = STRING_FIELD_VALIDATION_UNKNOWN
4246+
features.enum_type = ENUM_TYPE_UNKNOWN
42474247
];
42484248
})schema",
42494249
"5:17: Feature field google.protobuf.FeatureSet.field_presence must resolve to a "

src/google/protobuf/descriptor.cc

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1117,10 +1117,11 @@ FeatureSet* CreateProto2DefaultFeatures() {
11171117
features->set_field_presence(FeatureSet::EXPLICIT);
11181118
features->set_enum_type(FeatureSet::CLOSED);
11191119
features->set_repeated_field_encoding(FeatureSet::EXPANDED);
1120-
features->set_string_field_validation(FeatureSet::HINT);
11211120
features->set_message_encoding(FeatureSet::LENGTH_PREFIXED);
11221121
features->set_json_format(FeatureSet::LEGACY_BEST_EFFORT);
11231122
features->MutableExtension(pb::cpp)->set_legacy_closed_enum(true);
1123+
features->MutableExtension(pb::cpp)->set_utf8_validation(
1124+
pb::CppFeatures::VERIFY_DLOG);
11241125

11251126
return features;
11261127
}
@@ -1145,9 +1146,11 @@ const FeatureSet& GetProto3Features() {
11451146
features->set_field_presence(FeatureSet::IMPLICIT);
11461147
features->set_enum_type(FeatureSet::OPEN);
11471148
features->set_repeated_field_encoding(FeatureSet::PACKED);
1148-
features->set_string_field_validation(FeatureSet::MANDATORY);
11491149
features->set_message_encoding(FeatureSet::LENGTH_PREFIXED);
11501150
features->set_json_format(FeatureSet::ALLOW);
1151+
features->MutableExtension(pb::cpp)->set_legacy_closed_enum(false);
1152+
features->MutableExtension(pb::cpp)->set_utf8_validation(
1153+
pb::CppFeatures::VERIFY_PARSE);
11511154
return features;
11521155
}();
11531156
return *kProto3Features;
@@ -3817,7 +3820,8 @@ bool FieldDescriptor::is_packed() const {
38173820

38183821
static bool IsStrictUtf8(const FieldDescriptor* field) {
38193822
return internal::InternalFeatureHelper::GetFeatures(*field)
3820-
.string_field_validation() == FeatureSet::MANDATORY;
3823+
.GetExtension(pb::cpp)
3824+
.utf8_validation() == pb::CppFeatures::VERIFY_PARSE;
38213825
}
38223826

38233827
bool FieldDescriptor::requires_utf8_validation() const {
@@ -7838,17 +7842,6 @@ void DescriptorBuilder::ValidateOptions(const FieldDescriptor* field,
78387842

78397843
}
78407844

7841-
static bool IsStringMapType(const FieldDescriptor* field) {
7842-
if (!field->is_map()) return false;
7843-
for (int i = 0; i < field->message_type()->field_count(); ++i) {
7844-
if (field->message_type()->field(i)->type() ==
7845-
FieldDescriptor::TYPE_STRING) {
7846-
return true;
7847-
}
7848-
}
7849-
return false;
7850-
}
7851-
78527845
void DescriptorBuilder::ValidateFieldFeatures(
78537846
const FieldDescriptor* field, const FieldDescriptorProto& proto) {
78547847
// Rely on our legacy validation for proto2/proto3 files.
@@ -7895,12 +7888,6 @@ void DescriptorBuilder::ValidateFieldFeatures(
78957888
field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
78967889
"Only singular scalar fields can specify required field presence.");
78977890
}
7898-
if (field->type() != FieldDescriptor::TYPE_STRING &&
7899-
!IsStringMapType(field) &&
7900-
field->proto_features_->has_string_field_validation()) {
7901-
AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
7902-
"Only string fields can specify `string_field_validation`.");
7903-
}
79047891
if (!field->is_repeated() &&
79057892
field->proto_features_->has_repeated_field_encoding()) {
79067893
AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,

0 commit comments

Comments
 (0)