Skip to content

Commit 139ea4d

Browse files
Tweak "group to property name" mapping for C#. Under editions, where fields using a delimited encoding have independent field names from type names, we want to use the specified field name.
This change keeps the existing naming for properties generated from proto2 files, while improving the experience under editions. This introduces a C# incompatibility when upgrading a proto from proto2 to editions, but we anticipate this being a relatively rare problem. PiperOrigin-RevId: 610783407
1 parent 0ad1bfc commit 139ea4d

File tree

2 files changed

+105
-73
lines changed

2 files changed

+105
-73
lines changed

src/google/protobuf/compiler/csharp/csharp_generator.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class PROTOC_EXPORT Generator : public CodeGenerator {
3535
GeneratorContext* generator_context,
3636
std::string* error) const override;
3737
uint64_t GetSupportedFeatures() const override;
38+
using CodeGenerator::GetEdition;
3839
};
3940

4041
} // namespace csharp

src/google/protobuf/compiler/csharp/csharp_helpers.cc

Lines changed: 104 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "absl/strings/string_view.h"
2525
#include "google/protobuf/compiler/csharp/csharp_enum_field.h"
2626
#include "google/protobuf/compiler/csharp/csharp_field_base.h"
27+
#include "google/protobuf/compiler/csharp/csharp_generator.h"
2728
#include "google/protobuf/compiler/csharp/csharp_map_field.h"
2829
#include "google/protobuf/compiler/csharp/csharp_message_field.h"
2930
#include "google/protobuf/compiler/csharp/csharp_options.h"
@@ -87,11 +88,11 @@ CSharpType GetCSharpType(FieldDescriptor::Type type) {
8788
// types are added.
8889
}
8990
ABSL_LOG(FATAL) << "Can't get here.";
90-
return (CSharpType) -1;
91+
return (CSharpType)-1;
9192
}
9293

93-
// Convert a string which is expected to be SHOUTY_CASE (but may not be *precisely* shouty)
94-
// into a PascalCase string. Precise rules implemented:
94+
// Convert a string which is expected to be SHOUTY_CASE (but may not be
95+
// *precisely* shouty) into a PascalCase string. Precise rules implemented:
9596

9697
// Previous input character Current character Case
9798
// Any Non-alphanumeric Skipped
@@ -124,12 +125,11 @@ std::string ShoutyToPascalCase(absl::string_view input) {
124125
return result;
125126
}
126127

127-
// Attempt to remove a prefix from a value, ignoring casing and skipping underscores.
128-
// (foo, foo_bar) => bar - underscore after prefix is skipped
129-
// (FOO, foo_bar) => bar - casing is ignored
130-
// (foo_bar, foobarbaz) => baz - underscore in prefix is ignored
131-
// (foobar, foo_barbaz) => baz - underscore in value is ignored
132-
// (foo, bar) => bar - prefix isn't matched; return original value
128+
// Attempt to remove a prefix from a value, ignoring casing and skipping
129+
// underscores. (foo, foo_bar) => bar - underscore after prefix is skipped (FOO,
130+
// foo_bar) => bar - casing is ignored (foo_bar, foobarbaz) => baz - underscore
131+
// in prefix is ignored (foobar, foo_barbaz) => baz - underscore in value is
132+
// ignored (foo, bar) => bar - prefix isn't matched; return original value
133133
std::string TryRemovePrefix(absl::string_view prefix, absl::string_view value) {
134134
// First normalize to a lower-case no-underscores prefix to match against
135135
std::string prefix_to_match = "";
@@ -142,13 +142,14 @@ std::string TryRemovePrefix(absl::string_view prefix, absl::string_view value) {
142142
// This keeps track of how much of value we've consumed
143143
size_t prefix_index, value_index;
144144
for (prefix_index = 0, value_index = 0;
145-
prefix_index < prefix_to_match.size() && value_index < value.size();
146-
value_index++) {
145+
prefix_index < prefix_to_match.size() && value_index < value.size();
146+
value_index++) {
147147
// Skip over underscores in the value
148148
if (value[value_index] == '_') {
149149
continue;
150150
}
151-
if (absl::ascii_tolower(value[value_index]) != prefix_to_match[prefix_index++]) {
151+
if (absl::ascii_tolower(value[value_index]) !=
152+
prefix_to_match[prefix_index++]) {
152153
// Failed to match the prefix - bail out early.
153154
return std::string(value);
154155
}
@@ -164,7 +165,8 @@ std::string TryRemovePrefix(absl::string_view prefix, absl::string_view value) {
164165
value_index++;
165166
}
166167

167-
// If there's nothing left (e.g. it was a prefix with only underscores afterwards), don't strip.
168+
// If there's nothing left (e.g. it was a prefix with only underscores
169+
// afterwards), don't strip.
168170
if (value_index == value.size()) {
169171
return std::string(value);
170172
}
@@ -181,8 +183,8 @@ std::string GetEnumValueName(absl::string_view enum_name,
181183
absl::string_view enum_value_name) {
182184
std::string stripped = TryRemovePrefix(enum_name, enum_value_name);
183185
std::string result = ShoutyToPascalCase(stripped);
184-
// Just in case we have an enum name of FOO and a value of FOO_2... make sure the returned
185-
// string is a valid identifier.
186+
// Just in case we have an enum name of FOO and a value of FOO_2... make sure
187+
// the returned string is a valid identifier.
186188
if (absl::ascii_isdigit(result[0])) {
187189
return absl::StrCat("_", result);
188190
}
@@ -237,11 +239,17 @@ std::string GetFullExtensionName(const FieldDescriptor* descriptor) {
237239
GetPropertyName(descriptor));
238240
}
239241

240-
// Groups are hacky: The name of the field is just the lower-cased name
241-
// of the group type. In C#, though, we would like to retain the original
242-
// capitalization of the type name.
242+
// Groups in proto2 are hacky: The name of the field is just the lower-cased
243+
// name of the group type. In C#, though, we would like to retain the original
244+
// capitalization of the type name. Fields with an encoding of "delimited" in
245+
// editions are like groups, but have a real name, so we use that. This means
246+
// upgrading a proto from proto2 to editions *can* be a breaking change for C#,
247+
// but it's unlikely to cause significant issues (as C# has primarily been used
248+
// with proto3, and even with proto2 groups, only some group names will cause
249+
// compatibility issues).
243250
std::string GetFieldName(const FieldDescriptor* descriptor) {
244-
if (descriptor->type() == FieldDescriptor::TYPE_GROUP) {
251+
if (descriptor->type() == FieldDescriptor::TYPE_GROUP &&
252+
Generator::GetEdition(*descriptor->file()) == Edition::EDITION_PROTO2) {
245253
return descriptor->message_type()->name();
246254
} else {
247255
return descriptor->name();
@@ -254,37 +262,32 @@ std::string GetFieldConstantName(const FieldDescriptor* field) {
254262

255263
std::string GetPropertyName(const FieldDescriptor* descriptor) {
256264
// Names of members declared or overridden in the message.
257-
static const auto& reserved_member_names = *new absl::flat_hash_set<absl::string_view>({
258-
"Types",
259-
"Descriptor",
260-
"Equals",
261-
"ToString",
262-
"GetHashCode",
263-
"WriteTo",
264-
"Clone",
265-
"CalculateSize",
266-
"MergeFrom",
267-
"OnConstruction",
268-
"Parser"
269-
});
265+
static const auto& reserved_member_names =
266+
*new absl::flat_hash_set<absl::string_view>(
267+
{"Types", "Descriptor", "Equals", "ToString", "GetHashCode",
268+
"WriteTo", "Clone", "CalculateSize", "MergeFrom", "OnConstruction",
269+
"Parser"});
270270

271271
// TODO: consider introducing csharp_property_name field option
272272
std::string property_name = UnderscoresToPascalCase(GetFieldName(descriptor));
273273
// Avoid either our own type name or reserved names.
274-
// There are various ways of ending up with naming collisions, but we try to avoid obvious
275-
// ones. In particular, we avoid the names of all the members we generate.
276-
// Note that we *don't* add an underscore for MemberwiseClone or GetType. Those generate
277-
// warnings, but not errors; changing the name now could be a breaking change.
278-
if (property_name == descriptor->containing_type()->name()
279-
|| reserved_member_names.find(property_name) != reserved_member_names.end()) {
274+
// There are various ways of ending up with naming collisions, but we try to
275+
// avoid obvious ones. In particular, we avoid the names of all the members we
276+
// generate. Note that we *don't* add an underscore for MemberwiseClone or
277+
// GetType. Those generate warnings, but not errors; changing the name now
278+
// could be a breaking change.
279+
if (property_name == descriptor->containing_type()->name() ||
280+
reserved_member_names.find(property_name) !=
281+
reserved_member_names.end()) {
280282
absl::StrAppend(&property_name, "_");
281283
}
282284
return property_name;
283285
}
284286

285287
std::string GetOneofCaseName(const FieldDescriptor* descriptor) {
286-
// The name in a oneof case enum is the same as for the property, but as we always have a "None"
287-
// value as well, we need to reserve that by appending an underscore.
288+
// The name in a oneof case enum is the same as for the property, but as we
289+
// always have a "None" value as well, we need to reserve that by appending an
290+
// underscore.
288291
std::string property_name = GetPropertyName(descriptor);
289292
return property_name == "None" ? "None_" : property_name;
290293
}
@@ -294,29 +297,47 @@ std::string GetOneofCaseName(const FieldDescriptor* descriptor) {
294297
// returns -1.
295298
int GetFixedSize(FieldDescriptor::Type type) {
296299
switch (type) {
297-
case FieldDescriptor::TYPE_INT32 : return -1;
298-
case FieldDescriptor::TYPE_INT64 : return -1;
299-
case FieldDescriptor::TYPE_UINT32 : return -1;
300-
case FieldDescriptor::TYPE_UINT64 : return -1;
301-
case FieldDescriptor::TYPE_SINT32 : return -1;
302-
case FieldDescriptor::TYPE_SINT64 : return -1;
303-
case FieldDescriptor::TYPE_FIXED32 : return internal::WireFormatLite::kFixed32Size;
304-
case FieldDescriptor::TYPE_FIXED64 : return internal::WireFormatLite::kFixed64Size;
305-
case FieldDescriptor::TYPE_SFIXED32: return internal::WireFormatLite::kSFixed32Size;
306-
case FieldDescriptor::TYPE_SFIXED64: return internal::WireFormatLite::kSFixed64Size;
307-
case FieldDescriptor::TYPE_FLOAT : return internal::WireFormatLite::kFloatSize;
308-
case FieldDescriptor::TYPE_DOUBLE : return internal::WireFormatLite::kDoubleSize;
309-
310-
case FieldDescriptor::TYPE_BOOL : return internal::WireFormatLite::kBoolSize;
311-
case FieldDescriptor::TYPE_ENUM : return -1;
312-
313-
case FieldDescriptor::TYPE_STRING : return -1;
314-
case FieldDescriptor::TYPE_BYTES : return -1;
315-
case FieldDescriptor::TYPE_GROUP : return -1;
316-
case FieldDescriptor::TYPE_MESSAGE : return -1;
317-
318-
// No default because we want the compiler to complain if any new
319-
// types are added.
300+
case FieldDescriptor::TYPE_INT32:
301+
return -1;
302+
case FieldDescriptor::TYPE_INT64:
303+
return -1;
304+
case FieldDescriptor::TYPE_UINT32:
305+
return -1;
306+
case FieldDescriptor::TYPE_UINT64:
307+
return -1;
308+
case FieldDescriptor::TYPE_SINT32:
309+
return -1;
310+
case FieldDescriptor::TYPE_SINT64:
311+
return -1;
312+
case FieldDescriptor::TYPE_FIXED32:
313+
return internal::WireFormatLite::kFixed32Size;
314+
case FieldDescriptor::TYPE_FIXED64:
315+
return internal::WireFormatLite::kFixed64Size;
316+
case FieldDescriptor::TYPE_SFIXED32:
317+
return internal::WireFormatLite::kSFixed32Size;
318+
case FieldDescriptor::TYPE_SFIXED64:
319+
return internal::WireFormatLite::kSFixed64Size;
320+
case FieldDescriptor::TYPE_FLOAT:
321+
return internal::WireFormatLite::kFloatSize;
322+
case FieldDescriptor::TYPE_DOUBLE:
323+
return internal::WireFormatLite::kDoubleSize;
324+
325+
case FieldDescriptor::TYPE_BOOL:
326+
return internal::WireFormatLite::kBoolSize;
327+
case FieldDescriptor::TYPE_ENUM:
328+
return -1;
329+
330+
case FieldDescriptor::TYPE_STRING:
331+
return -1;
332+
case FieldDescriptor::TYPE_BYTES:
333+
return -1;
334+
case FieldDescriptor::TYPE_GROUP:
335+
return -1;
336+
case FieldDescriptor::TYPE_MESSAGE:
337+
return -1;
338+
339+
// No default because we want the compiler to complain if any new
340+
// types are added.
320341
}
321342
ABSL_LOG(FATAL) << "Can't get here.";
322343
return -1;
@@ -373,41 +394,51 @@ FieldGeneratorBase* CreateFieldGenerator(const FieldDescriptor* descriptor,
373394
if (descriptor->is_map()) {
374395
return new MapFieldGenerator(descriptor, presenceIndex, options);
375396
} else {
376-
return new RepeatedMessageFieldGenerator(descriptor, presenceIndex, options);
397+
return new RepeatedMessageFieldGenerator(descriptor, presenceIndex,
398+
options);
377399
}
378400
} else {
379401
if (IsWrapperType(descriptor)) {
380402
if (descriptor->real_containing_oneof()) {
381-
return new WrapperOneofFieldGenerator(descriptor, presenceIndex, options);
403+
return new WrapperOneofFieldGenerator(descriptor, presenceIndex,
404+
options);
382405
} else {
383-
return new WrapperFieldGenerator(descriptor, presenceIndex, options);
406+
return new WrapperFieldGenerator(descriptor, presenceIndex,
407+
options);
384408
}
385409
} else {
386410
if (descriptor->real_containing_oneof()) {
387-
return new MessageOneofFieldGenerator(descriptor, presenceIndex, options);
411+
return new MessageOneofFieldGenerator(descriptor, presenceIndex,
412+
options);
388413
} else {
389-
return new MessageFieldGenerator(descriptor, presenceIndex, options);
414+
return new MessageFieldGenerator(descriptor, presenceIndex,
415+
options);
390416
}
391417
}
392418
}
393419
case FieldDescriptor::TYPE_ENUM:
394420
if (descriptor->is_repeated()) {
395-
return new RepeatedEnumFieldGenerator(descriptor, presenceIndex, options);
421+
return new RepeatedEnumFieldGenerator(descriptor, presenceIndex,
422+
options);
396423
} else {
397424
if (descriptor->real_containing_oneof()) {
398-
return new EnumOneofFieldGenerator(descriptor, presenceIndex, options);
425+
return new EnumOneofFieldGenerator(descriptor, presenceIndex,
426+
options);
399427
} else {
400428
return new EnumFieldGenerator(descriptor, presenceIndex, options);
401429
}
402430
}
403431
default:
404432
if (descriptor->is_repeated()) {
405-
return new RepeatedPrimitiveFieldGenerator(descriptor, presenceIndex, options);
433+
return new RepeatedPrimitiveFieldGenerator(descriptor, presenceIndex,
434+
options);
406435
} else {
407436
if (descriptor->real_containing_oneof()) {
408-
return new PrimitiveOneofFieldGenerator(descriptor, presenceIndex, options);
437+
return new PrimitiveOneofFieldGenerator(descriptor, presenceIndex,
438+
options);
409439
} else {
410-
return new PrimitiveFieldGenerator(descriptor, presenceIndex, options);
440+
return new PrimitiveFieldGenerator(descriptor, presenceIndex,
441+
options);
411442
}
412443
}
413444
}

0 commit comments

Comments
 (0)