Skip to content

Commit 95e0dc6

Browse files
committed
Provide suggestions for invalid shape ID targets
When an invalid shape ID target is encountered, a "Did you mean X?" list of shape IDs are suggested if any shape IDs in a model have a Levenshtein edit distance of less than 2 characters. The shape IDs with the lowest distance from the invalid shape ID are suggested. Closes #464. This change also updates the error message for "shape has a/an `X` relationship..." so that is uses the appropriate indefinte articale based on if the relation type starts with a vowel.
1 parent 1bf0cea commit 95e0dc6

File tree

5 files changed

+285
-9
lines changed

5 files changed

+285
-9
lines changed

smithy-model/src/main/java/software/amazon/smithy/model/validation/validators/TargetValidator.java

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717

1818
import static java.lang.String.format;
1919

20+
import java.util.Collection;
2021
import java.util.List;
2122
import java.util.Locale;
2223
import java.util.Optional;
2324
import java.util.Set;
25+
import java.util.TreeSet;
2426
import java.util.stream.Collectors;
2527
import java.util.stream.Stream;
2628
import software.amazon.smithy.model.Model;
@@ -38,12 +40,14 @@
3840
import software.amazon.smithy.utils.FunctionalUtils;
3941
import software.amazon.smithy.utils.OptionalUtils;
4042
import software.amazon.smithy.utils.SetUtils;
43+
import software.amazon.smithy.utils.StringUtils;
4144

4245
/**
4346
* Validates that neighbors target resolvable shapes of the correct type.
4447
*/
4548
public final class TargetValidator extends AbstractValidator {
4649

50+
private static final int MAX_EDIT_DISTANCE_FOR_SUGGESTIONS = 2;
4751
private static final Set<ShapeType> INVALID_MEMBER_TARGETS = SetUtils.of(
4852
ShapeType.SERVICE, ShapeType.RESOURCE, ShapeType.OPERATION, ShapeType.MEMBER);
4953

@@ -61,7 +65,7 @@ private Stream<ValidationEvent> validateShape(Model model, Shape shape, List<Rel
6165
return OptionalUtils.stream(
6266
validateTarget(model, shape, relationship.getNeighborShape().get(), relationship));
6367
} else {
64-
return Stream.of(unresolvedTarget(shape, relationship));
68+
return Stream.of(unresolvedTarget(model, shape, relationship));
6569
}
6670
});
6771
}
@@ -148,17 +152,60 @@ private Optional<ValidationEvent> validateIdentifier(Shape shape, Shape target)
148152
}
149153
}
150154

151-
private ValidationEvent unresolvedTarget(Shape shape, Relationship rel) {
155+
private ValidationEvent unresolvedTarget(Model model, Shape shape, Relationship rel) {
156+
// Detect if there are shape IDs within 2 characters edit distance from the
157+
// invalid shape ID target. An edit distance > 2 seems to give far more false
158+
// positives than are useful.
159+
Collection<String> suggestions = computeTargetSuggestions(model, rel.getNeighborShapeId());
160+
String suggestionText = !suggestions.isEmpty()
161+
? ". Did you mean " + String.join(", ", suggestions) + "?"
162+
: "";
163+
152164
if (rel.getRelationshipType() == RelationshipType.MEMBER_TARGET) {
165+
// Don't show the relationship type for invalid member targets.
153166
return error(shape, String.format(
154-
"member shape targets an unresolved shape `%s`", rel.getNeighborShapeId()));
167+
"member shape targets an unresolved shape `%s`%s", rel.getNeighborShapeId(), suggestionText));
155168
} else {
169+
// Use "a" or "an" depending on if the relationship starts with a vowel.
170+
String indefiniteArticle = isUppercaseVowel(rel.getRelationshipType().toString().charAt(0))
171+
? "an"
172+
: "a";
156173
return error(shape, String.format(
157-
"%s shape has a `%s` relationship to an unresolved shape `%s`",
174+
"%s shape has %s `%s` relationship to an unresolved shape `%s`%s",
158175
shape.getType(),
176+
indefiniteArticle,
159177
rel.getRelationshipType().toString().toLowerCase(Locale.US),
160-
rel.getNeighborShapeId()));
178+
rel.getNeighborShapeId(),
179+
suggestionText));
180+
}
181+
}
182+
183+
private Collection<String> computeTargetSuggestions(Model model, ShapeId target) {
184+
String targetString = target.toString();
185+
int floor = Integer.MAX_VALUE;
186+
// Sort the result to create deterministic error messages.
187+
Set<String> candidates = new TreeSet<>();
188+
189+
for (Shape shape : model.toSet()) {
190+
String idString = shape.getId().toString();
191+
int distance = StringUtils.levenshteinDistance(targetString, idString, MAX_EDIT_DISTANCE_FOR_SUGGESTIONS);
192+
if (distance == floor) {
193+
// Add to the list of candidates that have the same distance.
194+
candidates.add(idString);
195+
} else if (distance > -1 && distance < floor) {
196+
// Found a new ID that has a lower distance. Set the new floor,
197+
// clear out the previous candidates, and add this ID.
198+
floor = distance;
199+
candidates.clear();
200+
candidates.add(idString);
201+
}
161202
}
203+
204+
return candidates;
205+
}
206+
207+
private static boolean isUppercaseVowel(char c) {
208+
return c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U';
162209
}
163210

164211
private ValidationEvent badType(Shape shape, Shape target, RelationshipType rel, ShapeType valid) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
[ERROR] ns.foo#InvalidList1$member: member shape targets an unresolved shape `ns.foo#d`. Did you mean ns.foo#a, ns.foo#b, ns.foo#c? | Target
2+
[ERROR] ns.foo#InvalidList2$member: member shape targets an unresolved shape `ns.foo#dd`. Did you mean ns.foo#a, ns.foo#ab, ns.foo#b, ns.foo#c? | Target
3+
[ERROR] ns.foo#InvalidList3$member: member shape targets an unresolved shape `ns.foo#acb`. Did you mean ns.foo#ab? | Target
4+
[ERROR] ns.foo#Operation: operation shape has an `input` relationship to an unresolved shape `ns.foo#abcd`. Did you mean ns.foo#abc? | Target
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
{
2+
"smithy": "1.0",
3+
"metadata": {
4+
"suppressions": [
5+
{"id": "UnreferencedShape", "namespace": "ns.foo"}
6+
]
7+
},
8+
"shapes": {
9+
"ns.foo#a": {
10+
"type": "string"
11+
},
12+
"ns.foo#b": {
13+
"type": "string"
14+
},
15+
"ns.foo#c": {
16+
"type": "string"
17+
},
18+
"ns.foo#ab": {
19+
"type": "string"
20+
},
21+
"ns.foo#abc": {
22+
"type": "string"
23+
},
24+
"ns.foo#InvalidList1": {
25+
"type": "list",
26+
"member": {
27+
"target": "ns.foo#d"
28+
}
29+
},
30+
"ns.foo#InvalidList2": {
31+
"type": "list",
32+
"member": {
33+
"target": "ns.foo#dd"
34+
}
35+
},
36+
"ns.foo#InvalidList3": {
37+
"type": "list",
38+
"member": {
39+
"target": "ns.foo#acb"
40+
}
41+
},
42+
"ns.foo#Operation": {
43+
"type": "operation",
44+
"input": {
45+
"target": "ns.foo#abcd"
46+
}
47+
}
48+
}
49+
}

smithy-model/src/test/resources/software/amazon/smithy/model/errorfiles/validators/target-validator.errors

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
[ERROR] ns.foo#InalidOperationInputOutputErrorBadTypes: operation shape `error` relationships must target a structure shape, but found (integer: `ns.foo#Integer`) | Target
22
[ERROR] ns.foo#InalidOperationInputOutputErrorBadTypes: operation shape `input` relationships must target a structure shape, but found (integer: `ns.foo#Integer`) | Target
33
[ERROR] ns.foo#InalidOperationInputOutputErrorBadTypes: operation shape `output` relationships must target a structure shape, but found (integer: `ns.foo#Integer`) | Target
4-
[ERROR] ns.foo#InalidOperationInputOutputErrorNotFound: operation shape has a `error` relationship to an unresolved shape `ns.foo#NotFound` | Target
5-
[ERROR] ns.foo#InalidOperationInputOutputErrorNotFound: operation shape has a `input` relationship to an unresolved shape `ns.foo#NotFound` | Target
6-
[ERROR] ns.foo#InalidOperationInputOutputErrorNotFound: operation shape has a `output` relationship to an unresolved shape `ns.foo#NotFound` | Target
4+
[ERROR] ns.foo#InalidOperationInputOutputErrorNotFound: operation shape has an `error` relationship to an unresolved shape `ns.foo#NotFound` | Target
5+
[ERROR] ns.foo#InalidOperationInputOutputErrorNotFound: operation shape has an `input` relationship to an unresolved shape `ns.foo#NotFound` | Target
6+
[ERROR] ns.foo#InalidOperationInputOutputErrorNotFound: operation shape has an `output` relationship to an unresolved shape `ns.foo#NotFound` | Target
77
[ERROR] ns.foo#InvalidListMemberMember$member: Members cannot target member shapes, but found (member: `ns.foo#ValidInput$integer`) | Target
88
[ERROR] ns.foo#InvalidListMemberReference$member: member shape targets an unresolved shape `ns.foo#NotFound` | Target
99
[ERROR] ns.foo#InvalidListMemberResource$member: Members cannot target resource shapes, but found (resource: `ns.foo#MyResource`) | Target
@@ -14,7 +14,7 @@
1414
[ERROR] ns.foo#InvalidOperationBadErrorTraits: Operation output targets an invalid structure `ns.foo#ValidError` that is marked with the `error` trait. | Target
1515
[ERROR] ns.foo#InvalidResourceBindingReference: resource shape has a `resource` relationship to an unresolved shape `ns.foo#NotFound` | Target
1616
[ERROR] ns.foo#InvalidResourceBindingType: resource shape `resource` relationships must target a resource shape, but found (integer: `ns.foo#Integer`) | Target
17-
[ERROR] ns.foo#InvalidResourceIdentifierReference: resource shape has a `identifier` relationship to an unresolved shape `ns.foo#NotFound` | Target
17+
[ERROR] ns.foo#InvalidResourceIdentifierReference: resource shape has an `identifier` relationship to an unresolved shape `ns.foo#NotFound` | Target
1818
[ERROR] ns.foo#InvalidResourceIdentifierType: resource shape `identifier` relationships must target a string shape, but found (integer: `ns.foo#Integer`) | Target
1919
[ERROR] ns.foo#InvalidResourceLifecycle: Resource create lifecycle operation must target an operation, but found (integer: `ns.foo#Integer`) | Target
2020
[ERROR] ns.foo#InvalidResourceLifecycle: Resource delete lifecycle operation must target an operation, but found (integer: `ns.foo#Integer`) | Target

smithy-utils/src/main/java/software/amazon/smithy/utils/StringUtils.java

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
package software.amazon.smithy.utils;
1717

18+
import java.util.Arrays;
1819
import java.util.Locale;
1920
import java.util.regex.Matcher;
2021
import java.util.regex.Pattern;
@@ -1429,4 +1430,179 @@ public static String escapeJavaString(Object object, String indent) {
14291430
result.append('"');
14301431
return result.toString();
14311432
}
1433+
1434+
/**
1435+
* Find the Levenshtein distance between two CharSequences if it's less than or
1436+
* equal to a given threshold.
1437+
*
1438+
* <p>This code is a copy of the {@code limitedCompare} method from
1439+
* Apache commons-text LevenshteinDistance.java.
1440+
*
1441+
* <p>
1442+
* This implementation follows from Algorithms on Strings, Trees and
1443+
* Sequences by Dan Gusfield and Chas Emerick's implementation of the
1444+
* Levenshtein distance algorithm from <a
1445+
* href="http://www.merriampark.com/ld.htm"
1446+
* >http://www.merriampark.com/ld.htm</a>
1447+
* </p>
1448+
*
1449+
* <pre>
1450+
* limitedCompare(null, *, *) = IllegalArgumentException
1451+
* limitedCompare(*, null, *) = IllegalArgumentException
1452+
* limitedCompare(*, *, -1) = IllegalArgumentException
1453+
* limitedCompare("","", 0) = 0
1454+
* limitedCompare("aaapppp", "", 8) = 7
1455+
* limitedCompare("aaapppp", "", 7) = 7
1456+
* limitedCompare("aaapppp", "", 6)) = -1
1457+
* limitedCompare("elephant", "hippo", 7) = 7
1458+
* limitedCompare("elephant", "hippo", 6) = -1
1459+
* limitedCompare("hippo", "elephant", 7) = 7
1460+
* limitedCompare("hippo", "elephant", 6) = -1
1461+
* </pre>
1462+
*
1463+
* @param left the first CharSequence, must not be null
1464+
* @param right the second CharSequence, must not be null
1465+
* @param threshold the target threshold, must not be negative
1466+
* @return result distance, or -1
1467+
* @see <a href="https://github.com/apache/commons-text/blob/master/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java">LevenshteinDistance.java</a>
1468+
*/
1469+
public static int levenshteinDistance(CharSequence left, CharSequence right, final int threshold) {
1470+
if (left == null || right == null) {
1471+
throw new IllegalArgumentException("CharSequences must not be null");
1472+
}
1473+
if (threshold < 0) {
1474+
throw new IllegalArgumentException("Threshold must not be negative");
1475+
}
1476+
1477+
/*
1478+
* This implementation only computes the distance if it's less than or
1479+
* equal to the threshold value, returning -1 if it's greater. The
1480+
* advantage is performance: unbounded distance is O(nm), but a bound of
1481+
* k allows us to reduce it to O(km) time by only computing a diagonal
1482+
* stripe of width 2k + 1 of the cost table. It is also possible to use
1483+
* this to compute the unbounded Levenshtein distance by starting the
1484+
* threshold at 1 and doubling each time until the distance is found;
1485+
* this is O(dm), where d is the distance.
1486+
*
1487+
* One subtlety comes from needing to ignore entries on the border of
1488+
* our stripe eg. p[] = |#|#|#|* d[] = *|#|#|#| We must ignore the entry
1489+
* to the left of the leftmost member We must ignore the entry above the
1490+
* rightmost member
1491+
*
1492+
* Another subtlety comes from our stripe running off the matrix if the
1493+
* strings aren't of the same size. Since string s is always swapped to
1494+
* be the shorter of the two, the stripe will always run off to the
1495+
* upper right instead of the lower left of the matrix.
1496+
*
1497+
* As a concrete example, suppose s is of length 5, t is of length 7,
1498+
* and our threshold is 1. In this case we're going to walk a stripe of
1499+
* length 3. The matrix would look like so:
1500+
*
1501+
* <pre>
1502+
* 1 2 3 4 5
1503+
* 1 |#|#| | | |
1504+
* 2 |#|#|#| | |
1505+
* 3 | |#|#|#| |
1506+
* 4 | | |#|#|#|
1507+
* 5 | | | |#|#|
1508+
* 6 | | | | |#|
1509+
* 7 | | | | | |
1510+
* </pre>
1511+
*
1512+
* Note how the stripe leads off the table as there is no possible way
1513+
* to turn a string of length 5 into one of length 7 in edit distance of
1514+
* 1.
1515+
*
1516+
* Additionally, this implementation decreases memory usage by using two
1517+
* single-dimensional arrays and swapping them back and forth instead of
1518+
* allocating an entire n by m matrix. This requires a few minor
1519+
* changes, such as immediately returning when it's detected that the
1520+
* stripe has run off the matrix and initially filling the arrays with
1521+
* large values so that entries we don't compute are ignored.
1522+
*
1523+
* See Algorithms on Strings, Trees and Sequences by Dan Gusfield for
1524+
* some discussion.
1525+
*/
1526+
1527+
int n = left.length(); // length of left
1528+
int m = right.length(); // length of right
1529+
1530+
// if one string is empty, the edit distance is necessarily the length
1531+
// of the other
1532+
if (n == 0) {
1533+
return m <= threshold ? m : -1;
1534+
} else if (m == 0) {
1535+
return n <= threshold ? n : -1;
1536+
}
1537+
1538+
if (n > m) {
1539+
// swap the two strings to consume less memory
1540+
final CharSequence tmp = left;
1541+
left = right;
1542+
right = tmp;
1543+
n = m;
1544+
m = right.length();
1545+
}
1546+
1547+
// the edit distance cannot be less than the length difference
1548+
if (m - n > threshold) {
1549+
return -1;
1550+
}
1551+
1552+
int[] p = new int[n + 1]; // 'previous' cost array, horizontally
1553+
int[] d = new int[n + 1]; // cost array, horizontally
1554+
int[] tempD; // placeholder to assist in swapping p and d
1555+
1556+
// fill in starting table values
1557+
final int boundary = Math.min(n, threshold) + 1;
1558+
for (int i = 0; i < boundary; i++) {
1559+
p[i] = i;
1560+
}
1561+
// these fills ensure that the value above the rightmost entry of our
1562+
// stripe will be ignored in following loop iterations
1563+
Arrays.fill(p, boundary, p.length, Integer.MAX_VALUE);
1564+
Arrays.fill(d, Integer.MAX_VALUE);
1565+
1566+
// iterates through t
1567+
for (int j = 1; j <= m; j++) {
1568+
final char rightJ = right.charAt(j - 1); // jth character of right
1569+
d[0] = j;
1570+
1571+
// compute stripe indices, constrain to array size
1572+
final int min = Math.max(1, j - threshold);
1573+
final int max = j > Integer.MAX_VALUE - threshold ? n : Math.min(
1574+
n, j + threshold);
1575+
1576+
// ignore entry left of leftmost
1577+
if (min > 1) {
1578+
d[min - 1] = Integer.MAX_VALUE;
1579+
}
1580+
1581+
// iterates through [min, max] in s
1582+
for (int i = min; i <= max; i++) {
1583+
if (left.charAt(i - 1) == rightJ) {
1584+
// diagonally left and up
1585+
d[i] = p[i - 1];
1586+
} else {
1587+
// 1 + minimum of cell to the left, to the top, diagonally
1588+
// left and up
1589+
d[i] = 1 + Math.min(Math.min(d[i - 1], p[i]), p[i - 1]);
1590+
}
1591+
}
1592+
1593+
// copy current distance counts to 'previous row' distance counts
1594+
tempD = p;
1595+
p = d;
1596+
d = tempD;
1597+
}
1598+
1599+
// if p[n] is greater than the threshold, there's no guarantee on it
1600+
// being the correct
1601+
// distance
1602+
if (p[n] <= threshold) {
1603+
return p[n];
1604+
}
1605+
1606+
return -1;
1607+
}
14321608
}

0 commit comments

Comments
 (0)