Skip to content

Commit 41572f4

Browse files
committed
Require domains of emails to have dot by default (multiple parts)
Can be disabled with an option, but it feels like the better default.
1 parent 3d50956 commit 41572f4

File tree

4 files changed

+70
-31
lines changed

4 files changed

+70
-31
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ Not supported:
9292
* Quoted local parts, e.g. `"this is sparta"@example.com`
9393
* Address literals, e.g. `foo@[127.0.0.1]`
9494

95-
Note that the domain part can be a single top-level domain (e.g.
96-
`foo@com`). If this is not wanted, filter the resulting links.
95+
Note that the domain must have at least one dot (e.g. `foo@com` isn't
96+
matched), unless the `emailDomainMustHaveDot` option is disabled.
9797

9898
Also see [test cases](src/test/java/org/nibor/autolink/AutolinkEmailTest.java).
9999

src/main/java/org/nibor/autolink/LinkExtractor.java

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,12 @@
1313
*/
1414
public class LinkExtractor {
1515

16-
private static Scanner URL_SCANNER = new UrlScanner();
17-
private static Scanner EMAIL_SCANNER = new EmailScanner();
16+
private final Scanner urlScanner;
17+
private final Scanner emailScanner;
1818

19-
private final Set<LinkType> linkTypes;
20-
21-
private LinkExtractor(Set<LinkType> linkTypes) {
22-
this.linkTypes = linkTypes;
19+
private LinkExtractor(UrlScanner urlScanner, EmailScanner emailScanner) {
20+
this.urlScanner = urlScanner;
21+
this.emailScanner = emailScanner;
2322
}
2423

2524
public static Builder builder() {
@@ -44,14 +43,9 @@ public Iterator<LinkSpan> iterator() {
4443
private Scanner trigger(char c) {
4544
switch (c) {
4645
case ':':
47-
if (linkTypes.contains(LinkType.URL)) {
48-
return URL_SCANNER;
49-
}
50-
break;
46+
return urlScanner;
5147
case '@':
52-
if (linkTypes.contains(LinkType.EMAIL)) {
53-
return EMAIL_SCANNER;
54-
}
48+
return emailScanner;
5549
}
5650
return null;
5751
}
@@ -62,6 +56,7 @@ private Scanner trigger(char c) {
6256
public static class Builder {
6357

6458
private Set<LinkType> linkTypes = EnumSet.allOf(LinkType.class);
59+
private boolean emailDomainMustHaveDot = true;
6560

6661
private Builder() {
6762
}
@@ -75,11 +70,23 @@ public Builder linkTypes(Set<LinkType> linkTypes) {
7570
return this;
7671
}
7772

73+
/**
74+
* @param emailDomainMustHaveDot true if the domain in an email address is required to have more than one part,
75+
* false if it can also just have single part (e.g. foo@com); true by default
76+
* @return this builder
77+
*/
78+
public Builder emailDomainMustHaveDot(boolean emailDomainMustHaveDot) {
79+
this.emailDomainMustHaveDot = emailDomainMustHaveDot;
80+
return this;
81+
}
82+
7883
/**
7984
* @return the configured link extractor
8085
*/
8186
public LinkExtractor build() {
82-
return new LinkExtractor(linkTypes);
87+
UrlScanner urlScanner = linkTypes.contains(LinkType.URL) ? new UrlScanner() : null;
88+
EmailScanner emailScanner = linkTypes.contains(LinkType.EMAIL) ? new EmailScanner(emailDomainMustHaveDot) : null;
89+
return new LinkExtractor(urlScanner, emailScanner);
8390
}
8491
}
8592

src/main/java/org/nibor/autolink/internal/EmailScanner.java

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@
1010
*/
1111
public class EmailScanner implements Scanner {
1212

13+
private final boolean domainMustHaveDot;
14+
15+
public EmailScanner(boolean domainMustHaveDot) {
16+
this.domainMustHaveDot = domainMustHaveDot;
17+
}
18+
1319
@Override
1420
public LinkSpan scan(CharSequence input, int triggerIndex, int rewindIndex) {
1521
int beforeAt = triggerIndex - 1;
@@ -50,15 +56,16 @@ private int findFirst(CharSequence input, int beginIndex, int rewindIndex) {
5056

5157
// See "Domain" in RFC 5321, plus extension of "sub-domain" in RFC 6531
5258
private int findLast(CharSequence input, int beginIndex) {
53-
boolean firstSubDomain = true;
59+
boolean firstInSubDomain = true;
5460
boolean canEndSubDomain = false;
61+
int firstDot = -1;
5562
int last = -1;
5663
for (int i = beginIndex; i < input.length(); i++) {
5764
char c = input.charAt(i);
58-
if (firstSubDomain) {
65+
if (firstInSubDomain) {
5966
if (subDomainAllowed(c)) {
6067
last = i;
61-
firstSubDomain = false;
68+
firstInSubDomain = false;
6269
canEndSubDomain = true;
6370
} else {
6471
break;
@@ -68,7 +75,10 @@ private int findLast(CharSequence input, int beginIndex) {
6875
if (!canEndSubDomain) {
6976
break;
7077
}
71-
firstSubDomain = true;
78+
firstInSubDomain = true;
79+
if (firstDot == -1) {
80+
firstDot = i;
81+
}
7282
} else if (c == '-') {
7383
canEndSubDomain = false;
7484
} else if (subDomainAllowed(c)) {
@@ -79,7 +89,11 @@ private int findLast(CharSequence input, int beginIndex) {
7989
}
8090
}
8191
}
82-
return last;
92+
if (domainMustHaveDot && (firstDot == -1 || firstDot > last)) {
93+
return -1;
94+
} else {
95+
return last;
96+
}
8397
}
8498

8599
// See "Atom" in RFC 5321, "atext" in RFC 5322

src/test/java/org/nibor/autolink/AutolinkEmailTest.java

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,22 @@
1212
@RunWith(Parameterized.class)
1313
public class AutolinkEmailTest extends AutolinkTestCase {
1414

15-
@Parameters(name = "{1}")
15+
@Parameters(name = "{2}")
1616
public static Iterable<Object[]> data() {
1717
return Arrays.asList(new Object[][]{
18-
{LinkExtractor.builder().linkTypes(EnumSet.of(LinkType.EMAIL)).build(), "email"},
19-
{LinkExtractor.builder().build(), "all"}
18+
{LinkExtractor.builder().linkTypes(EnumSet.of(LinkType.EMAIL)).build(), true, "email"},
19+
{LinkExtractor.builder().build(), true, "all"},
20+
{LinkExtractor.builder().emailDomainMustHaveDot(false).build(), false, "all, single part domain"}
2021
});
2122
}
2223

2324
@Parameter(0)
2425
public LinkExtractor linkExtractor;
2526

2627
@Parameter(1)
28+
public boolean domainMustHaveDot;
29+
30+
@Parameter(2)
2731
public String description;
2832

2933
@Test
@@ -38,7 +42,6 @@ public void notLinked() {
3842

3943
@Test
4044
public void simple() {
41-
assertLinked("a@b", "|a@b|");
4245
assertLinked("[email protected]", "|[email protected]|");
4346
assertLinked("[email protected]", "|[email protected]|");
4447
}
@@ -51,10 +54,10 @@ public void allowedText() {
5154

5255
@Test
5356
public void spaceSeparation() {
54-
assertLinked("foo a@b", "foo |a@b|");
55-
assertLinked("a@b foo", "|a@b| foo");
56-
assertLinked("\na@b", "\n|a@b|");
57-
assertLinked("a@b\n", "|a@b|\n");
57+
assertLinked("foo a@b.com", "foo |a@b.com|");
58+
assertLinked("a@b.com foo", "|a@b.com| foo");
59+
assertLinked("\na@b.com", "\n|a@b.com|");
60+
assertLinked("a@b.com\n", "|a@b.com|\n");
5861
}
5962

6063
@Test
@@ -75,15 +78,30 @@ public void dots() {
7578
assertLinked("[email protected]", ".|[email protected]|");
7679
assertLinked("[email protected]", "a..|[email protected]|");
7780
assertLinked("[email protected].", "|[email protected]|.");
78-
assertLinked("[email protected]", "|a@b|..com");
81+
}
82+
83+
@Test
84+
public void domainWithoutDot() {
85+
if (domainMustHaveDot) {
86+
assertNotLinked("a@b");
87+
assertNotLinked("a@b.");
88+
assertLinked("[email protected].", "|[email protected]|.");
89+
} else {
90+
assertLinked("a@b", "|a@b|");
91+
assertLinked("a@b.", "|a@b|.");
92+
}
7993
}
8094

8195
@Test
8296
public void dashes() {
8397
assertLinked("[email protected]", "|[email protected]|-");
8498
assertLinked("[email protected]", "|[email protected]|");
85-
assertLinked("a@b-.", "|a@b|-.");
8699
assertNotLinked("[email protected]");
100+
if (domainMustHaveDot) {
101+
assertNotLinked("a@b-.");
102+
} else {
103+
assertLinked("a@b-.", "|a@b|-.");
104+
}
87105
}
88106

89107
@Test

0 commit comments

Comments
 (0)