Skip to content

Commit 54fc294

Browse files
Merge pull request #71 from apex-dev-tools/case-insensitive-lexer
Enable `caseInsensitive` lexer option
2 parents 2464c34 + 7301d8d commit 54fc294

File tree

13 files changed

+79
-49
lines changed

13 files changed

+79
-49
lines changed

CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66

77
- **(BREAKING)** Updated to ANTLR runtime `4.13.2`, using the ANTLR tool to generate both target languages.
88

9+
- Enabled `caseInsensitive` option for lexers (added in ANTLR 4.10).
10+
- As a result, `CaseInsensitiveInputStream` is deprecated and should no longer be required.
11+
912
- Added `ApexParserFactory` class to create parsers, token streams, and lexers.
1013
- Primarily for TS to avoid directly creating `antlr4` class instances.
1114
- In Java, it still requires passing a `CharStream` or `CommonTokenStream` to create parsers.
@@ -39,7 +42,7 @@
3942

4043
- **(BREAKING)** Updated output to `ES2020` and increased min node version to 16.
4144

42-
- `CaseInsensitiveInputStream` type now extends `CharStream` and can be constructed from `string`.
45+
- `CaseInsensitiveInputStream` (deprecated) type now extends `CharStream` and can be constructed from `string`.
4346
- Constructor passing in `CharStream` retained to match Java version.
4447

4548
- Removed `node-dir` dependency - replaced with node fs api.

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
Parser for Salesforce Apex (including Triggers & inline SOQL/SOQL). This is based on an [ANTLR4](https://www.antlr.org/) grammar, see [`antlr/BaseApexParser.g4`](./antlr/BaseApexParser.g4). Currently packaged for Java and JavaScript/TypeScript targets.
44

5-
With the ANTLR4 generated types, a `CaseInsensitiveInputStream` is included (and required) for the lexer. Type aliases and abstractions like `ApexParserFactory` and `ApexErrorListener` are also available for quick start. There are minimal examples in the test classes.
5+
The packages include ANTLR4 generated types plus optional extras for convenience. The TypeScript package exports type aliases for ANTLR types, while both packages have abstractions like `ApexParserFactory` and `ApexErrorListener`. There are minimal examples below and in the test classes.
66

77
## Installation
88

antlr/BaseApexLexer.g4

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -299,16 +299,16 @@ IntegerLiteral
299299
;
300300

301301
LongLiteral
302-
: Digit Digit* [lL]
302+
: Digit Digit* [l]
303303
;
304304

305305
NumberLiteral
306-
: Digit* '.' Digit Digit* [dD]?
306+
: Digit* '.' Digit Digit* [d]?
307307
;
308308

309309
fragment
310310
HexCharacter
311-
: [0-9a-fA-F]
311+
: [0-9a-f]
312312
;
313313

314314
fragment
@@ -424,7 +424,7 @@ Identifier
424424
// so we can give better error messages
425425
fragment
426426
JavaLetter
427-
: [a-zA-Z$_] // these are the "java letters" below 0xFF
427+
: [a-z$_] // these are the "java letters" below 0xFF
428428
| // covers all characters above 0xFF which are not a surrogate
429429
~[\u0000-\u00FF\uD800-\uDBFF]
430430
| // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
@@ -433,7 +433,7 @@ JavaLetter
433433

434434
fragment
435435
JavaLetterOrDigit
436-
: [a-zA-Z0-9$_] // these are the "java letters or digits" below 0xFF
436+
: [a-z0-9$_] // these are the "java letters or digits" below 0xFF
437437
| // covers all characters above 0xFF which are not a surrogate
438438
~[\u0000-\u00FF\uD800-\uDBFF]
439439
| // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF

jvm/antlr/ApexLexer.g4

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
lexer grammar ApexLexer;
2+
options { caseInsensitive = true; }
23

34
@lexer::members {
4-
public void clearCache() {_interp.clearDFA();}
5+
public void clearCache() { _interp.clearDFA(); }
56
}
67

78
import BaseApexLexer;

jvm/src/main/java/io/github/apexdevtools/apexparser/ApexParserFactory.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ public static CommonTokenStream createTokenStream(CharStream stream) {
5151
}
5252

5353
public static ApexLexer createLexer(CharStream stream) {
54-
return new ApexLexer(new CaseInsensitiveInputStream(stream));
54+
ApexLexer lexer = new ApexLexer(stream);
55+
56+
// always remove default console listener
57+
lexer.removeErrorListeners();
58+
return lexer;
5559
}
5660
}

jvm/src/main/java/io/github/apexdevtools/apexparser/CaseInsensitiveInputStream.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,12 @@ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
3737

3838
/**
3939
* ANTLR4 stream handler that allows use of case insensitive handling.
40+
*
41+
* @deprecated since 5.0.0. ApexLexer is generated with ANTLR 4.10
42+
* `caseInsensitive` option enabled.
4043
*/
41-
@SuppressWarnings({ "unused" })
44+
@SuppressWarnings("all")
45+
@Deprecated
4246
public class CaseInsensitiveInputStream implements CharStream {
4347

4448
private final CharStream src;

jvm/src/test/java/io/github/apexdevtools/apexparser/ApexLexerTest.java

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@ public class ApexLexerTest {
2525
@Test
2626
void testLexerGeneratesTokens() {
2727
Map.Entry<ApexLexer, SyntaxErrorCounter> lexerAndCounter = createLexer(
28-
"public class Hello {}",
29-
false
28+
"public class Hello {}"
3029
);
3130
CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey());
3231
assertEquals(6, tokens.getNumberOfOnChannelTokens());
@@ -36,8 +35,7 @@ void testLexerGeneratesTokens() {
3635
@Test
3736
void testCaseInsensitivityLowerCase() {
3837
Map.Entry<ApexLexer, SyntaxErrorCounter> lexerAndCounter = createLexer(
39-
"public",
40-
true
38+
"public"
4139
);
4240
CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey());
4341
assertEquals(2, tokens.getNumberOfOnChannelTokens());
@@ -47,8 +45,7 @@ void testCaseInsensitivityLowerCase() {
4745
@Test
4846
void testCaseInsensitivityUpperCase() {
4947
Map.Entry<ApexLexer, SyntaxErrorCounter> lexerAndCounter = createLexer(
50-
"PUBLIC",
51-
true
48+
"PUBLIC"
5249
);
5350
CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey());
5451
assertEquals(2, tokens.getNumberOfOnChannelTokens());
@@ -58,19 +55,33 @@ void testCaseInsensitivityUpperCase() {
5855
@Test
5956
void testCaseInsensitivityMixedCase() {
6057
Map.Entry<ApexLexer, SyntaxErrorCounter> lexerAndCounter = createLexer(
61-
"PuBliC",
62-
true
58+
"PuBliC"
6359
);
6460
CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey());
6561
assertEquals(2, tokens.getNumberOfOnChannelTokens());
6662
assertEquals(0, lexerAndCounter.getValue().getNumErrors());
6763
}
6864

65+
@Test
66+
@SuppressWarnings("deprecation")
67+
void testCaseInsensitivityDeprecated() {
68+
// intentional testing deprecated type backward compat
69+
ApexLexer lexer = new ApexLexer(
70+
new CaseInsensitiveInputStream(CharStreams.fromString("PuBliC"))
71+
);
72+
lexer.removeErrorListeners();
73+
SyntaxErrorCounter errorCounter = new SyntaxErrorCounter();
74+
lexer.addErrorListener(errorCounter);
75+
76+
CommonTokenStream tokens = new CommonTokenStream(lexer);
77+
assertEquals(2, tokens.getNumberOfOnChannelTokens());
78+
assertEquals(0, errorCounter.getNumErrors());
79+
}
80+
6981
@Test
7082
void testLexerUnicodeEscapes() {
7183
Map.Entry<ApexLexer, SyntaxErrorCounter> lexerAndCounter = createLexer(
72-
"'Fran\\u00E7ois'",
73-
false
84+
"'Fran\\u00E7ois'"
7485
);
7586
CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey());
7687
assertEquals(2, tokens.getNumberOfOnChannelTokens());

jvm/src/test/java/io/github/apexdevtools/apexparser/SyntaxErrorCounter.java

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,11 @@ public int getNumErrors() {
3838
}
3939

4040
public static Map.Entry<ApexLexer, SyntaxErrorCounter> createLexer(
41-
String input,
42-
Boolean caseInsensitive
41+
String input
4342
) {
44-
CharStream stream = CharStreams.fromString(input);
45-
ApexLexer lexer = new ApexLexer(
46-
caseInsensitive ? new CaseInsensitiveInputStream(stream) : stream
43+
ApexLexer lexer = ApexParserFactory.createLexer(
44+
CharStreams.fromString(input)
4745
);
48-
49-
lexer.removeErrorListeners();
5046
SyntaxErrorCounter errorCounter = new SyntaxErrorCounter();
5147
lexer.addErrorListener(errorCounter);
5248

npm/antlr/ApexLexer.g4

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
lexer grammar ApexLexer;
2+
options { caseInsensitive = true; }
23

34
import BaseApexLexer;

npm/src/ApexParserFactory.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
*/
2828

2929
import {
30+
CharStreams,
3031
CommonTokenStream,
3132
ErrorNode,
3233
ParserRuleContext,
@@ -41,7 +42,6 @@ import {
4142
import ApexParserListener from "./antlr/ApexParserListener";
4243
import ApexParserVisitor from "./antlr/ApexParserVisitor";
4344
import ApexLexer from "./antlr/ApexLexer";
44-
import { CaseInsensitiveInputStream } from "./CaseInsensitiveInputStream";
4545
import ApexParser from "./antlr/ApexParser";
4646
import { ThrowingErrorListener } from "./ApexErrorListener";
4747

@@ -85,7 +85,11 @@ export class ApexParserFactory {
8585
}
8686

8787
static createLexer(source: string): ApexLexer {
88-
return new ApexLexer(new CaseInsensitiveInputStream(source));
88+
const lexer = new ApexLexer(CharStreams.fromString(source));
89+
90+
// always remove default console listener
91+
lexer.removeErrorListeners();
92+
return lexer;
8993
}
9094
}
9195

0 commit comments

Comments
 (0)