Merge pull request #71 from apex-dev-tools/case-insensitive-lexer

pwrightcertinia · web-flow · commit 54fc29450613 · 2025-05-22T11:50:40.000+01:00
Enable `caseInsensitive` lexer option
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,9 @@
 
 - **(BREAKING)** Updated to ANTLR runtime `4.13.2`, using the ANTLR tool to generate both target languages.
 
+- Enabled `caseInsensitive` option for lexers (added in ANTLR 4.10).
+  - As a result, `CaseInsensitiveInputStream` is deprecated and should no longer be required.
+
 - Added `ApexParserFactory` class to create parsers, token streams, and lexers.
   - Primarily for TS to avoid directly creating `antlr4` class instances.
   - In Java, it still requires passing a `CharStream` or `CommonTokenStream` to create parsers.
@@ -39,7 +42,7 @@
 
 - **(BREAKING)** Updated output to `ES2020` and increased min node version to 16.
 
-- `CaseInsensitiveInputStream` type now extends `CharStream` and can be constructed from `string`.
+- `CaseInsensitiveInputStream` (deprecated) type now extends `CharStream` and can be constructed from `string`.
   - Constructor passing in `CharStream` retained to match Java version.
 
 - Removed `node-dir` dependency - replaced with node fs api.
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 Parser for Salesforce Apex (including Triggers & inline SOQL/SOQL). This is based on an [ANTLR4](https://www.antlr.org/) grammar, see [`antlr/BaseApexParser.g4`](./antlr/BaseApexParser.g4). Currently packaged for Java and JavaScript/TypeScript targets.
 
-With the ANTLR4 generated types, a `CaseInsensitiveInputStream` is included (and required) for the lexer. Type aliases and abstractions like `ApexParserFactory` and `ApexErrorListener` are also available for quick start. There are minimal examples in the test classes.
+The packages include ANTLR4 generated types plus optional extras for convenience. The TypeScript package exports type aliases for ANTLR types, while both packages have abstractions like `ApexParserFactory` and `ApexErrorListener`. There are minimal examples below and in the test classes.
 
 ## Installation
 
diff --git a/antlr/BaseApexLexer.g4 b/antlr/BaseApexLexer.g4
@@ -299,16 +299,16 @@ IntegerLiteral
     ;
 
 LongLiteral
-    : Digit Digit* [lL]
+    : Digit Digit* [l]
     ;
 
 NumberLiteral
-    :   Digit* '.' Digit Digit* [dD]?
+    :   Digit* '.' Digit Digit* [d]?
     ;
 
 fragment
 HexCharacter
-    :   [0-9a-fA-F]
+    :   [0-9a-f]
     ;
 
 fragment
@@ -424,7 +424,7 @@ Identifier
 // so we can give better error messages
 fragment
 JavaLetter
-    :   [a-zA-Z$_] // these are the "java letters" below 0xFF
+    :   [a-z$_] // these are the "java letters" below 0xFF
     |   // covers all characters above 0xFF which are not a surrogate
         ~[\u0000-\u00FF\uD800-\uDBFF]
     |   // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
@@ -433,7 +433,7 @@ JavaLetter
 
 fragment
 JavaLetterOrDigit
-    :   [a-zA-Z0-9$_] // these are the "java letters or digits" below 0xFF
+    :   [a-z0-9$_] // these are the "java letters or digits" below 0xFF
     |   // covers all characters above 0xFF which are not a surrogate
         ~[\u0000-\u00FF\uD800-\uDBFF]
     |   // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
diff --git a/jvm/antlr/ApexLexer.g4 b/jvm/antlr/ApexLexer.g4
@@ -1,7 +1,8 @@
 lexer grammar ApexLexer;
+options { caseInsensitive = true; }
 
 @lexer::members {
-public void clearCache() {_interp.clearDFA();}
+public void clearCache() { _interp.clearDFA(); }
 }
 
 import BaseApexLexer;
diff --git a/jvm/src/main/java/io/github/apexdevtools/apexparser/ApexParserFactory.java b/jvm/src/main/java/io/github/apexdevtools/apexparser/ApexParserFactory.java
@@ -51,6 +51,10 @@ public static CommonTokenStream createTokenStream(CharStream stream) {
   }
 
   public static ApexLexer createLexer(CharStream stream) {
-    return new ApexLexer(new CaseInsensitiveInputStream(stream));
+    ApexLexer lexer = new ApexLexer(stream);
+
+    // always remove default console listener
+    lexer.removeErrorListeners();
+    return lexer;
   }
 }
diff --git a/jvm/src/main/java/io/github/apexdevtools/apexparser/CaseInsensitiveInputStream.java b/jvm/src/main/java/io/github/apexdevtools/apexparser/CaseInsensitiveInputStream.java
@@ -37,8 +37,12 @@ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 
 /**
  * ANTLR4 stream handler that allows use of case insensitive handling.
+ *
+ * @deprecated since 5.0.0. ApexLexer is generated with ANTLR 4.10
+ * `caseInsensitive` option enabled.
  */
-@SuppressWarnings({ "unused" })
+@SuppressWarnings("all")
+@Deprecated
 public class CaseInsensitiveInputStream implements CharStream {
 
   private final CharStream src;
diff --git a/jvm/src/test/java/io/github/apexdevtools/apexparser/ApexLexerTest.java b/jvm/src/test/java/io/github/apexdevtools/apexparser/ApexLexerTest.java
@@ -25,8 +25,7 @@ public class ApexLexerTest {
   @Test
   void testLexerGeneratesTokens() {
     Map.Entry<ApexLexer, SyntaxErrorCounter> lexerAndCounter = createLexer(
-      "public class Hello {}",
-      false
+      "public class Hello {}"
     );
     CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey());
     assertEquals(6, tokens.getNumberOfOnChannelTokens());
@@ -36,8 +35,7 @@ void testLexerGeneratesTokens() {
   @Test
   void testCaseInsensitivityLowerCase() {
     Map.Entry<ApexLexer, SyntaxErrorCounter> lexerAndCounter = createLexer(
-      "public",
-      true
+      "public"
     );
     CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey());
     assertEquals(2, tokens.getNumberOfOnChannelTokens());
@@ -47,8 +45,7 @@ void testCaseInsensitivityLowerCase() {
   @Test
   void testCaseInsensitivityUpperCase() {
     Map.Entry<ApexLexer, SyntaxErrorCounter> lexerAndCounter = createLexer(
-      "PUBLIC",
-      true
+      "PUBLIC"
     );
     CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey());
     assertEquals(2, tokens.getNumberOfOnChannelTokens());
@@ -58,19 +55,33 @@ void testCaseInsensitivityUpperCase() {
   @Test
   void testCaseInsensitivityMixedCase() {
     Map.Entry<ApexLexer, SyntaxErrorCounter> lexerAndCounter = createLexer(
-      "PuBliC",
-      true
+      "PuBliC"
     );
     CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey());
     assertEquals(2, tokens.getNumberOfOnChannelTokens());
     assertEquals(0, lexerAndCounter.getValue().getNumErrors());
   }
 
+  @Test
+  @SuppressWarnings("deprecation")
+  void testCaseInsensitivityDeprecated() {
+    // intentional testing deprecated type backward compat
+    ApexLexer lexer = new ApexLexer(
+      new CaseInsensitiveInputStream(CharStreams.fromString("PuBliC"))
+    );
+    lexer.removeErrorListeners();
+    SyntaxErrorCounter errorCounter = new SyntaxErrorCounter();
+    lexer.addErrorListener(errorCounter);
+
+    CommonTokenStream tokens = new CommonTokenStream(lexer);
+    assertEquals(2, tokens.getNumberOfOnChannelTokens());
+    assertEquals(0, errorCounter.getNumErrors());
+  }
+
   @Test
   void testLexerUnicodeEscapes() {
     Map.Entry<ApexLexer, SyntaxErrorCounter> lexerAndCounter = createLexer(
-      "'Fran\\u00E7ois'",
-      false
+      "'Fran\\u00E7ois'"
     );
     CommonTokenStream tokens = new CommonTokenStream(lexerAndCounter.getKey());
     assertEquals(2, tokens.getNumberOfOnChannelTokens());
diff --git a/jvm/src/test/java/io/github/apexdevtools/apexparser/SyntaxErrorCounter.java b/jvm/src/test/java/io/github/apexdevtools/apexparser/SyntaxErrorCounter.java
@@ -38,15 +38,11 @@ public int getNumErrors() {
   }
 
   public static Map.Entry<ApexLexer, SyntaxErrorCounter> createLexer(
-    String input,
-    Boolean caseInsensitive
+    String input
   ) {
-    CharStream stream = CharStreams.fromString(input);
-    ApexLexer lexer = new ApexLexer(
-      caseInsensitive ? new CaseInsensitiveInputStream(stream) : stream
+    ApexLexer lexer = ApexParserFactory.createLexer(
+      CharStreams.fromString(input)
     );
-
-    lexer.removeErrorListeners();
     SyntaxErrorCounter errorCounter = new SyntaxErrorCounter();
     lexer.addErrorListener(errorCounter);
 
diff --git a/npm/antlr/ApexLexer.g4 b/npm/antlr/ApexLexer.g4
@@ -1,3 +1,4 @@
 lexer grammar ApexLexer;
+options { caseInsensitive = true; }
 
 import BaseApexLexer;
diff --git a/npm/src/ApexParserFactory.ts b/npm/src/ApexParserFactory.ts
@@ -27,6 +27,7 @@
 */
 
 import {
+  CharStreams,
   CommonTokenStream,
   ErrorNode,
   ParserRuleContext,
@@ -41,7 +42,6 @@ import {
 import ApexParserListener from "./antlr/ApexParserListener";
 import ApexParserVisitor from "./antlr/ApexParserVisitor";
 import ApexLexer from "./antlr/ApexLexer";
-import { CaseInsensitiveInputStream } from "./CaseInsensitiveInputStream";
 import ApexParser from "./antlr/ApexParser";
 import { ThrowingErrorListener } from "./ApexErrorListener";
 
@@ -85,7 +85,11 @@ export class ApexParserFactory {
   }
 
   static createLexer(source: string): ApexLexer {
-    return new ApexLexer(new CaseInsensitiveInputStream(source));
+    const lexer = new ApexLexer(CharStreams.fromString(source));
+
+    // always remove default console listener
+    lexer.removeErrorListeners();
+    return lexer;
   }
 }
 
diff --git a/npm/src/CaseInsensitiveInputStream.ts b/npm/src/CaseInsensitiveInputStream.ts
@@ -28,6 +28,12 @@
 
 import { CharStream } from "antlr4";
 
+/**
+ * Converts char stream to lower case for case insensitive usage.
+ *
+ * @deprecated since 5.0.0. ApexLexer is generated with ANTLR 4.10
+ * `caseInsensitive` option enabled.
+ */
 export class CaseInsensitiveInputStream extends CharStream {
   constructor(data: string, decodeToUnicodeCodePoints?: boolean);
   constructor(stream: CharStream, decodeToUnicodeCodePoints?: boolean);
diff --git a/npm/src/__tests__/ApexLexerTest.ts b/npm/src/__tests__/ApexLexerTest.ts
@@ -12,7 +12,9 @@
     derived from this software without specific prior written permission.
  */
 import { CommonTokenStream } from "antlr4";
-import { createLexer } from "./SyntaxErrorCounter";
+import { createLexer, SyntaxErrorCounter } from "./SyntaxErrorCounter";
+import { CaseInsensitiveInputStream } from "../CaseInsensitiveInputStream";
+import ApexLexer from "../antlr/ApexLexer";
 
 type ExtCommonTokenStream = CommonTokenStream & {
   // This method is present but not available
@@ -21,7 +23,7 @@ type ExtCommonTokenStream = CommonTokenStream & {
 };
 
 test("Lexer generates tokens", () => {
-  const [lexer, errorCounter] = createLexer("public class Hello {}", false);
+  const [lexer, errorCounter] = createLexer("public class Hello {}");
   const tokens = new CommonTokenStream(lexer) as ExtCommonTokenStream;
   expect(tokens.getNumberOfOnChannelTokens()).toBe(6);
   expect(errorCounter.getNumErrors()).toEqual(0);
@@ -48,8 +50,20 @@ test("Case insensitivity (mixed case)", () => {
   expect(errorCounter.getNumErrors()).toEqual(0);
 });
 
+test("Case insensitivity (deprecated stream)", () => {
+  // intentional testing deprecated type backward compat
+  const lexer = new ApexLexer(new CaseInsensitiveInputStream("PuBliC"));
+  lexer.removeErrorListeners();
+  const errorCounter = new SyntaxErrorCounter<number>();
+  lexer.addErrorListener(errorCounter);
+
+  const tokens = new CommonTokenStream(lexer) as ExtCommonTokenStream;
+  expect(tokens.getNumberOfOnChannelTokens()).toBe(2);
+  expect(errorCounter.getNumErrors()).toEqual(0);
+});
+
 test("Lexer unicode escapes", () => {
-  const [lexer, errorCounter] = createLexer("'Fran\\u00E7ois'", false);
+  const [lexer, errorCounter] = createLexer("'Fran\\u00E7ois'");
   const tokens = new CommonTokenStream(lexer) as ExtCommonTokenStream;
   expect(tokens.getNumberOfOnChannelTokens()).toBe(2);
   expect(errorCounter.getNumErrors()).toEqual(0);
diff --git a/npm/src/__tests__/SyntaxErrorCounter.ts b/npm/src/__tests__/SyntaxErrorCounter.ts
@@ -11,16 +11,9 @@
  3. The name of the author may not be used to endorse or promote products
     derived from this software without specific prior written permission.
  */
-import {
-  ErrorListener,
-  CharStreams,
-  RecognitionException,
-  Recognizer,
-  Token,
-} from "antlr4";
+import { ErrorListener, RecognitionException, Recognizer, Token } from "antlr4";
 import ApexLexer from "../antlr/ApexLexer";
 import ApexParser from "../antlr/ApexParser";
-import { CaseInsensitiveInputStream } from "../CaseInsensitiveInputStream";
 import { ApexParserFactory } from "../ApexParserFactory";
 
 export class SyntaxErrorCounter<T = Token> extends ErrorListener<T> {
@@ -43,16 +36,9 @@ export class SyntaxErrorCounter<T = Token> extends ErrorListener<T> {
 }
 
 export function createLexer(
-  input: string,
-  caseInsensitive: boolean = true
+  input: string
 ): [ApexLexer, SyntaxErrorCounter<number>] {
-  const lexer = new ApexLexer(
-    caseInsensitive
-      ? new CaseInsensitiveInputStream(input)
-      : CharStreams.fromString(input)
-  );
-
-  lexer.removeErrorListeners();
+  const lexer = ApexParserFactory.createLexer(input);
   const errorCounter = new SyntaxErrorCounter<number>();
   lexer.addErrorListener(errorCounter);
 

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,8 @@`
`1`	`1`	`lexer grammar ApexLexer;`
	`2`	`+options { caseInsensitive = true; }`
`2`	`3`
`3`	`4`	`@lexer::members {`
`4`		`-public void clearCache() {_interp.clearDFA();}`
	`5`	`+public void clearCache() { _interp.clearDFA(); }`
`5`	`6`	`}`
`6`	`7`
`7`	`8`	`import BaseApexLexer;`
Original file line number	Diff line number	Diff line change
`@@ -51,6 +51,10 @@ public static CommonTokenStream createTokenStream(CharStream stream) {`
`51`	`51`	`}`
`52`	`52`
`53`	`53`	`public static ApexLexer createLexer(CharStream stream) {`
`54`		`- return new ApexLexer(new CaseInsensitiveInputStream(stream));`
	`54`	`+ ApexLexer lexer = new ApexLexer(stream);`
	`55`	`+`
	`56`	`+ // always remove default console listener`
	`57`	`+ lexer.removeErrorListeners();`
	`58`	`+ return lexer;`
`55`	`59`	`}`
`56`	`60`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
`1`	`1`	`lexer grammar ApexLexer;`
	`2`	`+options { caseInsensitive = true; }`
`2`	`3`
`3`	`4`	`import BaseApexLexer;`