KaTeX · edemaine · Oct 10, 2017 · Oct 1, 2017 · Oct 3, 2017 · Oct 9, 2017
diff --git a/src/Lexer.js b/src/Lexer.js
@@ -34,18 +34,25 @@ import {LexerInterface, Token} from "./Token";
  * still reject the input.
  */
 const commentRegexString = "%[^\n]*[\n]";
+const controlWordRegexString = "\\\\[a-zA-Z@]+";
+const controlSymbolRegexString = "\\\\[^\uD800-\uDFFF]";
 const tokenRegex = new RegExp(
     "([ \r\n\t]+)|" +                                 // whitespace
     `(${commentRegexString}|` +                       // comments
     "[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" +   // single codepoint
     "|[\uD800-\uDBFF][\uDC00-\uDFFF]" +               // surrogate pair
     "|\\\\verb\\*([^]).*?\\3" +                       // \verb*
     "|\\\\verb([^*a-zA-Z]).*?\\4" +                   // \verb unstarred
-    "|\\\\(?:[a-zA-Z@]+|[^\uD800-\uDFFF])" +          // function name
+    `|${controlWordRegexString}` +                    // \macroName
+    `|${controlSymbolRegexString}` +                  // \\, \', etc.
     ")"
 );
 
-const commentRegex = new RegExp(commentRegexString);
+// tokenRegex has no ^ marker, as required by matchAt.
+// These regexs are for matching results from tokenRegex,
+// so they do have ^ markers.
+export const controlWordRegex = new RegExp(`^${controlWordRegexString}`);
+const commentRegex = new RegExp(`^${commentRegexString}`);
 
 /** Main Lexer class */
 export default class Lexer implements LexerInterface {

diff --git a/src/MacroExpander.js b/src/MacroExpander.js
@@ -4,7 +4,7 @@
  * until only non-macro tokens remain.
  */
 
-import Lexer from "./Lexer";
+import Lexer, {controlWordRegex} from "./Lexer";
 import {Token} from "./Token";
 import builtinMacros from "./macros";
 import ParseError from "./ParseError";
@@ -82,8 +82,8 @@ export default class MacroExpander implements MacroContextInterface {
         const topToken = this.popToken();
         const name = topToken.text;
         const isMacro = (name.charAt(0) === "\\");
-        if (isMacro) {
-            // Consume all spaces after \macro
+        if (isMacro && controlWordRegex.test(name)) {
+            // Consume all spaces after \macro (but not \\, \', etc.)
             this.consumeSpaces();
         }
         if (!(isMacro && this.macros.hasOwnProperty(name))) {

diff --git a/src/Parser.js b/src/Parser.js
@@ -140,9 +140,13 @@ export default class Parser {
      * and fetches the one after that as the new look ahead.
      */
     consume() {
-        this.nextToken = this.gullet.get(this.mode === "math");
+        this.nextToken = this.gullet.get(false);
     }
 
+    /**
+     * Switches between "text" and "math" modes, reconsuming nextToken
+     * in case it would be read differently in the new mode.
+     */
     switchMode(newMode) {
         this.gullet.unget(this.nextToken);
         this.mode = newMode;
@@ -193,6 +197,10 @@ export default class Parser {
         // Keep adding atoms to the body until we can't parse any more atoms (either
         // we reached the end, a }, or a \right)
         while (true) {
+            // Ignore spaces in math mode
+            if (this.mode === "math") {
+                this.consumeSpaces();
+            }
             const lex = this.nextToken;
             if (Parser.endOfExpression.indexOf(lex.text) !== -1) {
                 break;
@@ -283,6 +291,7 @@ export default class Parser {
         const symbolToken = this.nextToken;
         const symbol = symbolToken.text;
         this.consume();
+        this.consumeSpaces(); // ignore spaces before sup/subscript argument
         const group = this.parseGroup();
 
         if (!group) {
@@ -366,6 +375,9 @@ export default class Parser {
         let superscript;
         let subscript;
         while (true) {
+            // Guaranteed in math mode, so eat any spaces first.
+            this.consumeSpaces();
+
             // Lex the first token
             const lex = this.nextToken;
 
@@ -674,9 +686,16 @@ export default class Parser {
         const optArgs = [];
 
         for (let i = 0; i < totalArgs; i++) {
-            const nextToken = this.nextToken;
             const argType = funcData.argTypes && funcData.argTypes[i];
             const isOptional = i < funcData.numOptionalArgs;
+            // Ignore spaces between arguments.  As the TeXbook says:
+            // "After you have said ‘\def\row#1#2{...}’, you are allowed to
+            //  put spaces between the arguments (e.g., ‘\row x n’), because
+            //  TeX doesn’t use single spaces as undelimited arguments."
+            if (i > 0 && !isOptional) {
+                this.consumeSpaces();
+            }
+            const nextToken = this.nextToken;
             let arg = argType ?
                 this.parseGroupOfType(argType, isOptional) :
                 this.parseGroup(isOptional);
@@ -733,14 +752,9 @@ export default class Parser {
             return this.parseSizeGroup(optional);
         }
 
-        this.switchMode(innerMode);
-        if (innerMode === "text") {
-            // text mode is special because it should ignore the whitespace before
-            // it
-            this.consumeSpaces();
-        }
         // By the time we get here, innerMode is one of "text" or "math".
         // We switch the mode of the parser, recurse, then restore the old mode.
+        this.switchMode(innerMode);
         const res = this.parseGroup(optional);
         this.switchMode(outerMode);
         return res;

diff --git a/src/functions.js b/src/functions.js
@@ -178,6 +178,7 @@ defineFunction(["\\kern", "\\mkern"], {
 // A KaTeX logo
 defineFunction(["\\KaTeX"], {
     numArgs: 0,
+    allowedInText: true,
 }, function(context) {
     return {
         type: "katex",

diff --git a/test/katex-spec.js b/test/katex-spec.js
@@ -188,9 +188,11 @@ describe("A parser", function() {
     });
 
     it("should ignore whitespace", function() {
-        const parseA = stripPositions(getParsed("    x    y    "));
-        const parseB = stripPositions(getParsed("xy"));
-        expect(parseA).toEqual(parseB);
+        expect("    x    y    ").toParseLike("xy");
+    });
+
+    it("should ignore whitespace in atom", function() {
+        expect("    x   ^ y    ").toParseLike("x^y");
     });
 });
 
@@ -2353,6 +2355,11 @@ describe("An aligned environment", function() {
             .toParse();
     });
 
+    it("should allow cells in brackets", function() {
+        expect("\\begin{aligned}[a]&[b]\\\\ [c]&[d]\\end{aligned}")
+            .toParse();
+    });
+
 });
 
 describe("A parser that does not throw on unsupported commands", function() {
@@ -2397,7 +2404,7 @@ describe("A parser that does not throw on unsupported commands", function() {
     });
 });
 
-describe("The symbol table integraty", function() {
+describe("The symbol table integrity", function() {
     it("should treat certain symbols as synonyms", function() {
         expect(getBuilt("<")).toEqual(getBuilt("\\lt"));
         expect(getBuilt(">")).toEqual(getBuilt("\\gt"));
@@ -2431,10 +2438,30 @@ describe("A macro expander", function() {
         compareParseTree("\\foo", "x", {"\\foo": " x"});
     });
 
-    it("should consume spaces after macro", function() {
+    it("should consume spaces after control-word macro", function() {
         compareParseTree("\\text{\\foo }", "\\text{x}", {"\\foo": "x"});
     });
 
+    it("should consume spaces after macro with \\relax", function() {
+        compareParseTree("\\text{\\foo }", "\\text{}", {"\\foo": "\\relax"});
+    });
+
+    it("should consume spaces after \\relax", function() {
+        compareParseTree("\\text{\\relax }", "\\text{}");
+    });
+
+    it("should consume spaces after control-word function", function() {
+        compareParseTree("\\text{\\KaTeX x}", "\\text{\\KaTeX\\relax x}");
+    });
+
+    it("should preserve spaces after control-symbol macro", function() {
+        compareParseTree("\\text{\\% y}", "\\text{x y}", {"\\%": "x"});
+    });
+
+    it("should preserve spaces after control-symbol function", function() {
+        expect("\\text{\\' }").toParse();
+    });
+
     it("should consume spaces between arguments", function() {
         compareParseTree("\\text{\\foo 1 2}", "\\text{12end}", {"\\foo": "#1#2end"});
         compareParseTree("\\text{\\foo {1} {2}}", "\\text{12end}", {"\\foo": "#1#2end"});
@@ -2475,13 +2502,37 @@ describe("A macro expander", function() {
         });
     });
 
+    it("should allow for space second argument (text version)", function() {
+        compareParseTree("\\text{\\foo\\bar\\bar}", "\\text{( , )}", {
+            "\\foo": "(#1,#2)",
+            "\\bar": " ",
+        });
+    });
+
+    it("should allow for space second argument (math version)", function() {
+        compareParseTree("\\foo\\bar\\bar", "(,)", {
+            "\\foo": "(#1,#2)",
+            "\\bar": " ",
+        });
+    });
+
     it("should allow for empty macro argument", function() {
         compareParseTree("\\foo\\bar", "()", {
             "\\foo": "(#1)",
             "\\bar": "",
         });
     });
 
+    // The following is not currently possible to get working, given that
+    // functions and macros are dealt with separately.
+/*
+    it("should allow for space function arguments", function() {
+        compareParseTree("\\frac\\bar\\bar", "\\frac{}{}", {
+            "\\bar": " ",
+        });
+    });
+*/
+
     it("should expand the \\overset macro as expected", function() {
         expect("\\overset?=").toParseLike("\\mathop{=}\\limits^{?}");
         expect("\\overset{x=y}{\\sqrt{ab}}")