Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions src/Lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,25 @@ import {LexerInterface, Token} from "./Token";
* still reject the input.
*/
const commentRegexString = "%[^\n]*[\n]";
const controlWordRegexString = "\\\\[a-zA-Z@]+";
const controlSymbolRegexString = "\\\\[^\uD800-\uDFFF]";
const tokenRegex = new RegExp(
"([ \r\n\t]+)|" + // whitespace
`(${commentRegexString}|` + // comments
"[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
"|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
"|\\\\verb\\*([^]).*?\\3" + // \verb*
"|\\\\verb([^*a-zA-Z]).*?\\4" + // \verb unstarred
"|\\\\(?:[a-zA-Z@]+|[^\uD800-\uDFFF])" + // function name
`|${controlWordRegexString}` + // \macroName
`|${controlSymbolRegexString}` + // \\, \', etc.
")"
);

const commentRegex = new RegExp(commentRegexString);
// tokenRegex has no ^ marker, as required by matchAt.
// These regexs are for matching results from tokenRegex,
// so they do have ^ markers.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice comment

export const controlWordRegex = new RegExp(`^${controlWordRegexString}`);
const commentRegex = new RegExp(`^${commentRegexString}`);

/** Main Lexer class */
export default class Lexer implements LexerInterface {
Expand Down
6 changes: 3 additions & 3 deletions src/MacroExpander.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* until only non-macro tokens remain.
*/

import Lexer from "./Lexer";
import Lexer, {controlWordRegex} from "./Lexer";
import {Token} from "./Token";
import builtinMacros from "./macros";
import ParseError from "./ParseError";
Expand Down Expand Up @@ -82,8 +82,8 @@ export default class MacroExpander implements MacroContextInterface {
const topToken = this.popToken();
const name = topToken.text;
const isMacro = (name.charAt(0) === "\\");
if (isMacro) {
// Consume all spaces after \macro
if (isMacro && controlWordRegex.test(name)) {
// Consume all spaces after \macro (but not \\, \', etc.)
this.consumeSpaces();
}
if (!(isMacro && this.macros.hasOwnProperty(name))) {
Expand Down
30 changes: 22 additions & 8 deletions src/Parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,13 @@ export default class Parser {
* and fetches the one after that as the new look ahead.
*/
consume() {
this.nextToken = this.gullet.get(this.mode === "math");
this.nextToken = this.gullet.get(false);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is the only place where gullet.get is called and its parameter is always the same, maybe we should get rid of the parameter.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I wanted a second opinion on this. As I wrote above: "we never use MacroExpander.get with a true argument, which could simplify the code of both get and unget." We no longer need any of the space saving/restoring mechanics, so I'll get rid of that.

"It also means that switchMode no longer does anything useful." (in the parser) This one I'm less sure of. Maybe switchMode would be useful in the future, e.g. if we can ever tweak catcodes in other ways? (e.g. verbatim or url modes?)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you think switchMode will come in handy in the future let's keep it, but please add a comment as to why we're keeping it.

Copy link
Copy Markdown
Member

@k4b7 k4b7 Oct 10, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we still need switchMode because we store the current mode in the parse nodes and that value gets used in the builders. Ignore my comment about adding a comment.

}

/**
* Switches between "text" and "math" modes, reconsuming nextToken
* in case it would be read differently in the new mode.
*/
switchMode(newMode) {
this.gullet.unget(this.nextToken);
this.mode = newMode;
Expand Down Expand Up @@ -193,6 +197,10 @@ export default class Parser {
// Keep adding atoms to the body until we can't parse any more atoms (either
// we reached the end, a }, or a \right)
while (true) {
// Ignore spaces in math mode
if (this.mode === "math") {
this.consumeSpaces();
}
const lex = this.nextToken;
if (Parser.endOfExpression.indexOf(lex.text) !== -1) {
break;
Expand Down Expand Up @@ -283,6 +291,7 @@ export default class Parser {
const symbolToken = this.nextToken;
const symbol = symbolToken.text;
this.consume();
this.consumeSpaces(); // ignore spaces before sup/subscript argument
const group = this.parseGroup();

if (!group) {
Expand Down Expand Up @@ -366,6 +375,9 @@ export default class Parser {
let superscript;
let subscript;
while (true) {
// Guaranteed in math mode, so eat any spaces first.
this.consumeSpaces();

// Lex the first token
const lex = this.nextToken;

Expand Down Expand Up @@ -674,9 +686,16 @@ export default class Parser {
const optArgs = [];

for (let i = 0; i < totalArgs; i++) {
const nextToken = this.nextToken;
const argType = funcData.argTypes && funcData.argTypes[i];
const isOptional = i < funcData.numOptionalArgs;
// Ignore spaces between arguments. As the TeXbook says:
// "After you have said ‘\def\row#1#2{...}’, you are allowed to
// put spaces between the arguments (e.g., ‘\row x n’), because
// TeX doesn’t use single spaces as undelimited arguments."
if (i > 0 && !isOptional) {
this.consumeSpaces();
}
const nextToken = this.nextToken;
let arg = argType ?
this.parseGroupOfType(argType, isOptional) :
this.parseGroup(isOptional);
Expand Down Expand Up @@ -733,14 +752,9 @@ export default class Parser {
return this.parseSizeGroup(optional);
}

this.switchMode(innerMode);
if (innerMode === "text") {
// text mode is special because it should ignore the whitespace before
// it
this.consumeSpaces();
}
// By the time we get here, innerMode is one of "text" or "math".
// We switch the mode of the parser, recurse, then restore the old mode.
this.switchMode(innerMode);
const res = this.parseGroup(optional);
this.switchMode(outerMode);
return res;
Expand Down
1 change: 1 addition & 0 deletions src/functions.js
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ defineFunction(["\\kern", "\\mkern"], {
// A KaTeX logo
defineFunction(["\\KaTeX"], {
numArgs: 0,
allowedInText: true,
}, function(context) {
return {
type: "katex",
Expand Down
61 changes: 56 additions & 5 deletions test/katex-spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,11 @@ describe("A parser", function() {
});

it("should ignore whitespace", function() {
const parseA = stripPositions(getParsed(" x y "));
const parseB = stripPositions(getParsed("xy"));
expect(parseA).toEqual(parseB);
expect(" x y ").toParseLike("xy");
});

it("should ignore whitespace in atom", function() {
expect(" x ^ y ").toParseLike("x^y");
});
});

Expand Down Expand Up @@ -2353,6 +2355,11 @@ describe("An aligned environment", function() {
.toParse();
});

it("should allow cells in brackets", function() {
expect("\\begin{aligned}[a]&[b]\\\\ [c]&[d]\\end{aligned}")
.toParse();
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And if there's no space between \\\\and [c] would it try to parse [c] as a measurement?

Copy link
Copy Markdown
Member Author

@edemaine edemaine Oct 9, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. This matches LaTeX behavior, based on testing. (\@ifnextchar[ must get a space instead of a [.) I'll add an error test for the no-space case.

});

});

describe("A parser that does not throw on unsupported commands", function() {
Expand Down Expand Up @@ -2397,7 +2404,7 @@ describe("A parser that does not throw on unsupported commands", function() {
});
});

describe("The symbol table integraty", function() {
describe("The symbol table integrity", function() {
it("should treat certain symbols as synonyms", function() {
expect(getBuilt("<")).toEqual(getBuilt("\\lt"));
expect(getBuilt(">")).toEqual(getBuilt("\\gt"));
Expand Down Expand Up @@ -2431,10 +2438,30 @@ describe("A macro expander", function() {
compareParseTree("\\foo", "x", {"\\foo": " x"});
});

it("should consume spaces after macro", function() {
it("should consume spaces after control-word macro", function() {
compareParseTree("\\text{\\foo }", "\\text{x}", {"\\foo": "x"});
});

it("should consume spaces after macro with \\relax", function() {
compareParseTree("\\text{\\foo }", "\\text{}", {"\\foo": "\\relax"});
});

it("should consume spaces after \\relax", function() {
compareParseTree("\\text{\\relax }", "\\text{}");
});

it("should consume spaces after control-word function", function() {
compareParseTree("\\text{\\KaTeX x}", "\\text{\\KaTeX\\relax x}");
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So these two things parse the same, but I thought control words weren't allowed inside \text{}. Why is it important that these two parse the same?

Copy link
Copy Markdown
Member Author

@edemaine edemaine Oct 9, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to make sure that \text{\KaTeX x} does not render a space. Ideally I'd say that \text{\KaTeX x} renders the same as \text{{\KaTeX}x} but that generates another group... perhaps I should tweak the test to actually look for features in the parse tree. (Control words like \KaTeX definitely work in text mode.) Ah, I can just test for \text{\KaTeX } vs. \text{\KaTeX}.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I got an error when I tried \text{\KaTeX} on the demo page so maybe it's just some control words don't work.

});

it("should preserve spaces after control-symbol macro", function() {
compareParseTree("\\text{\\% y}", "\\text{x y}", {"\\%": "x"});
});

it("should preserve spaces after control-symbol function", function() {
expect("\\text{\\' }").toParse();
});

it("should consume spaces between arguments", function() {
compareParseTree("\\text{\\foo 1 2}", "\\text{12end}", {"\\foo": "#1#2end"});
compareParseTree("\\text{\\foo {1} {2}}", "\\text{12end}", {"\\foo": "#1#2end"});
Expand Down Expand Up @@ -2475,13 +2502,37 @@ describe("A macro expander", function() {
});
});

it("should allow for space second argument (text version)", function() {
compareParseTree("\\text{\\foo\\bar\\bar}", "\\text{( , )}", {
"\\foo": "(#1,#2)",
"\\bar": " ",
});
});

it("should allow for space second argument (math version)", function() {
compareParseTree("\\foo\\bar\\bar", "(,)", {
"\\foo": "(#1,#2)",
"\\bar": " ",
});
});

it("should allow for empty macro argument", function() {
compareParseTree("\\foo\\bar", "()", {
"\\foo": "(#1)",
"\\bar": "",
});
});

// The following is not currently possible to get working, given that
// functions and macros are dealt with separately.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the current behavior? Can you open an issue for this?

Copy link
Copy Markdown
Member Author

@edemaine edemaine Oct 9, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opened #924. Also added some more comments in the code about this.

/*
it("should allow for space function arguments", function() {
compareParseTree("\\frac\\bar\\bar", "\\frac{}{}", {
"\\bar": " ",
});
});
*/

it("should expand the \\overset macro as expected", function() {
expect("\\overset?=").toParseLike("\\mathop{=}\\limits^{?}");
expect("\\overset{x=y}{\\sqrt{ab}}")
Expand Down