Skip to content

Commit 395c533

Browse files
committed
Lexer - Solving ambiguity on function keywords
Signed-off-by: iifawzi <[email protected]>
1 parent 8fddb4b commit 395c533

File tree

9 files changed

+1540
-8
lines changed

9 files changed

+1540
-8
lines changed

src/Lexer.php

Lines changed: 71 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,30 @@ class Lexer extends Core
8787
'parseUnknown',
8888
];
8989

90+
91+
/**
92+
* A list of keywords that indicate that the function keyword
93+
* is not used as a function
94+
*
95+
* @var string[]
96+
*/
97+
public $KEYWORD_NAME_INDICATORS = [
98+
'FROM',
99+
'SET',
100+
'WHERE',
101+
];
102+
103+
/**
104+
* A list of operators that indicate that the function keyword
105+
* is not used as a function
106+
*
107+
* @var string[]
108+
*/
109+
public $OPERATOR_NAME_INDICATORS = [
110+
',',
111+
'.',
112+
];
113+
90114
/**
91115
* The string to be parsed.
92116
*
@@ -344,6 +368,7 @@ public function lex()
344368
$this->list = $list;
345369

346370
$this->solveAmbiguityOnStarOperator();
371+
$this->solveAmbiguityOnFunctionKeywords();
347372
}
348373

349374
/**
@@ -358,10 +383,8 @@ public function lex()
358383
* - ")" (a closing parenthesis like in "COUNT(*)").
359384
* This methods will change the flag of the "*" tokens when any of those condition above is true. Otherwise, the
360385
* default flag (arithmetic) will be kept.
361-
*
362-
* @return void
363386
*/
364-
private function solveAmbiguityOnStarOperator()
387+
private function solveAmbiguityOnStarOperator(): void
365388
{
366389
$iBak = $this->list->idx;
367390
while (($starToken = $this->list->getNextOfTypeAndValue(Token::TYPE_OPERATOR, '*')) !== null) {
@@ -385,6 +408,51 @@ private function solveAmbiguityOnStarOperator()
385408
$this->list->idx = $iBak;
386409
}
387410

411+
/**
412+
* Resolves the ambiguity when dealing with the functions keywords.
413+
*
414+
* In SQL statements, the function keywords might be used as table names or columns names.
415+
* To solve this ambiguity, the solution is to find the next token, excluding whitespaces and
416+
* comments, right after the function keyword position. The function keyword is for sure used
417+
* as column name or table name if the next token found is any of:
418+
*
419+
* - "FROM" (the FROM keyword like in "SELECT Country x, AverageSalary avg FROM...");
420+
* - "WHERE" (the WHERE keyword like in "DELETE FROM emp x WHERE x.salary = 20");
421+
* - "SET" (the SET keyword like in "UPDATE Country x, City y set x.Name=x.Name");
422+
* - "," (a comma separator like 'x,' in "UPDATE Country x, City y set x.Name=x.Name");
423+
* - "." (a dot separator like in "x.asset_id FROM (SELECT evt.asset_id FROM evt)".
424+
* - "NULL" (when used as a table alias like in "avg.col FROM (SELECT ev.col FROM ev) avg").
425+
*
426+
* This method will change the flag of the function keyword tokens when any of those
427+
* condition above is true. Otherwise, the
428+
* default flag (function keyword) will be kept.
429+
*/
430+
private function solveAmbiguityOnFunctionKeywords(): void
431+
{
432+
$iBak = $this->list->idx;
433+
$keywordFunction = Token::TYPE_KEYWORD | Token::FLAG_KEYWORD_FUNCTION;
434+
while (($keywordToken = $this->list->getNextOfTypeAndFlag(Token::TYPE_KEYWORD, $keywordFunction)) !== null) {
435+
$next = $this->list->getNext();
436+
if (
437+
($next->type !== Token::TYPE_KEYWORD
438+
|| ! in_array($next->value, $this->KEYWORD_NAME_INDICATORS, true)
439+
)
440+
&& ($next->type !== Token::TYPE_OPERATOR
441+
|| ! in_array($next->value, $this->OPERATOR_NAME_INDICATORS, true)
442+
)
443+
&& ($next->value !== null)
444+
) {
445+
continue;
446+
}
447+
448+
$keywordToken->type = Token::TYPE_NONE;
449+
$keywordToken->flags = Token::TYPE_NONE;
450+
$keywordToken->keyword = $keywordToken->value;
451+
}
452+
453+
$this->list->idx = $iBak;
454+
}
455+
388456
/**
389457
* Creates a new error log.
390458
*

src/TokensList.php

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,23 @@ public function getNextOfTypeAndValue($type, $value)
168168
return null;
169169
}
170170

171+
/**
172+
* Gets the next token.
173+
*
174+
* @param int $type the type of the token
175+
* @param int $flag the flag of the token
176+
*/
177+
public function getNextOfTypeAndFlag(int $type, int $flag): ?Token
178+
{
179+
for (; $this->idx < $this->count; ++$this->idx) {
180+
if (($this->tokens[$this->idx]->type === $type) && ($this->tokens[$this->idx]->flags === $flag)) {
181+
return $this->tokens[$this->idx++];
182+
}
183+
}
184+
185+
return null;
186+
}
187+
171188
/**
172189
* Sets an value inside the container.
173190
*

tests/Components/ExpressionTest.php

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,23 @@ public function testParse2(): void
2222
$this->assertEquals($component->expr, 'col');
2323
}
2424

25+
public function testParse3(): void
26+
{
27+
$component = Expression::parse(new Parser(), $this->getTokensList('col xx'));
28+
$this->assertEquals($component->alias, 'xx');
29+
30+
$component = Expression::parse(new Parser(), $this->getTokensList('col y'));
31+
$this->assertEquals($component->alias, 'y');
32+
33+
$component = Expression::parse(new Parser(), $this->getTokensList('avg.col FROM (SELECT ev.col FROM ev)'));
34+
$this->assertEquals($component->table, 'avg');
35+
$this->assertEquals($component->expr, 'avg.col');
36+
37+
$component = Expression::parse(new Parser(), $this->getTokensList('x.id FROM (SELECT a.id FROM a) x'));
38+
$this->assertEquals($component->table, 'x');
39+
$this->assertEquals($component->expr, 'x.id');
40+
}
41+
2542
/**
2643
* @dataProvider parseErrProvider
2744
*/

tests/Lexer/TokensListTest.php

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,22 @@ public function setUp(): void
3030
new Token(' ', Token::TYPE_WHITESPACE),
3131
new Token('*', Token::TYPE_OPERATOR),
3232
new Token(' ', Token::TYPE_WHITESPACE),
33-
new Token('FROM', Token::TYPE_KEYWORD),
33+
new Token('FROM', Token::TYPE_KEYWORD, Token::FLAG_KEYWORD_RESERVED),
3434
new Token(' ', Token::TYPE_WHITESPACE),
3535
new Token('`test`', Token::TYPE_SYMBOL),
3636
new Token(' ', Token::TYPE_WHITESPACE),
37+
new Token('WHERE', Token::TYPE_KEYWORD, Token::FLAG_KEYWORD_RESERVED),
38+
new Token(' ', Token::TYPE_WHITESPACE),
39+
new Token('name', Token::TYPE_NONE),
40+
new Token('=', Token::TYPE_OPERATOR),
41+
new Token('fa', Token::TYPE_NONE),
3742
];
3843
}
3944

4045
public function testBuild(): void
4146
{
4247
$list = new TokensList($this->tokens);
43-
$this->assertEquals('SELECT * FROM `test` ', TokensList::build($list));
48+
$this->assertEquals('SELECT * FROM `test` WHERE name=fa', TokensList::build($list));
4449
}
4550

4651
public function testAdd(): void
@@ -60,6 +65,10 @@ public function testGetNext(): void
6065
$this->assertEquals($this->tokens[2], $list->getNext());
6166
$this->assertEquals($this->tokens[4], $list->getNext());
6267
$this->assertEquals($this->tokens[6], $list->getNext());
68+
$this->assertEquals($this->tokens[8], $list->getNext());
69+
$this->assertEquals($this->tokens[10], $list->getNext());
70+
$this->assertEquals($this->tokens[11], $list->getNext());
71+
$this->assertEquals($this->tokens[12], $list->getNext());
6372
$this->assertNull($list->getNext());
6473
}
6574

@@ -78,9 +87,24 @@ public function testGetNextOfType(): void
7887
$list = new TokensList($this->tokens);
7988
$this->assertEquals($this->tokens[0], $list->getNextOfType(Token::TYPE_KEYWORD));
8089
$this->assertEquals($this->tokens[4], $list->getNextOfType(Token::TYPE_KEYWORD));
90+
$this->assertEquals($this->tokens[8], $list->getNextOfType(Token::TYPE_KEYWORD));
8191
$this->assertNull($list->getNextOfType(Token::TYPE_KEYWORD));
8292
}
8393

94+
public function testGetNextOfTypeAndFlag(): void
95+
{
96+
$list = new TokensList($this->tokens);
97+
$this->assertEquals($this->tokens[4], $list->getNextOfTypeAndFlag(
98+
Token::TYPE_KEYWORD,
99+
Token::FLAG_KEYWORD_RESERVED
100+
));
101+
$this->assertEquals($this->tokens[8], $list->getNextOfTypeAndFlag(
102+
Token::TYPE_KEYWORD,
103+
Token::FLAG_KEYWORD_RESERVED
104+
));
105+
$this->assertNull($list->getNextOfTypeAndFlag(Token::TYPE_KEYWORD, Token::FLAG_KEYWORD_RESERVED));
106+
}
107+
84108
public function testGetNextOfTypeAndValue(): void
85109
{
86110
$list = new TokensList($this->tokens);
@@ -107,7 +131,7 @@ public function testArrayAccess(): void
107131

108132
// offsetExists($offset)
109133
$this->assertArrayHasKey(2, $list);
110-
$this->assertArrayNotHasKey(8, $list);
134+
$this->assertArrayNotHasKey(13, $list);
111135

112136
// offsetUnset($offset)
113137
unset($list[2]);

tests/data/parser/parseDelete13.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
DELETE FROM emp x WHERE x.salary = 20

0 commit comments

Comments
 (0)