Skip to content

Commit a10babb

Browse files
authored
Implement soft keywords (hand-written and code generation) (python#129)
1 parent 6c50468 commit a10babb

File tree

5 files changed

+58
-10
lines changed

5 files changed

+58
-10
lines changed

Parser/pegen/vm.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,10 @@ run_vm(Parser *p, Rule rules[], int root)
153153
oparg = f->rule->opcodes[f->iop++];
154154
v = _PyPegen_expect_token(p, oparg);
155155
break;
156+
case OP_SOFT_KEYWORD:
157+
oparg = f->rule->opcodes[f->iop++];
158+
v = _PyPegen_expect_soft_keyword(p, soft_keywords[oparg]);
159+
break;
156160
case OP_RULE:
157161
oparg = f->rule->opcodes[f->iop++];
158162
Rule *rule = &rules[oparg];

Parser/pegen/vm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ typedef enum _opcodes {
1212
OP_SUCCESS,
1313
OP_FAILURE,
1414
// The rest have an argument
15+
OP_SOFT_KEYWORD,
1516
OP_TOKEN,
1617
OP_RULE,
1718
OP_RETURN,
@@ -31,6 +32,7 @@ static char *opcode_names[] = {
3132
"OP_SUCCESS",
3233
"OP_FAILURE",
3334
// The rest have an argument
35+
"OP_SOFT_KEYWORD",
3436
"OP_TOKEN",
3537
"OP_RULE",
3638
"OP_RETURN",

Parser/pegen/vmparse.h

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@ static KeywordToken *reserved_keywords[] = {
88
},
99
};
1010

11+
enum {
12+
SK___PEG_PARSER__,
13+
};
14+
15+
static const char *soft_keywords[] = {
16+
"__peg_parser__",
17+
};
18+
1119
enum {
1220
R_START,
1321
R_STMT,
@@ -33,6 +41,7 @@ enum {
3341
A_FACTOR_1,
3442
A_FACTOR_2,
3543
A_FACTOR_3,
44+
A_FACTOR_4,
3645
A__GATHER_2_0,
3746
A__GATHER_2_1,
3847
};
@@ -78,12 +87,13 @@ static Rule all_rules[] = {
7887
},
7988
{"factor",
8089
R_FACTOR,
81-
{0, 8, 16, 19, -1},
90+
{0, 8, 16, 19, 23, -1},
8291
{
8392
OP_TOKEN, 7, OP_RULE, R_EXPR, OP_TOKEN, 8, OP_RETURN, A_FACTOR_0,
8493
OP_TOKEN, 9, OP_RULE, R__GATHER_2, OP_TOKEN, 10, OP_RETURN, A_FACTOR_1,
8594
OP_NUMBER, OP_RETURN, A_FACTOR_2,
86-
OP_NAME, OP_RETURN, A_FACTOR_3,
95+
OP_SOFT_KEYWORD, SK___PEG_PARSER__, OP_RETURN, A_FACTOR_3,
96+
OP_NAME, OP_RETURN, A_FACTOR_4,
8797
},
8898
},
8999
{"root",
@@ -132,7 +142,7 @@ call_action(Parser *p, Frame *_f, int _iaction)
132142
case A_EXPR_1:
133143
case A_TERM_1:
134144
case A_FACTOR_2:
135-
case A_FACTOR_3:
145+
case A_FACTOR_4:
136146
case A__GATHER_2_0:
137147
case A__GATHER_2_1:
138148
return _f->vals[0];
@@ -146,6 +156,8 @@ call_action(Parser *p, Frame *_f, int _iaction)
146156
return _f->vals[1];
147157
case A_FACTOR_1:
148158
return _Py_List ( _f->vals[1] , Load , EXTRA );
159+
case A_FACTOR_3:
160+
return RAISE_SYNTAX_ERROR("You found it!");
149161
default:
150162
assert(0);
151163
}

Tools/peg_generator/data/simple.gram

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,5 @@ factor:
1414
| '(' a=expr ')' { a }
1515
| '[' a=','.expr+ ']' { _Py_List(a, Load, EXTRA) }
1616
| NUMBER
17+
| "__peg_parser__" { RAISE_SYNTAX_ERROR("You found it!") }
1718
| NAME

Tools/peg_generator/pegen/vm_generator.py

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,17 +68,27 @@ def __init__(
6868
self.gen = parser_generator
6969
self.cache: Dict[Any, Any] = {}
7070
self.keyword_cache: Dict[str, int] = {}
71+
self.soft_keyword_cache: List[str] = []
7172

72-
def keyword_helper(self, keyword: str) -> int:
73+
def keyword_helper(self, keyword: str) -> Tuple[str, int]:
7374
if keyword not in self.keyword_cache:
7475
self.keyword_cache[keyword] = self.gen.keyword_type()
75-
return self.keyword_cache[keyword]
76+
return "OP_TOKEN", self.keyword_cache[keyword]
7677

77-
def visit_StringLeaf(self, node: StringLeaf) -> int:
78+
def soft_keyword_helper(self, keyword: str) -> Tuple[str, str]:
79+
if keyword not in self.soft_keyword_cache:
80+
self.soft_keyword_cache.append(keyword)
81+
return "OP_SOFT_KEYWORD", f"SK_{keyword.upper()}"
82+
83+
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, Union[str, int]]:
7884
val = ast.literal_eval(node.value)
7985
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
80-
return self.keyword_helper(val)
81-
return token.EXACT_TOKEN_TYPES[val] # type: ignore [attr-defined]
86+
if node.value.endswith("'"):
87+
return self.keyword_helper(val)
88+
else:
89+
return self.soft_keyword_helper(val)
90+
tok_num = token.EXACT_TOKEN_TYPES[val]
91+
return "OP_TOKEN", token.tok_name[tok_num]
8292

8393
def visit_Repeat0(self, node: Repeat0) -> None:
8494
if node in self.cache:
@@ -133,6 +143,7 @@ def generate(self, filename: str) -> None:
133143
self.collect_todo()
134144
self.gather_actions()
135145
self._setup_keywords()
146+
self._setup_soft_keywords()
136147

137148
self.print("enum {")
138149
with self.indent():
@@ -194,6 +205,24 @@ def _setup_keywords(self) -> None:
194205
self.print("};")
195206
self.print()
196207

208+
def _setup_soft_keywords(self) -> None:
209+
soft_keywords = self.callmakervisitor.soft_keyword_cache
210+
if not soft_keywords:
211+
return
212+
213+
self.print("enum {")
214+
with self.indent():
215+
for soft_keyword in soft_keywords:
216+
self.print(f"SK_{soft_keyword.upper()},")
217+
self.print("};")
218+
self.print()
219+
self.print("static const char *soft_keywords[] = {")
220+
with self.indent():
221+
for soft_keyword in soft_keywords:
222+
self.print(f'"{soft_keyword}",')
223+
self.print("};")
224+
self.print()
225+
197226
def print_action_cases(self) -> None:
198227
unique_actions: Dict[str, List[str]] = defaultdict(list)
199228
for actionname, action in self.actions.items():
@@ -310,8 +339,8 @@ def visit_NameLeaf(self, node: NameLeaf) -> None:
310339
self.add_opcode("OP_RULE", self._get_rule_opcode(name))
311340

312341
def visit_StringLeaf(self, node: StringLeaf) -> None:
313-
token_type = self.callmakervisitor.visit(node)
314-
self.add_opcode("OP_TOKEN", token_type)
342+
op_pair = self.callmakervisitor.visit(node)
343+
self.add_opcode(*op_pair)
315344

316345
def handle_loop_rhs(
317346
self, node: Rhs, opcodes_by_alt: Dict[int, List[str]], collect_opcode: str,

0 commit comments

Comments
 (0)