-
-
Notifications
You must be signed in to change notification settings - Fork 32.7k
bpo-44317: Improve tokenizer errors with more informative locations #26555
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
e9b20c5
ec8a7c5
ffc3279
c66a1f0
4efe8e3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Improve tokenizer error with improved locations. Patch by Pablo Galindo. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1067,19 +1067,13 @@ tok_backup(struct tok_state *tok, int c) | |
} | ||
} | ||
|
||
|
||
static int | ||
syntaxerror(struct tok_state *tok, const char *format, ...) | ||
_syntaxerror_range(struct tok_state *tok, const char *format, | ||
int col_offset, int end_col_offset, | ||
va_list vargs) | ||
{ | ||
PyObject *errmsg, *errtext, *args; | ||
va_list vargs; | ||
#ifdef HAVE_STDARG_PROTOTYPES | ||
va_start(vargs, format); | ||
#else | ||
va_start(vargs); | ||
#endif | ||
errmsg = PyUnicode_FromFormatV(format, vargs); | ||
va_end(vargs); | ||
if (!errmsg) { | ||
goto error; | ||
} | ||
|
@@ -1089,7 +1083,14 @@ syntaxerror(struct tok_state *tok, const char *format, ...) | |
if (!errtext) { | ||
goto error; | ||
} | ||
int offset = (int)PyUnicode_GET_LENGTH(errtext); | ||
|
||
if (col_offset == 0) { | ||
col_offset = (int)PyUnicode_GET_LENGTH(errtext); | ||
} | ||
if (end_col_offset == 0) { | ||
col_offset = col_offset; | ||
} | ||
|
||
Py_ssize_t line_len = strcspn(tok->line_start, "\n"); | ||
if (line_len != tok->cur - tok->line_start) { | ||
Py_DECREF(errtext); | ||
|
@@ -1100,8 +1101,8 @@ syntaxerror(struct tok_state *tok, const char *format, ...) | |
goto error; | ||
} | ||
|
||
args = Py_BuildValue("(O(OiiN))", errmsg, | ||
tok->filename, tok->lineno, offset, errtext); | ||
args = Py_BuildValue("(O(OiiNii))", errmsg, tok->filename, tok->lineno, | ||
col_offset, errtext, tok->lineno, end_col_offset); | ||
if (args) { | ||
PyErr_SetObject(PyExc_SyntaxError, args); | ||
Py_DECREF(args); | ||
|
@@ -1113,6 +1114,38 @@ syntaxerror(struct tok_state *tok, const char *format, ...) | |
return ERRORTOKEN; | ||
} | ||
|
||
static int | ||
syntaxerror(struct tok_state *tok, const char *format, ...) | ||
{ | ||
va_list vargs; | ||
#ifdef HAVE_STDARG_PROTOTYPES | ||
va_start(vargs, format); | ||
#else | ||
va_start(vargs); | ||
#endif | ||
int ret = _syntaxerror_range(tok, format, 0, 0, vargs); | ||
va_end(vargs); | ||
return ret; | ||
} | ||
|
||
static int | ||
syntaxerror_known_range(struct tok_state *tok, | ||
int col_offset, int end_col_offset, | ||
const char *format, ...) | ||
{ | ||
va_list vargs; | ||
#ifdef HAVE_STDARG_PROTOTYPES | ||
va_start(vargs, format); | ||
#else | ||
va_start(vargs); | ||
#endif | ||
int ret = _syntaxerror_range(tok, format, col_offset, end_col_offset, vargs); | ||
va_end(vargs); | ||
return ret; | ||
} | ||
|
||
|
||
|
||
static int | ||
indenterror(struct tok_state *tok) | ||
{ | ||
|
@@ -1552,6 +1585,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) | |
/* Number */ | ||
if (isdigit(c)) { | ||
if (c == '0') { | ||
const char* number_start = tok->cur; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cat |
||
/* Hex, octal or binary -- maybe. */ | ||
c = tok_nextc(tok); | ||
if (c == 'x' || c == 'X') { | ||
|
@@ -1578,12 +1612,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) | |
c = tok_nextc(tok); | ||
} | ||
if (c < '0' || c >= '8') { | ||
tok_backup(tok, c); | ||
if (isdigit(c)) { | ||
return syntaxerror(tok, | ||
"invalid digit '%c' in octal literal", c); | ||
} | ||
else { | ||
tok_backup(tok, c); | ||
return syntaxerror(tok, "invalid octal literal"); | ||
} | ||
} | ||
|
@@ -1606,6 +1640,8 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) | |
if (c != '0' && c != '1') { | ||
tok_backup(tok, c); | ||
if (isdigit(c)) { | ||
// Move to the actual current token that is incorrect | ||
tok_nextc(tok); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above, |
||
return syntaxerror(tok, | ||
"invalid digit '%c' in binary literal", c); | ||
} | ||
|
@@ -1639,6 +1675,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) | |
} | ||
c = tok_nextc(tok); | ||
} | ||
char* zeros_end = tok->cur; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cat There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not here because we want to highlight only the zeros and |
||
if (isdigit(c)) { | ||
nonzero = 1; | ||
c = tok_decimal_tail(tok); | ||
|
@@ -1659,10 +1696,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) | |
else if (nonzero) { | ||
/* Old-style octal: now disallowed. */ | ||
tok_backup(tok, c); | ||
return syntaxerror(tok, | ||
"leading zeros in decimal integer " | ||
"literals are not permitted; " | ||
"use an 0o prefix for octal integers"); | ||
return syntaxerror_known_range( | ||
tok, number_start - tok->line_start, | ||
pablogsal marked this conversation as resolved.
Show resolved
Hide resolved
|
||
zeros_end - tok->line_start, | ||
pablogsal marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"leading zeros in decimal integer " | ||
"literals are not permitted; " | ||
"use an 0o prefix for octal integers"); | ||
} | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What if error occurred at the beginning of the line?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, I was thinking here about line numbers. This should have been
-1
. Thanks for the catch