Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Lib/test/test_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,9 @@ def testSyntaxErrorOffset(self):
# Errors thrown by tokenizer.c
check('(0x+1)', 1, 3)
check('x = 0xI', 1, 6)
check('0010 + 2', 1, 4)
check('0010 + 2', 1, 1)
check('x = 32e-+4', 1, 8)
check('x = 0o9', 1, 6)
check('x = 0o9', 1, 7)
check('\u03b1 = 0xI', 1, 6)
check(b'\xce\xb1 = 0xI', 1, 6)
check(b'# -*- coding: iso8859-7 -*-\n\xe1 = 0xI', 2, 6,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve tokenizer error with improved locations. Patch by Pablo Galindo.
73 changes: 56 additions & 17 deletions Parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1067,19 +1067,13 @@ tok_backup(struct tok_state *tok, int c)
}
}


static int
syntaxerror(struct tok_state *tok, const char *format, ...)
_syntaxerror_range(struct tok_state *tok, const char *format,
int col_offset, int end_col_offset,
va_list vargs)
{
PyObject *errmsg, *errtext, *args;
va_list vargs;
#ifdef HAVE_STDARG_PROTOTYPES
va_start(vargs, format);
#else
va_start(vargs);
#endif
errmsg = PyUnicode_FromFormatV(format, vargs);
va_end(vargs);
if (!errmsg) {
goto error;
}
Expand All @@ -1089,7 +1083,14 @@ syntaxerror(struct tok_state *tok, const char *format, ...)
if (!errtext) {
goto error;
}
int offset = (int)PyUnicode_GET_LENGTH(errtext);

if (col_offset == 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if error occurred at the beginning of the line?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if error occurred at the beginning of the line?

Oh, I was thinking here about line numbers. This should have been -1. Thanks for the catch

col_offset = (int)PyUnicode_GET_LENGTH(errtext);
}
if (end_col_offset == 0) {
col_offset = col_offset;
}

Py_ssize_t line_len = strcspn(tok->line_start, "\n");
if (line_len != tok->cur - tok->line_start) {
Py_DECREF(errtext);
Expand All @@ -1100,8 +1101,8 @@ syntaxerror(struct tok_state *tok, const char *format, ...)
goto error;
}

args = Py_BuildValue("(O(OiiN))", errmsg,
tok->filename, tok->lineno, offset, errtext);
args = Py_BuildValue("(O(OiiNii))", errmsg, tok->filename, tok->lineno,
col_offset, errtext, tok->lineno, end_col_offset);
if (args) {
PyErr_SetObject(PyExc_SyntaxError, args);
Py_DECREF(args);
Expand All @@ -1113,6 +1114,38 @@ syntaxerror(struct tok_state *tok, const char *format, ...)
return ERRORTOKEN;
}

static int
syntaxerror(struct tok_state *tok, const char *format, ...)
{
va_list vargs;
#ifdef HAVE_STDARG_PROTOTYPES
va_start(vargs, format);
#else
va_start(vargs);
#endif
int ret = _syntaxerror_range(tok, format, 0, 0, vargs);
va_end(vargs);
return ret;
}

static int
syntaxerror_known_range(struct tok_state *tok,
int col_offset, int end_col_offset,
const char *format, ...)
{
va_list vargs;
#ifdef HAVE_STDARG_PROTOTYPES
va_start(vargs, format);
#else
va_start(vargs);
#endif
int ret = _syntaxerror_range(tok, format, col_offset, end_col_offset, vargs);
va_end(vargs);
return ret;
}



static int
indenterror(struct tok_state *tok)
{
Expand Down Expand Up @@ -1552,6 +1585,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
/* Number */
if (isdigit(c)) {
if (c == '0') {
const char* number_start = tok->cur;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cat tok->start be used instead of a new variable?

/* Hex, octal or binary -- maybe. */
c = tok_nextc(tok);
if (c == 'x' || c == 'X') {
Expand All @@ -1578,12 +1612,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
c = tok_nextc(tok);
}
if (c < '0' || c >= '8') {
tok_backup(tok, c);
if (isdigit(c)) {
return syntaxerror(tok,
"invalid digit '%c' in octal literal", c);
}
else {
tok_backup(tok, c);
return syntaxerror(tok, "invalid octal literal");
}
}
Expand All @@ -1606,6 +1640,8 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
if (c != '0' && c != '1') {
tok_backup(tok, c);
if (isdigit(c)) {
// Move to the actual current token that is incorrect
tok_nextc(tok);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above, tok_nextc() cancels tok_backup().

return syntaxerror(tok,
"invalid digit '%c' in binary literal", c);
}
Expand Down Expand Up @@ -1639,6 +1675,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
}
c = tok_nextc(tok);
}
char* zeros_end = tok->cur;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cat tok->start be used instead of a new variable?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not here because we want to highlight only the zeros and tok->cur points at the end of the number

if (isdigit(c)) {
nonzero = 1;
c = tok_decimal_tail(tok);
Expand All @@ -1659,10 +1696,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
else if (nonzero) {
/* Old-style octal: now disallowed. */
tok_backup(tok, c);
return syntaxerror(tok,
"leading zeros in decimal integer "
"literals are not permitted; "
"use an 0o prefix for octal integers");
return syntaxerror_known_range(
tok, number_start - tok->line_start,
zeros_end - tok->line_start,
"leading zeros in decimal integer "
"literals are not permitted; "
"use an 0o prefix for octal integers");
}
}
}
Expand Down