Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/ruff_python_parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ ruff_source_file = { workspace = true }

anyhow = { workspace = true }
insta = { workspace = true, features = ["glob"] }
itertools = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
walkdir = { workspace = true }
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Regression test for https://github.com/astral-sh/ty/issues/1828
(c: int = 1,f"""{d=[
def a(
class A:
pass
57 changes: 45 additions & 12 deletions crates/ruff_python_parser/src/token_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,26 +67,59 @@ impl<'src> TokenSource<'src> {
///
/// [`re_lex_logical_token`]: Lexer::re_lex_logical_token
pub(crate) fn re_lex_logical_token(&mut self) {
let mut non_logical_newline_start = None;
for token in self.tokens.iter().rev() {
let mut non_logical_newline = None;

#[cfg(debug_assertions)]
let last_non_trivia_end_before = {
self.tokens
.iter()
.rev()
.find(|tok| !tok.kind().is_trivia())
.map(ruff_text_size::Ranged::end)
};

for (index, token) in self.tokens.iter().enumerate().rev() {
match token.kind() {
TokenKind::NonLogicalNewline => {
non_logical_newline_start = Some(token.start());
non_logical_newline = Some((index, token.start()));
}
TokenKind::Comment => continue,
_ => break,
}
}

if self.lexer.re_lex_logical_token(non_logical_newline_start) {
let current_start = self.current_range().start();
while self
.tokens
.last()
.is_some_and(|last| last.start() >= current_start)
{
self.tokens.pop();
}
if !self
.lexer
.re_lex_logical_token(non_logical_newline.map(|(_, start)| start))
{
return;
}

let non_logical_line_index = non_logical_newline
.expect(
"`re_lex_logical_token` should only return `true` if `non_logical_line` is `Some`",
)
.0;

// Trim the already bumped logical line token (and comments coming after it) as it might now have become a logical line token
self.tokens.truncate(non_logical_line_index);

#[cfg(debug_assertions)]
{
let last_non_trivia_end_now = {
self.tokens
.iter()
.rev()
.find(|tok| !tok.kind().is_trivia())
.map(ruff_text_size::Ranged::end)
};

assert_eq!(last_non_trivia_end_before, last_non_trivia_end_now);
}

// Ensure `current` is positioned at a non-trivia token.
if self.current_kind().is_trivia() {
self.bump(self.current_kind());
}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the main part of the fix based on the PR description?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the other changes are mainly added assertions and some safe-guarding that we never remove any tokens that came before the non logical newline

}

Expand Down
65 changes: 58 additions & 7 deletions crates/ruff_python_parser/tests/fixtures.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@ use std::fmt::{Formatter, Write};
use std::fs;
use std::path::Path;

use itertools::Itertools;
use ruff_annotate_snippets::{Level, Renderer, Snippet};
use ruff_python_ast::token::Token;
use ruff_python_ast::token::{Token, Tokens};
use ruff_python_ast::visitor::Visitor;
use ruff_python_ast::visitor::source_order::{SourceOrderVisitor, TraversalSignal, walk_module};
use ruff_python_ast::{self as ast, AnyNodeRef, Mod, PythonVersion};
use ruff_python_parser::semantic_errors::{
SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError,
};
use ruff_python_parser::{Mode, ParseErrorType, ParseOptions, parse_unchecked};
use ruff_python_parser::{Mode, ParseErrorType, ParseOptions, Parsed, parse_unchecked};
use ruff_source_file::{LineIndex, OneIndexed, SourceCode};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};

Expand Down Expand Up @@ -81,7 +82,7 @@ fn test_valid_syntax(input_path: &Path) {
}

validate_tokens(parsed.tokens(), source.text_len(), input_path);
validate_ast(parsed.syntax(), source.text_len(), input_path);
validate_ast(&parsed, source.text_len(), input_path);

let mut output = String::new();
writeln!(&mut output, "## AST").unwrap();
Expand Down Expand Up @@ -139,7 +140,7 @@ fn test_invalid_syntax(input_path: &Path) {
let parsed = parse_unchecked(&source, options.clone());

validate_tokens(parsed.tokens(), source.text_len(), input_path);
validate_ast(parsed.syntax(), source.text_len(), input_path);
validate_ast(&parsed, source.text_len(), input_path);

let mut output = String::new();
writeln!(&mut output, "## AST").unwrap();
Expand Down Expand Up @@ -402,21 +403,26 @@ Tokens: {tokens:#?}
/// * the range of the parent node fully encloses all its child nodes
/// * the ranges are strictly increasing when traversing the nodes in pre-order.
/// * all ranges are within the length of the source code.
fn validate_ast(root: &Mod, source_len: TextSize, test_path: &Path) {
walk_module(&mut ValidateAstVisitor::new(source_len, test_path), root);
fn validate_ast(parsed: &Parsed<Mod>, source_len: TextSize, test_path: &Path) {
walk_module(
&mut ValidateAstVisitor::new(parsed.tokens(), source_len, test_path),
parsed.syntax(),
);
}

#[derive(Debug)]
struct ValidateAstVisitor<'a> {
tokens: std::iter::Peekable<std::slice::Iter<'a, Token>>,
parents: Vec<AnyNodeRef<'a>>,
previous: Option<AnyNodeRef<'a>>,
source_length: TextSize,
test_path: &'a Path,
}

impl<'a> ValidateAstVisitor<'a> {
fn new(source_length: TextSize, test_path: &'a Path) -> Self {
fn new(tokens: &'a Tokens, source_length: TextSize, test_path: &'a Path) -> Self {
Self {
tokens: tokens.iter().peekable(),
parents: Vec::new(),
previous: None,
source_length,
Expand All @@ -425,6 +431,47 @@ impl<'a> ValidateAstVisitor<'a> {
}
}

impl ValidateAstVisitor<'_> {
/// Check that the node's start doesn't fall within a token.
/// Called in `enter_node` before visiting children.
fn assert_start_boundary(&mut self, node: AnyNodeRef<'_>) {
// Skip tokens that end at or before the node starts.
self.tokens
.peeking_take_while(|t| t.end() <= node.start())
.last();

if let Some(next) = self.tokens.peek() {
// At this point, next_token.end() > node.start()
assert!(
next.start() >= node.start(),
"{path}: The start of the node falls within a token.\nNode: {node:#?}\n\nToken: {next:#?}\n\nRoot: {root:#?}",
path = self.test_path.display(),
root = self.parents.first()
);
}
}

/// Check that the node's end doesn't fall within a token.
/// Called in `leave_node` after visiting children, so all tokens
/// within the node have been consumed.
fn assert_end_boundary(&mut self, node: AnyNodeRef<'_>) {
// Skip tokens that end at or before the node ends.
self.tokens
.peeking_take_while(|t| t.end() <= node.end())
.last();

if let Some(next) = self.tokens.peek() {
// At this point, `next_token.end() > node.end()`
assert!(
next.start() >= node.end(),
"{path}: The end of the node falls within a token.\nNode: {node:#?}\n\nToken: {next:#?}\n\nRoot: {root:#?}",
path = self.test_path.display(),
root = self.parents.first()
);
}
}
}

impl<'ast> SourceOrderVisitor<'ast> for ValidateAstVisitor<'ast> {
fn enter_node(&mut self, node: AnyNodeRef<'ast>) -> TraversalSignal {
assert!(
Expand Down Expand Up @@ -452,12 +499,16 @@ impl<'ast> SourceOrderVisitor<'ast> for ValidateAstVisitor<'ast> {
);
}

self.assert_start_boundary(node);

self.parents.push(node);

TraversalSignal::Traverse
}

fn leave_node(&mut self, node: AnyNodeRef<'ast>) {
self.assert_end_boundary(node);

self.parents.pop().expect("Expected tree to be balanced");

self.previous = Some(node);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ Module(
test: Call(
ExprCall {
node_index: NodeIndex(None),
range: 456..472,
range: 456..471,
func: Name(
ExprName {
node_index: NodeIndex(None),
Expand All @@ -306,7 +306,7 @@ Module(
},
),
arguments: Arguments {
range: 460..472,
range: 460..471,
node_index: NodeIndex(None),
args: [
Name(
Expand Down Expand Up @@ -581,7 +581,7 @@ Module(
test: Call(
ExprCall {
node_index: NodeIndex(None),
range: 890..906,
range: 890..905,
func: Name(
ExprName {
node_index: NodeIndex(None),
Expand All @@ -591,7 +591,7 @@ Module(
},
),
arguments: Arguments {
range: 894..906,
range: 894..905,
node_index: NodeIndex(None),
args: [
FString(
Expand Down Expand Up @@ -832,7 +832,16 @@ Module(
|
28 | # The lexer is nested with multiple levels of parentheses
29 | if call(foo, [a, b
| ^ Syntax Error: Expected `]`, found NonLogicalNewline
30 | def bar():
| ^^^ Syntax Error: Expected `]`, found `def`
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find those error messages more accurate. We only expect a ] because def is the start of a new keyword. It's otherwise perfectly fine to write a list over multiple lines.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, but then the following error message points at this location "expecting ), found Newline". I think this is still an improvement but just found this a bit inconsistent. No need to change anything here.

31 | pass
|


|
28 | # The lexer is nested with multiple levels of parentheses
29 | if call(foo, [a, b
| ^ Syntax Error: Expected `)`, found newline
30 | def bar():
31 | pass
|
Expand All @@ -857,11 +866,10 @@ Module(


|
41 | # test is to make sure it emits a `NonLogicalNewline` token after `b`.
42 | if call(foo, [a,
43 | b
| ^ Syntax Error: Expected `]`, found NonLogicalNewline
44 | )
| ^ Syntax Error: Expected `]`, found `)`
45 | def bar():
46 | pass
|
Expand Down Expand Up @@ -898,7 +906,7 @@ Module(
|
49 | # F-strings uses normal list parsing, so test those as well
50 | if call(f"hello {x
| ^ Syntax Error: Expected FStringEnd, found NonLogicalNewline
| ^ Syntax Error: Expected `)`, found newline
51 | def bar():
52 | pass
|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Module(
test: Call(
ExprCall {
node_index: NodeIndex(None),
range: 3..19,
range: 3..18,
func: Name(
ExprName {
node_index: NodeIndex(None),
Expand All @@ -27,7 +27,7 @@ Module(
},
),
arguments: Arguments {
range: 7..19,
range: 7..18,
node_index: NodeIndex(None),
args: [
Name(
Expand Down Expand Up @@ -113,5 +113,11 @@ Module(

|
1 | if call(foo, [a, b def bar(): pass
| ^ Syntax Error: Expected `]`, found NonLogicalNewline
| ^^^ Syntax Error: Expected `]`, found `def`
|


|
1 | if call(foo, [a, b def bar(): pass
| ^ Syntax Error: Expected `)`, found newline
Comment on lines +121 to +122
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is bad but it's inherent to how we do recovery. The issue is that nesting is still greater than 1 when we're recovering in [ so that the NonLogicalNewline isn't relexed as a Newline. However, it is later re-lexed when we recover from the unclosed (, which feels a bit inconsistent.

|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Module(
test: Call(
ExprCall {
node_index: NodeIndex(None),
range: 3..20,
range: 3..18,
func: Name(
ExprName {
node_index: NodeIndex(None),
Expand All @@ -27,7 +27,7 @@ Module(
},
),
arguments: Arguments {
range: 7..20,
range: 7..18,
node_index: NodeIndex(None),
args: [
Name(
Expand Down Expand Up @@ -113,7 +113,15 @@ Module(

|
1 | if call(foo, [a, b
| ^ Syntax Error: Expected `]`, found NonLogicalNewline
2 | def bar():
| ^^^ Syntax Error: Expected `]`, found `def`
3 | pass
|


|
1 | if call(foo, [a, b
| ^ Syntax Error: Expected `)`, found newline
2 | def bar():
3 | pass
|
Loading