Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 18 additions & 10 deletions src/ast/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ impl fmt::Display for Query {
format.fmt(f)?;
}
for pipe_operator in &self.pipe_operators {
f.write_str(" |> ")?;
f.write_str(" |>")?;
pipe_operator.fmt(f)?;
}
Ok(())
Expand Down Expand Up @@ -2680,28 +2680,32 @@ pub enum PipeOperator {
full_table_exprs: Vec<ExprWithAliasAndOrderBy>,
group_by_expr: Vec<ExprWithAliasAndOrderBy>,
},
/// Selects a random sample of rows from the input table.
/// Syntax: `|> TABLESAMPLE <method> (<size> {ROWS | PERCENT})`
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the officially supported spec from the paper but doesn't cover everything that is technically supported in this PR. I'm wondering how to best deal with that (see the open question in the PR description).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the doc comment doesn't necessarily need to spell out the full spec. usually its enough to give a rough example of what the statement looks like e.g.

Syntax: `|> TABLESAMPLE BERNOULLI(50)

/// See more at <https://cloud.google.com/bigquery/docs/reference/standard-sql/pipe-syntax#tablesample_pipe_operator>
TableSample { sample: Box <TableSample> },
}

impl fmt::Display for PipeOperator {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
PipeOperator::Select { exprs } => {
write!(f, "SELECT {}", display_comma_separated(exprs.as_slice()))
write!(f, " SELECT {}", display_comma_separated(exprs.as_slice()))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I think we'd ideally undo these changes, its better for the operator display to be standalone where possible (i.e. they shouldn't assume surrounding space formatting, rather it should be left up to the caller), it makes it easier to compose nodes in the AST when displaying

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, I found a way to fix the formatting for TableSample in a different way that requires fewer changes and follows this general principle.

}
PipeOperator::Extend { exprs } => {
write!(f, "EXTEND {}", display_comma_separated(exprs.as_slice()))
write!(f, " EXTEND {}", display_comma_separated(exprs.as_slice()))
}
PipeOperator::Set { assignments } => {
write!(f, "SET {}", display_comma_separated(assignments.as_slice()))
write!(f, " SET {}", display_comma_separated(assignments.as_slice()))
}
PipeOperator::Drop { columns } => {
write!(f, "DROP {}", display_comma_separated(columns.as_slice()))
write!(f, " DROP {}", display_comma_separated(columns.as_slice()))
}
PipeOperator::As { alias } => {
write!(f, "AS {}", alias)
write!(f, " AS {}", alias)
}
PipeOperator::Limit { expr, offset } => {
write!(f, "LIMIT {}", expr)?;
write!(f, " LIMIT {}", expr)?;
if let Some(offset) = offset {
write!(f, " OFFSET {}", offset)?;
}
Expand All @@ -2711,7 +2715,7 @@ impl fmt::Display for PipeOperator {
full_table_exprs,
group_by_expr,
} => {
write!(f, "AGGREGATE")?;
write!(f, " AGGREGATE")?;
if !full_table_exprs.is_empty() {
write!(
f,
Expand All @@ -2726,10 +2730,14 @@ impl fmt::Display for PipeOperator {
}

PipeOperator::Where { expr } => {
write!(f, "WHERE {}", expr)
write!(f, " WHERE {}", expr)
}
PipeOperator::OrderBy { exprs } => {
write!(f, "ORDER BY {}", display_comma_separated(exprs.as_slice()))
write!(f, " ORDER BY {}", display_comma_separated(exprs.as_slice()))
}

PipeOperator::TableSample { sample } => {
write!(f, "{}", sample)
}
}
}
Expand Down
14 changes: 11 additions & 3 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11047,6 +11047,7 @@ impl<'a> Parser<'a> {
Keyword::LIMIT,
Keyword::AGGREGATE,
Keyword::ORDER,
Keyword::TABLESAMPLE,
])?;
match kw {
Keyword::SELECT => {
Expand Down Expand Up @@ -11109,6 +11110,10 @@ impl<'a> Parser<'a> {
let exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?;
pipe_operators.push(PipeOperator::OrderBy { exprs })
}
Keyword::TABLESAMPLE => {
let sample = self.parse_table_sample(TableSampleModifier::TableSample)?;
pipe_operators.push(PipeOperator::TableSample { sample });
}
unhandled => {
return Err(ParserError::ParserError(format!(
"`expect_one_of_keywords` further up allowed unhandled keyword: {unhandled:?}"
Expand Down Expand Up @@ -12753,8 +12758,11 @@ impl<'a> Parser<'a> {
} else {
return Ok(None);
};
self.parse_table_sample(modifier).map(|sample| Some(sample))
}

let name = match self.parse_one_of_keywords(&[
fn parse_table_sample(&mut self, modifier: TableSampleModifier ) -> Result<Box<TableSample>, ParserError> {
let name = match self.parse_one_of_keywords(&[
Keyword::BERNOULLI,
Keyword::ROW,
Keyword::SYSTEM,
Expand Down Expand Up @@ -12835,14 +12843,14 @@ impl<'a> Parser<'a> {
None
};

Ok(Some(Box::new(TableSample {
Ok(Box::new(TableSample {
modifier,
name,
quantity,
seed,
bucket,
offset,
})))
}))
}

fn parse_table_sample_seed(
Expand Down
6 changes: 6 additions & 0 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15155,6 +15155,12 @@ fn parse_pipeline_operator() {
dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC");
dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC, name ASC");

// tablesample pipe operator
dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE BERNOULLI (50)");
dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50)");
// TODO: Technically, REPEATABLE is not available in BigQuery, but it is used with TABLESAMPLE in other dialects
dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50) REPEATABLE (10)");

// many pipes
dialects.verified_stmt(
"SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC",
Expand Down