apache · iffyio · May 30, 2025 · May 25, 2025 · May 26, 2025 · May 28, 2025
diff --git a/src/ast/query.rs b/src/ast/query.rs
@@ -104,7 +104,7 @@ impl fmt::Display for Query {
             format.fmt(f)?;
         }
         for pipe_operator in &self.pipe_operators {
-            f.write_str(" |> ")?;
+            f.write_str(" |>")?;
             pipe_operator.fmt(f)?;
         }
         Ok(())
@@ -2680,28 +2680,32 @@ pub enum PipeOperator {
         full_table_exprs: Vec<ExprWithAliasAndOrderBy>,
         group_by_expr: Vec<ExprWithAliasAndOrderBy>,
     },
+    /// Selects a random sample of rows from the input table.
+    /// Syntax: `|> TABLESAMPLE <method> (<size> {ROWS | PERCENT})`
+    /// See more at <https://cloud.google.com/bigquery/docs/reference/standard-sql/pipe-syntax#tablesample_pipe_operator>
+    TableSample { sample: Box <TableSample> },
 }
 
 impl fmt::Display for PipeOperator {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self {
             PipeOperator::Select { exprs } => {
-                write!(f, "SELECT {}", display_comma_separated(exprs.as_slice()))
+                write!(f, " SELECT {}", display_comma_separated(exprs.as_slice()))
             }
             PipeOperator::Extend { exprs } => {
-                write!(f, "EXTEND {}", display_comma_separated(exprs.as_slice()))
+                write!(f, " EXTEND {}", display_comma_separated(exprs.as_slice()))
             }
             PipeOperator::Set { assignments } => {
-                write!(f, "SET {}", display_comma_separated(assignments.as_slice()))
+                write!(f, " SET {}", display_comma_separated(assignments.as_slice()))
             }
             PipeOperator::Drop { columns } => {
-                write!(f, "DROP {}", display_comma_separated(columns.as_slice()))
+                write!(f, " DROP {}", display_comma_separated(columns.as_slice()))
             }
             PipeOperator::As { alias } => {
-                write!(f, "AS {}", alias)
+                write!(f, " AS {}", alias)
             }
             PipeOperator::Limit { expr, offset } => {
-                write!(f, "LIMIT {}", expr)?;
+                write!(f, " LIMIT {}", expr)?;
                 if let Some(offset) = offset {
                     write!(f, " OFFSET {}", offset)?;
                 }
@@ -2711,7 +2715,7 @@ impl fmt::Display for PipeOperator {
                 full_table_exprs,
                 group_by_expr,
             } => {
-                write!(f, "AGGREGATE")?;
+                write!(f, " AGGREGATE")?;
                 if !full_table_exprs.is_empty() {
                     write!(
                         f,
@@ -2726,10 +2730,14 @@ impl fmt::Display for PipeOperator {
             }
 
             PipeOperator::Where { expr } => {
-                write!(f, "WHERE {}", expr)
+                write!(f, " WHERE {}", expr)
             }
             PipeOperator::OrderBy { exprs } => {
-                write!(f, "ORDER BY {}", display_comma_separated(exprs.as_slice()))
+                write!(f, " ORDER BY {}", display_comma_separated(exprs.as_slice()))
+            }
+
+            PipeOperator::TableSample { sample } => {
+                write!(f, "{}", sample)
             }
         }
     }

diff --git a/src/parser/mod.rs b/src/parser/mod.rs
@@ -11047,6 +11047,7 @@ impl<'a> Parser<'a> {
                 Keyword::LIMIT,
                 Keyword::AGGREGATE,
                 Keyword::ORDER,
+                Keyword::TABLESAMPLE,
             ])?;
             match kw {
                 Keyword::SELECT => {
@@ -11109,6 +11110,10 @@ impl<'a> Parser<'a> {
                     let exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?;
                     pipe_operators.push(PipeOperator::OrderBy { exprs })
                 }
+                Keyword::TABLESAMPLE => {
+                    let sample = self.parse_table_sample(TableSampleModifier::TableSample)?;
+                    pipe_operators.push(PipeOperator::TableSample { sample });
+                }
                 unhandled => {
                     return Err(ParserError::ParserError(format!(
                     "`expect_one_of_keywords` further up allowed unhandled keyword: {unhandled:?}"
@@ -12753,8 +12758,11 @@ impl<'a> Parser<'a> {
         } else {
             return Ok(None);
         };
+        self.parse_table_sample(modifier).map(|sample| Some(sample))       
+    }
 
-        let name = match self.parse_one_of_keywords(&[
+    fn parse_table_sample(&mut self, modifier: TableSampleModifier ) -> Result<Box<TableSample>, ParserError> {
+let name = match self.parse_one_of_keywords(&[
             Keyword::BERNOULLI,
             Keyword::ROW,
             Keyword::SYSTEM,
@@ -12835,14 +12843,14 @@ impl<'a> Parser<'a> {
             None
         };
 
-        Ok(Some(Box::new(TableSample {
+        Ok(Box::new(TableSample {
             modifier,
             name,
             quantity,
             seed,
             bucket,
             offset,
-        })))
+        }))
     }
 
     fn parse_table_sample_seed(

diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs
@@ -15155,6 +15155,12 @@ fn parse_pipeline_operator() {
     dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC");
     dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC, name ASC");
 
+    // tablesample pipe operator
+    dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE BERNOULLI (50)");
+    dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50)");
+    // TODO: Technically, REPEATABLE is not available in BigQuery, but it is used with TABLESAMPLE in other dialects
+    dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50) REPEATABLE (10)");
+
     // many pipes
     dialects.verified_stmt(
         "SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC",