Skip to content

Fix: JOIN should require ON condition #1552

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions src/ast/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1852,6 +1852,27 @@ pub enum JoinOperator {
},
}

impl JoinOperator {
pub fn constraint(&self) -> JoinConstraint {
match self {
JoinOperator::Inner(constraint)
| JoinOperator::LeftOuter(constraint)
| JoinOperator::RightOuter(constraint)
| JoinOperator::FullOuter(constraint)
| JoinOperator::Semi(constraint)
| JoinOperator::LeftSemi(constraint)
| JoinOperator::RightSemi(constraint)
| JoinOperator::Anti(constraint)
| JoinOperator::LeftAnti(constraint)
| JoinOperator::RightAnti(constraint) => constraint.clone(),
JoinOperator::AsOf { constraint, .. } => constraint.clone(),
JoinOperator::CrossJoin | JoinOperator::CrossApply | JoinOperator::OuterApply => {
JoinConstraint::None
}
}
}
}

#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
Expand Down
13 changes: 12 additions & 1 deletion src/dialect/ansi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::dialect::Dialect;
use crate::{ast::JoinOperator, dialect::Dialect};

/// A [`Dialect`] for [ANSI SQL](https://en.wikipedia.org/wiki/SQL:2011).
#[derive(Debug)]
Expand All @@ -33,4 +33,15 @@ impl Dialect for AnsiDialect {
fn require_interval_qualifier(&self) -> bool {
true
}

fn verify_join_operator(&self, join_operator: &JoinOperator) -> bool {
matches!(
join_operator,
JoinOperator::Inner(_)
| JoinOperator::LeftOuter(_)
| JoinOperator::RightOuter(_)
| JoinOperator::FullOuter(_)
| JoinOperator::CrossJoin
)
}
}
31 changes: 30 additions & 1 deletion src/dialect/bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
// specific language governing permissions and limitations
// under the License.

use crate::dialect::Dialect;
use crate::{
ast::{JoinConstraint, JoinOperator},
dialect::Dialect,
};

/// A [`Dialect`] for [Google Bigquery](https://cloud.google.com/bigquery/)
#[derive(Debug, Default)]
Expand Down Expand Up @@ -72,4 +75,30 @@ impl Dialect for BigQueryDialect {
fn require_interval_qualifier(&self) -> bool {
true
}

// https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#join_types
fn verify_join_operator(&self, join_operator: &JoinOperator) -> bool {
matches!(
join_operator,
JoinOperator::Inner(_)
| JoinOperator::LeftOuter(_)
| JoinOperator::RightOuter(_)
| JoinOperator::FullOuter(_)
| JoinOperator::CrossJoin
)
}

fn verify_join_constraint(&self, join_operator: &JoinOperator) -> bool {
match join_operator.constraint() {
JoinConstraint::Natural => false,
JoinConstraint::On(_) | JoinConstraint::Using(_) => matches!(
join_operator,
JoinOperator::Inner(_)
| JoinOperator::LeftOuter(_)
| JoinOperator::RightOuter(_)
| JoinOperator::FullOuter(_)
),
JoinConstraint::None => matches!(join_operator, JoinOperator::CrossJoin),
}
}
}
21 changes: 20 additions & 1 deletion src/dialect/clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::dialect::Dialect;
use crate::{ast::JoinOperator, dialect::Dialect};

// A [`Dialect`] for [ClickHouse](https://clickhouse.com/).
#[derive(Debug)]
Expand Down Expand Up @@ -50,4 +50,23 @@ impl Dialect for ClickHouseDialect {
fn supports_limit_comma(&self) -> bool {
true
}

// https://clickhouse.com/docs/en/sql-reference/statements/select/join
fn verify_join_operator(&self, join_operator: &JoinOperator) -> bool {
matches!(
join_operator,
JoinOperator::Inner(_)
| JoinOperator::LeftOuter(_)
| JoinOperator::RightOuter(_)
| JoinOperator::FullOuter(_)
| JoinOperator::CrossJoin
| JoinOperator::Semi(_)
| JoinOperator::LeftSemi(_)
| JoinOperator::RightSemi(_)
| JoinOperator::Anti(_)
| JoinOperator::LeftAnti(_)
| JoinOperator::RightAnti(_)
| JoinOperator::AsOf { .. }
)
}
}
18 changes: 17 additions & 1 deletion src/dialect/databricks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::dialect::Dialect;
use crate::{ast::JoinOperator, dialect::Dialect};

/// A [`Dialect`] for [Databricks SQL](https://www.databricks.com/)
///
Expand Down Expand Up @@ -59,4 +59,20 @@ impl Dialect for DatabricksDialect {
fn require_interval_qualifier(&self) -> bool {
true
}

// https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-join.html
fn verify_join_operator(&self, join_operator: &JoinOperator) -> bool {
matches!(
join_operator,
JoinOperator::Inner(_)
| JoinOperator::LeftOuter(_)
| JoinOperator::RightOuter(_)
| JoinOperator::FullOuter(_)
| JoinOperator::CrossJoin
| JoinOperator::Anti(_)
| JoinOperator::LeftAnti(_)
| JoinOperator::Semi(_)
| JoinOperator::LeftSemi(_)
)
}
}
21 changes: 20 additions & 1 deletion src/dialect/duckdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::dialect::Dialect;
use crate::{ast::JoinOperator, dialect::Dialect};

/// A [`Dialect`] for [DuckDB](https://duckdb.org/)
#[derive(Debug, Default)]
Expand Down Expand Up @@ -75,4 +75,23 @@ impl Dialect for DuckDbDialect {
fn supports_load_extension(&self) -> bool {
true
}

// https://duckdb.org/docs/sql/query_syntax/from.html#joins
fn verify_join_operator(&self, join_operator: &JoinOperator) -> bool {
matches!(
join_operator,
JoinOperator::Inner(_)
| JoinOperator::LeftOuter(_)
| JoinOperator::RightOuter(_)
| JoinOperator::FullOuter(_)
| JoinOperator::CrossJoin
| JoinOperator::Anti(_)
| JoinOperator::LeftAnti(_)
| JoinOperator::RightAnti(_)
| JoinOperator::Semi(_)
| JoinOperator::LeftSemi(_)
| JoinOperator::RightSemi(_)
| JoinOperator::AsOf { .. }
)
}
}
38 changes: 37 additions & 1 deletion src/dialect/hive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
// specific language governing permissions and limitations
// under the License.

use crate::dialect::Dialect;
use crate::{
ast::{JoinConstraint, JoinOperator},
dialect::Dialect,
};

/// A [`Dialect`] for [Hive](https://hive.apache.org/).
#[derive(Debug)]
Expand Down Expand Up @@ -61,4 +64,37 @@ impl Dialect for HiveDialect {
fn supports_load_data(&self) -> bool {
true
}

// https://cwiki.apache.org/confluence/display/hive/languagemanual+joins
fn verify_join_operator(&self, join_operator: &JoinOperator) -> bool {
matches!(
join_operator,
JoinOperator::Inner(_)
| JoinOperator::LeftOuter(_)
| JoinOperator::RightOuter(_)
| JoinOperator::FullOuter(_)
| JoinOperator::CrossJoin
| JoinOperator::Semi(_)
| JoinOperator::LeftSemi(_)
)
}

fn verify_join_constraint(&self, join_operator: &JoinOperator) -> bool {
match join_operator.constraint() {
JoinConstraint::Natural => false,
JoinConstraint::On(_) | JoinConstraint::Using(_) => matches!(
join_operator,
JoinOperator::Inner(_)
| JoinOperator::LeftOuter(_)
| JoinOperator::RightOuter(_)
| JoinOperator::FullOuter(_)
| JoinOperator::Semi(_)
| JoinOperator::LeftSemi(_)
),
JoinConstraint::None => matches!(
join_operator,
JoinOperator::Inner(_) | JoinOperator::CrossJoin
),
}
}
}
37 changes: 36 additions & 1 deletion src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ pub use self::postgresql::PostgreSqlDialect;
pub use self::redshift::RedshiftSqlDialect;
pub use self::snowflake::SnowflakeDialect;
pub use self::sqlite::SQLiteDialect;
use crate::ast::{ColumnOption, Expr, Statement};
use crate::ast::{ColumnOption, Expr, JoinConstraint, JoinOperator, Statement};
pub use crate::keywords;
use crate::keywords::Keyword;
use crate::parser::{Parser, ParserError};
Expand Down Expand Up @@ -687,6 +687,41 @@ pub trait Dialect: Debug + Any {
fn is_reserved_for_identifier(&self, kw: Keyword) -> bool {
keywords::RESERVED_FOR_IDENTIFIER.contains(&kw)
}

/// Verifies whether the provided `JoinOperator` is supported by this SQL dialect.
/// Returns `true` if the `JoinOperator` is supported, otherwise `false`.
fn verify_join_operator(&self, _join_operator: &JoinOperator) -> bool {
true
}

/// Verifies if the given `JoinOperator`'s constraint is valid for this SQL dialect.
/// Returns `true` if the join constraint is valid, otherwise `false`.
fn verify_join_constraint(&self, join_operator: &JoinOperator) -> bool {
Comment on lines +691 to +699
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah so since the behavior seems to vary a bit across dialects, I'm thinking it could make sense after all if we let the parse continue to be permissive in syntax and downstream crates can perform the additional checks in the cases where specific combinations need to be enforced?

Copy link
Contributor Author

@dmitriibugakov dmitriibugakov Nov 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand your idea, and it makes sense. But the "issue" is based on the idea that the parser should restrict cases like “INNER JOIN” without an ON condition.

For me, as a developer, it would feel strange if I pick a parser, for example, the PostgreSQL dialect, and it allows “ANTI JOIN” without any error. This would mean I have to check all the statements again to find mistakes. It seems like a trade-off, and we need to decide which approach works best.

Or are you suggesting moving this specific implementation directly to GenericDialect?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah it is indeed a tradeoff, the expectation currently is that downstream crates further validate the output of the parser against any dialect specific requirements/invariants, see note here for example

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alamb, could you determine whether the issue outlined in #13486 is valid?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my opinion:

  1. if there are any dialects that support <left> JOIN <right> without an ON clause then so should sqlparser
  2. If there are no dialects that support such syntax, then erroring is a good idea

I have not done the research to know if there are any dialects that support such syntax

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, DataFusion processes SELECT ... FROM l JOIN r as a CROSS JOIN for all dialects, including DuckDB, where an INNER JOIN explicitly requires an ON clause.

@Dandandan proposed addressing this issue in sqlparser-rs, which seems reasonable to me. However, after discussing it with @iffyio, I see

indeed a tradeoff, the expectation currently is that downstream crates further validate the output of the parser

My questions are:

  1. Should this behavior be considered a bug?
  2. If yes, at which level should it be addressed: datafusion or sqlparser-rs?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Should this behavior be considered a bug?

I don't have a strong opinion -- is it causing anyone problems? It seems like the ramification of allowing JOIN... as CROSS JOIN 's largest implication is that now DataFusion has some new dialect.

If it isn't causing problems, my suggestion is do nothing until someone has a concrete usecase

match join_operator.constraint() {
JoinConstraint::Natural => true,
JoinConstraint::On(_) | JoinConstraint::Using(_) => matches!(
join_operator,
JoinOperator::Inner(_)
| JoinOperator::LeftOuter(_)
| JoinOperator::RightOuter(_)
| JoinOperator::FullOuter(_)
| JoinOperator::Semi(_)
| JoinOperator::LeftSemi(_)
| JoinOperator::RightSemi(_)
| JoinOperator::Anti(_)
| JoinOperator::LeftAnti(_)
| JoinOperator::RightAnti(_)
| JoinOperator::AsOf { .. }
),
JoinConstraint::None => matches!(
join_operator,
JoinOperator::CrossJoin
| JoinOperator::CrossApply
| JoinOperator::OuterApply
| JoinOperator::AsOf { .. }
),
}
}
}

/// This represents the operators for which precedence must be defined
Expand Down
36 changes: 35 additions & 1 deletion src/dialect/mssql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
// specific language governing permissions and limitations
// under the License.

use crate::dialect::Dialect;
use crate::{
ast::{JoinConstraint, JoinOperator},
dialect::Dialect,
};

/// A [`Dialect`] for [Microsoft SQL Server](https://www.microsoft.com/en-us/sql-server/)
#[derive(Debug)]
Expand Down Expand Up @@ -78,4 +81,35 @@ impl Dialect for MsSqlDialect {
fn supports_named_fn_args_with_rarrow_operator(&self) -> bool {
false
}

// https://learn.microsoft.com/en-us/sql/relational-databases/performance/joins?view=sql-server-ver16
fn verify_join_operator(&self, join_operator: &JoinOperator) -> bool {
matches!(
join_operator,
JoinOperator::Inner(_)
| JoinOperator::LeftOuter(_)
| JoinOperator::RightOuter(_)
| JoinOperator::FullOuter(_)
| JoinOperator::CrossJoin
| JoinOperator::CrossApply
| JoinOperator::OuterApply
)
}

fn verify_join_constraint(&self, join_operator: &JoinOperator) -> bool {
match join_operator.constraint() {
JoinConstraint::Natural => false,
JoinConstraint::On(_) | JoinConstraint::Using(_) => matches!(
join_operator,
JoinOperator::Inner(_)
| JoinOperator::LeftOuter(_)
| JoinOperator::RightOuter(_)
| JoinOperator::FullOuter(_)
),
JoinConstraint::None => matches!(
join_operator,
JoinOperator::CrossJoin | JoinOperator::CrossApply | JoinOperator::OuterApply
),
}
}
}
31 changes: 30 additions & 1 deletion src/dialect/mysql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
use alloc::boxed::Box;

use crate::{
ast::{BinaryOperator, Expr, LockTable, LockTableType, Statement},
ast::{
BinaryOperator, Expr, JoinConstraint, JoinOperator, LockTable, LockTableType, Statement,
},
dialect::Dialect,
keywords::Keyword,
parser::{Parser, ParserError},
Expand Down Expand Up @@ -102,6 +104,33 @@ impl Dialect for MySqlDialect {
fn supports_create_table_select(&self) -> bool {
true
}

fn verify_join_operator(&self, join_operator: &JoinOperator) -> bool {
matches!(
join_operator,
JoinOperator::Inner(_)
| JoinOperator::LeftOuter(_)
| JoinOperator::RightOuter(_)
| JoinOperator::CrossJoin
)
}

fn verify_join_constraint(&self, join_operator: &JoinOperator) -> bool {
match join_operator.constraint() {
JoinConstraint::Natural => true,
JoinConstraint::On(_) | JoinConstraint::Using(_) => matches!(
join_operator,
JoinOperator::Inner(_) | JoinOperator::LeftOuter(_) | JoinOperator::RightOuter(_)
),
JoinConstraint::None => matches!(
join_operator,
JoinOperator::Inner(_)
| JoinOperator::LeftOuter(_)
| JoinOperator::RightOuter(_)
| JoinOperator::CrossJoin
),
}
}
}

/// `LOCK TABLES`
Expand Down
Loading