From 7995c73fa3b3773be5ad5c725ca7caa25d5555ef Mon Sep 17 00:00:00 2001 From: Carl Meyer Date: Mon, 14 Apr 2025 18:27:58 -0700 Subject: [PATCH 1/3] [red-knot] optimize building large unions of literals --- .../resources/mdtest/annotations/literal.md | 4 +- .../resources/mdtest/annotations/string.md | 2 +- .../src/types/builder.rs | 167 +++++++++++++++--- 3 files changed, 150 insertions(+), 23 deletions(-) diff --git a/crates/red_knot_python_semantic/resources/mdtest/annotations/literal.md b/crates/red_knot_python_semantic/resources/mdtest/annotations/literal.md index 75c311800b6850..138478dbdf6771 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/annotations/literal.md +++ b/crates/red_knot_python_semantic/resources/mdtest/annotations/literal.md @@ -68,7 +68,7 @@ def x( a3: Literal[Literal["w"], Literal["r"], Literal[Literal["w+"]]], a4: Literal[True] | Literal[1, 2] | Literal["foo"], ): - reveal_type(a1) # revealed: Literal[1, 2, 3, "foo", 5] | None + reveal_type(a1) # revealed: Literal[1, 2, 3, 5, "foo"] | None reveal_type(a2) # revealed: Literal["w", "r"] reveal_type(a3) # revealed: Literal["w", "r", "w+"] reveal_type(a4) # revealed: Literal[True, 1, 2, "foo"] @@ -108,7 +108,7 @@ def union_example( None, ], ): - reveal_type(x) # revealed: Unknown | Literal[-1, "A", b"A", b"\x00", b"\x07", 0, 1, "B", "foo", "bar", True] | None + reveal_type(x) # revealed: Unknown | Literal[-1, 0, 1, "A", "B", "foo", "bar", b"A", b"\x00", b"\x07", True] | None ``` ## Detecting Literal outside typing and typing_extensions diff --git a/crates/red_knot_python_semantic/resources/mdtest/annotations/string.md b/crates/red_knot_python_semantic/resources/mdtest/annotations/string.md index a1b47a30c7747e..f44c000855ea67 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/annotations/string.md +++ b/crates/red_knot_python_semantic/resources/mdtest/annotations/string.md @@ -105,7 +105,7 @@ def f1( from typing import Literal def f(v: Literal["a", r"b", b"c", "d" "e", "\N{LATIN SMALL LETTER F}", "\x67", """h"""]): - reveal_type(v) # revealed: Literal["a", "b", b"c", "de", "f", "g", "h"] + reveal_type(v) # revealed: Literal["a", "b", "de", "f", "g", "h", b"c"] ``` ## Class variables diff --git a/crates/red_knot_python_semantic/src/types/builder.rs b/crates/red_knot_python_semantic/src/types/builder.rs index c9324747035001..2537fc266cfb09 100644 --- a/crates/red_knot_python_semantic/src/types/builder.rs +++ b/crates/red_knot_python_semantic/src/types/builder.rs @@ -12,6 +12,11 @@ //! flattens into the outer one), intersections cannot contain other intersections (also //! flattens), and intersections cannot contain unions (the intersection distributes over the //! union, inverting it into a union-of-intersections). +//! * No type in a union can be a subtype of any other type in the union (just eliminate the +//! subtype from the union). +//! * No type in an intersection can be a supertype of any other type in the intersection (just +//! eliminate the supertype from the intersection). +//! * An intersection containing two non-overlapping types simplifies to [`Type::Never`]. //! //! The implication of these invariants is that a [`UnionBuilder`] does not necessarily build a //! [`Type::Union`]. For example, if only one type is added to the [`UnionBuilder`], `build()` will @@ -19,19 +24,35 @@ //! union type is added to the intersection, it will distribute and [`IntersectionBuilder::build`] //! may end up returning a [`Type::Union`] of intersections. //! -//! In the future we should have these additional invariants, but they aren't implemented yet: -//! * No type in a union can be a subtype of any other type in the union (just eliminate the -//! subtype from the union). -//! * No type in an intersection can be a supertype of any other type in the intersection (just -//! eliminate the supertype from the intersection). -//! * An intersection containing two non-overlapping types should simplify to [`Type::Never`]. - -use crate::types::{IntersectionType, KnownClass, Type, TypeVarBoundOrConstraints, UnionType}; +//! ## Performance +//! +//! In practice, there are two kinds of unions found in the wild: relatively-small unions made up +//! of normal user types (classes, etc), and large unions made up of literals, which can occur via +//! large enums (not yet implemented) or from string/integer/bytes literals, which can grow due to +//! literal arithmetic or operations on literal strings/bytes. For normal unions, it's most +//! efficient to just store the member types in a vector, and do O(n^2) `is_subtype_of` checks to +//! maintain the union in simplified form. But literal unions can grow to a size where this becomes +//! a performance problem. For this reason, we group literal types in `UnionBuilder`. Since every +//! different string literal type shares exactly the same possible super-types, and none of them +//! are subtypes of each other (unless exactly the same literal type), we can avoid many +//! unnecessary `is_subtype_of` checks. + +use crate::types::{ + BytesLiteralType, IntersectionType, KnownClass, StringLiteralType, Type, + TypeVarBoundOrConstraints, UnionType, +}; use crate::{Db, FxOrderSet}; use smallvec::SmallVec; +enum UnionElement<'db> { + IntLiterals(FxOrderSet), + StringLiterals(FxOrderSet>), + BytesLiterals(FxOrderSet>), + Type(Type<'db>), +} + pub(crate) struct UnionBuilder<'db> { - elements: Vec>, + elements: Vec>, db: &'db dyn Db, } @@ -50,7 +71,8 @@ impl<'db> UnionBuilder<'db> { /// Collapse the union to a single type: `object`. fn collapse_to_object(mut self) -> Self { self.elements.clear(); - self.elements.push(Type::object(self.db)); + self.elements + .push(UnionElement::Type(Type::object(self.db))); self } @@ -66,6 +88,75 @@ impl<'db> UnionBuilder<'db> { } // Adding `Never` to a union is a no-op. Type::Never => {} + // If adding a string literal, look for an existing `UnionElement::StringLiterals` to + // add it to, or an existing element that is a super-type of string literals, which + // means we shouldn't add it. Otherwise, add a new `UnionElement::StringLiterals` + // containing it. + Type::StringLiteral(literal) => { + let mut found = false; + for element in &mut self.elements { + match element { + UnionElement::StringLiterals(literals) => { + literals.insert(literal); + found = true; + break; + } + UnionElement::Type(existing) if ty.is_subtype_of(self.db, *existing) => { + return self; + } + _ => {} + } + } + if !found { + let mut literals = FxOrderSet::default(); + literals.insert(literal); + self.elements.push(UnionElement::StringLiterals(literals)); + } + } + // Same for bytes literals as for string literals, above. + Type::BytesLiteral(literal) => { + let mut found = false; + for element in &mut self.elements { + match element { + UnionElement::BytesLiterals(literals) => { + literals.insert(literal); + found = true; + break; + } + UnionElement::Type(existing) if ty.is_subtype_of(self.db, *existing) => { + return self; + } + _ => {} + } + } + if !found { + let mut literals = FxOrderSet::default(); + literals.insert(literal); + self.elements.push(UnionElement::BytesLiterals(literals)); + } + } + // And same for int literals as well. + Type::IntLiteral(literal) => { + let mut found = false; + for element in &mut self.elements { + match element { + UnionElement::IntLiterals(literals) => { + literals.insert(literal); + found = true; + break; + } + UnionElement::Type(existing) if ty.is_subtype_of(self.db, *existing) => { + return self; + } + _ => {} + } + } + if !found { + let mut literals = FxOrderSet::default(); + literals.insert(literal); + self.elements.push(UnionElement::IntLiterals(literals)); + } + } // Adding `object` to a union results in `object`. ty if ty.is_object(self.db) => { return self.collapse_to_object(); @@ -81,8 +172,29 @@ impl<'db> UnionBuilder<'db> { let mut to_remove = SmallVec::<[usize; 2]>::new(); let ty_negated = ty.negate(self.db); - for (index, element) in self.elements.iter().enumerate() { - if Some(*element) == bool_pair { + for (index, element) in self + .elements + .iter() + .map(|element| { + // For literals, the first element in the set can stand in for all the rest, + // since they all have the same super-types. SAFETY: a `UnionElement` of + // literal kind must always have at least one element in it. + match element { + UnionElement::IntLiterals(literals) => { + Type::IntLiteral(*literals.iter().next().unwrap()) + } + UnionElement::StringLiterals(literals) => { + Type::StringLiteral(*literals.iter().next().unwrap()) + } + UnionElement::BytesLiterals(literals) => { + Type::BytesLiteral(*literals.iter().next().unwrap()) + } + UnionElement::Type(ty) => *ty, + } + }) + .enumerate() + { + if Some(element) == bool_pair { to_add = KnownClass::Bool.to_instance(self.db); to_remove.push(index); // The type we are adding is a BooleanLiteral, which doesn't have any @@ -92,14 +204,14 @@ impl<'db> UnionBuilder<'db> { break; } - if ty.is_same_gradual_form(*element) - || ty.is_subtype_of(self.db, *element) + if ty.is_same_gradual_form(element) + || ty.is_subtype_of(self.db, element) || element.is_object(self.db) { return self; } else if element.is_subtype_of(self.db, ty) { to_remove.push(index); - } else if ty_negated.is_subtype_of(self.db, *element) { + } else if ty_negated.is_subtype_of(self.db, element) { // We add `ty` to the union. We just checked that `~ty` is a subtype of an existing `element`. // This also means that `~ty | ty` is a subtype of `element | ty`, because both elements in the // first union are subtypes of the corresponding elements in the second union. But `~ty | ty` is @@ -111,13 +223,13 @@ impl<'db> UnionBuilder<'db> { } } if let Some((&first, rest)) = to_remove.split_first() { - self.elements[first] = to_add; + self.elements[first] = UnionElement::Type(to_add); // We iterate in descending order to keep remaining indices valid after `swap_remove`. for &index in rest.iter().rev() { self.elements.swap_remove(index); } } else { - self.elements.push(to_add); + self.elements.push(UnionElement::Type(to_add)); } } } @@ -125,10 +237,25 @@ impl<'db> UnionBuilder<'db> { } pub(crate) fn build(self) -> Type<'db> { - match self.elements.len() { + let mut types = vec![]; + for element in self.elements { + match element { + UnionElement::IntLiterals(literals) => { + types.extend(literals.into_iter().map(Type::IntLiteral)); + } + UnionElement::StringLiterals(literals) => { + types.extend(literals.into_iter().map(Type::StringLiteral)); + } + UnionElement::BytesLiterals(literals) => { + types.extend(literals.into_iter().map(Type::BytesLiteral)); + } + UnionElement::Type(ty) => types.push(ty), + } + } + match types.len() { 0 => Type::Never, - 1 => self.elements[0], - _ => Type::Union(UnionType::new(self.db, self.elements.into_boxed_slice())), + 1 => types[0], + _ => Type::Union(UnionType::new(self.db, types.into_boxed_slice())), } } } From 412d4bf04027629c74bdb56a268ef17c0b4bc523 Mon Sep 17 00:00:00 2001 From: Carl Meyer Date: Wed, 16 Apr 2025 06:43:40 -0700 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: Alex Waygood --- .../src/types/builder.rs | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/crates/red_knot_python_semantic/src/types/builder.rs b/crates/red_knot_python_semantic/src/types/builder.rs index 2537fc266cfb09..d0c3c512b89c54 100644 --- a/crates/red_knot_python_semantic/src/types/builder.rs +++ b/crates/red_knot_python_semantic/src/types/builder.rs @@ -108,9 +108,10 @@ impl<'db> UnionBuilder<'db> { } } if !found { - let mut literals = FxOrderSet::default(); - literals.insert(literal); - self.elements.push(UnionElement::StringLiterals(literals)); + self.elements + .push(UnionElement::StringLiterals(FxOrderSet::from_iter([ + literal, + ]))); } } // Same for bytes literals as for string literals, above. @@ -130,9 +131,10 @@ impl<'db> UnionBuilder<'db> { } } if !found { - let mut literals = FxOrderSet::default(); - literals.insert(literal); - self.elements.push(UnionElement::BytesLiterals(literals)); + self.elements + .push(UnionElement::BytesLiterals(FxOrderSet::from_iter([ + literal, + ]))); } } // And same for int literals as well. @@ -152,9 +154,10 @@ impl<'db> UnionBuilder<'db> { } } if !found { - let mut literals = FxOrderSet::default(); - literals.insert(literal); - self.elements.push(UnionElement::IntLiterals(literals)); + self.elements + .push(UnionElement::IntLiterals(FxOrderSet::from_iter([ + literal, + ]))); } } // Adding `object` to a union results in `object`. @@ -180,14 +183,12 @@ impl<'db> UnionBuilder<'db> { // since they all have the same super-types. SAFETY: a `UnionElement` of // literal kind must always have at least one element in it. match element { - UnionElement::IntLiterals(literals) => { - Type::IntLiteral(*literals.iter().next().unwrap()) - } + UnionElement::IntLiterals(literals) => Type::IntLiteral(literals[0]), UnionElement::StringLiterals(literals) => { - Type::StringLiteral(*literals.iter().next().unwrap()) + Type::StringLiteral(literals[0]) } UnionElement::BytesLiterals(literals) => { - Type::BytesLiteral(*literals.iter().next().unwrap()) + Type::BytesLiteral(literals[0]) } UnionElement::Type(ty) => *ty, } From a99bdc32d1e8ce93b71d47df1cc37b50e9fca0cd Mon Sep 17 00:00:00 2001 From: Carl Meyer Date: Wed, 16 Apr 2025 06:52:08 -0700 Subject: [PATCH 3/3] cargo fmt --- crates/red_knot_python_semantic/src/types/builder.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/red_knot_python_semantic/src/types/builder.rs b/crates/red_knot_python_semantic/src/types/builder.rs index d0c3c512b89c54..43f46cf715c505 100644 --- a/crates/red_knot_python_semantic/src/types/builder.rs +++ b/crates/red_knot_python_semantic/src/types/builder.rs @@ -155,9 +155,7 @@ impl<'db> UnionBuilder<'db> { } if !found { self.elements - .push(UnionElement::IntLiterals(FxOrderSet::from_iter([ - literal, - ]))); + .push(UnionElement::IntLiterals(FxOrderSet::from_iter([literal]))); } } // Adding `object` to a union results in `object`.