@@ -3,9 +3,10 @@ use std::collections::HashMap;
33use tantivy_fst:: raw:: CompiledAddr ;
44use tantivy_fst:: { Automaton , Map } ;
55
6+ use super :: term_set_query_fastfield:: FastFieldTermSetWeight ;
67use crate :: query:: score_combiner:: DoNothingCombiner ;
78use crate :: query:: { AutomatonWeight , BooleanWeight , EnableScoring , Occur , Query , Weight } ;
8- use crate :: schema:: { Field , Schema } ;
9+ use crate :: schema:: { Field , Schema , Type } ;
910use crate :: { SegmentReader , Term } ;
1011
1112/// A Term Set Query matches all of the documents containing any of the Term provided
@@ -44,20 +45,41 @@ impl TermSetQuery {
4445 return Err ( crate :: TantivyError :: SchemaError ( error_msg) ) ;
4546 }
4647
47- // In practice this won't fail because:
48- // - we are writing to memory, so no IoError
49- // - Terms are ordered
50- let map = Map :: from_iter (
51- sorted_terms
52- . iter ( )
53- . map ( |key| ( key. serialized_value_bytes ( ) , 0 ) ) ,
54- )
55- . map_err ( std:: io:: Error :: other) ?;
56-
57- sub_queries. push ( (
58- Occur :: Should ,
59- Box :: new ( AutomatonWeight :: new ( field, SetDfaWrapper ( map) ) ) ,
60- ) ) ;
48+ let supported_for_ff = sorted_terms
49+ . get ( 0 )
50+ . map ( |term| match term. typ ( ) {
51+ Type :: U64 | Type :: I64 | Type :: F64 | Type :: Bool | Type :: Date | Type :: IpAddr => {
52+ true
53+ }
54+ Type :: Json | Type :: Str => {
55+ // Explicitly not supported yet: see `term_set_query_fastfield.rs`.
56+ false
57+ }
58+ _ => false ,
59+ } )
60+ . unwrap_or ( false ) ;
61+
62+ if field_type. is_fast ( ) && supported_for_ff {
63+ sub_queries. push ( (
64+ Occur :: Should ,
65+ Box :: new ( FastFieldTermSetWeight :: new ( field, sorted_terms. to_vec ( ) ) ) ,
66+ ) ) ;
67+ } else {
68+ // In practice this won't fail because:
69+ // - we are writing to memory, so no IoError
70+ // - Terms are ordered
71+ let map = Map :: from_iter (
72+ sorted_terms
73+ . iter ( )
74+ . map ( |key| ( key. serialized_value_bytes ( ) , 0 ) ) ,
75+ )
76+ . map_err ( |e| std:: io:: Error :: new ( std:: io:: ErrorKind :: Other , e) ) ?;
77+
78+ sub_queries. push ( (
79+ Occur :: Should ,
80+ Box :: new ( AutomatonWeight :: new ( field, SetDfaWrapper ( map) ) ) ,
81+ ) ) ;
82+ }
6183 }
6284
6385 Ok ( BooleanWeight :: new (
@@ -87,6 +109,59 @@ impl Query for TermSetQuery {
87109 }
88110}
89111
112+ /// `InvertedIndexTermSetQuery` is the same as [TermSetQuery] but only uses the inverted index.
113+ #[ derive( Debug , Clone ) ]
114+ pub struct InvertedIndexTermSetQuery {
115+ terms_map : HashMap < Field , Vec < Term > > ,
116+ }
117+
118+ impl InvertedIndexTermSetQuery {
119+ /// Create a new `InvertedIndexTermSetQuery`.
120+ pub fn new < T : IntoIterator < Item = Term > > ( terms : T ) -> Self {
121+ let mut terms_map: HashMap < _ , Vec < _ > > = HashMap :: new ( ) ;
122+ for term in terms {
123+ terms_map. entry ( term. field ( ) ) . or_default ( ) . push ( term) ;
124+ }
125+
126+ for terms in terms_map. values_mut ( ) {
127+ terms. sort_unstable ( ) ;
128+ terms. dedup ( ) ;
129+ }
130+
131+ InvertedIndexTermSetQuery { terms_map }
132+ }
133+ }
134+
135+ impl Query for InvertedIndexTermSetQuery {
136+ fn weight ( & self , enable_scoring : EnableScoring < ' _ > ) -> crate :: Result < Box < dyn Weight > > {
137+ let mut sub_queries: Vec < ( _ , Box < dyn Weight > ) > = Vec :: with_capacity ( self . terms_map . len ( ) ) ;
138+ for ( & field, sorted_terms) in & self . terms_map {
139+ let schema = enable_scoring. schema ( ) ;
140+ let field_entry = schema. get_field_entry ( field) ;
141+ if !field_entry. field_type ( ) . is_indexed ( ) {
142+ let error_msg = format ! ( "Field {:?} is not indexed." , field_entry. name( ) ) ;
143+ return Err ( crate :: TantivyError :: SchemaError ( error_msg) ) ;
144+ }
145+ let map = Map :: from_iter (
146+ sorted_terms
147+ . iter ( )
148+ . map ( |key| ( key. serialized_value_bytes ( ) , 0 ) ) ,
149+ )
150+ . map_err ( |e| std:: io:: Error :: new ( std:: io:: ErrorKind :: Other , e) ) ?;
151+
152+ sub_queries. push ( (
153+ Occur :: Should ,
154+ Box :: new ( AutomatonWeight :: new ( field, SetDfaWrapper ( map) ) ) ,
155+ ) ) ;
156+ }
157+ Ok ( Box :: new ( BooleanWeight :: new (
158+ sub_queries,
159+ false ,
160+ Box :: new ( DoNothingCombiner :: default) ,
161+ ) ) )
162+ }
163+ }
164+
90165struct SetDfaWrapper ( Map < Vec < u8 > > ) ;
91166
92167impl Automaton for SetDfaWrapper {
0 commit comments