@@ -58,6 +58,8 @@ impl Client {
5858
5959 /// Search songs satisfying a given `query`.
6060 pub async fn search_songs ( & self , query : & str ) -> anyhow:: Result < Vec < search:: Result > > {
61+ let query = improve_query ( query) ;
62+
6163 log:: debug!( "search songs: query={query}" ) ;
6264
6365 let body = self
@@ -136,6 +138,60 @@ impl Default for Client {
136138 }
137139}
138140
141+ /// Returns `query` without `remaster` & `remix` information from track/artist query.
142+ /// Returned value is lowercase.
143+ /// These caused wildly invalid lyrics to be found.
144+ /// (try yourself adding remastered 2011 to a song's name when searching in Genius!)
145+ fn improve_query ( query : & str ) -> String {
146+ let is_dash = |c : char | c == '-' || c == '—' || c == '–' ;
147+
148+ // reverse finder for non-filler (space, dashes) chars.
149+ // Acts like a trim to remove undesired spaces and dashes.
150+ let r_no_filler = |s : & str , idx : usize | {
151+ let Some ( s) = s. get ( ..idx) else { return idx } ;
152+ s. char_indices ( )
153+ . rev ( )
154+ . find ( |( _, c) | !( is_dash ( * c) || c. is_whitespace ( ) ) )
155+ . map_or ( idx, |( idx, c) | idx + c. len_utf8 ( ) )
156+ } ;
157+ // used to handle longer variants of words: `remixed`, `remastered`, etc.
158+ let end_of_word = |s : & str , idx : usize | {
159+ let Some ( s) = s. get ( idx..) else { return idx } ;
160+ s. find ( |c| !c. is_alphanumeric ( ) )
161+ . map_or ( idx, |found| found + idx)
162+ } ;
163+
164+ let mut query = query. to_lowercase ( ) ;
165+ // just cut xxxx Remaster from query
166+ if let Some ( remaster_start) = query. find ( "remaster" ) {
167+ let end = remaster_start + "remaster" . len ( ) ;
168+ let mut trim_start = remaster_start. saturating_sub ( 1 ) ;
169+ let prev = query. get ( ..remaster_start. saturating_sub ( 2 ) ) . unwrap_or ( "" ) ;
170+ let end_of_prev_word = prev. rfind ( ' ' ) . unwrap_or ( 0 ) ;
171+
172+ if let Some ( year) = query. get ( end_of_prev_word + 1 ..remaster_start. saturating_sub ( 1 ) ) {
173+ if year. chars ( ) . all ( |c| c. is_whitespace ( ) || c. is_numeric ( ) ) {
174+ trim_start = end_of_prev_word;
175+ }
176+ }
177+ trim_start = r_no_filler ( & query, trim_start) ;
178+ let end = end_of_word ( & query, end) ;
179+ query. drain ( trim_start..end) ;
180+ }
181+ // just cut anything off after - for remix
182+ if let Some ( remix_start) = query. find ( "remix" ) {
183+ let end = remix_start + "remix" . len ( ) ;
184+ let end = end_of_word ( & query, end) ;
185+ if let Some ( metadata_start) = query. rfind ( is_dash) {
186+ if metadata_start > 4 {
187+ let start = r_no_filler ( & query, metadata_start) ;
188+ query. drain ( start..end) ;
189+ }
190+ }
191+ }
192+ query
193+ }
194+
139195mod parse {
140196 use html5ever:: tendril:: TendrilSink ;
141197 use html5ever:: * ;
0 commit comments