@@ -58,6 +58,8 @@ impl Client {
5858
5959 /// Search songs satisfying a given `query`.
6060 pub async fn search_songs ( & self , query : & str ) -> anyhow:: Result < Vec < search:: Result > > {
61+ let query = improve_query ( query) ;
62+
6163 log:: debug!( "search songs: query={query}" ) ;
6264
6365 let body = self
@@ -136,6 +138,66 @@ impl Default for Client {
136138 }
137139}
138140
141+ /// Returns `query` without `remaster` & `remix` information from track/artist query.
142+ /// Returned value is lowercase.
143+ /// These caused wildly invalid lyrics to be found.
144+ /// (try yourself adding remastered 2011 to a song's name when searching in Genius!)
145+ fn improve_query ( query : & str ) -> String {
146+ // flag for doing something wrong if the song name (after removing remix metadata) is too short.
147+ const SONG_MIN_LENGTH_WO_REMIX_METADATA : usize = 3 ;
148+
149+ let is_dash = |c : char | c == '-' ;
150+
151+ // reverse finder for non-filler (space, dashes) chars before an index.
152+ // Acts like a trim to remove undesired spaces and dashes.
153+ let rfind_non_filler = |s : & str , idx : usize | {
154+ let Some ( s) = s. get ( ..idx) else { return idx } ;
155+ s. char_indices ( )
156+ . rfind ( |( _, c) | !( is_dash ( * c) || c. is_whitespace ( ) ) )
157+ . map_or ( idx, |( idx, c) | idx + c. len_utf8 ( ) )
158+ } ;
159+ // used to handle longer variants of words: `remixed`, `remastered`, etc.
160+ let end_of_word = |s : & str , idx : usize | {
161+ let Some ( s) = s. get ( idx..) else { return idx } ;
162+ s. find ( |c : char | !c. is_alphanumeric ( ) )
163+ . map_or ( idx, |found| found + idx)
164+ } ;
165+
166+ let mut query = query. to_lowercase ( ) ;
167+ // remove "xxxx Remaster" from the query
168+ // For example, `{song} xxxx Remastered {artists}` becomes `{song} {artists}`.
169+ if let Some ( remaster_start) = query. find ( "remaster" ) {
170+ let end = remaster_start + "remaster" . len ( ) ;
171+ let end = end_of_word ( & query, end) ;
172+
173+ let mut start = remaster_start. saturating_sub ( 1 ) ;
174+ let prev = query. get ( ..remaster_start. saturating_sub ( 2 ) ) . unwrap_or ( "" ) ;
175+ let end_of_prev_word = prev. rfind ( ' ' ) . unwrap_or ( 0 ) ;
176+
177+ if let Some ( year) = query. get ( end_of_prev_word + 1 ..remaster_start. saturating_sub ( 1 ) ) {
178+ if year. chars ( ) . all ( |c| c. is_whitespace ( ) || c. is_numeric ( ) ) {
179+ start = end_of_prev_word;
180+ }
181+ }
182+ start = rfind_non_filler ( & query, start) ;
183+ query. drain ( start..end) ;
184+ }
185+ // remove "- xxxx yyy remix" from the query
186+ // For example, `{song} - xxxx yyy remix {artists}` becomes `{song} {artists}`.
187+ if let Some ( remix_start) = query. find ( "remix" ) {
188+ let end = remix_start + "remix" . len ( ) ;
189+ let end = end_of_word ( & query, end) ;
190+
191+ if let Some ( metadata_start) = query. rfind ( is_dash) {
192+ if metadata_start >= SONG_MIN_LENGTH_WO_REMIX_METADATA {
193+ let start = rfind_non_filler ( & query, metadata_start) ;
194+ query. drain ( start..end) ;
195+ }
196+ }
197+ }
198+ query
199+ }
200+
139201mod parse {
140202 use html5ever:: tendril:: TendrilSink ;
141203 use html5ever:: * ;
0 commit comments