11use std:: {
2+ borrow:: Cow ,
23 collections:: { HashMap , HashSet , hash_map:: Entry } ,
34 path:: Path ,
45 sync:: Arc ,
@@ -29,6 +30,61 @@ impl FragmentInput {
2930 }
3031}
3132
33+ /// A fragment builder that expands the given fragments into a list of candidates.
34+ struct FragmentBuilder {
35+ variants : Vec < String > ,
36+ decoded : Vec < String > ,
37+ }
38+
39+ impl FragmentBuilder {
40+ fn new ( fragment : & str , url : & Url , file_type : FileType ) -> Result < Self > {
41+ let mut variants = vec ! [ fragment. into( ) ] ;
42+ // For GitHub links, add "user-content-" prefix to the fragments.
43+ // The following cases cannot be handled unless we simulate with a headless browser:
44+ // - markdown files from any specific path (includes "blob/master/README.md")
45+ // - "issuecomment" fragments from the GitHub issue pages
46+ if url
47+ . host_str ( )
48+ . is_some_and ( |host| host. ends_with ( "github.com" ) )
49+ {
50+ variants. push ( format ! ( "user-content-{fragment}" ) ) ;
51+ }
52+
53+ // Only store the percent-decoded variants if it's different from the original
54+ // fragment. This avoids storing and comparing the same fragment twice.
55+ let mut decoded = Vec :: new ( ) ;
56+ for frag in & variants {
57+ let mut require_alloc = false ;
58+ let mut fragment_decoded: Cow < ' _ , str > = match percent_decode_str ( frag) . decode_utf8 ( ) ? {
59+ Cow :: Borrowed ( s) => s. into ( ) ,
60+ Cow :: Owned ( s) => {
61+ require_alloc = true ;
62+ s. into ( )
63+ }
64+ } ;
65+ if file_type == FileType :: Markdown {
66+ let lowercase = fragment_decoded. to_lowercase ( ) ;
67+ if lowercase != fragment_decoded {
68+ fragment_decoded = lowercase. into ( ) ;
69+ require_alloc = true ;
70+ }
71+ }
72+ if require_alloc {
73+ decoded. push ( fragment_decoded. into ( ) ) ;
74+ }
75+ }
76+
77+ Ok ( Self { variants, decoded } )
78+ }
79+
80+ fn any_matches ( & self , fragments : & HashSet < String > ) -> bool {
81+ self . variants
82+ . iter ( )
83+ . chain ( self . decoded . iter ( ) )
84+ . any ( |frag| fragments. contains ( frag) )
85+ }
86+ }
87+
3288/// Holds a cache of fragments for a given URL.
3389///
3490/// Fragments, also known as anchors, are used to link to a specific
@@ -67,7 +123,7 @@ impl FragmentChecker {
67123 if fragment. is_empty ( ) || fragment. eq_ignore_ascii_case ( "top" ) {
68124 return Ok ( true ) ;
69125 }
70- let mut fragment_decoded = percent_decode_str ( fragment ) . decode_utf8 ( ) ? ;
126+
71127 let url_without_frag = Self :: remove_fragment ( url. clone ( ) ) ;
72128
73129 let FragmentInput { content, file_type } = input;
@@ -76,20 +132,18 @@ impl FragmentChecker {
76132 FileType :: Html => extract_html_fragments,
77133 FileType :: Plaintext => return Ok ( true ) ,
78134 } ;
79- if file_type == FileType :: Markdown {
80- fragment_decoded = fragment_decoded. to_lowercase ( ) . into ( ) ;
81- }
135+
136+ let fragment_candidates = FragmentBuilder :: new ( fragment, url, file_type) ?;
82137 match self . cache . lock ( ) . await . entry ( url_without_frag) {
83138 Entry :: Vacant ( entry) => {
84139 let file_frags = extractor ( & content) ;
85- let contains_fragment =
86- file_frags. contains ( fragment) || file_frags. contains ( & fragment_decoded as & str ) ;
140+ let contains_fragment = fragment_candidates. any_matches ( & file_frags) ;
87141 entry. insert ( file_frags) ;
88142 Ok ( contains_fragment)
89143 }
90144 Entry :: Occupied ( entry) => {
91- Ok ( entry. get ( ) . contains ( fragment )
92- || entry . get ( ) . contains ( & fragment_decoded as & str ) )
145+ let file_frags = entry. get ( ) ;
146+ Ok ( fragment_candidates . any_matches ( file_frags ) )
93147 }
94148 }
95149 }
0 commit comments