Skip to content

Commit 02f6f5c

Browse files
authored
feat: add 'user-content-' prefix to support github markdown fragment (#1750)
1 parent 81f2605 commit 02f6f5c

File tree

3 files changed

+64
-10
lines changed

3 files changed

+64
-10
lines changed

fixtures/fragments/file.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
<a href="#in-the-end">doesn't exist</a><br>
2525
<a href="#">To the top</a><br>
2626
<a href="#top">To the top alt</a><br>
27-
<a href="https://github.com/lycheeverse/lychee#user-content-table-of-contents">To the lychee readme license fragment.</a>
27+
<a href="https://github.com/lycheeverse/lychee#table-of-contents">To the lychee readme license fragment.</a>
2828
</section>
2929
</body>
3030
</html>

lychee-bin/tests/cli.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1890,7 +1890,7 @@ mod cli {
18901890
.stderr(contains("fixtures/fragments/file.html#top"))
18911891
.stderr(contains("fixtures/fragments/file2.md#top"))
18921892
.stderr(contains(
1893-
"https://github.com/lycheeverse/lychee#user-content-table-of-contents",
1893+
"https://github.com/lycheeverse/lychee#table-of-contents",
18941894
))
18951895
.stderr(contains(
18961896
"https://github.com/lycheeverse/lychee#non-existent-anchor",

lychee-lib/src/utils/fragment_checker.rs

Lines changed: 62 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use std::{
2+
borrow::Cow,
23
collections::{HashMap, HashSet, hash_map::Entry},
34
path::Path,
45
sync::Arc,
@@ -29,6 +30,61 @@ impl FragmentInput {
2930
}
3031
}
3132

33+
/// A fragment builder that expands the given fragments into a list of candidates.
34+
struct FragmentBuilder {
35+
variants: Vec<String>,
36+
decoded: Vec<String>,
37+
}
38+
39+
impl FragmentBuilder {
40+
fn new(fragment: &str, url: &Url, file_type: FileType) -> Result<Self> {
41+
let mut variants = vec![fragment.into()];
42+
// For GitHub links, add "user-content-" prefix to the fragments.
43+
// The following cases cannot be handled unless we simulate with a headless browser:
44+
// - markdown files from any specific path (includes "blob/master/README.md")
45+
// - "issuecomment" fragments from the GitHub issue pages
46+
if url
47+
.host_str()
48+
.is_some_and(|host| host.ends_with("github.com"))
49+
{
50+
variants.push(format!("user-content-{fragment}"));
51+
}
52+
53+
// Only store the percent-decoded variants if it's different from the original
54+
// fragment. This avoids storing and comparing the same fragment twice.
55+
let mut decoded = Vec::new();
56+
for frag in &variants {
57+
let mut require_alloc = false;
58+
let mut fragment_decoded: Cow<'_, str> = match percent_decode_str(frag).decode_utf8()? {
59+
Cow::Borrowed(s) => s.into(),
60+
Cow::Owned(s) => {
61+
require_alloc = true;
62+
s.into()
63+
}
64+
};
65+
if file_type == FileType::Markdown {
66+
let lowercase = fragment_decoded.to_lowercase();
67+
if lowercase != fragment_decoded {
68+
fragment_decoded = lowercase.into();
69+
require_alloc = true;
70+
}
71+
}
72+
if require_alloc {
73+
decoded.push(fragment_decoded.into());
74+
}
75+
}
76+
77+
Ok(Self { variants, decoded })
78+
}
79+
80+
fn any_matches(&self, fragments: &HashSet<String>) -> bool {
81+
self.variants
82+
.iter()
83+
.chain(self.decoded.iter())
84+
.any(|frag| fragments.contains(frag))
85+
}
86+
}
87+
3288
/// Holds a cache of fragments for a given URL.
3389
///
3490
/// Fragments, also known as anchors, are used to link to a specific
@@ -67,7 +123,7 @@ impl FragmentChecker {
67123
if fragment.is_empty() || fragment.eq_ignore_ascii_case("top") {
68124
return Ok(true);
69125
}
70-
let mut fragment_decoded = percent_decode_str(fragment).decode_utf8()?;
126+
71127
let url_without_frag = Self::remove_fragment(url.clone());
72128

73129
let FragmentInput { content, file_type } = input;
@@ -76,20 +132,18 @@ impl FragmentChecker {
76132
FileType::Html => extract_html_fragments,
77133
FileType::Plaintext => return Ok(true),
78134
};
79-
if file_type == FileType::Markdown {
80-
fragment_decoded = fragment_decoded.to_lowercase().into();
81-
}
135+
136+
let fragment_candidates = FragmentBuilder::new(fragment, url, file_type)?;
82137
match self.cache.lock().await.entry(url_without_frag) {
83138
Entry::Vacant(entry) => {
84139
let file_frags = extractor(&content);
85-
let contains_fragment =
86-
file_frags.contains(fragment) || file_frags.contains(&fragment_decoded as &str);
140+
let contains_fragment = fragment_candidates.any_matches(&file_frags);
87141
entry.insert(file_frags);
88142
Ok(contains_fragment)
89143
}
90144
Entry::Occupied(entry) => {
91-
Ok(entry.get().contains(fragment)
92-
|| entry.get().contains(&fragment_decoded as &str))
145+
let file_frags = entry.get();
146+
Ok(fragment_candidates.any_matches(file_frags))
93147
}
94148
}
95149
}

0 commit comments

Comments
 (0)