Skip to content

Commit 2e03f16

Browse files
authored
Merge pull request #789 from gjtorikian/external-hash-crash
Improved PDF hash handling
2 parents 3aa7073 + d0809a7 commit 2e03f16

25 files changed

+578
-162
lines changed

html-proofer.gemspec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ Gem::Specification.new do |spec|
2727
spec.add_dependency("addressable", "~> 2.3")
2828
spec.add_dependency("async", "~> 2.1")
2929
spec.add_dependency("nokogiri", "~> 1.13")
30+
spec.add_dependency("pdf-reader", "~> 2.11")
3031
spec.add_dependency("rainbow", "~> 3.0")
3132
spec.add_dependency("typhoeus", "~> 1.3")
3233
spec.add_dependency("yell", "~> 2.0")

lib/html_proofer/attribute/url.rb

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -141,12 +141,16 @@ def file_path
141141
# either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname
142142
@runner.options[:root_dir] || (File.directory?(@runner.current_source) ? @runner.current_source : File.dirname(@runner.current_source))
143143
# relative links, path is a file
144-
elsif File.exist?(File.expand_path(path,
145-
@runner.current_source)) || File.exist?(File.expand_path(path_dot_ext, @runner.current_source))
144+
elsif File.exist?(File.expand_path(
145+
path,
146+
@runner.current_source,
147+
)) || File.exist?(File.expand_path(path_dot_ext, @runner.current_source))
146148
File.dirname(@runner.current_filename)
147149
# relative links in nested dir, path is a file
148-
elsif File.exist?(File.join(File.dirname(@runner.current_filename),
149-
path)) || File.exist?(File.join(File.dirname(@runner.current_filename), path_dot_ext))
150+
elsif File.exist?(File.join(
151+
File.dirname(@runner.current_filename),
152+
path,
153+
)) || File.exist?(File.join(File.dirname(@runner.current_filename), path_dot_ext))
150154
File.dirname(@runner.current_filename)
151155
# relative link, path is a directory
152156
else

lib/html_proofer/check.rb

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,14 @@ def run
2525
end
2626

2727
def add_failure(description, line: nil, status: nil, content: nil)
28-
@failures << Failure.new(@runner.current_filename, short_name, description, line: line, status: status,
29-
content: content)
28+
@failures << Failure.new(
29+
@runner.current_filename,
30+
short_name,
31+
description,
32+
line: line,
33+
status: status,
34+
content: content,
35+
)
3036
end
3137

3238
def short_name

lib/html_proofer/check/favicon.rb

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,19 @@ def run
1717

1818
if found
1919
if @favicon.url.protocol_relative?
20-
add_failure("favicon link #{@favicon.url} is a protocol-relative URL, use explicit https:// instead",
21-
line: @favicon.line, content: @favicon.content)
20+
add_failure(
21+
"favicon link #{@favicon.url} is a protocol-relative URL, use explicit https:// instead",
22+
line: @favicon.line,
23+
content: @favicon.content,
24+
)
2225
elsif @favicon.url.remote?
2326
add_to_external_urls(@favicon.url, @favicon.line)
2427
elsif !@favicon.url.exists?
25-
add_failure("internal favicon #{@favicon.url.raw_attribute} does not exist", line: @favicon.line,
26-
content: @favicon.content)
28+
add_failure(
29+
"internal favicon #{@favicon.url.raw_attribute} does not exist",
30+
line: @favicon.line,
31+
content: @favicon.content,
32+
)
2733
end
2834
else
2935
add_failure("no favicon provided")

lib/html_proofer/check/images.rb

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,27 +12,39 @@ def run
1212
next if @img.ignore?
1313

1414
# screenshot filenames should return because of terrible names
15-
add_failure("image has a terrible filename (#{@img.url.raw_attribute})", line: @img.line,
16-
content: @img.content) if terrible_filename?
15+
add_failure(
16+
"image has a terrible filename (#{@img.url.raw_attribute})",
17+
line: @img.line,
18+
content: @img.content,
19+
) if terrible_filename?
1720

1821
# does the image exist?
1922
if missing_src?
2023
add_failure("image has no src or srcset attribute", line: @img.line, content: @img.content)
2124
elsif @img.url.protocol_relative?
22-
add_failure("image link #{@img.url} is a protocol-relative URL, use explicit https:// instead",
23-
line: @img.line, content: @img.content)
25+
add_failure(
26+
"image link #{@img.url} is a protocol-relative URL, use explicit https:// instead",
27+
line: @img.line,
28+
content: @img.content,
29+
)
2430
elsif @img.url.remote?
2531
add_to_external_urls(@img.url, @img.line)
2632
elsif !@img.url.exists? && !@img.multiple_srcsets? && !@img.multiple_sizes?
27-
add_failure("internal image #{@img.url.raw_attribute} does not exist", line: @img.line,
28-
content: @img.content)
33+
add_failure(
34+
"internal image #{@img.url.raw_attribute} does not exist",
35+
line: @img.line,
36+
content: @img.content,
37+
)
2938
elsif @img.multiple_srcsets? || @img.multiple_sizes?
3039
@img.srcsets_wo_sizes.each do |srcset|
3140
srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url, extract_size: true)
3241

3342
if srcset_url.protocol_relative?
34-
add_failure("image link #{srcset_url.url} is a protocol-relative URL, use explicit https:// instead",
35-
line: @img.line, content: @img.content)
43+
add_failure(
44+
"image link #{srcset_url.url} is a protocol-relative URL, use explicit https:// instead",
45+
line: @img.line,
46+
content: @img.content,
47+
)
3648
elsif srcset_url.remote?
3749
add_to_external_urls(srcset_url.url, @img.line)
3850
elsif !srcset_url.exists?
@@ -44,16 +56,25 @@ def run
4456
# if this is an img element, check that the alt attribute is present
4557
if @img.img_tag? && !ignore_element?
4658
if missing_alt_tag? && !ignore_missing_alt?
47-
add_failure("image #{@img.url.raw_attribute} does not have an alt attribute", line: @img.line,
48-
content: @img.content)
59+
add_failure(
60+
"image #{@img.url.raw_attribute} does not have an alt attribute",
61+
line: @img.line,
62+
content: @img.content,
63+
)
4964
elsif (empty_alt_tag? || alt_all_spaces?) && !ignore_empty_alt?
50-
add_failure("image #{@img.url.raw_attribute} has an alt attribute, but no content", line: @img.line,
51-
content: @img.content)
65+
add_failure(
66+
"image #{@img.url.raw_attribute} has an alt attribute, but no content",
67+
line: @img.line,
68+
content: @img.content,
69+
)
5270
end
5371
end
5472

55-
add_failure("image #{@img.url.raw_attribute} uses the http scheme", line: @img.line,
56-
content: @img.content) if @runner.enforce_https? && @img.url.http?
73+
add_failure(
74+
"image #{@img.url.raw_attribute} uses the http scheme",
75+
line: @img.line,
76+
content: @img.content,
77+
) if @runner.enforce_https? && @img.url.http?
5778
end
5879

5980
external_urls

lib/html_proofer/check/links.rb

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,11 @@ def run
2929
end
3030

3131
if @link.url.protocol_relative?
32-
add_failure("#{@link.url} is a protocol-relative URL, use explicit https:// instead",
33-
line: @link.line, content: @link.content)
32+
add_failure(
33+
"#{@link.url} is a protocol-relative URL, use explicit https:// instead",
34+
line: @link.line,
35+
content: @link.content,
36+
)
3437
next
3538
end
3639

@@ -55,8 +58,11 @@ def run
5558
elsif @link.url.internal?
5659
# does the local directory have a trailing slash?
5760
if @link.url.unslashed_directory?(@link.url.absolute_path)
58-
add_failure("internally linking to a directory #{@link.url.raw_attribute} without trailing slash",
59-
line: @link.line, content: @link.content)
61+
add_failure(
62+
"internally linking to a directory #{@link.url.raw_attribute} without trailing slash",
63+
line: @link.line,
64+
content: @link.content,
65+
)
6066
next
6167
end
6268

@@ -88,17 +94,26 @@ def check_schemes
8894

8995
def handle_mailto
9096
if @link.url.path.empty?
91-
add_failure("#{@link.url.raw_attribute} contains no email address", line: @link.line,
92-
content: @link.content) unless ignore_empty_mailto?
97+
add_failure(
98+
"#{@link.url.raw_attribute} contains no email address",
99+
line: @link.line,
100+
content: @link.content,
101+
) unless ignore_empty_mailto?
93102
elsif !/#{URI::MailTo::EMAIL_REGEXP}/o.match?(@link.url.path)
94-
add_failure("#{@link.url.raw_attribute} contains an invalid email address", line: @link.line,
95-
content: @link.content)
103+
add_failure(
104+
"#{@link.url.raw_attribute} contains an invalid email address",
105+
line: @link.line,
106+
content: @link.content,
107+
)
96108
end
97109
end
98110

99111
def handle_tel
100-
add_failure("#{@link.url.raw_attribute} contains no phone number", line: @link.line,
101-
content: @link.content) if @link.url.path.empty?
112+
add_failure(
113+
"#{@link.url.raw_attribute} contains no phone number",
114+
line: @link.line,
115+
content: @link.content,
116+
) if @link.url.path.empty?
102117
end
103118

104119
def ignore_empty_mailto?
@@ -113,13 +128,19 @@ def check_sri
113128
return unless SRI_REL_TYPES.include?(@link.node["rel"])
114129

115130
if blank?(@link.node["integrity"]) && blank?(@link.node["crossorigin"])
116-
add_failure("SRI and CORS not provided in: #{@link.url.raw_attribute}", line: @link.line,
117-
content: @link.content)
131+
add_failure(
132+
"SRI and CORS not provided in: #{@link.url.raw_attribute}",
133+
line: @link.line,
134+
content: @link.content,
135+
)
118136
elsif blank?(@link.node["integrity"])
119137
add_failure("Integrity is missing in: #{@link.url.raw_attribute}", line: @link.line, content: @link.content)
120138
elsif blank?(@link.node["crossorigin"])
121-
add_failure("CORS not provided for external resource in: #{@link.link.url.raw_attribute}", line: @link.line,
122-
content: @link.content)
139+
add_failure(
140+
"CORS not provided for external resource in: #{@link.link.url.raw_attribute}",
141+
line: @link.line,
142+
content: @link.content,
143+
)
123144
end
124145
end
125146

lib/html_proofer/check/open_graph.rb

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,19 @@ def run
1717
elsif !@open_graph.url.valid?
1818
add_failure("#{@open_graph.src} is an invalid URL", line: @open_graph.line)
1919
elsif @open_graph.url.protocol_relative?
20-
add_failure("open graph link #{@open_graph.url} is a protocol-relative URL, use explicit https:// instead",
21-
line: @open_graph.line, content: @open_graph.content)
20+
add_failure(
21+
"open graph link #{@open_graph.url} is a protocol-relative URL, use explicit https:// instead",
22+
line: @open_graph.line,
23+
content: @open_graph.content,
24+
)
2225
elsif @open_graph.url.remote?
2326
add_to_external_urls(@open_graph.url, @open_graph.line)
2427
else
25-
add_failure("internal open graph #{@open_graph.url.raw_attribute} does not exist", line: @open_graph.line,
26-
content: @open_graph.content) unless @open_graph.url.exists?
28+
add_failure(
29+
"internal open graph #{@open_graph.url.raw_attribute} does not exist",
30+
line: @open_graph.line,
31+
content: @open_graph.content,
32+
) unless @open_graph.url.exists?
2733
end
2834
end
2935

lib/html_proofer/check/scripts.rb

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,20 @@ def run
1414
if missing_src?
1515
add_failure("script is empty and has no src attribute", line: @script.line, content: @script.content)
1616
elsif @script.url.protocol_relative?
17-
add_failure("script link #{@script.url} is a protocol-relative URL, use explicit https:// instead",
18-
line: @script.line, content: @script.content)
17+
add_failure(
18+
"script link #{@script.url} is a protocol-relative URL, use explicit https:// instead",
19+
line: @script.line,
20+
content: @script.content,
21+
)
1922
elsif @script.url.remote?
2023
add_to_external_urls(@script.url, @script.line)
2124
check_sri if @runner.check_sri?
2225
elsif !@script.url.exists?
23-
add_failure("internal script reference #{@script.src} does not exist", line: @script.line,
24-
content: @script.content)
26+
add_failure(
27+
"internal script reference #{@script.src} does not exist",
28+
line: @script.line,
29+
content: @script.content,
30+
)
2531
end
2632
end
2733

@@ -34,14 +40,23 @@ def missing_src?
3440

3541
def check_sri
3642
if blank?(@script.node["integrity"]) && blank?(@script.node["crossorigin"])
37-
add_failure("SRI and CORS not provided in: #{@script.url.raw_attribute}", line: @script.line,
38-
content: @script.content)
43+
add_failure(
44+
"SRI and CORS not provided in: #{@script.url.raw_attribute}",
45+
line: @script.line,
46+
content: @script.content,
47+
)
3948
elsif blank?(@script.node["integrity"])
40-
add_failure("Integrity is missing in: #{@script.url.raw_attribute}", line: @script.line,
41-
content: @script.content)
49+
add_failure(
50+
"Integrity is missing in: #{@script.url.raw_attribute}",
51+
line: @script.line,
52+
content: @script.content,
53+
)
4254
elsif blank?(@script.node["crossorigin"])
43-
add_failure("CORS not provided for external resource in: #{@script.url.raw_attribute}", line: @script.line,
44-
content: @script.content)
55+
add_failure(
56+
"CORS not provided for external resource in: #{@script.url.raw_attribute}",
57+
line: @script.line,
58+
content: @script.content,
59+
)
4560
end
4661
end
4762
end

0 commit comments

Comments
 (0)