Skip to content

Commit dd45f68

Browse files
committed
Fix ReDoS caused by very large character references using repeated 0s
This patch will fix the ReDoS that is caused by large string of 0s on a character reference (like `&#00000000...`). ## Proof of Concept ```ruby require "rexml" require "benchmark" def test(benchmark, payload) benchmark.report { begin REXML::Document.new(payload) rescue Exception end } end Benchmark.bm do |x| test(x, '<test testing="&#' + "0" * 20000 + '"/>') test(x, '<test testing="&#' + "0" * 40000 + '"/>') test(x, '<test testing="&#' + "0" * 60000 + '"/>') test(x, '<test testing="&#' + "0" * 80000 + '"/>') test(x, '<test testing="&#' + "0" * 100000 + '"/>') end ``` 4ebf21
1 parent 4ebf21f commit dd45f68

File tree

2 files changed

+54
-14
lines changed

2 files changed

+54
-14
lines changed

lib/rexml/text.rb

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -151,25 +151,45 @@ def Text.check string, pattern, doctype
151151
end
152152
end
153153

154-
# context sensitive
155-
string.scan(pattern) do
156-
if $1[-1] != ?;
157-
raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
158-
elsif $1[0] == ?&
159-
if $5 and $5[0] == ?#
160-
case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
161-
when *VALID_CHAR
154+
pos = 0
155+
while (index = string.index(/<|&/, pos))
156+
if string[index] == "<"
157+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
158+
end
159+
160+
unless (end_index = string.index(/[^\s];/, index + 1))
161+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
162+
end
163+
164+
value = string[(index + 1)..end_index]
165+
if /\s/.match?(value)
166+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
167+
end
168+
169+
if value[0] == "#"
170+
character_reference = value[1..-1]
171+
172+
unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
173+
if character_reference[0] == "x" || character_reference[-1] == "x"
174+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
162175
else
163-
raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
176+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
164177
end
165-
# FIXME: below can't work but this needs API change.
166-
# elsif @parent and $3 and !SUBSTITUTES.include?($1)
167-
# if !doctype or !doctype.entities.has_key?($3)
168-
# raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
169-
# end
170178
end
179+
180+
case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
181+
when *VALID_CHAR
182+
else
183+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
184+
end
185+
elsif !(/\A#{Entity::NAME}\z/um.match?(value))
186+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
171187
end
188+
189+
pos = end_index + 1
172190
end
191+
192+
string
173193
end
174194

175195
def node_type
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
require "test/unit"
2+
require "core_assertions"
3+
4+
require "rexml/document"
5+
6+
module REXMLTests
7+
class TestParseCharacterReference < Test::Unit::TestCase
8+
include Test::Unit::CoreAssertions
9+
10+
def test_gt_linear_performance_malformed_character_reference
11+
seq = [10000, 50000, 100000, 150000, 200000]
12+
assert_linear_performance(seq, rehearsal: 10) do |n|
13+
begin
14+
REXML::Document.new('<test testing="&#' + "0" * n + '"/>')
15+
rescue
16+
end
17+
end
18+
end
19+
end
20+
end

0 commit comments

Comments
 (0)