Skip to content

Commit 68b4488

Browse files
authored
Add CSV::InvalidEncodingError (#287)
To handle encoding errors in CSV parsing with the appropriate error class
1 parent 281b1a5 commit 68b4488

File tree

4 files changed

+17
-10
lines changed

4 files changed

+17
-10
lines changed

lib/csv.rb

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -854,6 +854,15 @@ def initialize(message, line_number)
854854
end
855855
end
856856

857+
# The error thrown when the parser encounters invalid encoding in CSV.
858+
class InvalidEncodingError < MalformedCSVError
859+
attr_reader :encoding
860+
def initialize(encoding, line_number)
861+
@encoding = encoding
862+
super("Invalid byte sequence in #{encoding}", line_number)
863+
end
864+
end
865+
857866
#
858867
# A FieldInfo Struct contains details about a field's position in the data
859868
# source it was read from. CSV will pass this Struct to some blocks that make

lib/csv/parser.rb

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -414,8 +414,7 @@ def parse(&block)
414414
else
415415
lineno = @lineno + 1
416416
end
417-
message = "Invalid byte sequence in #{@encoding}"
418-
raise MalformedCSVError.new(message, lineno)
417+
raise InvalidEncodingError.new(@encoding, lineno)
419418
rescue UnexpectedError => error
420419
if @scanner
421420
ignore_broken_line
@@ -876,8 +875,7 @@ def build_scanner
876875
!line.valid_encoding?
877876
end
878877
if index
879-
message = "Invalid byte sequence in #{@encoding}"
880-
raise MalformedCSVError.new(message, @lineno + index + 1)
878+
raise InvalidEncodingError.new(@encoding, @lineno + index + 1)
881879
end
882880
end
883881
Scanner.new(string)

test/csv/interface/test_read.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,11 +113,11 @@ def test_open_encoding_invalid
113113
file << "\u{1F600},\u{1F601}"
114114
end
115115
CSV.open(@input.path, encoding: "EUC-JP") do |csv|
116-
error = assert_raise(CSV::MalformedCSVError) do
116+
error = assert_raise(CSV::InvalidEncodingError) do
117117
csv.shift
118118
end
119-
assert_equal("Invalid byte sequence in EUC-JP in line 1.",
120-
error.message)
119+
assert_equal([Encoding::EUC_JP, "Invalid byte sequence in EUC-JP in line 1."],
120+
[error.encoding, error.message])
121121
end
122122
end
123123

test/csv/test_encodings.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -280,12 +280,12 @@ def test_row_separator_detection_with_invalid_encoding
280280
def test_invalid_encoding_row_error
281281
csv = CSV.new("valid,x\rinvalid,\xF8\r".force_encoding("UTF-8"),
282282
encoding: "UTF-8", row_sep: "\r")
283-
error = assert_raise(CSV::MalformedCSVError) do
283+
error = assert_raise(CSV::InvalidEncodingError) do
284284
csv.shift
285285
csv.shift
286286
end
287-
assert_equal("Invalid byte sequence in UTF-8 in line 2.",
288-
error.message)
287+
assert_equal([Encoding::UTF_8, "Invalid byte sequence in UTF-8 in line 2."],
288+
[error.encoding, error.message])
289289
end
290290

291291
def test_string_input_transcode

0 commit comments

Comments
 (0)