Skip to content

Commit 1a6ca6e

Browse files
kmutoarp242
andauthored
skip UTF-8 BOM also (#381)
Co-authored-by: Martin Tournoij <[email protected]>
1 parent bd94408 commit 1a6ca6e

File tree

2 files changed

+6
-2
lines changed

2 files changed

+6
-2
lines changed

decode_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ func TestDecodeBOM(t *testing.T) {
6666
for _, tt := range [][]byte{
6767
[]byte("\xff\xfea = \"b\""),
6868
[]byte("\xfe\xffa = \"b\""),
69+
[]byte("\xef\xbb\xbfa = \"b\""),
6970
} {
7071
t.Run("", func(t *testing.T) {
7172
var s struct{ A string }

parse.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,12 @@ func parse(data string) (p *parser, err error) {
4747
}()
4848

4949
// Read over BOM; do this here as the lexer calls utf8.DecodeRuneInString()
50-
// which mangles stuff.
51-
if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") {
50+
// which mangles stuff. UTF-16 BOM isn't strictly valid, but some tools add
51+
// it anyway.
52+
if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") { // UTF-16
5253
data = data[2:]
54+
} else if strings.HasPrefix(data, "\xef\xbb\xbf") { // UTF-8
55+
data = data[3:]
5356
}
5457

5558
// Examine first few bytes for NULL bytes; this probably means it's a UTF-16

0 commit comments

Comments
 (0)