Skip to content

Commit d4dd8a3

Browse files
committed
Merge pull request #184 from ambroff/issue-2303
[RECREATED] Deal with different file encodings when submitting solutions
2 parents 24ed0f0 + d7d8d13 commit d4dd8a3

7 files changed

Lines changed: 67 additions & 9 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,6 @@ _testmain.go
2424
out/
2525
release/
2626
go-exercism
27+
28+
# Intellij
29+
/.idea

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ go:
77
install: true
88

99
before_install:
10-
go get github.com/codegangsta/cli && go get github.com/stretchr/testify/assert
10+
bin/deps
1111

1212
script:
1313
- go test ./...

api/iteration.go

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,24 @@
11
package api
22

33
import (
4+
"bytes"
45
"errors"
56
"io/ioutil"
67
"path/filepath"
78
"strings"
9+
10+
"golang.org/x/net/html/charset"
11+
"golang.org/x/text/transform"
12+
)
13+
14+
const (
15+
mimeType = "text/plain"
816
)
917

1018
var (
1119
errUnidentifiable = errors.New("unable to identify track and problem")
1220
errNoFiles = errors.New("no files submitted")
21+
utf8BOM = []byte{0xef, 0xbb, 0xbf}
1322
)
1423

1524
// Iteration represents a version of a particular exercise.
@@ -53,12 +62,13 @@ func NewIteration(dir string, filenames []string) (*Iteration, error) {
5362
iter.Problem = segments[2]
5463

5564
for _, filename := range filenames {
56-
b, err := ioutil.ReadFile(filename)
65+
fileContents, err := readFileAsUTF8String(filename)
5766
if err != nil {
5867
return nil, err
5968
}
69+
6070
path := filename[len(iter.RelativePath()):]
61-
iter.Solution[path] = string(b)
71+
iter.Solution[path] = *fileContents
6272
}
6373
return iter, nil
6474
}
@@ -73,3 +83,28 @@ func (iter *Iteration) isValidFilepath(path string) bool {
7383
}
7484
return strings.HasPrefix(strings.ToLower(path), strings.ToLower(iter.Dir))
7585
}
86+
87+
func readFileAsUTF8String(filename string) (*string, error) {
88+
b, err := ioutil.ReadFile(filename)
89+
if err != nil {
90+
return nil, err
91+
}
92+
93+
encoding, _, _ := charset.DetermineEncoding(b, mimeType)
94+
decoder := encoding.NewDecoder()
95+
decodedBytes, _, err := transform.Bytes(decoder, b)
96+
if err != nil {
97+
return nil, err
98+
}
99+
100+
// Drop the UTF-8 BOM that may have been added. This isn't necessary, and
101+
// it's going to be written into another UTF-8 buffer anyway once it's JSON
102+
// serialized.
103+
//
104+
// The standard recommends omitting the BOM. See
105+
// http://www.unicode.org/versions/Unicode5.0.0/ch02.pdf
106+
decodedBytes = bytes.TrimPrefix(decodedBytes, utf8BOM)
107+
108+
s := string(decodedBytes)
109+
return &s, nil
110+
}

api/iteration_test.go

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@ package api
33
import (
44
"path/filepath"
55
"runtime"
6+
"strings"
67
"testing"
8+
"unicode/utf8"
79
)
810

911
func TestNewIteration(t *testing.T) {
@@ -14,6 +16,8 @@ func TestNewIteration(t *testing.T) {
1416
filepath.Join(dir, "python", "leap", "one.py"),
1517
filepath.Join(dir, "python", "leap", "two.py"),
1618
filepath.Join(dir, "python", "leap", "lib", "three.py"),
19+
filepath.Join(dir, "python", "leap", "utf16le.py"),
20+
filepath.Join(dir, "python", "leap", "utf16be.py"),
1721
}
1822

1923
iter, err := NewIteration(dir, files)
@@ -28,18 +32,25 @@ func TestNewIteration(t *testing.T) {
2832
t.Errorf("Expected problem to be leap, was %s", iter.Problem)
2933
}
3034

31-
if len(iter.Solution) != 3 {
35+
if len(iter.Solution) != 5 {
3236
t.Fatalf("Expected solution to have 3 files, had %d", len(iter.Solution))
3337
}
3438

3539
expected := map[string]string{
36-
"one.py": "# one\n",
37-
"two.py": "# two\n",
38-
"lib/three.py": "# three\n",
40+
"one.py": "# one",
41+
"two.py": "# two",
42+
filepath.Join("lib", "three.py"): "# three",
43+
"utf16le.py": "# utf16le",
44+
"utf16be.py": "# utf16be",
3945
}
46+
4047
for filename, code := range expected {
41-
if iter.Solution[filename] != code {
42-
t.Errorf("Expected %s to contain %s, had %s", filename, code, iter.Solution[filename])
48+
if !utf8.ValidString(iter.Solution[filename]) {
49+
t.Errorf("Iteration content is not valid UTF-8 data: %s", iter.Solution[filename])
50+
}
51+
52+
if !strings.HasPrefix(iter.Solution[filename], code) {
53+
t.Errorf("Expected %s to contain `%s', had `%s'", filename, code, iter.Solution[filename])
4354
}
4455
}
4556
}

bin/deps

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
3+
LIBRARIES="\
4+
github.com/codegangsta/cli \
5+
github.com/stretchr/testify/assert \
6+
golang.org/x/net/html/charset \
7+
golang.org/x/text/transform"
8+
9+
go get $LIBRARIES
20 Bytes
Binary file not shown.
20 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)