exercism · kytrinyx · Sep 26, 2016 · Sep 5, 2016 · Sep 5, 2016 · Apr 25, 2016
diff --git a/api/iteration.go b/api/iteration.go
@@ -137,7 +137,16 @@ func readFileAsUTF8String(filename string) (*string, error) {
 		return nil, err
 	}
 
-	encoding, _, _ := charset.DetermineEncoding(b, mimeType)
+	encoding, _, certain := charset.DetermineEncoding(b, mimeType)
+	if !certain {
+		// We don't want to use an uncertain encoding.
+		// In particular, doing that may mangle UTF-8 files
+		// that have only ASCII in their first 1024 bytes.
+		// See https://github.com/exercism/cli/issues/309.
+		// So if we're unsure, use UTF-8 (no transformation).
+		s := string(b)
+		return &s, nil
+	}
 	decoder := encoding.NewDecoder()
 	decodedBytes, _, err := transform.Bytes(decoder, b)
 	if err != nil {

diff --git a/api/iteration_test.go b/api/iteration_test.go
@@ -18,6 +18,7 @@ func TestNewIteration(t *testing.T) {
 		filepath.Join(dir, "python", "leap", "lib", "three.py"),
 		filepath.Join(dir, "python", "leap", "utf16le.py"),
 		filepath.Join(dir, "python", "leap", "utf16be.py"),
+		filepath.Join(dir, "python", "leap", "long-utf8.py"),
 	}
 
 	iter, err := NewIteration(dir, files)
@@ -32,25 +33,32 @@ func TestNewIteration(t *testing.T) {
 		t.Errorf("Expected problem to be leap, was %s", iter.Problem)
 	}
 
-	if len(iter.Solution) != 5 {
-		t.Fatalf("Expected solution to have 3 files, had %d", len(iter.Solution))
+	if len(iter.Solution) != 6 {
+		t.Fatalf("Expected solution to have 6 files, had %d", len(iter.Solution))
 	}
 
-	expected := map[string]string{
-		"one.py": "# one",
-		"two.py": "# two",
-		filepath.Join("lib", "three.py"): "# three",
-		"utf16le.py":                     "# utf16le",
-		"utf16be.py":                     "# utf16be",
+	expected := map[string]struct {
+		prefix string
+		suffix string
+	}{
+		"one.py": {prefix: "# one"},
+		"two.py": {prefix: "# two"},
+		filepath.Join("lib", "three.py"): {prefix: "# three"},
+		"utf16le.py":                     {prefix: "# utf16le"},
+		"utf16be.py":                     {prefix: "# utf16be"},
+		"long-utf8.py":                   {prefix: "# The first 1024", suffix: "👍\n"},
 	}
 
 	for filename, code := range expected {
 		if !utf8.ValidString(iter.Solution[filename]) {
 			t.Errorf("Iteration content is not valid UTF-8 data: %s", iter.Solution[filename])
 		}
 
-		if !strings.HasPrefix(iter.Solution[filename], code) {
-			t.Errorf("Expected %s to contain `%s', had `%s'", filename, code, iter.Solution[filename])
+		if !strings.HasPrefix(iter.Solution[filename], code.prefix) {
+			t.Errorf("Expected %s to start with `%s', had `%s'", filename, code.prefix, iter.Solution[filename])
+		}
+		if !strings.HasSuffix(iter.Solution[filename], code.suffix) {
+			t.Errorf("Expected %s to end with `%s', had `%s'", filename, code.suffix, iter.Solution[filename])
 		}
 	}
 }

diff --git a/fixtures/iteration/python/leap/long-utf8.py b/fixtures/iteration/python/leap/long-utf8.py
@@ -0,0 +1,31 @@
+# The first 1024 bytes of this file need to contain only ASCII characters.
+# After the first 1024 bytes, then there should be a non-ASCII character.
+#
+# Explanation:
+# We use golang.org/x/net/html/charset.DetectEncoding to guess file encoding.
+# DetectEncoding checks the first 1024 bytes of a file.
+# If it can't determine the encoding and saw no non-ASCII characters,
+# it declares the file to have windows-1252 encoding.
+# This mangles the submitted file if it should have been UTF-8.
+# We test to make sure we use UTF-8 for such files, instead of windows-1252.
+
+lipsum = """
+Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nam condimentum vitae
+ipsum eget tempor. Morbi sed ex quis orci vulputate cursus quis non massa.
+Vestibulum quam nibh, elementum in justo in, venenatis tristique nisl. Morbi
+sagittis elit id velit ultricies, sed rutrum augue posuere. Donec nec nulla nec
+eros fringilla pellentesque. Duis at dictum justo. Nunc ut magna felis. Aliquam
+volutpat, lectus et molestie porttitor, est orci malesuada erat, ac pretium
+eros ligula vel erat. Nullam venenatis dui eget sapien semper lobortis. Aenean
+ac eros eget neque porta auctor in nec erat. Phasellus ac nulla ac turpis
+porttitor auctor. Etiam eget posuere diam, ac feugiat lacus. Curabitur ornare
+justo ut nulla congue, vitae posuere erat venenatis. Aliquam pulvinar eleifend
+faucibus.
+
+Etiam justo sem, faucibus malesuada purus a, ultrices efficitur ex.
+Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac
+turpis egestas. Duis maximus dapibus mattis. Quisque sem ex, convallis eu
+ultricies posuere.
+"""
+
+# 👍