@@ -25,28 +25,30 @@ def _get_mock_dataset(root_dir, base_dir_name):
25
25
file_names = ("wiki.train.tokens" , "wiki.valid.tokens" , "wiki.test.tokens" )
26
26
for file_name in file_names :
27
27
csv_file = os .path .join (temp_dataset_dir , file_name )
28
- mocked_lines = mocked_data [os . path . splitext ( file_name )[ 0 ]]
28
+ mocked_lines = mocked_data [file_name . split ( "." )[ 1 ]]
29
29
with open (csv_file , "w" , encoding = "utf-8" ) as f :
30
30
for i in range (5 ):
31
31
rand_string = get_random_unicode (seed )
32
- dataset_line = rand_string
33
- f .write (f" { rand_string } \n " )
32
+ dataset_line = f" { rand_string } \n "
33
+ f .write (dataset_line )
34
34
35
35
# append line to correct dataset split
36
36
mocked_lines .append (dataset_line )
37
37
seed += 1
38
38
39
39
if base_dir_name == WikiText103 .__name__ :
40
40
compressed_file = "wikitext-103-v1"
41
+ arcname_folder = "wikitext-103"
41
42
else :
42
43
compressed_file = "wikitext-2-v1"
44
+ arcname_folder = "wikitext-2"
43
45
44
46
compressed_dataset_path = os .path .join (base_dir , compressed_file + ".zip" )
45
47
# create zip file from dataset folder
46
48
with zipfile .ZipFile (compressed_dataset_path , "w" ) as zip_file :
47
49
for file_name in file_names :
48
50
txt_file = os .path .join (temp_dataset_dir , file_name )
49
- zip_file .write (txt_file , arcname = compressed_file )
51
+ zip_file .write (txt_file , arcname = os . path . join ( arcname_folder , file_name ) )
50
52
51
53
return mocked_data
52
54
0 commit comments