Skip to content

Commit 3aa53d2

Browse files
committed
TEST-#2288: addressing review comments
Signed-off-by: Alexander Myskov <[email protected]>
1 parent 1f81eb1 commit 3aa53d2

File tree

2 files changed

+134
-125
lines changed

2 files changed

+134
-125
lines changed

modin/pandas/test/test_io.py

Lines changed: 7 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
import shutil
2626
import sqlalchemy as sa
2727
import csv
28-
from string import ascii_letters
2928

3029
from .utils import (
3130
df_equals,
@@ -35,6 +34,10 @@
3534
json_long_bytes,
3635
random_state,
3736
eval_io,
37+
get_unique_filename,
38+
get_random_string,
39+
insert_lines_to_csv,
40+
IO_OPS_DATA_DIR,
3841
)
3942

4043
from modin.config import Engine, Backend
@@ -62,9 +65,9 @@
6265
TEST_GBQ_FILENAME = "test_gbq."
6366
SMALL_ROW_SIZE = 2000
6467

65-
test_data_dir = os.path.join(os.path.dirname(__file__), "read_csv_data")
66-
if not os.path.exists(test_data_dir):
67-
os.mkdir(test_data_dir)
68+
69+
if not os.path.exists(IO_OPS_DATA_DIR):
70+
os.mkdir(IO_OPS_DATA_DIR)
6871

6972

7073
@pytest.fixture
@@ -159,76 +162,6 @@ def teardown_test_file(test_path):
159162
os.remove(test_path)
160163

161164

162-
def get_random_string():
163-
random_string = "".join(
164-
random_state.choice([x for x in ascii_letters], size=10).tolist()
165-
)
166-
return random_string
167-
168-
169-
def insert_lines_to_csv(
170-
csv_name: str,
171-
lines_positions: list,
172-
lines_type: str = "blank",
173-
encoding: str = None,
174-
**csv_reader_writer_params,
175-
):
176-
"""Insert lines to ".csv" file.
177-
178-
Parameters
179-
----------
180-
csv_name: str
181-
".csv" file that should be modified.
182-
lines_positions: list of ints
183-
Lines postions that sghould be modified (serial number
184-
of line - begins from 0, ends in <rows_number> - 1).
185-
lines_type: str
186-
Lines types that should be inserted to ".csv" file. Possible types:
187-
"blank" - empty line without any delimiters/separators,
188-
"bad" - lines with len(lines_data) > cols_number
189-
encoding: str
190-
Encoding type that should be used during file reading and writing.
191-
"""
192-
cols_number = len(pandas.read_csv(csv_name, nrows=1).columns)
193-
if lines_type == "blank":
194-
lines_data = []
195-
elif lines_type == "bad":
196-
cols_number = len(pandas.read_csv(csv_name, nrows=1).columns)
197-
lines_data = [x for x in range(cols_number + 1)]
198-
else:
199-
raise ValueError(
200-
f"acceptable values for parameter are ['blank', 'bad'], actually passed {lines_type}"
201-
)
202-
lines = []
203-
dialect = "excel"
204-
with open(csv_name, "r", encoding=encoding, newline="") as read_file:
205-
try:
206-
dialect = csv.Sniffer().sniff(read_file.read())
207-
read_file.seek(0)
208-
except Exception:
209-
dialect = None
210-
211-
reader = csv.reader(
212-
read_file,
213-
dialect=dialect if dialect is not None else "excel",
214-
**csv_reader_writer_params,
215-
)
216-
counter = 0
217-
for row in reader:
218-
if counter in lines_positions:
219-
lines.append(lines_data)
220-
else:
221-
lines.append(row)
222-
counter += 1
223-
with open(csv_name, "w", encoding=encoding, newline="") as write_file:
224-
writer = csv.writer(
225-
write_file,
226-
dialect=dialect if dialect is not None else "excel",
227-
**csv_reader_writer_params,
228-
)
229-
writer.writerows(lines)
230-
231-
232165
def _make_csv_file(filenames):
233166
def _csv_file_maker(
234167
filename=TEST_CSV_FILENAME,
@@ -562,57 +495,6 @@ def teardown_fwf_file():
562495
pass
563496

564497

565-
def get_unique_filename(
566-
test_name: str,
567-
kwargs: dict = {},
568-
extension: str = "csv",
569-
data_dir: str = test_data_dir,
570-
suffix: str = "",
571-
):
572-
"""Returns unique file name with specified parameters.
573-
574-
Parameters
575-
----------
576-
test_name: str
577-
name of the test for which the unique file name is needed.
578-
kwargs: list of ints
579-
Unique combiantion of test parameters for creation of unique name.
580-
extension: str
581-
Extension of unique file.
582-
data_dir: str
583-
Data directory where test files will be created.
584-
suffix: str
585-
String to append to the resulted name.
586-
587-
Returns
588-
-------
589-
Unique file name.
590-
"""
591-
# shortcut if kwargs parameter os not provided
592-
if len(kwargs) == 0 and extension == "csv" and suffix == "":
593-
return os.path.join(data_dir, (test_name + f"_{suffix}" + f".{extension}"))
594-
595-
assert "." not in extension, "please provide pure extension name without '.'"
596-
prohibited_chars = ['"', "\n"]
597-
non_prohibited_char = "np_char"
598-
char_counter = 0
599-
kwargs_name = dict(kwargs)
600-
for key, value in kwargs_name.items():
601-
for char in prohibited_chars:
602-
if isinstance(value, str) and char in value or callable(value):
603-
kwargs_name[key] = non_prohibited_char + str(char_counter)
604-
char_counter += 1
605-
parameters_values = "_".join(
606-
[
607-
str(value)
608-
if not isinstance(value, (list, tuple))
609-
else "_".join([str(x) for x in value])
610-
for value in kwargs_name.values()
611-
]
612-
)
613-
return os.path.join(data_dir, parameters_values + f"_{suffix}" + f".{extension}")
614-
615-
616498
class TestReadCSV:
617499
# delimiter tests
618500
@pytest.mark.parametrize("sep", ["_", ",", ".", "\n"])

modin/pandas/test/utils.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525
from modin.utils import to_pandas
2626
from modin.config import TestDatasetSize
2727
from io import BytesIO
28+
import os
29+
from string import ascii_letters
30+
import csv
2831

2932
random_state = np.random.RandomState(seed=42)
3033

@@ -41,6 +44,9 @@
4144
RAND_LOW = 0
4245
RAND_HIGH = 100
4346

47+
# Directory for storing I/O operations test data
48+
IO_OPS_DATA_DIR = os.path.join(os.path.dirname(__file__), "read_csv_data")
49+
4450
# Input data and functions for the tests
4551
# The test data that we will test our code against
4652
test_data = {
@@ -832,3 +838,124 @@ def generate_none_dfs():
832838
}
833839
)
834840
return df, df2
841+
842+
843+
def get_unique_filename(
844+
test_name: str,
845+
kwargs: dict = {},
846+
extension: str = "csv",
847+
data_dir: str = IO_OPS_DATA_DIR,
848+
suffix: str = "",
849+
):
850+
"""Returns unique file name with specified parameters.
851+
852+
Parameters
853+
----------
854+
test_name: str
855+
name of the test for which the unique file name is needed.
856+
kwargs: list of ints
857+
Unique combiantion of test parameters for creation of unique name.
858+
extension: str
859+
Extension of unique file.
860+
data_dir: str
861+
Data directory where test files will be created.
862+
suffix: str
863+
String to append to the resulted name.
864+
865+
Returns
866+
-------
867+
Unique file name.
868+
"""
869+
# shortcut if kwargs parameter os not provided
870+
if len(kwargs) == 0 and extension == "csv" and suffix == "":
871+
return os.path.join(data_dir, (test_name + f"_{suffix}" + f".{extension}"))
872+
873+
assert "." not in extension, "please provide pure extension name without '.'"
874+
prohibited_chars = ['"', "\n"]
875+
non_prohibited_char = "np_char"
876+
char_counter = 0
877+
kwargs_name = dict(kwargs)
878+
for key, value in kwargs_name.items():
879+
for char in prohibited_chars:
880+
if isinstance(value, str) and char in value or callable(value):
881+
kwargs_name[key] = non_prohibited_char + str(char_counter)
882+
char_counter += 1
883+
parameters_values = "_".join(
884+
[
885+
str(value)
886+
if not isinstance(value, (list, tuple))
887+
else "_".join([str(x) for x in value])
888+
for value in kwargs_name.values()
889+
]
890+
)
891+
return os.path.join(data_dir, parameters_values + f"_{suffix}" + f".{extension}")
892+
893+
894+
def get_random_string():
895+
random_string = "".join(
896+
random_state.choice([x for x in ascii_letters], size=10).tolist()
897+
)
898+
return random_string
899+
900+
901+
def insert_lines_to_csv(
902+
csv_name: str,
903+
lines_positions: list,
904+
lines_type: str = "blank",
905+
encoding: str = None,
906+
**csv_reader_writer_params,
907+
):
908+
"""Insert lines to ".csv" file.
909+
910+
Parameters
911+
----------
912+
csv_name: str
913+
".csv" file that should be modified.
914+
lines_positions: list of ints
915+
Lines postions that sghould be modified (serial number
916+
of line - begins from 0, ends in <rows_number> - 1).
917+
lines_type: str
918+
Lines types that should be inserted to ".csv" file. Possible types:
919+
"blank" - empty line without any delimiters/separators,
920+
"bad" - lines with len(lines_data) > cols_number
921+
encoding: str
922+
Encoding type that should be used during file reading and writing.
923+
"""
924+
cols_number = len(pandas.read_csv(csv_name, nrows=1).columns)
925+
if lines_type == "blank":
926+
lines_data = []
927+
elif lines_type == "bad":
928+
cols_number = len(pandas.read_csv(csv_name, nrows=1).columns)
929+
lines_data = [x for x in range(cols_number + 1)]
930+
else:
931+
raise ValueError(
932+
f"acceptable values for parameter are ['blank', 'bad'], actually passed {lines_type}"
933+
)
934+
lines = []
935+
dialect = "excel"
936+
with open(csv_name, "r", encoding=encoding, newline="") as read_file:
937+
try:
938+
dialect = csv.Sniffer().sniff(read_file.read())
939+
read_file.seek(0)
940+
except Exception:
941+
dialect = None
942+
943+
reader = csv.reader(
944+
read_file,
945+
dialect=dialect if dialect is not None else "excel",
946+
**csv_reader_writer_params,
947+
)
948+
counter = 0
949+
for row in reader:
950+
if counter in lines_positions:
951+
lines.append(lines_data)
952+
else:
953+
lines.append(row)
954+
counter += 1
955+
with open(csv_name, "w", encoding=encoding, newline="") as write_file:
956+
writer = csv.writer(
957+
write_file,
958+
dialect=dialect if dialect is not None else "excel",
959+
**csv_reader_writer_params,
960+
)
961+
writer.writerows(lines)

0 commit comments

Comments
 (0)