|
25 | 25 | import shutil
|
26 | 26 | import sqlalchemy as sa
|
27 | 27 | import csv
|
28 |
| -from string import ascii_letters |
29 | 28 |
|
30 | 29 | from .utils import (
|
31 | 30 | df_equals,
|
|
35 | 34 | json_long_bytes,
|
36 | 35 | random_state,
|
37 | 36 | eval_io,
|
| 37 | + get_unique_filename, |
| 38 | + get_random_string, |
| 39 | + insert_lines_to_csv, |
| 40 | + IO_OPS_DATA_DIR, |
38 | 41 | )
|
39 | 42 |
|
40 | 43 | from modin.config import Engine, Backend
|
|
62 | 65 | TEST_GBQ_FILENAME = "test_gbq."
|
63 | 66 | SMALL_ROW_SIZE = 2000
|
64 | 67 |
|
65 |
| -test_data_dir = os.path.join(os.path.dirname(__file__), "read_csv_data") |
66 |
| -if not os.path.exists(test_data_dir): |
67 |
| - os.mkdir(test_data_dir) |
| 68 | + |
| 69 | +if not os.path.exists(IO_OPS_DATA_DIR): |
| 70 | + os.mkdir(IO_OPS_DATA_DIR) |
68 | 71 |
|
69 | 72 |
|
70 | 73 | @pytest.fixture
|
@@ -159,76 +162,6 @@ def teardown_test_file(test_path):
|
159 | 162 | os.remove(test_path)
|
160 | 163 |
|
161 | 164 |
|
162 |
| -def get_random_string(): |
163 |
| - random_string = "".join( |
164 |
| - random_state.choice([x for x in ascii_letters], size=10).tolist() |
165 |
| - ) |
166 |
| - return random_string |
167 |
| - |
168 |
| - |
169 |
| -def insert_lines_to_csv( |
170 |
| - csv_name: str, |
171 |
| - lines_positions: list, |
172 |
| - lines_type: str = "blank", |
173 |
| - encoding: str = None, |
174 |
| - **csv_reader_writer_params, |
175 |
| -): |
176 |
| - """Insert lines to ".csv" file. |
177 |
| -
|
178 |
| - Parameters |
179 |
| - ---------- |
180 |
| - csv_name: str |
181 |
| - ".csv" file that should be modified. |
182 |
| - lines_positions: list of ints |
183 |
| - Lines postions that sghould be modified (serial number |
184 |
| - of line - begins from 0, ends in <rows_number> - 1). |
185 |
| - lines_type: str |
186 |
| - Lines types that should be inserted to ".csv" file. Possible types: |
187 |
| - "blank" - empty line without any delimiters/separators, |
188 |
| - "bad" - lines with len(lines_data) > cols_number |
189 |
| - encoding: str |
190 |
| - Encoding type that should be used during file reading and writing. |
191 |
| - """ |
192 |
| - cols_number = len(pandas.read_csv(csv_name, nrows=1).columns) |
193 |
| - if lines_type == "blank": |
194 |
| - lines_data = [] |
195 |
| - elif lines_type == "bad": |
196 |
| - cols_number = len(pandas.read_csv(csv_name, nrows=1).columns) |
197 |
| - lines_data = [x for x in range(cols_number + 1)] |
198 |
| - else: |
199 |
| - raise ValueError( |
200 |
| - f"acceptable values for parameter are ['blank', 'bad'], actually passed {lines_type}" |
201 |
| - ) |
202 |
| - lines = [] |
203 |
| - dialect = "excel" |
204 |
| - with open(csv_name, "r", encoding=encoding, newline="") as read_file: |
205 |
| - try: |
206 |
| - dialect = csv.Sniffer().sniff(read_file.read()) |
207 |
| - read_file.seek(0) |
208 |
| - except Exception: |
209 |
| - dialect = None |
210 |
| - |
211 |
| - reader = csv.reader( |
212 |
| - read_file, |
213 |
| - dialect=dialect if dialect is not None else "excel", |
214 |
| - **csv_reader_writer_params, |
215 |
| - ) |
216 |
| - counter = 0 |
217 |
| - for row in reader: |
218 |
| - if counter in lines_positions: |
219 |
| - lines.append(lines_data) |
220 |
| - else: |
221 |
| - lines.append(row) |
222 |
| - counter += 1 |
223 |
| - with open(csv_name, "w", encoding=encoding, newline="") as write_file: |
224 |
| - writer = csv.writer( |
225 |
| - write_file, |
226 |
| - dialect=dialect if dialect is not None else "excel", |
227 |
| - **csv_reader_writer_params, |
228 |
| - ) |
229 |
| - writer.writerows(lines) |
230 |
| - |
231 |
| - |
232 | 165 | def _make_csv_file(filenames):
|
233 | 166 | def _csv_file_maker(
|
234 | 167 | filename=TEST_CSV_FILENAME,
|
@@ -562,57 +495,6 @@ def teardown_fwf_file():
|
562 | 495 | pass
|
563 | 496 |
|
564 | 497 |
|
565 |
| -def get_unique_filename( |
566 |
| - test_name: str, |
567 |
| - kwargs: dict = {}, |
568 |
| - extension: str = "csv", |
569 |
| - data_dir: str = test_data_dir, |
570 |
| - suffix: str = "", |
571 |
| -): |
572 |
| - """Returns unique file name with specified parameters. |
573 |
| -
|
574 |
| - Parameters |
575 |
| - ---------- |
576 |
| - test_name: str |
577 |
| - name of the test for which the unique file name is needed. |
578 |
| - kwargs: list of ints |
579 |
| - Unique combiantion of test parameters for creation of unique name. |
580 |
| - extension: str |
581 |
| - Extension of unique file. |
582 |
| - data_dir: str |
583 |
| - Data directory where test files will be created. |
584 |
| - suffix: str |
585 |
| - String to append to the resulted name. |
586 |
| -
|
587 |
| - Returns |
588 |
| - ------- |
589 |
| - Unique file name. |
590 |
| - """ |
591 |
| - # shortcut if kwargs parameter os not provided |
592 |
| - if len(kwargs) == 0 and extension == "csv" and suffix == "": |
593 |
| - return os.path.join(data_dir, (test_name + f"_{suffix}" + f".{extension}")) |
594 |
| - |
595 |
| - assert "." not in extension, "please provide pure extension name without '.'" |
596 |
| - prohibited_chars = ['"', "\n"] |
597 |
| - non_prohibited_char = "np_char" |
598 |
| - char_counter = 0 |
599 |
| - kwargs_name = dict(kwargs) |
600 |
| - for key, value in kwargs_name.items(): |
601 |
| - for char in prohibited_chars: |
602 |
| - if isinstance(value, str) and char in value or callable(value): |
603 |
| - kwargs_name[key] = non_prohibited_char + str(char_counter) |
604 |
| - char_counter += 1 |
605 |
| - parameters_values = "_".join( |
606 |
| - [ |
607 |
| - str(value) |
608 |
| - if not isinstance(value, (list, tuple)) |
609 |
| - else "_".join([str(x) for x in value]) |
610 |
| - for value in kwargs_name.values() |
611 |
| - ] |
612 |
| - ) |
613 |
| - return os.path.join(data_dir, parameters_values + f"_{suffix}" + f".{extension}") |
614 |
| - |
615 |
| - |
616 | 498 | class TestReadCSV:
|
617 | 499 | # delimiter tests
|
618 | 500 | @pytest.mark.parametrize("sep", ["_", ",", ".", "\n"])
|
|
0 commit comments