11import csv
22import os
3- from typing import Iterable , Sequence
3+ from typing import Any , Iterable , Iterator , Sequence , overload
44import pandas as pd
55import pyfastx
66import python_calamine as calamine
@@ -26,40 +26,60 @@ class InvalidDelimiter(Exception):
2626 pass
2727
2828
29+ # Overloads for different return types based on chunk_size
30+ @overload
2931def read_metadata (
30- metadata_file ,
31- delimiters = DEFAULT_DELIMITERS ,
32- columns = None ,
33- id_columns = DEFAULT_ID_COLUMNS ,
34- keep_id_as_column = False ,
35- chunk_size = None ,
36- dtype = None ,
32+ metadata_file : str ,
33+ delimiters : Sequence [str ] = ...,
34+ columns : list [str ] | None = ...,
35+ id_columns : Sequence [str ] = ...,
36+ keep_id_as_column : bool = ...,
37+ chunk_size : None = None ,
38+ dtype : dict [str , Any ] | str | None = ...,
39+ ) -> pd .DataFrame : ...
40+
41+ @overload
42+ def read_metadata (
43+ metadata_file : str ,
44+ delimiters : Sequence [str ] = ...,
45+ columns : list [str ] | None = ...,
46+ id_columns : Sequence [str ] = ...,
47+ keep_id_as_column : bool = ...,
48+ chunk_size : int = ...,
49+ dtype : dict [str , Any ] | str | None = ...,
50+ ) -> Iterator [pd .DataFrame ]: ...
51+
52+ def read_metadata (
53+ metadata_file : str ,
54+ delimiters : Sequence [str ] = DEFAULT_DELIMITERS ,
55+ columns : list [str ] | None = None ,
56+ id_columns : Sequence [str ] = DEFAULT_ID_COLUMNS ,
57+ keep_id_as_column : bool = False ,
58+ chunk_size : int | None = None ,
59+ dtype : dict [str , Any ] | str | None = None ,
3760 ):
3861 r"""Read metadata from a given filename and into a pandas `DataFrame` or
39- `TextFileReader` object .
62+ iterator of DataFrames when `chunk_size` is specified .
4063
4164 Parameters
4265 ----------
43- metadata_file : str
66+ metadata_file
4467 Path to a metadata file to load.
45- delimiters : list of str
68+ delimiters
4669 List of possible delimiters to check for between columns in the metadata.
4770 Only one delimiter will be inferred.
48- columns : list of str
71+ columns
4972 List of columns to read. If unspecified, read all columns.
50- id_columns : list of str
73+ id_columns
5174 List of possible id column names to check for, ordered by priority.
5275 Only one id column will be inferred.
53- keep_id_as_column : bool
76+ keep_id_as_column
5477 If true, keep the resolved id column as a column in addition to setting it as the DataFrame index.
55- chunk_size : int
78+ chunk_size
5679 Size of chunks to stream from disk with an iterator instead of loading the entire input file into memory.
57- dtype : dict or str
80+ dtype
5881 Data types to apply to columns in metadata. If unspecified, pandas data type inference will be used.
5982 See documentation for an argument of the same name to `pandas.read_csv()`.
60- Returns
61- -------
62- pandas.DataFrame or `pandas.io.parsers.TextFileReader`
6383
6484 Raises
6585 ------
@@ -97,6 +117,7 @@ def read_metadata(
97117 "skipinitialspace" : True ,
98118 "na_filter" : False ,
99119 "low_memory" : False ,
120+ ** PANDAS_READ_CSV_OPTIONS ,
100121 }
101122
102123 if chunk_size :
@@ -107,7 +128,6 @@ def read_metadata(
107128 metadata_file ,
108129 iterator = True ,
109130 ** kwargs ,
110- ** PANDAS_READ_CSV_OPTIONS ,
111131 )
112132 chunk = metadata .read (nrows = 1 )
113133 metadata .close ()
@@ -168,13 +188,11 @@ def read_metadata(
168188 return read_csv_with_index_col (
169189 metadata_file ,
170190 ** kwargs ,
171- ** PANDAS_READ_CSV_OPTIONS ,
172191 )
173192 else :
174193 return pd .read_csv (
175194 metadata_file ,
176195 ** kwargs ,
177- ** PANDAS_READ_CSV_OPTIONS ,
178196 )
179197
180198
0 commit comments