Skip to content

Commit a974866

Browse files
authored
Merge pull request #1923: Add types for read_metadata()
2 parents 0d91730 + 9312413 commit a974866

File tree

2 files changed

+46
-22
lines changed

2 files changed

+46
-22
lines changed

augur/io/metadata.py

Lines changed: 40 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import csv
22
import os
3-
from typing import Iterable, Sequence
3+
from typing import Any, Iterable, Iterator, Sequence, overload
44
import pandas as pd
55
import pyfastx
66
import python_calamine as calamine
@@ -26,40 +26,60 @@ class InvalidDelimiter(Exception):
2626
pass
2727

2828

29+
# Overloads for different return types based on chunk_size
30+
@overload
2931
def read_metadata(
30-
metadata_file,
31-
delimiters=DEFAULT_DELIMITERS,
32-
columns=None,
33-
id_columns=DEFAULT_ID_COLUMNS,
34-
keep_id_as_column=False,
35-
chunk_size=None,
36-
dtype=None,
32+
metadata_file: str,
33+
delimiters: Sequence[str] = ...,
34+
columns: list[str] | None = ...,
35+
id_columns: Sequence[str] = ...,
36+
keep_id_as_column: bool = ...,
37+
chunk_size: None = None,
38+
dtype: dict[str, Any] | str | None = ...,
39+
) -> pd.DataFrame: ...
40+
41+
@overload
42+
def read_metadata(
43+
metadata_file: str,
44+
delimiters: Sequence[str] = ...,
45+
columns: list[str] | None = ...,
46+
id_columns: Sequence[str] = ...,
47+
keep_id_as_column: bool = ...,
48+
chunk_size: int = ...,
49+
dtype: dict[str, Any] | str | None = ...,
50+
) -> Iterator[pd.DataFrame]: ...
51+
52+
def read_metadata(
53+
metadata_file: str,
54+
delimiters: Sequence[str] = DEFAULT_DELIMITERS,
55+
columns: list[str] | None = None,
56+
id_columns: Sequence[str] = DEFAULT_ID_COLUMNS,
57+
keep_id_as_column: bool = False,
58+
chunk_size: int | None = None,
59+
dtype: dict[str, Any] | str | None = None,
3760
):
3861
r"""Read metadata from a given filename and into a pandas `DataFrame` or
39-
`TextFileReader` object.
62+
iterator of DataFrames when `chunk_size` is specified.
4063
4164
Parameters
4265
----------
43-
metadata_file : str
66+
metadata_file
4467
Path to a metadata file to load.
45-
delimiters : list of str
68+
delimiters
4669
List of possible delimiters to check for between columns in the metadata.
4770
Only one delimiter will be inferred.
48-
columns : list of str
71+
columns
4972
List of columns to read. If unspecified, read all columns.
50-
id_columns : list of str
73+
id_columns
5174
List of possible id column names to check for, ordered by priority.
5275
Only one id column will be inferred.
53-
keep_id_as_column : bool
76+
keep_id_as_column
5477
If true, keep the resolved id column as a column in addition to setting it as the DataFrame index.
55-
chunk_size : int
78+
chunk_size
5679
Size of chunks to stream from disk with an iterator instead of loading the entire input file into memory.
57-
dtype : dict or str
80+
dtype
5881
Data types to apply to columns in metadata. If unspecified, pandas data type inference will be used.
5982
See documentation for an argument of the same name to `pandas.read_csv()`.
60-
Returns
61-
-------
62-
pandas.DataFrame or `pandas.io.parsers.TextFileReader`
6383
6484
Raises
6585
------
@@ -97,6 +117,7 @@ def read_metadata(
97117
"skipinitialspace": True,
98118
"na_filter": False,
99119
"low_memory": False,
120+
**PANDAS_READ_CSV_OPTIONS,
100121
}
101122

102123
if chunk_size:
@@ -107,7 +128,6 @@ def read_metadata(
107128
metadata_file,
108129
iterator=True,
109130
**kwargs,
110-
**PANDAS_READ_CSV_OPTIONS,
111131
)
112132
chunk = metadata.read(nrows=1)
113133
metadata.close()
@@ -168,13 +188,11 @@ def read_metadata(
168188
return read_csv_with_index_col(
169189
metadata_file,
170190
**kwargs,
171-
**PANDAS_READ_CSV_OPTIONS,
172191
)
173192
else:
174193
return pd.read_csv(
175194
metadata_file,
176195
**kwargs,
177-
**PANDAS_READ_CSV_OPTIONS,
178196
)
179197

180198

docs/conf.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,12 @@ def prose_list(items):
137137
# This class can't be referenced.
138138
# <https://github.com/python/cpython/issues/101503>
139139
("py:class", "argparse._SubParsersAction"),
140+
141+
# sphinx-autodoc-typehints resolves pd.DataFrame to the internal path
142+
# pandas.core.frame.DataFrame, which is not in pandas' intersphinx
143+
# inventory (only pandas.DataFrame is).
144+
# <https://github.com/tox-dev/sphinx-autodoc-typehints/issues/47#issuecomment-401403609>
145+
("py:class", "pandas.core.frame.DataFrame"),
140146
]
141147

142148
# -- Cross-project references ------------------------------------------------

0 commit comments

Comments
 (0)