Skip to content

Commit afd3c40

Browse files
authored
ARROW-16838: [Python] Improve schema inference for pandas indexes with extension dtypes (#14080)
Possible fix for https://issues.apache.org/jira/browse/ARROW-16838. `pd.Index` objects don't have a `.head` method, while `pd.DataFrame`, `pd.Series`, and `pd.Index` all support indexing with `[:0]` to return a empty object of the same type. Authored-by: James Bourbeau <[email protected]> Signed-off-by: Joris Van den Bossche <[email protected]>
1 parent 8ad5e59 commit afd3c40

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

python/pyarrow/pandas_compat.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,9 @@ def dataframe_to_types(df, preserve_index, columns=None):
541541
if _pandas_api.is_categorical(values):
542542
type_ = pa.array(c, from_pandas=True).type
543543
elif _pandas_api.is_extension_array_dtype(values):
544-
type_ = pa.array(c.head(0), from_pandas=True).type
544+
empty = c.head(0) if isinstance(
545+
c, _pandas_api.pd.Series) else c[:0]
546+
type_ = pa.array(empty, from_pandas=True).type
545547
else:
546548
values, type_ = get_datetimetz_type(values, c.dtype, None)
547549
type_ = pa.lib._ndarray_to_arrow_type(values, type_)

python/pyarrow/tests/test_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,7 @@ def test_schema_from_pandas():
663663
if Version(pd.__version__) >= Version('1.0.0'):
664664
inputs.append(pd.array([1, 2, None], dtype=pd.Int32Dtype()))
665665
for data in inputs:
666-
df = pd.DataFrame({'a': data})
666+
df = pd.DataFrame({'a': data}, index=data)
667667
schema = pa.Schema.from_pandas(df)
668668
expected = pa.Table.from_pandas(df).schema
669669
assert schema == expected

0 commit comments

Comments
 (0)