Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/user_guide/categorical.rst
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ Pivot tables:

raw_cat = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"])
df = pd.DataFrame({"A": raw_cat, "B": ["c", "d", "c", "d"], "values": [1, 2, 3, 4]})
pd.pivot_table(df, values="values", index=["A", "B"])
pd.pivot_table(df, values="values", index=["A", "B"], observed=False)

Data munging
------------
Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.23.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ To show only observed values:
For pivoting operations, this behavior is *already* controlled by the ``dropna`` keyword:

.. ipython:: python
:okwarning:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you turn these into code-blocks?


cat1 = pd.Categorical(["a", "a", "b", "b"],
categories=["a", "b", "z"], ordered=True)
Expand All @@ -287,6 +288,7 @@ For pivoting operations, this behavior is *already* controlled by the ``dropna``
df

.. ipython:: python
:okwarning:

pd.pivot_table(df, values='values', index=['A', 'B'],
dropna=True)
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,7 @@ Other Deprecations
- Deprecated the ``fastpath`` keyword in the :class:`Series` constructor (:issue:`20110`)
- Deprecated the ``ordinal`` keyword in :class:`PeriodIndex`, use :meth:`PeriodIndex.from_ordinals` instead (:issue:`55960`)
- Deprecated the behavior of :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype; in a future version these will not perform dtype inference on the resulting :class:`Index`, do ``result.index = result.index.infer_objects()`` to retain the old behavior (:issue:`56161`)
- Deprecated the default of ``observed=False`` in :meth:`DataFrame.pivot_table`; will be ``True`` in a future version (:issue:`56236`)
- Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
- Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)
- Deprecated the previous implementation of :class:`DataFrame.stack`; specify ``future_stack=True`` to adopt the future version (:issue:`53515`)
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9262,6 +9262,11 @@ def pivot(
If True: only show observed values for categorical groupers.
If False: show all values for categorical groupers.

.. deprecated:: 2.2.0

The default value of ``False`` is deprecated and will change to
``True`` in a future version of pandas.

sort : bool, default True
Specifies if the result should be sorted.

Expand Down Expand Up @@ -9372,7 +9377,7 @@ def pivot_table(
margins: bool = False,
dropna: bool = True,
margins_name: Level = "All",
observed: bool = False,
observed: bool | lib.NoDefault = lib.no_default,
sort: bool = True,
) -> DataFrame:
from pandas.core.reshape.pivot import pivot_table
Expand Down
20 changes: 17 additions & 3 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
Literal,
cast,
)
import warnings

import numpy as np

Expand All @@ -18,6 +19,7 @@
Appender,
Substitution,
)
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.cast import maybe_downcast_to_dtype
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -68,7 +70,7 @@ def pivot_table(
margins: bool = False,
dropna: bool = True,
margins_name: Hashable = "All",
observed: bool = False,
observed: bool | lib.NoDefault = lib.no_default,
sort: bool = True,
) -> DataFrame:
index = _convert_by(index)
Expand Down Expand Up @@ -123,7 +125,7 @@ def __internal_pivot_table(
margins: bool,
dropna: bool,
margins_name: Hashable,
observed: bool,
observed: bool | lib.NoDefault,
sort: bool,
) -> DataFrame:
"""
Expand Down Expand Up @@ -166,7 +168,18 @@ def __internal_pivot_table(
pass
values = list(values)

grouped = data.groupby(keys, observed=observed, sort=sort, dropna=dropna)
observed_bool = False if observed is lib.no_default else observed
grouped = data.groupby(keys, observed=observed_bool, sort=sort, dropna=dropna)
if observed is lib.no_default and any(
ping._passed_categorical for ping in grouped.grouper.groupings
):
warnings.warn(
"The default value of observed=False is deprecated and will change "
"to observed=True in a future version of pandas. Specify "
"observed=False to silence this warning and retain the current behavior",
category=FutureWarning,
stacklevel=find_stack_level(),
)
agged = grouped.agg(aggfunc)

if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
Expand Down Expand Up @@ -719,6 +732,7 @@ def crosstab(
margins=margins,
margins_name=margins_name,
dropna=dropna,
observed=False,
**kwargs, # type: ignore[arg-type]
)

Expand Down
41 changes: 29 additions & 12 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,9 @@ def test_pivot_table_categorical(self):
["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True
)
df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
result = pivot_table(df, values="values", index=["A", "B"], dropna=True)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = pivot_table(df, values="values", index=["A", "B"], dropna=True)

exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"])
expected = DataFrame({"values": [1.0, 2.0, 3.0, 4.0]}, index=exp_index)
Expand All @@ -220,7 +222,9 @@ def test_pivot_table_dropna_categoricals(self, dropna):
)

df["A"] = df["A"].astype(CategoricalDtype(categories, ordered=False))
result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna)
expected_columns = Series(["a", "b", "c"], name="A")
expected_columns = expected_columns.astype(
CategoricalDtype(categories, ordered=False)
Expand Down Expand Up @@ -250,7 +254,9 @@ def test_pivot_with_non_observable_dropna(self, dropna):
}
)

result = df.pivot_table(index="A", values="B", dropna=dropna)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(index="A", values="B", dropna=dropna)
if dropna:
values = [2.0, 3.0]
codes = [0, 1]
Expand Down Expand Up @@ -283,7 +289,9 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):
}
)

result = df.pivot_table(index="A", values="B", dropna=dropna)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(index="A", values="B", dropna=dropna)
expected = DataFrame(
{"B": [2.0, 3.0, 0.0]},
index=Index(
Expand All @@ -301,7 +309,10 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):
def test_pivot_with_interval_index(self, interval_values, dropna):
# GH 25814
df = DataFrame({"A": interval_values, "B": 1})
result = df.pivot_table(index="A", values="B", dropna=dropna)

msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(index="A", values="B", dropna=dropna)
expected = DataFrame(
{"B": 1.0}, index=Index(interval_values.unique(), name="A")
)
Expand All @@ -322,9 +333,11 @@ def test_pivot_with_interval_index_margins(self):
}
)

pivot_tab = pivot_table(
df, index="C", columns="B", values="A", aggfunc="sum", margins=True
)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
pivot_tab = pivot_table(
df, index="C", columns="B", values="A", aggfunc="sum", margins=True
)

result = pivot_tab["All"]
expected = Series(
Expand Down Expand Up @@ -1825,7 +1838,9 @@ def test_categorical_margins_category(self, observed):

df.y = df.y.astype("category")
df.z = df.z.astype("category")
table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
tm.assert_frame_equal(table, expected)

def test_margins_casted_to_float(self):
Expand Down Expand Up @@ -1887,9 +1902,11 @@ def test_categorical_aggfunc(self, observed):
{"C1": ["A", "B", "C", "C"], "C2": ["a", "a", "b", "b"], "V": [1, 2, 3, 4]}
)
df["C1"] = df["C1"].astype("category")
result = df.pivot_table(
"V", index="C1", columns="C2", dropna=observed, aggfunc="count"
)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(
"V", index="C1", columns="C2", dropna=observed, aggfunc="count"
)

expected_index = pd.CategoricalIndex(
["A", "B", "C"], categories=["A", "B", "C"], ordered=False, name="C1"
Expand Down