Skip to content

Commit 25b58e2

Browse files
additional negative indexing support, and frame-level "take_every" (#3888)
1 parent 585c10c commit 25b58e2

File tree

6 files changed

+109
-6
lines changed

6 files changed

+109
-6
lines changed

py-polars/docs/source/reference/dataframe.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ Manipulation/ selection
125125
DataFrame.slice
126126
DataFrame.sort
127127
DataFrame.tail
128+
DataFrame.take_every
128129
DataFrame.to_dummies
129130
DataFrame.to_series
130131
DataFrame.transpose

py-polars/docs/source/reference/lazyframe.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ Manipulation/ selection
7575
LazyFrame.slice
7676
LazyFrame.sort
7777
LazyFrame.tail
78+
LazyFrame.take_every
7879
LazyFrame.unique
7980
LazyFrame.unnest
8081
LazyFrame.with_column

py-polars/polars/internals/frame.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1882,6 +1882,8 @@ def to_series(self, index: int = 0) -> pli.Series:
18821882
]
18831883
18841884
"""
1885+
if index < 0:
1886+
index = len(self.columns) + index
18851887
return pli.wrap_s(self._df.select_at_idx(index))
18861888

18871889
def reverse(self: DF) -> DF:
@@ -1974,6 +1976,8 @@ def insert_at_idx(self, index: int, series: pli.Series) -> None:
19741976
└─────┴─────┴─────┘
19751977
19761978
"""
1979+
if index < 0:
1980+
index = len(self.columns) + index
19771981
self._df.insert_at_idx(index, series._s)
19781982

19791983
def filter(self: DF, predicate: pli.Expr) -> DF:
@@ -2275,6 +2279,8 @@ def replace_at_idx(self, index: int, series: pli.Series) -> None:
22752279
└───────┴─────┴─────┘
22762280
22772281
"""
2282+
if index < 0:
2283+
index = len(self.columns) + index
22782284
self._df.replace_at_idx(index, series._s)
22792285

22802286
@overload
@@ -4079,6 +4085,8 @@ def select_at_idx(self, idx: int) -> pli.Series:
40794085
]
40804086
40814087
"""
4088+
if idx < 0:
4089+
idx = len(self.columns) + idx
40824090
return pli.wrap_s(self._df.select_at_idx(idx))
40834091

40844092
def cleared(self: DF) -> DF:
@@ -5462,6 +5470,27 @@ def shrink_to_fit(self: DF, in_place: bool = False) -> DF | None:
54625470
df._df.shrink_to_fit()
54635471
return df
54645472

5473+
def take_every(self: DF, n: int) -> DF:
5474+
"""
5475+
Take every nth row in the DataFrame and return as a new DataFrame.
5476+
5477+
Examples
5478+
--------
5479+
>>> s = pl.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
5480+
>>> s.take_every(2)
5481+
shape: (2, 2)
5482+
┌─────┬─────┐
5483+
│ a ┆ b │
5484+
│ --- ┆ --- │
5485+
│ i64 ┆ i64 │
5486+
╞═════╪═════╡
5487+
│ 1 ┆ 5 │
5488+
├╌╌╌╌╌┼╌╌╌╌╌┤
5489+
│ 3 ┆ 7 │
5490+
└─────┴─────┘
5491+
"""
5492+
return self.select(pli.col("*").take_every(n))
5493+
54655494
def hash_rows(
54665495
self, k0: int = 0, k1: int = 1, k2: int = 2, k3: int = 3
54675496
) -> pli.Series:

py-polars/polars/internals/lazy_frame.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1782,6 +1782,27 @@ def with_row_count(self: LDF, name: str = "row_nr", offset: int = 0) -> LDF:
17821782
"""
17831783
return self._from_pyldf(self._ldf.with_row_count(name, offset))
17841784

1785+
def take_every(self: LDF, n: int) -> LDF:
1786+
"""
1787+
Take every nth row in the LazyFrame and return as a new LazyFrame.
1788+
1789+
Examples
1790+
--------
1791+
>>> s = pl.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}).lazy()
1792+
>>> s.take_every(2).collect()
1793+
shape: (2, 2)
1794+
┌─────┬─────┐
1795+
│ a ┆ b │
1796+
│ --- ┆ --- │
1797+
│ i64 ┆ i64 │
1798+
╞═════╪═════╡
1799+
│ 1 ┆ 5 │
1800+
├╌╌╌╌╌┼╌╌╌╌╌┤
1801+
│ 3 ┆ 7 │
1802+
└─────┴─────┘
1803+
"""
1804+
return self.select(pli.col("*").take_every(n))
1805+
17851806
def fill_null(self: LDF, fill_value: int | str | pli.Expr) -> LDF:
17861807
"""
17871808
Fill missing values with a literal or Expr.

py-polars/tests/test_df.py

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from hypothesis import given
1515

1616
import polars as pl
17-
from polars.testing import assert_series_equal, columns, dataframes
17+
from polars.testing import assert_frame_equal, assert_series_equal, columns, dataframes
1818

1919
if sys.version_info >= (3, 8):
2020
from typing import Literal
@@ -32,8 +32,7 @@ def test_repr(df: pl.DataFrame) -> None:
3232
# print(df)
3333

3434

35-
# note: *temporarily* constraining dtypes this test until #3843 and a windows-specific
36-
# fixfor a related date bug is merged (tblocking the PR to merge hypothesis code).
35+
# note: temporarily constraining dtypes for this test (possible windows-specific date bug)
3736
@given(df=dataframes(allowed_dtypes=[pl.Boolean, pl.UInt64, pl.Utf8]))
3837
def test_null_count(df: pl.DataFrame) -> None:
3938
null_count, ncols = df.null_count(), len(df.columns)
@@ -548,6 +547,54 @@ def test_assignment() -> None:
548547
assert df["foo"].to_list() == [1, 9, 9]
549548

550549

550+
def test_select_at_idx() -> None:
551+
df = pl.DataFrame({"x": [1, 2, 3], "y": [2, 3, 4], "z": [3, 4, 5]})
552+
for idx in range(len(df.columns)):
553+
assert_series_equal(
554+
df.select_at_idx(idx), # regular positive indexing
555+
df.select_at_idx(idx - len(df.columns)), # equivalent negative index
556+
)
557+
558+
559+
def test_insert_at_idx() -> None:
560+
df = pl.DataFrame({"z": [3, 4, 5]})
561+
df.insert_at_idx(0, pl.Series("x", [1, 2, 3]))
562+
df.insert_at_idx(-1, pl.Series("y", [2, 3, 4]))
563+
564+
expected_df = pl.DataFrame({"x": [1, 2, 3], "y": [2, 3, 4], "z": [3, 4, 5]})
565+
assert_frame_equal(expected_df, df)
566+
567+
568+
def test_replace_at_idx() -> None:
569+
df = pl.DataFrame({"x": [1, 2, 3], "y": [2, 3, 4], "z": [3, 4, 5]})
570+
df.replace_at_idx(0, pl.Series("a", [4, 5, 6]))
571+
df.replace_at_idx(-2, pl.Series("b", [5, 6, 7]))
572+
df.replace_at_idx(-1, pl.Series("c", [6, 7, 8]))
573+
574+
expected_df = pl.DataFrame({"a": [4, 5, 6], "b": [5, 6, 7], "c": [6, 7, 8]})
575+
assert_frame_equal(expected_df, df)
576+
577+
578+
def test_to_series() -> None:
579+
df = pl.DataFrame({"x": [1, 2, 3], "y": [2, 3, 4], "z": [3, 4, 5]})
580+
581+
assert_series_equal(df.to_series(), df["x"])
582+
assert_series_equal(df.to_series(0), df["x"])
583+
assert_series_equal(df.to_series(-3), df["x"])
584+
585+
assert_series_equal(df.to_series(1), df["y"])
586+
assert_series_equal(df.to_series(-2), df["y"])
587+
588+
assert_series_equal(df.to_series(2), df["z"])
589+
assert_series_equal(df.to_series(-1), df["z"])
590+
591+
592+
def test_take_every() -> None:
593+
df = pl.DataFrame({"a": [1, 2, 3, 4], "b": ["w", "x", "y", "z"]})
594+
expected_df = pl.DataFrame({"a": [1, 3], "b": ["w", "y"]})
595+
assert_frame_equal(expected_df, df.take_every(2))
596+
597+
551598
def test_slice() -> None:
552599
df = pl.DataFrame({"a": [2, 1, 3], "b": ["a", "b", "c"]})
553600
expected = pl.DataFrame({"a": [1, 3], "b": ["b", "c"]})
@@ -1219,9 +1266,6 @@ def test_lazy_functions() -> None:
12191266
expected = 3
12201267
assert np.isclose(out.select_at_idx(9), expected)
12211268
assert np.isclose(pl.last(df["b"]), expected)
1222-
expected = 3
1223-
assert np.isclose(out.select_at_idx(9), expected)
1224-
assert np.isclose(pl.last(df["b"]), expected)
12251269

12261270

12271271
def test_multiple_column_sort() -> None:

py-polars/tests/test_lazy.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import polars as pl
66
from polars import col, lit, map_binary, when
7+
from polars.testing import assert_frame_equal
78

89

910
def test_lazy() -> None:
@@ -50,6 +51,12 @@ def test_set_null() -> None:
5051
assert s[2] is None
5152

5253

54+
def test_take_every() -> None:
55+
df = pl.DataFrame({"a": [1, 2, 3, 4], "b": ["w", "x", "y", "z"]}).lazy()
56+
expected_df = pl.DataFrame({"a": [1, 3], "b": ["w", "y"]})
57+
assert_frame_equal(expected_df, df.take_every(2).collect())
58+
59+
5360
def test_agg() -> None:
5461
df = pl.DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]})
5562
ldf = df.lazy().min()

0 commit comments

Comments
 (0)