ml31415 · ml31415 · Apr 19, 2024 · Apr 19, 2024 · Apr 19, 2024
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -27,9 +27,9 @@ jobs:
         os: ["ubuntu-latest"]
         python-version: ["3.9", "3.10", "3.11", "3.12"]
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
-          fetch-depth: 10 # Fetch all history for all branches and tags.
+          fetch-depth: 1
       - name: Set environment variables
         run: |
           echo "CONDA_ENV_FILE=ci/environment.yml" >> $GITHUB_ENV
@@ -49,7 +49,7 @@ jobs:
       # We only want to install this on one run, because otherwise we'll have
       # duplicate annotations.
       - name: Install error reporter
-        if: ${{ matrix.os }} == 'ubuntu-latest' and ${{ matrix.python-version }} == '3.10'
+        if: ${{ matrix.os }} == 'ubuntu-latest' and ${{ matrix.python-version }} == '3.11'
         run: |
           python -m pip install pytest-github-actions-annotate-failures
 

diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml
@@ -12,13 +12,13 @@ jobs:
     runs-on: ubuntu-latest
     if: github.repository == 'ml31415/numpy-groupies'
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           fetch-depth: 0
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v5
         name: Install Python
         with:
-          python-version: "3.10"
+          python-version: "3.11"
 
       - name: Install dependencies
         run: |
@@ -39,7 +39,7 @@ jobs:
           else
             echo "✅ Looks good"
           fi
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@v4
         with:
           name: releases
           path: dist
@@ -48,11 +48,11 @@ jobs:
     needs: build-artifacts
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v5
         name: Install Python
         with:
-          python-version: "3.10"
-      - uses: actions/download-artifact@v3
+          python-version: "3.11"
+      - uses: actions/download-artifact@v4
         with:
           name: releases
           path: dist
@@ -80,7 +80,7 @@ jobs:
     if: github.event_name == 'release'
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/download-artifact@v3
+      - uses: actions/download-artifact@v4
         with:
           name: releases
           path: dist
@@ -89,4 +89,4 @@ jobs:
         with:
           user: __token__
           password: ${{ secrets.PYPI_TOKEN }}
-          verbose: true
+          verbose: true
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,27 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: debug-statements
+      - id: detect-private-key
+      - id: check-builtin-literals
+      - id: check-case-conflict
+      - id: check-executables-have-shebangs
+      - id: check-json
+      - id: check-merge-conflict
+      - id: check-symlinks
+      - id: check-toml
+      - id: check-xml
+      - id: check-yaml
+        exclude: (.pre-commit-config\.yaml)
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.3.7
+    hooks:
+      - id: ruff
+      - id: ruff-format
+
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.2.6
+    hooks:
+      - id: codespell
diff --git a/README.md b/README.md
@@ -1,8 +1,7 @@
 [![GitHub Workflow CI Status](https://img.shields.io/github/actions/workflow/status/ml31415/numpy-groupies/ci.yaml?branch=master&logo=github&style=flat)](https://github.com/ml31415/numpy-groupies/actions)
 [![PyPI](https://img.shields.io/pypi/v/numpy-groupies.svg?style=flat)](https://pypi.org/project/numpy-groupies/)
 [![Conda-forge](https://img.shields.io/conda/vn/conda-forge/numpy_groupies.svg?style=flat)](https://anaconda.org/conda-forge/numpy_groupies)
-[![Supported Versions](https://img.shields.io/pypi/pyversions/numpy-groupies.svg)](https://pypi.org/project/numpy-groupies)
-[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+![Python Version from PEP 621 TOML](https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2Fml31415%2Fnumpy-groupies%2Fmaster%2Fpyproject.toml)
 ![PyPI - Downloads](https://img.shields.io/pypi/dm/numpy-groupies)
 
 # numpy-groupies

diff --git a/conftest.py b/conftest.py
@@ -5,7 +5,9 @@
 
 
 def pytest_configure(config):
-    config.addinivalue_line("markers", "deselect_if(func): function to deselect tests from parametrization")
+    config.addinivalue_line(
+        "markers", "deselect_if(func): function to deselect tests from parametrization"
+    )
 
 
 def pytest_collection_modifyitems(config, items):

diff --git a/numpy_groupies/aggregate_numba.py b/numpy_groupies/aggregate_numba.py
@@ -68,7 +68,9 @@ def __call__(
         dtype = check_dtype(dtype, self.func, a, len(group_idx))
         check_fill_value(fill_value, dtype, func=self.func)
         input_dtype = type(a) if np.isscalar(a) else a.dtype
-        ret, counter, mean, outer = self._initialize(flat_size, fill_value, dtype, input_dtype, group_idx.size)
+        ret, counter, mean, outer = self._initialize(
+            flat_size, fill_value, dtype, input_dtype, group_idx.size
+        )
         group_idx = np.ascontiguousarray(group_idx)
 
         if not np.isscalar(a):
@@ -141,7 +143,9 @@ def inner(ri, val, ret, counter, mean, fill_value):
         def loop(group_idx, a, ret, counter, mean, outer, fill_value, ddof):
             # ddof needs to be present for being exchangeable with loop_2pass
             size = len(ret)
-            rng = range(len(group_idx) - 1, -1, -1) if reverse else range(len(group_idx))
+            rng = (
+                range(len(group_idx) - 1, -1, -1) if reverse else range(len(group_idx))
+            )
             for i in rng:
                 ri = group_idx[i]
                 if ri < 0:
@@ -242,14 +246,18 @@ def __call__(
         axis=None,
         ddof=0,
     ):
-        iv = input_validation(group_idx, a, size=size, order=order, axis=axis, check_bounds=False)
+        iv = input_validation(
+            group_idx, a, size=size, order=order, axis=axis, check_bounds=False
+        )
         group_idx, a, flat_size, ndim_idx, size, _ = iv
 
         # TODO: The typecheck should be done by the class itself, not by check_dtype
         dtype = check_dtype(dtype, self.func, a, len(group_idx))
         check_fill_value(fill_value, dtype, func=self.func)
         input_dtype = type(a) if np.isscalar(a) else a.dtype
-        ret, _, _, _ = self._initialize(flat_size, fill_value, dtype, input_dtype, group_idx.size)
+        ret, _, _, _ = self._initialize(
+            flat_size, fill_value, dtype, input_dtype, group_idx.size
+        )
         group_idx = np.ascontiguousarray(group_idx)
 
         sortidx = np.argsort(group_idx, kind="mergesort")
@@ -493,7 +501,7 @@ class CumMin(AggregateNtoN, Min):
 
 
 def get_funcs():
-    funcs = dict()
+    funcs = {}
     for op in (
         Sum,
         Prod,
@@ -530,7 +538,16 @@ def get_funcs():
 
 
 def aggregate(
-    group_idx, a, func="sum", size=None, fill_value=0, order="C", dtype=None, axis=None, cache=True, **kwargs
+    group_idx,
+    a,
+    func="sum",
+    size=None,
+    fill_value=0,
+    order="C",
+    dtype=None,
+    axis=None,
+    cache=True,
+    **kwargs,
 ):
     func = get_func(func, aliasing, _impl_dict)
     if not isinstance(func, str):
@@ -541,7 +558,9 @@ def aggregate(
             if cache is True:
                 cache = _default_cache
             aggregate_op = cache.setdefault(func, AggregateGeneric(func))
-        return aggregate_op(group_idx, a, size, fill_value, order, dtype, axis, **kwargs)
+        return aggregate_op(
+            group_idx, a, size, fill_value, order, dtype, axis, **kwargs
+        )
     else:
         func = _impl_dict[func]
         return func(group_idx, a, size, fill_value, order, dtype, axis, **kwargs)

diff --git a/numpy_groupies/aggregate_numpy.py b/numpy_groupies/aggregate_numpy.py
@@ -30,7 +30,9 @@ def _sum(group_idx, a, size, fill_value, dtype=None):
             ret.real = np.bincount(group_idx, weights=a.real, minlength=size)
             ret.imag = np.bincount(group_idx, weights=a.imag, minlength=size)
         else:
-            ret = np.bincount(group_idx, weights=a, minlength=size).astype(dtype, copy=False)
+            ret = np.bincount(group_idx, weights=a, minlength=size).astype(
+                dtype, copy=False
+            )
 
     if fill_value != 0:
         _fill_untouched(group_idx, ret, fill_value)
@@ -117,7 +119,9 @@ def _argmax(group_idx, a, size, fill_value, dtype=int, _nansqueeze=False):
     ret = np.full(size, fill_value, dtype=dtype)
     group_idx_max = group_idx[is_max]
     (argmax,) = is_max.nonzero()
-    ret[group_idx_max[::-1]] = argmax[::-1]  # reverse to ensure first value for each group wins
+    ret[group_idx_max[::-1]] = argmax[
+        ::-1
+    ]  # reverse to ensure first value for each group wins
     return ret
 
 
@@ -129,7 +133,9 @@ def _argmin(group_idx, a, size, fill_value, dtype=int, _nansqueeze=False):
     ret = np.full(size, fill_value, dtype=dtype)
     group_idx_min = group_idx[is_min]
     (argmin,) = is_min.nonzero()
-    ret[group_idx_min[::-1]] = argmin[::-1]  # reverse to ensure first value for each group wins
+    ret[group_idx_min[::-1]] = argmin[
+        ::-1
+    ]  # reverse to ensure first value for each group wins
     return ret
 
 
@@ -143,7 +149,9 @@ def _mean(group_idx, a, size, fill_value, dtype=np.dtype(np.float64)):
         sums.real = np.bincount(group_idx, weights=a.real, minlength=size)
         sums.imag = np.bincount(group_idx, weights=a.imag, minlength=size)
     else:
-        sums = np.bincount(group_idx, weights=a, minlength=size).astype(dtype, copy=False)
+        sums = np.bincount(group_idx, weights=a, minlength=size).astype(
+            dtype, copy=False
+        )
 
     with np.errstate(divide="ignore", invalid="ignore"):
         ret = sums.astype(dtype, copy=False) / counts
@@ -160,15 +168,19 @@ def _sum_of_squres(group_idx, a, size, fill_value, dtype=np.dtype(np.float64)):
     return ret
 
 
-def _var(group_idx, a, size, fill_value, dtype=np.dtype(np.float64), sqrt=False, ddof=0):
+def _var(
+    group_idx, a, size, fill_value, dtype=np.dtype(np.float64), sqrt=False, ddof=0
+):
     if np.ndim(a) == 0:
         raise ValueError("cannot take variance with scalar a")
     counts = np.bincount(group_idx, minlength=size)
     sums = np.bincount(group_idx, weights=a, minlength=size)
     with np.errstate(divide="ignore", invalid="ignore"):
         means = sums.astype(dtype, copy=False) / counts
         counts = np.where(counts > ddof, counts - ddof, 0)
-        ret = np.bincount(group_idx, (a - means[group_idx]) ** 2, minlength=size) / counts
+        ret = (
+            np.bincount(group_idx, (a - means[group_idx]) ** 2, minlength=size) / counts
+        )
     if sqrt:
         ret = np.sqrt(ret)  # this is now std not var
     if not np.isnan(fill_value):
@@ -208,7 +220,9 @@ def _array(group_idx, a, size, fill_value, dtype=None):
     return ret
 
 
-def _generic_callable(group_idx, a, size, fill_value, dtype=None, func=lambda g: g, **kwargs):
+def _generic_callable(
+    group_idx, a, size, fill_value, dtype=None, func=lambda g: g, **kwargs
+):
     """groups a by inds, and then applies foo to each group in turn, placing
     the results in an array."""
     groups = _array(group_idx, a, size, ())
@@ -244,7 +258,9 @@ def _cumsum(group_idx, a, size, fill_value=None, dtype=None):
 
 def _nancumsum(group_idx, a, size, fill_value=None, dtype=None):
     a_nonans = np.where(np.isnan(a), 0, a)
-    group_idx_nonans = np.where(np.isnan(group_idx), np.nanmax(group_idx) + 1, group_idx)
+    group_idx_nonans = np.where(
+        np.isnan(group_idx), np.nanmax(group_idx) + 1, group_idx
+    )
     return _cumsum(group_idx_nonans, a_nonans, size, fill_value=fill_value, dtype=dtype)
 
 
@@ -271,7 +287,11 @@ def _nancumsum(group_idx, a, size, fill_value=None, dtype=None):
     sumofsquares=_sum_of_squres,
     generic=_generic_callable,
 )
-_impl_dict.update(("nan" + k, v) for k, v in list(_impl_dict.items()) if k not in funcs_no_separate_nan)
+_impl_dict.update(
+    ("nan" + k, v)
+    for k, v in list(_impl_dict.items())
+    if k not in funcs_no_separate_nan
+)
 _impl_dict["nancumsum"] = _nancumsum
 
 
@@ -321,7 +341,9 @@ def _aggregate_base(
         dtype = check_dtype(dtype, func, a, flat_size)
         check_fill_value(fill_value, dtype, func=func)
         func = _impl_dict[func]
-        ret = func(group_idx, a, flat_size, fill_value=fill_value, dtype=dtype, **kwargs)
+        ret = func(
+            group_idx, a, flat_size, fill_value=fill_value, dtype=dtype, **kwargs
+        )
 
     # deal with ndimensional indexing
     if ndim_idx > 1:
@@ -335,7 +357,17 @@ def _aggregate_base(
     return ret
 
 
-def aggregate(group_idx, a, func="sum", size=None, fill_value=0, order="C", dtype=None, axis=None, **kwargs):
+def aggregate(
+    group_idx,
+    a,
+    func="sum",
+    size=None,
+    fill_value=0,
+    order="C",
+    dtype=None,
+    axis=None,
+    **kwargs,
+):
     return _aggregate_base(
         group_idx,
         a,

diff --git a/numpy_groupies/aggregate_numpy_ufunc.py b/numpy_groupies/aggregate_numpy_ufunc.py
@@ -97,7 +97,17 @@ def _max(group_idx, a, size, fill_value, dtype=None):
 )
 
 
-def aggregate(group_idx, a, func="sum", size=None, fill_value=0, order="C", dtype=None, axis=None, **kwargs):
+def aggregate(
+    group_idx,
+    a,
+    func="sum",
+    size=None,
+    fill_value=0,
+    order="C",
+    dtype=None,
+    axis=None,
+    **kwargs,
+):
     func = get_func(func, aliasing, _impl_dict)
     if not isinstance(func, str):
         raise NotImplementedError("No such ufunc available")

diff --git a/numpy_groupies/aggregate_pandas.py b/numpy_groupies/aggregate_pandas.py
@@ -15,7 +15,7 @@
 
 def _wrapper(group_idx, a, size, fill_value, func="sum", dtype=None, ddof=0, **kwargs):
     funcname = func.__name__ if callable(func) else func
-    kwargs = dict()
+    kwargs = {}
     if funcname in ("var", "std"):
         kwargs["ddof"] = ddof
     df = pd.DataFrame({"group_idx": group_idx, "a": a})
@@ -37,7 +37,9 @@ def _wrapper(group_idx, a, size, fill_value, func="sum", dtype=None, ddof=0, **k
 _supported_funcs = "sum prod all any min max mean var std first last cumsum cumprod cummax cummin".split()
 _impl_dict = {fn: partial(_wrapper, func=fn) for fn in _supported_funcs}
 _impl_dict.update(
-    ("nan" + fn, partial(_wrapper, func=fn)) for fn in _supported_funcs if fn not in funcs_no_separate_nan
+    ("nan" + fn, partial(_wrapper, func=fn))
+    for fn in _supported_funcs
+    if fn not in funcs_no_separate_nan
 )
 _impl_dict.update(
     allnan=partial(_wrapper, func=allnan),
@@ -52,7 +54,17 @@ def _wrapper(group_idx, a, size, fill_value, func="sum", dtype=None, ddof=0, **k
 )
 
 
-def aggregate(group_idx, a, func="sum", size=None, fill_value=0, order="C", dtype=None, axis=None, **kwargs):
+def aggregate(
+    group_idx,
+    a,
+    func="sum",
+    size=None,
+    fill_value=0,
+    order="C",
+    dtype=None,
+    axis=None,
+    **kwargs,
+):
     return _aggregate_base(
         group_idx,
         a,