Skip to content

Routine maintenance #86

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ jobs:
os: ["ubuntu-latest"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 10 # Fetch all history for all branches and tags.
fetch-depth: 1
- name: Set environment variables
run: |
echo "CONDA_ENV_FILE=ci/environment.yml" >> $GITHUB_ENV
Expand All @@ -49,7 +49,7 @@ jobs:
# We only want to install this on one run, because otherwise we'll have
# duplicate annotations.
- name: Install error reporter
if: ${{ matrix.os }} == 'ubuntu-latest' and ${{ matrix.python-version }} == '3.10'
if: ${{ matrix.os }} == 'ubuntu-latest' and ${{ matrix.python-version }} == '3.11'
run: |
python -m pip install pytest-github-actions-annotate-failures

Expand Down
18 changes: 9 additions & 9 deletions .github/workflows/pypi-release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@ jobs:
runs-on: ubuntu-latest
if: github.repository == 'ml31415/numpy-groupies'
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/setup-python@v4
- uses: actions/setup-python@v5
name: Install Python
with:
python-version: "3.10"
python-version: "3.11"

- name: Install dependencies
run: |
Expand All @@ -39,7 +39,7 @@ jobs:
else
echo "✅ Looks good"
fi
- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
with:
name: releases
path: dist
Expand All @@ -48,11 +48,11 @@ jobs:
needs: build-artifacts
runs-on: ubuntu-latest
steps:
- uses: actions/setup-python@v4
- uses: actions/setup-python@v5
name: Install Python
with:
python-version: "3.10"
- uses: actions/download-artifact@v3
python-version: "3.11"
- uses: actions/download-artifact@v4
with:
name: releases
path: dist
Expand Down Expand Up @@ -80,7 +80,7 @@ jobs:
if: github.event_name == 'release'
runs-on: ubuntu-latest
steps:
- uses: actions/download-artifact@v3
- uses: actions/download-artifact@v4
with:
name: releases
path: dist
Expand All @@ -89,4 +89,4 @@ jobs:
with:
user: __token__
password: ${{ secrets.PYPI_TOKEN }}
verbose: true
verbose: true
27 changes: 27 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: debug-statements
- id: detect-private-key
- id: check-builtin-literals
- id: check-case-conflict
- id: check-executables-have-shebangs
- id: check-json
- id: check-merge-conflict
- id: check-symlinks
- id: check-toml
- id: check-xml
- id: check-yaml
exclude: (.pre-commit-config\.yaml)

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.7
hooks:
- id: ruff
- id: ruff-format

- repo: https://github.com/codespell-project/codespell
rev: v2.2.6
hooks:
- id: codespell
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
[![GitHub Workflow CI Status](https://img.shields.io/github/actions/workflow/status/ml31415/numpy-groupies/ci.yaml?branch=master&logo=github&style=flat)](https://github.com/ml31415/numpy-groupies/actions)
[![PyPI](https://img.shields.io/pypi/v/numpy-groupies.svg?style=flat)](https://pypi.org/project/numpy-groupies/)
[![Conda-forge](https://img.shields.io/conda/vn/conda-forge/numpy_groupies.svg?style=flat)](https://anaconda.org/conda-forge/numpy_groupies)
[![Supported Versions](https://img.shields.io/pypi/pyversions/numpy-groupies.svg)](https://pypi.org/project/numpy-groupies)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
![Python Version from PEP 621 TOML](https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2Fml31415%2Fnumpy-groupies%2Fmaster%2Fpyproject.toml)
![PyPI - Downloads](https://img.shields.io/pypi/dm/numpy-groupies)

# numpy-groupies
Expand Down
4 changes: 3 additions & 1 deletion conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@


def pytest_configure(config):
config.addinivalue_line("markers", "deselect_if(func): function to deselect tests from parametrization")
config.addinivalue_line(
"markers", "deselect_if(func): function to deselect tests from parametrization"
)


def pytest_collection_modifyitems(config, items):
Expand Down
33 changes: 26 additions & 7 deletions numpy_groupies/aggregate_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@ def __call__(
dtype = check_dtype(dtype, self.func, a, len(group_idx))
check_fill_value(fill_value, dtype, func=self.func)
input_dtype = type(a) if np.isscalar(a) else a.dtype
ret, counter, mean, outer = self._initialize(flat_size, fill_value, dtype, input_dtype, group_idx.size)
ret, counter, mean, outer = self._initialize(
flat_size, fill_value, dtype, input_dtype, group_idx.size
)
group_idx = np.ascontiguousarray(group_idx)

if not np.isscalar(a):
Expand Down Expand Up @@ -141,7 +143,9 @@ def inner(ri, val, ret, counter, mean, fill_value):
def loop(group_idx, a, ret, counter, mean, outer, fill_value, ddof):
# ddof needs to be present for being exchangeable with loop_2pass
size = len(ret)
rng = range(len(group_idx) - 1, -1, -1) if reverse else range(len(group_idx))
rng = (
range(len(group_idx) - 1, -1, -1) if reverse else range(len(group_idx))
)
for i in rng:
ri = group_idx[i]
if ri < 0:
Expand Down Expand Up @@ -242,14 +246,18 @@ def __call__(
axis=None,
ddof=0,
):
iv = input_validation(group_idx, a, size=size, order=order, axis=axis, check_bounds=False)
iv = input_validation(
group_idx, a, size=size, order=order, axis=axis, check_bounds=False
)
group_idx, a, flat_size, ndim_idx, size, _ = iv

# TODO: The typecheck should be done by the class itself, not by check_dtype
dtype = check_dtype(dtype, self.func, a, len(group_idx))
check_fill_value(fill_value, dtype, func=self.func)
input_dtype = type(a) if np.isscalar(a) else a.dtype
ret, _, _, _ = self._initialize(flat_size, fill_value, dtype, input_dtype, group_idx.size)
ret, _, _, _ = self._initialize(
flat_size, fill_value, dtype, input_dtype, group_idx.size
)
group_idx = np.ascontiguousarray(group_idx)

sortidx = np.argsort(group_idx, kind="mergesort")
Expand Down Expand Up @@ -493,7 +501,7 @@ class CumMin(AggregateNtoN, Min):


def get_funcs():
funcs = dict()
funcs = {}
for op in (
Sum,
Prod,
Expand Down Expand Up @@ -530,7 +538,16 @@ def get_funcs():


def aggregate(
group_idx, a, func="sum", size=None, fill_value=0, order="C", dtype=None, axis=None, cache=True, **kwargs
group_idx,
a,
func="sum",
size=None,
fill_value=0,
order="C",
dtype=None,
axis=None,
cache=True,
**kwargs,
):
func = get_func(func, aliasing, _impl_dict)
if not isinstance(func, str):
Expand All @@ -541,7 +558,9 @@ def aggregate(
if cache is True:
cache = _default_cache
aggregate_op = cache.setdefault(func, AggregateGeneric(func))
return aggregate_op(group_idx, a, size, fill_value, order, dtype, axis, **kwargs)
return aggregate_op(
group_idx, a, size, fill_value, order, dtype, axis, **kwargs
)
else:
func = _impl_dict[func]
return func(group_idx, a, size, fill_value, order, dtype, axis, **kwargs)
Expand Down
54 changes: 43 additions & 11 deletions numpy_groupies/aggregate_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ def _sum(group_idx, a, size, fill_value, dtype=None):
ret.real = np.bincount(group_idx, weights=a.real, minlength=size)
ret.imag = np.bincount(group_idx, weights=a.imag, minlength=size)
else:
ret = np.bincount(group_idx, weights=a, minlength=size).astype(dtype, copy=False)
ret = np.bincount(group_idx, weights=a, minlength=size).astype(
dtype, copy=False
)

if fill_value != 0:
_fill_untouched(group_idx, ret, fill_value)
Expand Down Expand Up @@ -117,7 +119,9 @@ def _argmax(group_idx, a, size, fill_value, dtype=int, _nansqueeze=False):
ret = np.full(size, fill_value, dtype=dtype)
group_idx_max = group_idx[is_max]
(argmax,) = is_max.nonzero()
ret[group_idx_max[::-1]] = argmax[::-1] # reverse to ensure first value for each group wins
ret[group_idx_max[::-1]] = argmax[
::-1
] # reverse to ensure first value for each group wins
return ret


Expand All @@ -129,7 +133,9 @@ def _argmin(group_idx, a, size, fill_value, dtype=int, _nansqueeze=False):
ret = np.full(size, fill_value, dtype=dtype)
group_idx_min = group_idx[is_min]
(argmin,) = is_min.nonzero()
ret[group_idx_min[::-1]] = argmin[::-1] # reverse to ensure first value for each group wins
ret[group_idx_min[::-1]] = argmin[
::-1
] # reverse to ensure first value for each group wins
return ret


Expand All @@ -143,7 +149,9 @@ def _mean(group_idx, a, size, fill_value, dtype=np.dtype(np.float64)):
sums.real = np.bincount(group_idx, weights=a.real, minlength=size)
sums.imag = np.bincount(group_idx, weights=a.imag, minlength=size)
else:
sums = np.bincount(group_idx, weights=a, minlength=size).astype(dtype, copy=False)
sums = np.bincount(group_idx, weights=a, minlength=size).astype(
dtype, copy=False
)

with np.errstate(divide="ignore", invalid="ignore"):
ret = sums.astype(dtype, copy=False) / counts
Expand All @@ -160,15 +168,19 @@ def _sum_of_squres(group_idx, a, size, fill_value, dtype=np.dtype(np.float64)):
return ret


def _var(group_idx, a, size, fill_value, dtype=np.dtype(np.float64), sqrt=False, ddof=0):
def _var(
group_idx, a, size, fill_value, dtype=np.dtype(np.float64), sqrt=False, ddof=0
):
if np.ndim(a) == 0:
raise ValueError("cannot take variance with scalar a")
counts = np.bincount(group_idx, minlength=size)
sums = np.bincount(group_idx, weights=a, minlength=size)
with np.errstate(divide="ignore", invalid="ignore"):
means = sums.astype(dtype, copy=False) / counts
counts = np.where(counts > ddof, counts - ddof, 0)
ret = np.bincount(group_idx, (a - means[group_idx]) ** 2, minlength=size) / counts
ret = (
np.bincount(group_idx, (a - means[group_idx]) ** 2, minlength=size) / counts
)
if sqrt:
ret = np.sqrt(ret) # this is now std not var
if not np.isnan(fill_value):
Expand Down Expand Up @@ -208,7 +220,9 @@ def _array(group_idx, a, size, fill_value, dtype=None):
return ret


def _generic_callable(group_idx, a, size, fill_value, dtype=None, func=lambda g: g, **kwargs):
def _generic_callable(
group_idx, a, size, fill_value, dtype=None, func=lambda g: g, **kwargs
):
"""groups a by inds, and then applies foo to each group in turn, placing
the results in an array."""
groups = _array(group_idx, a, size, ())
Expand Down Expand Up @@ -244,7 +258,9 @@ def _cumsum(group_idx, a, size, fill_value=None, dtype=None):

def _nancumsum(group_idx, a, size, fill_value=None, dtype=None):
a_nonans = np.where(np.isnan(a), 0, a)
group_idx_nonans = np.where(np.isnan(group_idx), np.nanmax(group_idx) + 1, group_idx)
group_idx_nonans = np.where(
np.isnan(group_idx), np.nanmax(group_idx) + 1, group_idx
)
return _cumsum(group_idx_nonans, a_nonans, size, fill_value=fill_value, dtype=dtype)


Expand All @@ -271,7 +287,11 @@ def _nancumsum(group_idx, a, size, fill_value=None, dtype=None):
sumofsquares=_sum_of_squres,
generic=_generic_callable,
)
_impl_dict.update(("nan" + k, v) for k, v in list(_impl_dict.items()) if k not in funcs_no_separate_nan)
_impl_dict.update(
("nan" + k, v)
for k, v in list(_impl_dict.items())
if k not in funcs_no_separate_nan
)
_impl_dict["nancumsum"] = _nancumsum


Expand Down Expand Up @@ -321,7 +341,9 @@ def _aggregate_base(
dtype = check_dtype(dtype, func, a, flat_size)
check_fill_value(fill_value, dtype, func=func)
func = _impl_dict[func]
ret = func(group_idx, a, flat_size, fill_value=fill_value, dtype=dtype, **kwargs)
ret = func(
group_idx, a, flat_size, fill_value=fill_value, dtype=dtype, **kwargs
)

# deal with ndimensional indexing
if ndim_idx > 1:
Expand All @@ -335,7 +357,17 @@ def _aggregate_base(
return ret


def aggregate(group_idx, a, func="sum", size=None, fill_value=0, order="C", dtype=None, axis=None, **kwargs):
def aggregate(
group_idx,
a,
func="sum",
size=None,
fill_value=0,
order="C",
dtype=None,
axis=None,
**kwargs,
):
return _aggregate_base(
group_idx,
a,
Expand Down
12 changes: 11 additions & 1 deletion numpy_groupies/aggregate_numpy_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,17 @@ def _max(group_idx, a, size, fill_value, dtype=None):
)


def aggregate(group_idx, a, func="sum", size=None, fill_value=0, order="C", dtype=None, axis=None, **kwargs):
def aggregate(
group_idx,
a,
func="sum",
size=None,
fill_value=0,
order="C",
dtype=None,
axis=None,
**kwargs,
):
func = get_func(func, aliasing, _impl_dict)
if not isinstance(func, str):
raise NotImplementedError("No such ufunc available")
Expand Down
18 changes: 15 additions & 3 deletions numpy_groupies/aggregate_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

def _wrapper(group_idx, a, size, fill_value, func="sum", dtype=None, ddof=0, **kwargs):
funcname = func.__name__ if callable(func) else func
kwargs = dict()
kwargs = {}
if funcname in ("var", "std"):
kwargs["ddof"] = ddof
df = pd.DataFrame({"group_idx": group_idx, "a": a})
Expand All @@ -37,7 +37,9 @@ def _wrapper(group_idx, a, size, fill_value, func="sum", dtype=None, ddof=0, **k
_supported_funcs = "sum prod all any min max mean var std first last cumsum cumprod cummax cummin".split()
_impl_dict = {fn: partial(_wrapper, func=fn) for fn in _supported_funcs}
_impl_dict.update(
("nan" + fn, partial(_wrapper, func=fn)) for fn in _supported_funcs if fn not in funcs_no_separate_nan
("nan" + fn, partial(_wrapper, func=fn))
for fn in _supported_funcs
if fn not in funcs_no_separate_nan
)
_impl_dict.update(
allnan=partial(_wrapper, func=allnan),
Expand All @@ -52,7 +54,17 @@ def _wrapper(group_idx, a, size, fill_value, func="sum", dtype=None, ddof=0, **k
)


def aggregate(group_idx, a, func="sum", size=None, fill_value=0, order="C", dtype=None, axis=None, **kwargs):
def aggregate(
group_idx,
a,
func="sum",
size=None,
fill_value=0,
order="C",
dtype=None,
axis=None,
**kwargs,
):
return _aggregate_base(
group_idx,
a,
Expand Down
Loading