-
-
Notifications
You must be signed in to change notification settings - Fork 18.9k
FIX: fix interpolate with kwarg limit area and limit direction using pad or bfill #31048
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
9afe992
3a191b9
fd5d8e8
26d88ed
6597aca
c536d3c
ed9cf21
2980325
ecf428e
f8a3423
6733186
c5b77d2
0bb36de
a467afd
5466d8c
3e968fc
556a3cf
6c1e429
767b0ca
b82aaff
26ef7b5
b4b6b5a
e259549
8ceff58
7c5ad7d
92148ff
d62e02e
c2473f2
610e347
570e3c2
721304a
73ab1bf
a33f629
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -222,40 +222,14 @@ def interpolate_1d( | |||||
# default limit is unlimited GH #16282 | ||||||
limit = algos._validate_limit(nobs=None, limit=limit) | ||||||
|
||||||
# These are sets of index pointers to invalid values... i.e. {0, 1, etc... | ||||||
all_nans = set(np.flatnonzero(invalid)) | ||||||
start_nans = set(range(find_valid_index(yvalues, "first"))) | ||||||
end_nans = set(range(1 + find_valid_index(yvalues, "last"), len(valid))) | ||||||
mid_nans = all_nans - start_nans - end_nans | ||||||
|
||||||
# Like the sets above, preserve_nans contains indices of invalid values, | ||||||
# but in this case, it is the final set of indices that need to be | ||||||
# preserved as NaN after the interpolation. | ||||||
|
||||||
# For example if limit_direction='forward' then preserve_nans will | ||||||
# contain indices of NaNs at the beginning of the series, and NaNs that | ||||||
# are more than'limit' away from the prior non-NaN. | ||||||
|
||||||
# set preserve_nans based on direction using _interp_limit | ||||||
if limit_direction == "forward": | ||||||
preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) | ||||||
elif limit_direction == "backward": | ||||||
preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) | ||||||
else: | ||||||
# both directions... just use _interp_limit | ||||||
preserve_nans = set(_interp_limit(invalid, limit, limit)) | ||||||
|
||||||
# if limit_area is set, add either mid or outside indices | ||||||
# to preserve_nans GH #16284 | ||||||
if limit_area == "inside": | ||||||
# preserve NaNs on the outside | ||||||
preserve_nans |= start_nans | end_nans | ||||||
elif limit_area == "outside": | ||||||
# preserve NaNs on the inside | ||||||
preserve_nans |= mid_nans | ||||||
|
||||||
# sort preserve_nans and covert to list | ||||||
preserve_nans = sorted(preserve_nans) | ||||||
preserve_nans = _derive_indices_of_nans_to_preserve( | ||||||
yvalues=yvalues, | ||||||
valid=valid, | ||||||
invalid=invalid, | ||||||
limit=limit, | ||||||
limit_area=limit_area, | ||||||
limit_direction=limit_direction, | ||||||
) | ||||||
|
||||||
xvalues = getattr(xvalues, "values", xvalues) | ||||||
yvalues = getattr(yvalues, "values", yvalues) | ||||||
|
@@ -313,6 +287,51 @@ def interpolate_1d( | |||||
result[preserve_nans] = np.nan | ||||||
return result | ||||||
|
||||||
def _derive_indices_of_nans_to_preserve( | ||||||
yvalues, valid, invalid, limit, limit_area, limit_direction, | ||||||
): | ||||||
cchwala marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
""" Derive the indices of NaNs that shall be preserved after interpolation | ||||||
This function is called by `interpolate_1d` and takes the arguments with | ||||||
the same name from there. In `interpolate_1d`, after performing the | ||||||
interpolation the list of indices of NaNs to preserve is used to put | ||||||
NaNs in the desired locations. | ||||||
""" | ||||||
|
||||||
# These are sets of index pointers to invalid values... i.e. {0, 1, etc... | ||||||
all_nans = set(np.flatnonzero(invalid)) | ||||||
cchwala marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
start_nans = set(range(find_valid_index(yvalues, "first"))) | ||||||
end_nans = set(range(1 + find_valid_index(yvalues, "last"), len(valid))) | ||||||
mid_nans = all_nans - start_nans - end_nans | ||||||
|
||||||
# Like the sets above, preserve_nans contains indices of invalid values, | ||||||
# but in this case, it is the final set of indices that need to be | ||||||
# preserved as NaN after the interpolation. | ||||||
|
||||||
# For example if limit_direction='forward' then preserve_nans will | ||||||
# contain indices of NaNs at the beginning of the series, and NaNs that | ||||||
# are more than'limit' away from the prior non-NaN. | ||||||
|
||||||
# set preserve_nans based on direction using _interp_limit | ||||||
if limit_direction == "forward": | ||||||
preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) | ||||||
elif limit_direction == "backward": | ||||||
preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) | ||||||
else: | ||||||
# both directions... just use _interp_limit | ||||||
preserve_nans = set(_interp_limit(invalid, limit, limit)) | ||||||
|
||||||
# if limit_area is set, add either mid or outside indices | ||||||
# to preserve_nans GH #16284 | ||||||
if limit_area == "inside": | ||||||
# preserve NaNs on the outside | ||||||
preserve_nans |= start_nans | end_nans | ||||||
elif limit_area == "outside": | ||||||
# preserve NaNs on the inside | ||||||
preserve_nans |= mid_nans | ||||||
|
||||||
# sort preserve_nans and covert to list | ||||||
preserve_nans = sorted(preserve_nans) | ||||||
cchwala marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
return preserve_nans | ||||||
|
||||||
def _interpolate_scipy_wrapper( | ||||||
x, y, new_x, method, fill_value=None, bounds_error=False, order=None, **kwargs | ||||||
|
@@ -477,6 +496,65 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0): | |||||
else: | ||||||
return [P(x, nu) for nu in der] | ||||||
|
||||||
def interpolate_1d_fill( | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you move this under interpolate_1d There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
You request, moving it to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cchwala interpolate_2d will work on a 1d array. did you investigate applying it along an axis (with masking logic) rather than creating a 1d version. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see #34749 for alternative implementation calling interpolate_2d instead. interpolate_2d already has the limit logic so no need to use the preserve_nans set based logic. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I used the nice solution from #34749 as suggested above There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as a consequence there is no |
||||||
values, | ||||||
method="pad", | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
axis=0, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
(import from pandas._typing) |
||||||
limit=None, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
limit_area=None, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
fill_value=None, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
dtype=None, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
(import from pandas._typing) |
||||||
): | ||||||
""" | ||||||
This is a 1D-versoin of `interpolate_2d`, which is used for methods `pad` | ||||||
and `backfill` when interpolating. This 1D-version is necessary to be | ||||||
able to handle kwarg `limit_area` via the function | ||||||
` _derive_indices_of_nans_to_preserve`. It is used the same way as the | ||||||
1D-interpolation functions which are based on scipy-interpolation, i.e. | ||||||
via np.apply_along_axis. | ||||||
""" | ||||||
if method == "pad": | ||||||
limit_direction = "forward" | ||||||
elif method == "backfill": | ||||||
limit_direction = "backward" | ||||||
else: | ||||||
raise ValueError("`method` must be either 'pad' or 'backfill'.") | ||||||
|
||||||
orig_values = values | ||||||
|
||||||
yvalues = values | ||||||
invalid = isna(yvalues) | ||||||
valid = ~invalid | ||||||
|
||||||
if values.ndim > 1: | ||||||
raise AssertionError("This only works with 1D data.") | ||||||
|
||||||
if fill_value is None: | ||||||
mask = None | ||||||
else: # todo create faster fill func without masking | ||||||
mask = mask_missing(values, fill_value) | ||||||
|
||||||
preserve_nans = _derive_indices_of_nans_to_preserve( | ||||||
yvalues=yvalues, | ||||||
valid=valid, | ||||||
invalid=invalid, | ||||||
limit=limit, | ||||||
limit_area=limit_area, | ||||||
limit_direction=limit_direction, | ||||||
) | ||||||
|
||||||
method = clean_fill_method(method) | ||||||
if method == "pad": | ||||||
values = pad_1d(values, limit=limit, mask=mask, dtype=dtype) | ||||||
else: | ||||||
values = backfill_1d(values, limit=limit, mask=mask, dtype=dtype) | ||||||
|
||||||
if orig_values.dtype.kind == "M": | ||||||
# convert float back to datetime64 | ||||||
values = values.astype(orig_values.dtype) | ||||||
|
||||||
values[preserve_nans] = fill_value | ||||||
return values | ||||||
|
||||||
def interpolate_2d( | ||||||
values, method="pad", axis=0, limit=None, fill_value=None, dtype=None | ||||||
|
Uh oh!
There was an error while loading. Please reload this page.