Skip to content

Consensus Motifs: Ostinato algorithm; most central motif; reproduce Fig 1, 2, 9 from paper Matrix Profile XV #279

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Dec 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
31fbd9f
Reproduce Fig 9 from paper Matrix Profile XV Consensus Motifs
refactoriel Nov 4, 2020
9bfd6bc
Faster implementation of Ostinato
refactoriel Nov 4, 2020
5a5b622
Merge branch 'master' into ostinato
refactoriel Nov 11, 2020
2f1fe19
Change from numpy.convolve to scipy.signal.convolve
refactoriel Nov 11, 2020
675f057
add test_ostinato
refactoriel Nov 12, 2020
b467e36
naive consensus search
refactoriel Nov 12, 2020
6315bec
add ostinato function
refactoriel Nov 12, 2020
1778d0f
Fix bug in naive implementation
refactoriel Nov 12, 2020
520f140
test_ostinato: ignore DeprecationWarning
refactoriel Nov 12, 2020
cac801e
Notebook: add EOG data, polish mtDNA, some more explantory text
refactoriel Nov 13, 2020
6d2d07b
expand range of tested random seeds
refactoriel Nov 13, 2020
6be27e8
Black'd and flake8'd
refactoriel Nov 13, 2020
37232d2
only test radius for now
refactoriel Nov 16, 2020
0d74094
black 20.8b1
refactoriel Nov 16, 2020
9f281a1
put back test of time series and subsequence indices
refactoriel Nov 17, 2020
11936fa
Private function _get_central_motif
refactoriel Nov 17, 2020
309b8d8
Complete _get_central_motif
refactoriel Nov 18, 2020
bd7e622
Ostinato: main computation in private function, add wrapper
refactoriel Nov 18, 2020
6bc4583
Consistent variable naming, improved docstrings and central motif search
refactoriel Nov 19, 2020
4c5b90d
Implement Sean's suggestions to notebook
refactoriel Nov 24, 2020
fb30fdf
Retrieve clean CSVs from Zenodo, remove unnecessary imports
refactoriel Nov 25, 2020
4bfec22
Edge case: same radius, same mean distance: default to first
refactoriel Nov 26, 2020
cff76f2
docstring formatting
refactoriel Nov 26, 2020
eca2af7
naive implementation of get_central_motif
refactoriel Nov 30, 2020
4a8603e
remove nested conditional
refactoriel Nov 30, 2020
31dbae7
oops, forgot black
refactoriel Nov 30, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
451 changes: 451 additions & 0 deletions docs/Tutorial_Consensus_Motif.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions stumpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from .aampi import aampi # noqa: F401
from .chains import atsc, allc # noqa: F401
from .floss import floss, fluss, _nnmark, _iac, _cac, _rea # noqa: F401
from .ostinato import ostinato # noqa: F401
from .scrump import scrump # noqa: F401
from .stumpi import stumpi # noqa: F401
from numba import cuda
Expand Down
3 changes: 2 additions & 1 deletion stumpy/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import numpy as np
from numba import njit, prange
from scipy.signal import convolve
import tempfile
import math

Expand Down Expand Up @@ -265,7 +266,7 @@ def sliding_dot_product(Q, T):
n = T.shape[0]
m = Q.shape[0]
Qr = np.flipud(Q) # Reverse/flip Q
QT = np.convolve(Qr, T)
QT = convolve(Qr, T)

return QT.real[m - 1 : n]

Expand Down
267 changes: 267 additions & 0 deletions stumpy/ostinato.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
import numpy as np
from . import core, stump


def ostinato(tss, m):
"""
Find the consensus motif of multiple time series

This is a wrapper around the vanilla version of the ostinato algorithm
which finds the best radius and a helper function that finds the most
central conserved motif.

Parameters
----------
tss : list
List of time series for which to find the consensus motif

m : int
Window size

Returns
-------
rad : float
Radius of the most central consensus motif

ts_ind : int
Index of time series which contains the most central consensus motif

ss_ind : int
Start index of the most central consensus motif within the time series
`ts_ind` that contains it

Notes
-----
<https://www.cs.ucr.edu/~eamonn/consensus_Motif_ICDM_Long_version.pdf>

See Table 2

The ostinato algorithm proposed in the paper finds the best radius
in `tss`. Intuitively, the radius is the minimum distance of a
subsequence to encompass at least one nearest neighbor subsequence
from all other time series. The best radius in `tss` is the minimum
radius amongst all radii. Some data sets might contain multiple
subsequences which have the same optimal radius.
The greedy Ostinato algorithm only finds one of them, which might
not be the most central motif. The most central motif amongst the
subsequences with the best radius is the one with the smallest mean
distance to nearest neighbors in all other time series. To find this
central motif it is necessary to search the subsequences with the
best radius via `stumpy.ostinato._get_central_motif`
"""
rad, ts_ind, ss_ind = _ostinato(tss, m)
return _get_central_motif(tss, rad, ts_ind, ss_ind, m)


def _ostinato(tss, m):
"""
Find the consensus motif of multiple time series

Parameters
----------
tss : list
List of time series for which to find the consensus motif

m : int
Window size

Returns
-------
bsf_rad : float
Radius of the consensus motif

ts_ind : int
Index of time series which contains the consensus motif

ss_ind : int
Start index of consensus motif within the time series ts_ind
that contains it

Notes
-----
<https://www.cs.ucr.edu/~eamonn/consensus_Motif_ICDM_Long_version.pdf>

See Table 2

The ostinato algorithm proposed in the paper finds the best radius
in `tss`. Intuitively, the radius is the minimum distance of a
subsequence to encompass at least one nearest neighbor subsequence
from all other time series. The best radius in `tss` is the minimum
radius amongst all radii. Some data sets might contain multiple
subsequences which have the same optimal radius.
The greedy Ostinato algorithm only finds one of them, which might
not be the most central motif. The most central motif amongst the
subsequences with the best radius is the one with the smallest mean
distance to nearest neighbors in all other time series. To find this
central motif it is necessary to search the subsequences with the
best radius via `stumpy.ostinato._get_central_motif`
"""
# Preprocess means and stddevs and handle np.nan/np.inf
Ts = [None] * len(tss)
M_Ts = [None] * len(tss)
Σ_Ts = [None] * len(tss)
for i, T in enumerate(tss):
Ts[i], M_Ts[i], Σ_Ts[i] = core.preprocess(T, m)

bsf_rad, ts_ind, ss_ind = np.inf, 0, 0
k = len(Ts)
for j in range(k):
if j < (k - 1):
h = j + 1
else:
h = 0

mp = stump(Ts[j], m, Ts[h], ignore_trivial=False)
si = np.argsort(mp[:, 0])
for q in si:
rad = mp[q, 0]
if rad >= bsf_rad:
break
for i in range(k):
if ~np.isin(i, [j, h]):
QT = core.sliding_dot_product(Ts[j][q : q + m], Ts[i])
rad = np.max(
(
rad,
np.min(
core._mass(
Ts[j][q : q + m],
Ts[i],
QT,
M_Ts[j][q],
Σ_Ts[j][q],
M_Ts[i],
Σ_Ts[i],
)
),
)
)
if rad >= bsf_rad:
break
if rad < bsf_rad:
bsf_rad, ts_ind, ss_ind = rad, j, q

return bsf_rad, ts_ind, ss_ind


def _get_central_motif(tss, rad, ts_ind, ss_ind, m):
"""
Compare subsequences with the same radius and return the most central motif

Parameters
----------
tss : list
List of time series for which to find the most central motif

rad : float
Best radius found by a consensus search algorithm

ts_ind : int
Index of time series in which `rad` was found first

ss_ind : int
Start index of subsequence in `ts_ind` that has radius `rad`

m : int
Window size

Returns
-------
rad : float
Radius of the most central consensus motif

ts_ind : int
Index of time series which contains the most central consensus motif

ss_ind : int
Start index of most central consensus motif within the time series `ts_ind`
that contains it

Notes
-----
<https://www.cs.ucr.edu/~eamonn/consensus_Motif_ICDM_Long_version.pdf>

See Table 2

The ostinato algorithm proposed in the paper finds the best radius
in `tss`. Intuitively, the radius is the minimum distance of a
subsequence to encompass at least one nearest neighbor subsequence
from all other time series. The best radius in `tss` is the minimum
radius amongst all radii. Some data sets might contain multiple
subsequences which have the same optimal radius.
The greedy Ostinato algorithm only finds one of them, which might
not be the most central motif. The most central motif amongst the
subsequences with the best radius is the one with the smallest mean
distance to nearest neighbors in all other time series. To find this
central motif it is necessary to search the subsequences with the
best radius via `stumpy.ostinato._get_central_motif`
"""
k = len(tss)

# Ostinato hit: get nearest neighbors' distances and indices
q_ost = tss[ts_ind][ss_ind : ss_ind + m]
ss_ind_nn_ost, d_ost = _across_series_nearest_neighbors(q_ost, tss)

# Alternative candidates: Distance to ostinato hit equals best radius
ts_ind_alt = np.flatnonzero(np.isclose(d_ost, rad))
ss_ind_alt = ss_ind_nn_ost[ts_ind_alt]
d_alt = np.zeros((len(ts_ind_alt), k), dtype=float)
for i, (tsi, ssi) in enumerate(zip(ts_ind_alt, ss_ind_alt)):
q = tss[tsi][ssi : ssi + m]
_, d_alt[i] = _across_series_nearest_neighbors(q, tss)
rad_alt = np.max(d_alt, axis=1)
d_mean_alt = np.mean(d_alt, axis=1)

# Alternatives with same radius and lower mean distance
alt_better = np.logical_and(
np.isclose(rad_alt, rad),
d_mean_alt < d_ost.mean(),
)
# Alternatives with same radius and same mean distance
alt_same = np.logical_and(
np.isclose(rad_alt, rad),
np.isclose(d_mean_alt, d_ost.mean()),
)
if np.any(alt_better):
ts_ind_alt = ts_ind_alt[alt_better]
d_mean_alt = d_mean_alt[alt_better]
i_alt_best = np.argmin(d_mean_alt)
ts_ind = ts_ind_alt[i_alt_best]
elif np.any(alt_same):
# Default to the first match in the list of time series
ts_ind_alt = ts_ind_alt[alt_same]
i_alt_first = np.argmin(ts_ind_alt)
ts_ind = np.min((ts_ind, ts_ind_alt[i_alt_first]))
ss_ind = ss_ind_nn_ost[ts_ind]
return rad, ts_ind, ss_ind


def _across_series_nearest_neighbors(q, tss):
"""
For multiple time series find, per individual time series, the subsequences closest
to a query.

Parameters
----------
q : ndarray
Query array or subsequence

tss : list
List of time series for which to the nearest neighbors to `q`

Returns
-------
ss_ind_nn : ndarray
Indices to subsequences in `tss` that are closest to `q`

d : ndarray
Distances to subsequences in `tss` that are closest to `q`
"""
k = len(tss)
d = np.zeros(k, dtype=float)
ss_ind_nn = np.zeros(k, dtype=int)
for i in range(k):
dp = core.mass(q, tss[i])
ss_ind_nn[i] = np.argmin(dp)
d[i] = dp[ss_ind_nn[i]]
return ss_ind_nn, d
16 changes: 9 additions & 7 deletions test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ if [ $# -gt 0 ]; then
else
echo "Using default test_mode=\"all\""
fi
fi
fi

###############
# Functions #
Expand Down Expand Up @@ -63,11 +63,11 @@ test_unit()
check_errs $?
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stump.py tests/test_mstump.py tests/test_scrump.py tests/test_stumpi.py
check_errs $?
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stumped.py
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stumped.py
check_errs $?
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stumped_one_constant_subsequence.py
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stumped_one_constant_subsequence.py
check_errs $?
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stumped_two_constant_subsequences.py
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stumped_two_constant_subsequences.py
check_errs $?
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stumped_two_constant_subsequences_swap.py
check_errs $?
Expand All @@ -87,7 +87,7 @@ test_unit()
check_errs $?
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stumped_two_subsequences_nan_inf_A_B_join_swap.py
check_errs $?
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_mstumped.py
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_mstumped.py
check_errs $?
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_mstumped_one_subsequence_nan_self_join.py
check_errs $?
Expand All @@ -99,9 +99,9 @@ test_unit()
check_errs $?
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamped.py
check_errs $?
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamped_one_constant_subsequence.py
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamped_one_constant_subsequence.py
check_errs $?
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamped_two_constant_subsequences.py
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamped_two_constant_subsequences.py
check_errs $?
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamped_two_constant_subsequences_swap.py
check_errs $?
Expand All @@ -121,6 +121,8 @@ test_unit()
check_errs $?
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamped_two_subsequences_nan_inf_A_B_join_swap.py
check_errs $?
py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_ostinato.py
check_errs $?
}

test_coverage()
Expand Down
Loading