33import logging
44import numpy as np
55import pandas as pd
6+ from math import isclose
67import time as _time
78import bisect
89
@@ -1376,9 +1377,26 @@ def _fix_bad_div_adjust(self, df, interval, currency):
13761377 # div_too_big_improvement_threshold = 1
13771378 div_too_big_improvement_threshold = 2
13781379
1379- drop_c2l = df2 ['Close' ].iloc [div_idx - 1 ] - df2 ['Low' ].iloc [div_idx ]
1380- # drop_c2c = df2['Close'].iloc[div_idx-1] - df2['Close'].iloc[div_idx]
1381- # drop = drop_c2c
1380+ if isclose (df2 ['Low' ].iloc [div_idx ], df2 ['Close' ].iloc [div_idx - 1 ]* 100 , rel_tol = 0.025 ):
1381+ # Price has jumped ~100x on ex-div day, need to fix immediately.
1382+ drop_c2l = df2 ['Close' ].iloc [div_idx - 1 ]* 100 - df2 ['Low' ].iloc [div_idx ]
1383+ div_pct = div / (df2 ['Close' ].iloc [div_idx - 1 ]* 100 )
1384+ true_adjust = 1.0 - div / (df2 ['Close' ].iloc [div_idx - 1 ]* 100 )
1385+ present_adj = df2 ['Adj Close' ].iloc [div_idx - 1 ] / df2 ['Close' ].iloc [div_idx - 1 ]
1386+ if not isclose (present_adj , true_adjust , rel_tol = 0.025 ):
1387+ df2 .loc [:dt - _datetime .timedelta (seconds = 1 ), 'Adj Close' ] = true_adjust * df2 ['Close' ].loc [:dt - _datetime .timedelta (seconds = 1 )]
1388+ df2 .loc [:dt - _datetime .timedelta (seconds = 1 ), 'Repaired?' ] = True
1389+ elif isclose (df2 ['Low' ].iloc [div_idx ], df2 ['Close' ].iloc [div_idx - 1 ]* 0.01 , rel_tol = 0.025 ):
1390+ # Price has dropped ~100x on ex-div day, need to fix immediately.
1391+ drop_c2l = df2 ['Close' ].iloc [div_idx - 1 ]* 0.01 - df2 ['Low' ].iloc [div_idx ]
1392+ div_pct = div / (df2 ['Close' ].iloc [div_idx - 1 ]* 0.01 )
1393+ true_adjust = 1.0 - div / (df2 ['Close' ].iloc [div_idx - 1 ]* 100 )
1394+ present_adj = df2 ['Adj Close' ].iloc [div_idx - 1 ] / df2 ['Close' ].iloc [div_idx - 1 ]
1395+ if not isclose (present_adj , true_adjust , rel_tol = 0.025 ):
1396+ df2 .loc [:dt - _datetime .timedelta (seconds = 1 ), 'Adj Close' ] = true_adjust * df2 ['Close' ].loc [:dt - _datetime .timedelta (seconds = 1 )]
1397+ df2 .loc [:dt - _datetime .timedelta (seconds = 1 ), 'Repaired?' ] = True
1398+ else :
1399+ drop_c2l = df2 ['Close' ].iloc [div_idx - 1 ] - df2 ['Low' ].iloc [div_idx ]
13821400 drop = drop_c2l
13831401 if div_idx < len (df2 )- 1 :
13841402 # # In low-volume scenarios, the price drop is day after not today.
@@ -1388,8 +1406,10 @@ def _fix_bad_div_adjust(self, df, interval, currency):
13881406 # elif df2['Volume'].iloc[div_idx]==0:
13891407 # if drop == 0.0:
13901408 # drop = np.max(df2['Close'].iloc[div_idx-1:div_idx+1].to_numpy() - df2['Low'].iloc[div_idx:div_idx+2].to_numpy())
1409+ #
13911410 # Hmm, can I always look ahead 1 day? Catch: increases FP rate of div-too-small for tiny divs.
1392- drops = df2 ['Close' ].iloc [div_idx - 1 :div_idx + 1 ].to_numpy () - df2 ['Low' ].iloc [div_idx :div_idx + 2 ].to_numpy ()
1411+ # drops = df2['Close'].iloc[div_idx-1:div_idx+1].to_numpy() - df2['Low'].iloc[div_idx:div_idx+2].to_numpy()
1412+ drops = np .array ([drop , df2 ['Close' ].iloc [div_idx ] - df2 ['Low' ].iloc [div_idx + 1 ]])
13931413 drop_2Dmax = np .max (drops )
13941414 else :
13951415 drops = np .array ([drop ])
@@ -1728,21 +1748,27 @@ def cluster_dividends(df, column='div', threshold=7):
17281748 adjDeltas = x ['Adj Low' ].iloc [1 :].to_numpy () - x ['Adj Close' ].iloc [:- 1 ].to_numpy ()
17291749 adjDeltas = np .append ([0.0 ], adjDeltas )
17301750 x ['adjDelta' ] = adjDeltas
1751+ for i in np .where (x ['Dividends' ]> 0 )[0 ]:
1752+ x .loc [x .index [i ], 'adjDelta' ] += x ['Dividends' ].iloc [i ]* x ['Adj' ].iloc [i ]
17311753 deltas = x [['delta' , 'adjDelta' ]]
1732- if div_pct > 0.15 and div_pct < 1.0 : # avoid analysing impossibly-big dividends here
1754+ if div_pct > 0.05 and div_pct < 1.0 :
17331755 adjDiv = div * x ['Adj' ].iloc [0 ]
17341756 f = deltas ['adjDelta' ] > (adjDiv * 0.6 )
17351757 if f .any ():
1736- for idx in np .where (f )[0 ]:
1737- adjDelta_max_drop_idx = idx
1738- adjDelta_max_drop = deltas ['adjDelta' ].iloc [idx ]
1739- if adjDelta_max_drop > 1.001 * deltas ['delta' ].iloc [adjDelta_max_drop_idx ]:
1758+ indices = np .where (f )[0 ]
1759+ for idx in indices :
1760+ adjDelta_drop = deltas ['adjDelta' ].iloc [idx ]
1761+ if adjDelta_drop > 1.001 * deltas ['delta' ].iloc [idx ]:
17401762 # Adjusted price has risen by more than unadjusted, should not happen.
17411763 # See if Adjusted price later falls by a similar amount. This would mean
17421764 # dividend has been applied too early.
1743- ratios = (- 1 * deltas ['adjDelta' ])/ adjDelta_max_drop
1765+ ratios = (- 1 * deltas ['adjDelta' ])/ adjDelta_drop
17441766 f_near1_or_above = ratios >= 0.8
1745- if f_near1_or_above .any ():
1767+ # Update: only check for wrong date if no coincident split.
1768+ # Because if a split, more likely the div is missing split
1769+ split = df2 ['Stock Splits' ].loc [dt ]
1770+ pre_split = div_status_df ['div_pre_split' ].loc [dt ]
1771+ if (split == 0.0 or (not pre_split )) and f_near1_or_above .any ():
17461772 near_indices = np .where (f_near1_or_above )[0 ]
17471773 if len (near_indices ) > 1 :
17481774 penalties = np .zeros (len (near_indices ))
@@ -1760,7 +1786,7 @@ def cluster_dividends(df, column='div', threshold=7):
17601786 div_date_wrong = True
17611787 div_true_date = ratios .index [reversal_idx ]
17621788 break
1763- elif adjDelta_max_drop > 0.39 * adjDiv :
1789+ elif adjDelta_drop > 0.39 * adjDiv :
17641790 # Still true that applied adjustment exceeds price action,
17651791 # just not clear what solution is (if any).
17661792 div_adj_exceeds_prices = True
@@ -1777,6 +1803,27 @@ def cluster_dividends(df, column='div', threshold=7):
17771803 div_status ['div_date_wrong' ] = div_date_wrong
17781804 div_status ['div_true_date' ] = div_true_date
17791805
1806+ if div_adj_exceeds_prices :
1807+ split = df2 ['Stock Splits' ].loc [dt ]
1808+ if split != 0.0 :
1809+ # Check again if div missing split. Use looser tolerance
1810+ # as we know the adjustment seems wrong.
1811+ div_postSplit = div / split
1812+ if div_postSplit > div :
1813+ # Use volatility-adjusted drop
1814+ typical_volatility = div_status_df ['vol' ].loc [dt ]
1815+ drop = div_status_df ['drop' ].loc [dt ]
1816+ _drop = drop - typical_volatility
1817+ else :
1818+ drop_2Dmax = div_status_df ['drop_2Dmax' ].loc [dt ]
1819+ _drop = drop_2Dmax
1820+ if _drop > 0 :
1821+ diff = abs (div - _drop )
1822+ diff_postSplit = abs (div_postSplit - _drop )
1823+ if diff_postSplit <= (diff * 1.1 ):
1824+ # possibilities.append({'state':'div-pre-split', 'diff':diff_postSplit})
1825+ div_status_df .loc [dt , 'div_pre_split' ] = True
1826+
17801827 for k ,v in div_status .items ():
17811828 if k not in div_status_df :
17821829 if isinstance (v , (bool , np .bool_ )):
@@ -1852,7 +1899,7 @@ def cluster_dividends(df, column='div', threshold=7):
18521899 if 'div_date_wrong' in cluster .columns and (cluster [c ] == cluster ['div_date_wrong' ]).all ():
18531900 continue
18541901
1855- if 'adj_exceeds_prices' in cluster .columns and (cluster [c ] == cluster ['adj_exceeds_prices' ]).all ():
1902+ if 'adj_exceeds_prices' in cluster .columns and (cluster [c ] == ( cluster [c ] & cluster [ 'adj_exceeds_prices' ]) ).all ():
18561903 # More likely that true-positive. Maybe the div never happened
18571904 continue
18581905
@@ -1876,6 +1923,11 @@ def cluster_dividends(df, column='div', threshold=7):
18761923
18771924 if pct_fail >= true_threshold :
18781925 div_status_df .loc [fc , c ] = True
1926+ if 'div_date_wrong' in div_status_df .columns :
1927+ # reset this as well
1928+ div_status_df .loc [fc , 'div_date_wrong' ] = False
1929+ div_status_df .loc [fc , 'div_true_date' ] = pd .NaT
1930+ cluster = div_status_df [fc ].sort_index ()
18791931 continue
18801932 elif pct_fail <= fals_threshold :
18811933 div_status_df .loc [fc , c ] = False
@@ -1961,6 +2013,11 @@ def cluster_dividends(df, column='div', threshold=7):
19612013 div_too_big = False
19622014 cluster .loc [dt , 'div_too_big' ] = False
19632015 n_failed_checks -= 1
2016+ if div_exceeds_adj :
2017+ # false-positive
2018+ div_exceeds_adj = False
2019+ cluster .loc [dt , 'div_exceeds_adj' ] = False
2020+ n_failed_checks -= 1
19642021
19652022 if div_pre_split :
19662023 if adj_exceeds_prices :
0 commit comments