Skip to content

Commit 8daa477

Browse files
authored
Merge pull request #2090 from ranaroussi/feature/divident-repair-improve
Feature/dividend repair improve
2 parents ee657b2 + 3152715 commit 8daa477

File tree

1 file changed

+70
-13
lines changed

1 file changed

+70
-13
lines changed

yfinance/scrapers/history.py

Lines changed: 70 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import logging
44
import numpy as np
55
import pandas as pd
6+
from math import isclose
67
import time as _time
78
import bisect
89

@@ -1376,9 +1377,26 @@ def _fix_bad_div_adjust(self, df, interval, currency):
13761377
# div_too_big_improvement_threshold = 1
13771378
div_too_big_improvement_threshold = 2
13781379

1379-
drop_c2l = df2['Close'].iloc[div_idx-1] - df2['Low'].iloc[div_idx]
1380-
# drop_c2c = df2['Close'].iloc[div_idx-1] - df2['Close'].iloc[div_idx]
1381-
# drop = drop_c2c
1380+
if isclose(df2['Low'].iloc[div_idx], df2['Close'].iloc[div_idx-1]*100, rel_tol = 0.025):
1381+
# Price has jumped ~100x on ex-div day, need to fix immediately.
1382+
drop_c2l = df2['Close'].iloc[div_idx-1]*100 - df2['Low'].iloc[div_idx]
1383+
div_pct = div / (df2['Close'].iloc[div_idx-1]*100)
1384+
true_adjust = 1.0 - div / (df2['Close'].iloc[div_idx-1]*100)
1385+
present_adj = df2['Adj Close'].iloc[div_idx-1] / df2['Close'].iloc[div_idx-1]
1386+
if not isclose(present_adj, true_adjust, rel_tol = 0.025):
1387+
df2.loc[:dt-_datetime.timedelta(seconds=1), 'Adj Close'] = true_adjust * df2['Close'].loc[:dt-_datetime.timedelta(seconds=1)]
1388+
df2.loc[:dt-_datetime.timedelta(seconds=1), 'Repaired?'] = True
1389+
elif isclose(df2['Low'].iloc[div_idx], df2['Close'].iloc[div_idx-1]*0.01, rel_tol = 0.025):
1390+
# Price has dropped ~100x on ex-div day, need to fix immediately.
1391+
drop_c2l = df2['Close'].iloc[div_idx-1]*0.01 - df2['Low'].iloc[div_idx]
1392+
div_pct = div / (df2['Close'].iloc[div_idx-1]*0.01)
1393+
true_adjust = 1.0 - div / (df2['Close'].iloc[div_idx-1]*100)
1394+
present_adj = df2['Adj Close'].iloc[div_idx-1] / df2['Close'].iloc[div_idx-1]
1395+
if not isclose(present_adj, true_adjust, rel_tol = 0.025):
1396+
df2.loc[:dt-_datetime.timedelta(seconds=1), 'Adj Close'] = true_adjust * df2['Close'].loc[:dt-_datetime.timedelta(seconds=1)]
1397+
df2.loc[:dt-_datetime.timedelta(seconds=1), 'Repaired?'] = True
1398+
else:
1399+
drop_c2l = df2['Close'].iloc[div_idx-1] - df2['Low'].iloc[div_idx]
13821400
drop = drop_c2l
13831401
if div_idx < len(df2)-1:
13841402
# # In low-volume scenarios, the price drop is day after not today.
@@ -1388,8 +1406,10 @@ def _fix_bad_div_adjust(self, df, interval, currency):
13881406
# elif df2['Volume'].iloc[div_idx]==0:
13891407
# if drop == 0.0:
13901408
# drop = np.max(df2['Close'].iloc[div_idx-1:div_idx+1].to_numpy() - df2['Low'].iloc[div_idx:div_idx+2].to_numpy())
1409+
#
13911410
# Hmm, can I always look ahead 1 day? Catch: increases FP rate of div-too-small for tiny divs.
1392-
drops = df2['Close'].iloc[div_idx-1:div_idx+1].to_numpy() - df2['Low'].iloc[div_idx:div_idx+2].to_numpy()
1411+
# drops = df2['Close'].iloc[div_idx-1:div_idx+1].to_numpy() - df2['Low'].iloc[div_idx:div_idx+2].to_numpy()
1412+
drops = np.array([drop, df2['Close'].iloc[div_idx] - df2['Low'].iloc[div_idx+1]])
13931413
drop_2Dmax = np.max(drops)
13941414
else:
13951415
drops = np.array([drop])
@@ -1728,21 +1748,27 @@ def cluster_dividends(df, column='div', threshold=7):
17281748
adjDeltas = x['Adj Low'].iloc[1:].to_numpy() - x['Adj Close'].iloc[:-1].to_numpy()
17291749
adjDeltas = np.append([0.0], adjDeltas)
17301750
x['adjDelta'] = adjDeltas
1751+
for i in np.where(x['Dividends']>0)[0]:
1752+
x.loc[x.index[i], 'adjDelta'] += x['Dividends'].iloc[i]*x['Adj'].iloc[i]
17311753
deltas = x[['delta', 'adjDelta']]
1732-
if div_pct > 0.15 and div_pct < 1.0: # avoid analysing impossibly-big dividends here
1754+
if div_pct > 0.05 and div_pct < 1.0:
17331755
adjDiv = div * x['Adj'].iloc[0]
17341756
f = deltas['adjDelta'] > (adjDiv*0.6)
17351757
if f.any():
1736-
for idx in np.where(f)[0]:
1737-
adjDelta_max_drop_idx = idx
1738-
adjDelta_max_drop = deltas['adjDelta'].iloc[idx]
1739-
if adjDelta_max_drop > 1.001*deltas['delta'].iloc[adjDelta_max_drop_idx]:
1758+
indices = np.where(f)[0]
1759+
for idx in indices:
1760+
adjDelta_drop = deltas['adjDelta'].iloc[idx]
1761+
if adjDelta_drop > 1.001*deltas['delta'].iloc[idx]:
17401762
# Adjusted price has risen by more than unadjusted, should not happen.
17411763
# See if Adjusted price later falls by a similar amount. This would mean
17421764
# dividend has been applied too early.
1743-
ratios = (-1*deltas['adjDelta'])/adjDelta_max_drop
1765+
ratios = (-1*deltas['adjDelta'])/adjDelta_drop
17441766
f_near1_or_above = ratios>=0.8
1745-
if f_near1_or_above.any():
1767+
# Update: only check for wrong date if no coincident split.
1768+
# Because if a split, more likely the div is missing split
1769+
split = df2['Stock Splits'].loc[dt]
1770+
pre_split = div_status_df['div_pre_split'].loc[dt]
1771+
if (split==0.0 or (not pre_split)) and f_near1_or_above.any():
17461772
near_indices = np.where(f_near1_or_above)[0]
17471773
if len(near_indices) > 1:
17481774
penalties = np.zeros(len(near_indices))
@@ -1760,7 +1786,7 @@ def cluster_dividends(df, column='div', threshold=7):
17601786
div_date_wrong = True
17611787
div_true_date = ratios.index[reversal_idx]
17621788
break
1763-
elif adjDelta_max_drop > 0.39*adjDiv:
1789+
elif adjDelta_drop > 0.39*adjDiv:
17641790
# Still true that applied adjustment exceeds price action,
17651791
# just not clear what solution is (if any).
17661792
div_adj_exceeds_prices = True
@@ -1777,6 +1803,27 @@ def cluster_dividends(df, column='div', threshold=7):
17771803
div_status['div_date_wrong'] = div_date_wrong
17781804
div_status['div_true_date'] = div_true_date
17791805

1806+
if div_adj_exceeds_prices:
1807+
split = df2['Stock Splits'].loc[dt]
1808+
if split != 0.0:
1809+
# Check again if div missing split. Use looser tolerance
1810+
# as we know the adjustment seems wrong.
1811+
div_postSplit = div / split
1812+
if div_postSplit > div:
1813+
# Use volatility-adjusted drop
1814+
typical_volatility = div_status_df['vol'].loc[dt]
1815+
drop = div_status_df['drop'].loc[dt]
1816+
_drop = drop - typical_volatility
1817+
else:
1818+
drop_2Dmax = div_status_df['drop_2Dmax'].loc[dt]
1819+
_drop = drop_2Dmax
1820+
if _drop > 0:
1821+
diff = abs(div-_drop)
1822+
diff_postSplit = abs(div_postSplit-_drop)
1823+
if diff_postSplit <= (diff*1.1):
1824+
# possibilities.append({'state':'div-pre-split', 'diff':diff_postSplit})
1825+
div_status_df.loc[dt, 'div_pre_split'] = True
1826+
17801827
for k,v in div_status.items():
17811828
if k not in div_status_df:
17821829
if isinstance(v, (bool, np.bool_)):
@@ -1852,7 +1899,7 @@ def cluster_dividends(df, column='div', threshold=7):
18521899
if 'div_date_wrong' in cluster.columns and (cluster[c] == cluster['div_date_wrong']).all():
18531900
continue
18541901

1855-
if 'adj_exceeds_prices' in cluster.columns and (cluster[c] == cluster['adj_exceeds_prices']).all():
1902+
if 'adj_exceeds_prices' in cluster.columns and (cluster[c] == (cluster[c] & cluster['adj_exceeds_prices'])).all():
18561903
# More likely that true-positive. Maybe the div never happened
18571904
continue
18581905

@@ -1876,6 +1923,11 @@ def cluster_dividends(df, column='div', threshold=7):
18761923

18771924
if pct_fail >= true_threshold:
18781925
div_status_df.loc[fc, c] = True
1926+
if 'div_date_wrong' in div_status_df.columns:
1927+
# reset this as well
1928+
div_status_df.loc[fc, 'div_date_wrong'] = False
1929+
div_status_df.loc[fc, 'div_true_date'] = pd.NaT
1930+
cluster = div_status_df[fc].sort_index()
18791931
continue
18801932
elif pct_fail <= fals_threshold:
18811933
div_status_df.loc[fc, c] = False
@@ -1961,6 +2013,11 @@ def cluster_dividends(df, column='div', threshold=7):
19612013
div_too_big = False
19622014
cluster.loc[dt, 'div_too_big'] = False
19632015
n_failed_checks -= 1
2016+
if div_exceeds_adj:
2017+
# false-positive
2018+
div_exceeds_adj = False
2019+
cluster.loc[dt, 'div_exceeds_adj'] = False
2020+
n_failed_checks -= 1
19642021

19652022
if div_pre_split:
19662023
if adj_exceeds_prices:

0 commit comments

Comments
 (0)