Skip to content

Commit d3dfb4c

Browse files
committed
Fix merging events with intraday prices
If Yahoo returns intraday price data with dividend or stock-split event in future, then this broke the merge. Fix is to discard out-of-range events. Assumes that if user requesting intraday then they aren't interested in events.
1 parent 6cae6d4 commit d3dfb4c

File tree

2 files changed

+82
-36
lines changed

2 files changed

+82
-36
lines changed

tests/prices.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,43 @@ def test_duplicatingWeekly(self):
114114
if not test_run:
115115
self.skipTest("Skipping test_duplicatingWeekly() because not possible to fail Monday/weekend")
116116

117+
def test_pricesEventsMerge(self):
118+
# Test case: dividend occurs after last row in price data
119+
tkr = 'INTC'
120+
start_d = _dt.date(2022, 1, 1)
121+
end_d = _dt.date(2023, 1, 1)
122+
df = yf.Ticker(tkr, session=self.session).history(interval='1d', start=start_d, end=end_d)
123+
div = 1.0
124+
future_div_dt = df.index[-1] + _dt.timedelta(days=1)
125+
if future_div_dt.weekday() in [5, 6]:
126+
future_div_dt += _dt.timedelta(days=1) * (7 - future_div_dt.weekday())
127+
divs = _pd.DataFrame(data={"Dividends":[div]}, index=[future_div_dt])
128+
df2 = yf.utils.safe_merge_dfs(df.drop(['Dividends', 'Stock Splits'], axis=1), divs, '1d')
129+
self.assertIn(future_div_dt, df2.index)
130+
self.assertIn("Dividends", df2.columns)
131+
self.assertEqual(df2['Dividends'].iloc[-1], div)
132+
133+
def test_pricesEventsMerge_bug(self):
134+
# Reproduce exception when merging intraday prices with future dividend
135+
tkr = 'S32.AX'
136+
interval = '30m'
137+
df_index = []
138+
d = 13
139+
for h in range(0, 16):
140+
for m in [0, 30]:
141+
df_index.append(_dt.datetime(2023, 9, d, h, m))
142+
df_index.append(_dt.datetime(2023, 9, d, 16))
143+
df = _pd.DataFrame(index=df_index)
144+
df.index = _pd.to_datetime(df.index)
145+
df['Close'] = 1.0
146+
147+
div = 1.0
148+
future_div_dt = _dt.datetime(2023, 9, 14, 10)
149+
divs = _pd.DataFrame(data={"Dividends":[div]}, index=[future_div_dt])
150+
151+
df2 = yf.utils.safe_merge_dfs(df, divs, interval)
152+
# No exception = test pass
153+
117154
def test_intraDayWithEvents(self):
118155
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
119156
test_run = False

yfinance/utils.py

Lines changed: 45 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -650,8 +650,10 @@ def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
650650

651651

652652
def safe_merge_dfs(df_main, df_sub, interval):
653-
if df_sub.shape[0] == 0:
653+
if df_sub.empty:
654654
raise Exception("No data to merge")
655+
if df_main.empty:
656+
return df_main
655657

656658
df_sub_backup = df_sub.copy()
657659
data_cols = [c for c in df_sub.columns if c not in df_main]
@@ -675,47 +677,54 @@ def safe_merge_dfs(df_main, df_sub, interval):
675677
else:
676678
indices = _np.searchsorted(_np.append(df_main.index, df_main.index[-1] + td), df_sub.index, side='right')
677679
indices -= 1 # Convert from [[i-1], [i]) to [[i], [i+1])
678-
# Numpy.searchsorted does not handle out-of-range well, so handle manually:
679-
for i in range(len(df_sub.index)):
680-
dt = df_sub.index[i]
681-
if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
682-
# Out-of-range
683-
indices[i] = -1
680+
# Numpy.searchsorted does not handle out-of-range well, so handle manually:
681+
for i in range(len(df_sub.index)):
682+
dt = df_sub.index[i]
683+
if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
684+
# Out-of-range
685+
indices[i] = -1
684686

685687
f_outOfRange = indices == -1
686-
if f_outOfRange.any() and not intraday:
687-
empty_row_data = {c:[_np.nan] for c in const.price_colnames}|{'Volume':[0]}
688-
if interval == '1d':
689-
# For 1d, add all out-of-range event dates
690-
for i in _np.where(f_outOfRange)[0]:
691-
dt = df_sub.index[i]
692-
get_yf_logger().debug(f"Adding out-of-range {data_col} @ {dt.date()} in new prices row of NaNs")
693-
empty_row = _pd.DataFrame(data=empty_row_data, index=[dt])
694-
df_main = _pd.concat([df_main, empty_row], sort=True)
688+
if f_outOfRange.any():
689+
if intraday:
690+
# Discard out-of-range dividends in intraday data, assume user not interested
691+
df_sub = df_sub[~f_outOfRange]
692+
if df_sub.empty:
693+
df_main['Dividends'] = 0.0
694+
return df_main
695695
else:
696-
# Else, only add out-of-range event dates if occurring in interval
697-
# immediately after last pricfe row
698-
last_dt = df_main.index[-1]
699-
next_interval_start_dt = last_dt + td
700-
next_interval_end_dt = next_interval_start_dt + td
701-
for i in _np.where(f_outOfRange)[0]:
702-
dt = df_sub.index[i]
703-
if next_interval_start_dt <= dt < next_interval_end_dt:
704-
new_dt = next_interval_start_dt
696+
empty_row_data = {c:[_np.nan] for c in const.price_colnames}|{'Volume':[0]}
697+
if interval == '1d':
698+
# For 1d, add all out-of-range event dates
699+
for i in _np.where(f_outOfRange)[0]:
700+
dt = df_sub.index[i]
705701
get_yf_logger().debug(f"Adding out-of-range {data_col} @ {dt.date()} in new prices row of NaNs")
706702
empty_row = _pd.DataFrame(data=empty_row_data, index=[dt])
707703
df_main = _pd.concat([df_main, empty_row], sort=True)
708-
df_main = df_main.sort_index()
709-
710-
# Re-calculate indices
711-
indices = _np.searchsorted(_np.append(df_main.index, df_main.index[-1] + td), df_sub.index, side='right')
712-
indices -= 1 # Convert from [[i-1], [i]) to [[i], [i+1])
713-
# Numpy.searchsorted does not handle out-of-range well, so handle manually:
714-
for i in range(len(df_sub.index)):
715-
dt = df_sub.index[i]
716-
if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
717-
# Out-of-range
718-
indices[i] = -1
704+
else:
705+
# Else, only add out-of-range event dates if occurring in interval
706+
# immediately after last price row
707+
last_dt = df_main.index[-1]
708+
next_interval_start_dt = last_dt + td
709+
next_interval_end_dt = next_interval_start_dt + td
710+
for i in _np.where(f_outOfRange)[0]:
711+
dt = df_sub.index[i]
712+
if next_interval_start_dt <= dt < next_interval_end_dt:
713+
new_dt = next_interval_start_dt
714+
get_yf_logger().debug(f"Adding out-of-range {data_col} @ {dt.date()} in new prices row of NaNs")
715+
empty_row = _pd.DataFrame(data=empty_row_data, index=[dt])
716+
df_main = _pd.concat([df_main, empty_row], sort=True)
717+
df_main = df_main.sort_index()
718+
719+
# Re-calculate indices
720+
indices = _np.searchsorted(_np.append(df_main.index, df_main.index[-1] + td), df_sub.index, side='right')
721+
indices -= 1 # Convert from [[i-1], [i]) to [[i], [i+1])
722+
# Numpy.searchsorted does not handle out-of-range well, so handle manually:
723+
for i in range(len(df_sub.index)):
724+
dt = df_sub.index[i]
725+
if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
726+
# Out-of-range
727+
indices[i] = -1
719728

720729
f_outOfRange = indices == -1
721730
if f_outOfRange.any():

0 commit comments

Comments
 (0)