Skip to content

Commit 2ba5503

Browse files
committed
Fix merging events with intraday prices
If Yahoo returns intraday price data with dividend or stock-split event in future, then this broke the merge. Fix is to discard out-of-range events. Assumes that if user requesting intraday then they aren't interested in events.
1 parent 6cae6d4 commit 2ba5503

File tree

1 file changed

+45
-36
lines changed

1 file changed

+45
-36
lines changed

yfinance/utils.py

Lines changed: 45 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -650,8 +650,10 @@ def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
650650

651651

652652
def safe_merge_dfs(df_main, df_sub, interval):
653-
if df_sub.shape[0] == 0:
653+
if df_sub.empty:
654654
raise Exception("No data to merge")
655+
if df_main.empty:
656+
return df_main
655657

656658
df_sub_backup = df_sub.copy()
657659
data_cols = [c for c in df_sub.columns if c not in df_main]
@@ -675,47 +677,54 @@ def safe_merge_dfs(df_main, df_sub, interval):
675677
else:
676678
indices = _np.searchsorted(_np.append(df_main.index, df_main.index[-1] + td), df_sub.index, side='right')
677679
indices -= 1 # Convert from [[i-1], [i]) to [[i], [i+1])
678-
# Numpy.searchsorted does not handle out-of-range well, so handle manually:
679-
for i in range(len(df_sub.index)):
680-
dt = df_sub.index[i]
681-
if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
682-
# Out-of-range
683-
indices[i] = -1
680+
# Numpy.searchsorted does not handle out-of-range well, so handle manually:
681+
for i in range(len(df_sub.index)):
682+
dt = df_sub.index[i]
683+
if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
684+
# Out-of-range
685+
indices[i] = -1
684686

685687
f_outOfRange = indices == -1
686-
if f_outOfRange.any() and not intraday:
687-
empty_row_data = {c:[_np.nan] for c in const.price_colnames}|{'Volume':[0]}
688-
if interval == '1d':
689-
# For 1d, add all out-of-range event dates
690-
for i in _np.where(f_outOfRange)[0]:
691-
dt = df_sub.index[i]
692-
get_yf_logger().debug(f"Adding out-of-range {data_col} @ {dt.date()} in new prices row of NaNs")
693-
empty_row = _pd.DataFrame(data=empty_row_data, index=[dt])
694-
df_main = _pd.concat([df_main, empty_row], sort=True)
688+
if f_outOfRange.any():
689+
if intraday:
690+
# Discard out-of-range dividends in intraday data, assume user not interested
691+
df_sub = df_sub[~f_outOfRange]
692+
if df_sub.empty:
693+
df_main['Dividends'] = 0.0
694+
return df_main
695695
else:
696-
# Else, only add out-of-range event dates if occurring in interval
697-
# immediately after last pricfe row
698-
last_dt = df_main.index[-1]
699-
next_interval_start_dt = last_dt + td
700-
next_interval_end_dt = next_interval_start_dt + td
701-
for i in _np.where(f_outOfRange)[0]:
702-
dt = df_sub.index[i]
703-
if next_interval_start_dt <= dt < next_interval_end_dt:
704-
new_dt = next_interval_start_dt
696+
empty_row_data = {c:[_np.nan] for c in const.price_colnames}|{'Volume':[0]}
697+
if interval == '1d':
698+
# For 1d, add all out-of-range event dates
699+
for i in _np.where(f_outOfRange)[0]:
700+
dt = df_sub.index[i]
705701
get_yf_logger().debug(f"Adding out-of-range {data_col} @ {dt.date()} in new prices row of NaNs")
706702
empty_row = _pd.DataFrame(data=empty_row_data, index=[dt])
707703
df_main = _pd.concat([df_main, empty_row], sort=True)
708-
df_main = df_main.sort_index()
709-
710-
# Re-calculate indices
711-
indices = _np.searchsorted(_np.append(df_main.index, df_main.index[-1] + td), df_sub.index, side='right')
712-
indices -= 1 # Convert from [[i-1], [i]) to [[i], [i+1])
713-
# Numpy.searchsorted does not handle out-of-range well, so handle manually:
714-
for i in range(len(df_sub.index)):
715-
dt = df_sub.index[i]
716-
if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
717-
# Out-of-range
718-
indices[i] = -1
704+
else:
705+
# Else, only add out-of-range event dates if occurring in interval
706+
# immediately after last price row
707+
last_dt = df_main.index[-1]
708+
next_interval_start_dt = last_dt + td
709+
next_interval_end_dt = next_interval_start_dt + td
710+
for i in _np.where(f_outOfRange)[0]:
711+
dt = df_sub.index[i]
712+
if next_interval_start_dt <= dt < next_interval_end_dt:
713+
new_dt = next_interval_start_dt
714+
get_yf_logger().debug(f"Adding out-of-range {data_col} @ {dt.date()} in new prices row of NaNs")
715+
empty_row = _pd.DataFrame(data=empty_row_data, index=[dt])
716+
df_main = _pd.concat([df_main, empty_row], sort=True)
717+
df_main = df_main.sort_index()
718+
719+
# Re-calculate indices
720+
indices = _np.searchsorted(_np.append(df_main.index, df_main.index[-1] + td), df_sub.index, side='right')
721+
indices -= 1 # Convert from [[i-1], [i]) to [[i], [i+1])
722+
# Numpy.searchsorted does not handle out-of-range well, so handle manually:
723+
for i in range(len(df_sub.index)):
724+
dt = df_sub.index[i]
725+
if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
726+
# Out-of-range
727+
indices[i] = -1
719728

720729
f_outOfRange = indices == -1
721730
if f_outOfRange.any():

0 commit comments

Comments
 (0)