Skip to content

Commit b640103

Browse files
committed
Price-repair-split-errors: stop zeroes, big-divs, and 100x errors triggering false split errors
1 parent 965d1d5 commit b640103

File tree

1 file changed

+39
-2
lines changed

1 file changed

+39
-2
lines changed

yfinance/base.py

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,11 +1291,25 @@ def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_v
12911291
# Avoid using 'Low' and 'High'. For multiday intervals, these can be
12921292
# very volatile so reduce ability to detect genuine stock split errors
12931293
_1d_change_x = np.full((n, 2), 1.0)
1294-
price_data = df2[['Open','Close']].replace(0.0, 1.0).to_numpy()
1294+
price_data = df2[['Open','Close']].to_numpy()
1295+
f_zero = price_data == 0.0
12951296
else:
12961297
_1d_change_x = np.full((n, 4), 1.0)
1297-
price_data = df2[OHLC].replace(0.0, 1.0).to_numpy()
1298+
price_data = df2[OHLC].to_numpy()
1299+
f_zero = price_data == 0.0
1300+
if f_zero.any():
1301+
price_data[f_zero] = 1.0
1302+
1303+
# Update: if a VERY large dividend is paid out, then can be mistaken for a 1:2 stock split.
1304+
# Fix = use adjusted prices
1305+
adj = df2['Adj Close'].to_numpy() / df2['Close'].to_numpy()
1306+
for j in range(price_data.shape[1]):
1307+
price_data[:,j] *= adj
1308+
12981309
_1d_change_x[1:] = price_data[1:, ] / price_data[:-1, ]
1310+
f_zero_num_denom = f_zero | np.roll(f_zero, 1, axis=0)
1311+
if f_zero_num_denom.any():
1312+
_1d_change_x[f_zero_num_denom] = 1.0
12991313
if interday and interval != '1d':
13001314
# average change
13011315
_1d_change_minx = np.average(_1d_change_x, axis=1)
@@ -1394,6 +1408,29 @@ def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_v
13941408
logger.info(f'price-repair-split: No {fix_type}s detected')
13951409
return df
13961410

1411+
# Update: if any 100x changes are soon after a stock split, so could be confused with split error, then abort
1412+
threshold_days = 30
1413+
f_splits = df['Stock Splits'].to_numpy() != 0.0
1414+
if change in [100.0, 0.01] and f_splits.any():
1415+
indices_A = np.where(f_splits)[0]
1416+
indices_B = np.where(f)[0]
1417+
if not len(indices_A) or not len(indices_B):
1418+
return None
1419+
gaps = indices_B[:, None] - indices_A
1420+
# Because data is sorted in DEscending order, need to flip gaps
1421+
gaps *= -1
1422+
f_pos = gaps > 0
1423+
if f_pos.any():
1424+
gap_min = gaps[f_pos].min()
1425+
gap_td = utils._interval_to_timedelta(interval) * gap_min
1426+
if isinstance(gap_td, _dateutil.relativedelta.relativedelta):
1427+
threshold = _dateutil.relativedelta.relativedelta(days=threshold_days)
1428+
else:
1429+
threshold = _datetime.timedelta(days=threshold_days)
1430+
if gap_td < threshold:
1431+
logger.info(f'price-repair-split: 100x changes are too soon after stock split events, aborting')
1432+
return df
1433+
13971434
# if logger.isEnabledFor(logging.DEBUG):
13981435
# df_debug['i'] = list(range(0, df_debug.shape[0]))
13991436
# df_debug['i_rev'] = df_debug.shape[0]-1 - df_debug['i']

0 commit comments

Comments
 (0)