@@ -1291,11 +1291,25 @@ def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_v
12911291 # Avoid using 'Low' and 'High'. For multiday intervals, these can be
12921292 # very volatile so reduce ability to detect genuine stock split errors
12931293 _1d_change_x = np .full ((n , 2 ), 1.0 )
1294- price_data = df2 [['Open' ,'Close' ]].replace (0.0 , 1.0 ).to_numpy ()
1294+ price_data = df2 [['Open' ,'Close' ]].to_numpy ()
1295+ f_zero = price_data == 0.0
12951296 else :
12961297 _1d_change_x = np .full ((n , 4 ), 1.0 )
1297- price_data = df2 [OHLC ].replace (0.0 , 1.0 ).to_numpy ()
1298+ price_data = df2 [OHLC ].to_numpy ()
1299+ f_zero = price_data == 0.0
1300+ if f_zero .any ():
1301+ price_data [f_zero ] = 1.0
1302+
1303+ # Update: if a VERY large dividend is paid out, then can be mistaken for a 1:2 stock split.
1304+ # Fix = use adjusted prices
1305+ adj = df2 ['Adj Close' ].to_numpy () / df2 ['Close' ].to_numpy ()
1306+ for j in range (price_data .shape [1 ]):
1307+ price_data [:,j ] *= adj
1308+
12981309 _1d_change_x [1 :] = price_data [1 :, ] / price_data [:- 1 , ]
1310+ f_zero_num_denom = f_zero | np .roll (f_zero , 1 , axis = 0 )
1311+ if f_zero_num_denom .any ():
1312+ _1d_change_x [f_zero_num_denom ] = 1.0
12991313 if interday and interval != '1d' :
13001314 # average change
13011315 _1d_change_minx = np .average (_1d_change_x , axis = 1 )
@@ -1394,6 +1408,29 @@ def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_v
13941408 logger .info (f'price-repair-split: No { fix_type } s detected' )
13951409 return df
13961410
1411+ # Update: if any 100x changes are soon after a stock split, so could be confused with split error, then abort
1412+ threshold_days = 30
1413+ f_splits = df ['Stock Splits' ].to_numpy () != 0.0
1414+ if change in [100.0 , 0.01 ] and f_splits .any ():
1415+ indices_A = np .where (f_splits )[0 ]
1416+ indices_B = np .where (f )[0 ]
1417+ if not len (indices_A ) or not len (indices_B ):
1418+ return None
1419+ gaps = indices_B [:, None ] - indices_A
1420+ # Because data is sorted in DEscending order, need to flip gaps
1421+ gaps *= - 1
1422+ f_pos = gaps > 0
1423+ if f_pos .any ():
1424+ gap_min = gaps [f_pos ].min ()
1425+ gap_td = utils ._interval_to_timedelta (interval ) * gap_min
1426+ if isinstance (gap_td , _dateutil .relativedelta .relativedelta ):
1427+ threshold = _dateutil .relativedelta .relativedelta (days = threshold_days )
1428+ else :
1429+ threshold = _datetime .timedelta (days = threshold_days )
1430+ if gap_td < threshold :
1431+ logger .info (f'price-repair-split: 100x changes are too soon after stock split events, aborting' )
1432+ return df
1433+
13971434 # if logger.isEnabledFor(logging.DEBUG):
13981435 # df_debug['i'] = list(range(0, df_debug.shape[0]))
13991436 # df_debug['i_rev'] = df_debug.shape[0]-1 - df_debug['i']
0 commit comments