@@ -1426,7 +1426,7 @@ def _fix_bad_div_adjust(self, df, interval, currency):
14261426 typical_volatility = np .nan
14271427 else :
14281428 diffs = df2 ['Close' ].iloc [start :end - 1 ].to_numpy () - df2 ['Low' ].iloc [start + 1 :end ].to_numpy ()
1429- typical_volatility = np .median (np .abs (diffs ))
1429+ typical_volatility = np .mean (np .abs (diffs ))
14301430
14311431 possibilities = []
14321432 if (drops == 0.0 ).all () and df2 ['Volume' ].iloc [div_idx ]== 0 :
@@ -1681,10 +1681,6 @@ def cluster_dividends(df, column='div', threshold=7):
16811681 div_status_df .loc [phantom_div_dt , c ] = False
16821682 checks .append ('phantom' )
16831683
1684- if not div_status_df [checks ].any ().any ():
1685- # Perfect
1686- return df
1687-
16881684 # Remove phantoms early
16891685 if 'phantom' in div_status_df .columns :
16901686 f_phantom = div_status_df ['phantom' ]
@@ -1709,6 +1705,29 @@ def cluster_dividends(df, column='div', threshold=7):
17091705 if 'phantom' in checks :
17101706 checks .remove ('phantom' )
17111707
1708+ if not div_status_df [checks ].any ().any ():
1709+ # Maybe failed to detect a too-small div. If div is ~0.01x of previous and next, then
1710+ # treat as a 0.01x error
1711+ if len (div_status_df ) > 1 :
1712+ for i in range (0 , len (div_status_df )):
1713+ r_pre , r_post = None , None
1714+ if i > 0 :
1715+ r_pre = div_status_df ['%' ].iloc [i - 1 ] / div_status_df ['%' ].iloc [i ]
1716+ if i < (len (div_status_df )- 1 ):
1717+ r_post = div_status_df ['%' ].iloc [i + 1 ] / div_status_df ['%' ].iloc [i ]
1718+ r_pre = r_pre or r_post
1719+ r_post = r_post or r_pre
1720+ if abs (r_pre - currency_divide )< 20 and abs (r_post - currency_divide )< 20 :
1721+ div_dt = div_status_df .index [i ]
1722+ div_status_df .loc [div_dt , 'div_too_small' ] = True
1723+
1724+ if not div_status_df [checks ].any ().any ():
1725+ # Perfect
1726+ if df_modified :
1727+ return df2
1728+ else :
1729+ return df
1730+
17121731 # Check if the present div-adjustment contradicts price action
17131732 for i in range (len (div_status_df )):
17141733 div_idx = div_status_df ['idx' ].iloc [i ]
@@ -1789,7 +1808,8 @@ def cluster_dividends(df, column='div', threshold=7):
17891808 elif adjDelta_drop > 0.39 * adjDiv :
17901809 # Still true that applied adjustment exceeds price action,
17911810 # just not clear what solution is (if any).
1792- div_adj_exceeds_prices = True
1811+ if (x ['Adj' ]< 1.0 ).any ():
1812+ div_adj_exceeds_prices = True
17931813 break
17941814
17951815 # Can prune the space:
@@ -1843,22 +1863,6 @@ def cluster_dividends(df, column='div', threshold=7):
18431863
18441864 checks += ['adj_exceeds_prices' , 'div_date_wrong' ]
18451865
1846- if not div_status_df [checks ].any ().any ():
1847- # Maybe failed to detect a too-small div. If div is ~0.01x of previous and next, then
1848- # treat as a 0.01x error
1849- if len (div_status_df ) > 1 :
1850- for i in range (0 , len (div_status_df )):
1851- r_pre , r_post = None , None
1852- if i > 0 :
1853- r_pre = div_status_df ['%' ].iloc [i - 1 ] / div_status_df ['%' ].iloc [i ]
1854- if i < (len (div_status_df )- 1 ):
1855- r_post = div_status_df ['%' ].iloc [i + 1 ] / div_status_df ['%' ].iloc [i ]
1856- r_pre = r_pre or r_post
1857- r_post = r_post or r_pre
1858- if abs (r_pre - currency_divide )< 20 and abs (r_post - currency_divide )< 20 :
1859- div_dt = div_status_df .index [i ]
1860- div_status_df .loc [div_dt , 'div_too_small' ] = True
1861-
18621866 for c in checks :
18631867 if not div_status_df [c ].any ():
18641868 div_status_df = div_status_df .drop (c , axis = 1 )
@@ -1887,11 +1891,16 @@ def cluster_dividends(df, column='div', threshold=7):
18871891 div_pcts ['avg yr yield' ] = div_pcts ['%' ] / div_pcts ['period' ]
18881892
18891893 for c in checks :
1894+ if not cluster [c ].to_numpy ().any ():
1895+ cluster = cluster .drop (c , axis = 1 )
1896+ cluster_checks = [c for c in checks if c in cluster .columns ]
1897+
1898+ for c in cluster_checks :
18901899 f_fail = cluster [c ].to_numpy ()
18911900 n_fail = np .sum (f_fail )
18921901 if n_fail in [0 , n ]:
18931902 continue
1894- pct_fail = np . sum ( f_fail ) / n
1903+ pct_fail = n_fail / n
18951904 if c == 'div_too_big' :
18961905 true_threshold = 1.0
18971906 fals_threshold = 0.2
@@ -1900,7 +1909,16 @@ def cluster_dividends(df, column='div', threshold=7):
19001909 continue
19011910
19021911 if 'adj_exceeds_prices' in cluster .columns and (cluster [c ] == (cluster [c ] & cluster ['adj_exceeds_prices' ])).all ():
1903- # More likely that true-positive. Maybe the div never happened
1912+ # Treat div_too_big=False as false positives IFF adj_exceeds_prices=true AND
1913+ # true ratio above (lowered) threshold.
1914+ true_threshold = 0.5
1915+ f_adj_exceeds_prices = cluster ['adj_exceeds_prices' ].to_numpy ()
1916+ n = np .sum (f_adj_exceeds_prices )
1917+ n_fail = np .sum (f_fail [f_adj_exceeds_prices ])
1918+ pct_fail = n_fail / n
1919+ if pct_fail > true_threshold :
1920+ f = fc & div_status_df ['adj_exceeds_prices' ].to_numpy ()
1921+ div_status_df .loc [f , c ] = True
19041922 continue
19051923
19061924 if 'div_exceeds_adj' in cluster .columns and cluster ['div_exceeds_adj' ].all ():
0 commit comments