Skip to content

Commit 683064f

Browse files
authored
Merge pull request #1849 from ranaroussi/refactor/price-history
2 parents 97f93d3 + cdf897f commit 683064f

File tree

8 files changed

+1690
-1639
lines changed

8 files changed

+1690
-1639
lines changed

tests/prices.py

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ def test_dailyWithEvents_bugs(self):
277277
# Reproduce issue #1634 - 1d dividend out-of-range, should be prepended to prices
278278
div_dt = _pd.Timestamp(2022, 7, 21).tz_localize("America/New_York")
279279
df_dividends = _pd.DataFrame(data={"Dividends":[1.0]}, index=[div_dt])
280-
df_prices = _pd.DataFrame(data={c:[1.0] for c in yf.const.price_colnames}|{'Volume':0}, index=[div_dt+_dt.timedelta(days=1)])
280+
df_prices = _pd.DataFrame(data={c:[1.0] for c in yf.const._PRICE_COLNAMES_}|{'Volume':0}, index=[div_dt+_dt.timedelta(days=1)])
281281
df_merged = yf.utils.safe_merge_dfs(df_prices, df_dividends, '1d')
282282
self.assertEqual(df_merged.shape[0], 2)
283283
self.assertTrue(df_merged[df_prices.columns].iloc[1:].equals(df_prices))
@@ -470,6 +470,18 @@ def tearDownClass(cls):
470470
if cls.session is not None:
471471
cls.session.close()
472472

473+
def test_types(self):
474+
tkr = 'INTC'
475+
dat = yf.Ticker(tkr, session=self.session)
476+
477+
data = dat.history(period="3mo", interval="1d", prepost=True, repair=True)
478+
self.assertIsInstance(data, _pd.DataFrame, "data has wrong type")
479+
self.assertFalse(data.empty, "data is empty")
480+
481+
reconstructed = dat._lazy_load_price_history()._reconstruct_intervals_batch(data, "1wk", True)
482+
self.assertIsInstance(reconstructed, _pd.DataFrame, "data has wrong type")
483+
self.assertFalse(data.empty, "data is empty")
484+
473485
def test_reconstruct_2m(self):
474486
# 2m repair requires 1m data.
475487
# Yahoo restricts 1m fetches to 7 days max within last 30 days.
@@ -494,6 +506,7 @@ def test_repair_100x_random_weekly(self):
494506
tkr = "PNL.L"
495507
dat = yf.Ticker(tkr, session=self.session)
496508
tz_exchange = dat.fast_info["timezone"]
509+
hist = dat._lazy_load_price_history()
497510

498511
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
499512
df = _pd.DataFrame(data={"Open": [470.5, 473.5, 474.5, 470],
@@ -517,7 +530,7 @@ def test_repair_100x_random_weekly(self):
517530

518531
# Run test
519532

520-
df_repaired = dat._fix_unit_random_mixups(df_bad, "1wk", tz_exchange, prepost=False)
533+
df_repaired = hist._fix_unit_random_mixups(df_bad, "1wk", tz_exchange, prepost=False)
521534

522535
# First test - no errors left
523536
for c in data_cols:
@@ -548,6 +561,7 @@ def test_repair_100x_random_weekly_preSplit(self):
548561
tkr = "PNL.L"
549562
dat = yf.Ticker(tkr, session=self.session)
550563
tz_exchange = dat.fast_info["timezone"]
564+
hist = dat._lazy_load_price_history()
551565

552566
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
553567
df = _pd.DataFrame(data={"Open": [400, 398, 392.5, 417],
@@ -574,7 +588,7 @@ def test_repair_100x_random_weekly_preSplit(self):
574588
df.index = df.index.tz_localize(tz_exchange)
575589
df_bad.index = df_bad.index.tz_localize(tz_exchange)
576590

577-
df_repaired = dat._fix_unit_random_mixups(df_bad, "1wk", tz_exchange, prepost=False)
591+
df_repaired = hist._fix_unit_random_mixups(df_bad, "1wk", tz_exchange, prepost=False)
578592

579593
# First test - no errors left
580594
for c in data_cols:
@@ -606,6 +620,7 @@ def test_repair_100x_random_daily(self):
606620
tkr = "PNL.L"
607621
dat = yf.Ticker(tkr, session=self.session)
608622
tz_exchange = dat.fast_info["timezone"]
623+
hist = dat._lazy_load_price_history()
609624

610625
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
611626
df = _pd.DataFrame(data={"Open": [478, 476, 476, 472],
@@ -627,7 +642,7 @@ def test_repair_100x_random_daily(self):
627642
df.index = df.index.tz_localize(tz_exchange)
628643
df_bad.index = df_bad.index.tz_localize(tz_exchange)
629644

630-
df_repaired = dat._fix_unit_random_mixups(df_bad, "1d", tz_exchange, prepost=False)
645+
df_repaired = hist._fix_unit_random_mixups(df_bad, "1d", tz_exchange, prepost=False)
631646

632647
# First test - no errors left
633648
for c in data_cols:
@@ -656,6 +671,7 @@ def test_repair_100x_block_daily(self):
656671
for interval in ['1d', '1wk']:
657672
dat = yf.Ticker(tkr, session=self.session)
658673
tz_exchange = dat.fast_info["timezone"]
674+
hist = dat._lazy_load_price_history()
659675

660676
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
661677
_dp = os.path.dirname(__file__)
@@ -672,7 +688,7 @@ def test_repair_100x_block_daily(self):
672688
df.index = _pd.to_datetime(df.index, utc=True).tz_convert(tz_exchange)
673689
df = df.sort_index()
674690

675-
df_repaired = dat._fix_unit_switch(df_bad, interval, tz_exchange)
691+
df_repaired = hist._fix_unit_switch(df_bad, interval, tz_exchange)
676692
df_repaired = df_repaired.sort_index()
677693

678694
# First test - no errors left
@@ -704,6 +720,7 @@ def test_repair_100x_block_daily(self):
704720
def test_repair_zeroes_daily(self):
705721
tkr = "BBIL.L"
706722
dat = yf.Ticker(tkr, session=self.session)
723+
hist = dat._lazy_load_price_history()
707724
tz_exchange = dat.fast_info["timezone"]
708725

709726
df_bad = _pd.DataFrame(data={"Open": [0, 102.04, 102.04],
@@ -719,7 +736,7 @@ def test_repair_zeroes_daily(self):
719736
df_bad.index.name = "Date"
720737
df_bad.index = df_bad.index.tz_localize(tz_exchange)
721738

722-
repaired_df = dat._fix_zeroes(df_bad, "1d", tz_exchange, prepost=False)
739+
repaired_df = hist._fix_zeroes(df_bad, "1d", tz_exchange, prepost=False)
723740

724741
correct_df = df_bad.copy()
725742
correct_df.loc["2022-11-01", "Open"] = 102.080002
@@ -753,6 +770,7 @@ def test_repair_zeroes_daily_adjClose(self):
753770
dat = yf.Ticker(tkr, session=self.session)
754771
tz_exchange = dat.fast_info["timezone"]
755772
df.index = df.index.tz_localize(tz_exchange)
773+
hist = dat._lazy_load_price_history()
756774

757775
rtol = 5e-3
758776
for i in [0, 1, 2]:
@@ -761,7 +779,7 @@ def test_repair_zeroes_daily_adjClose(self):
761779
df_slice_bad = df_slice.copy()
762780
df_slice_bad.loc[df_slice_bad.index[j], "Adj Close"] = 0.0
763781

764-
df_slice_bad_repaired = dat._fix_zeroes(df_slice_bad, "1d", tz_exchange, prepost=False)
782+
df_slice_bad_repaired = hist._fix_zeroes(df_slice_bad, "1d", tz_exchange, prepost=False)
765783
for c in ["Close", "Adj Close"]:
766784
self.assertTrue(_np.isclose(df_slice_bad_repaired[c], df_slice[c], rtol=rtol).all())
767785
self.assertTrue("Repaired?" in df_slice_bad_repaired.columns)
@@ -771,8 +789,9 @@ def test_repair_zeroes_hourly(self):
771789
tkr = "INTC"
772790
dat = yf.Ticker(tkr, session=self.session)
773791
tz_exchange = dat.fast_info["timezone"]
792+
hist = dat._lazy_load_price_history()
774793

775-
correct_df = dat.history(period="1wk", interval="1h", auto_adjust=False, repair=True)
794+
correct_df = hist.history(period="1wk", interval="1h", auto_adjust=False, repair=True)
776795

777796
df_bad = correct_df.copy()
778797
bad_idx = correct_df.index[10]
@@ -783,7 +802,7 @@ def test_repair_zeroes_hourly(self):
783802
df_bad.loc[bad_idx, "Adj Close"] = _np.nan
784803
df_bad.loc[bad_idx, "Volume"] = 0
785804

786-
repaired_df = dat._fix_zeroes(df_bad, "1h", tz_exchange, prepost=False)
805+
repaired_df = hist._fix_zeroes(df_bad, "1h", tz_exchange, prepost=False)
787806

788807
for c in ["Open", "Low", "High", "Close"]:
789808
try:
@@ -812,11 +831,12 @@ def test_repair_bad_stock_split(self):
812831
for interval in intervals:
813832
dat = yf.Ticker(tkr, session=self.session)
814833
tz_exchange = dat.fast_info["timezone"]
834+
hist = dat._lazy_load_price_history()
815835

816836
_dp = os.path.dirname(__file__)
817837
df_good = dat.history(start='2020-01-01', end=_dt.date.today(), interval=interval, auto_adjust=False)
818838

819-
repaired_df = dat._fix_bad_stock_split(df_good, interval, tz_exchange)
839+
repaired_df = hist._fix_bad_stock_split(df_good, interval, tz_exchange)
820840

821841
# Expect no change from repair
822842
df_good = df_good.sort_index()
@@ -836,6 +856,7 @@ def test_repair_bad_stock_split(self):
836856
for tkr in bad_tkrs:
837857
dat = yf.Ticker(tkr, session=self.session)
838858
tz_exchange = dat.fast_info["timezone"]
859+
hist = dat._lazy_load_price_history()
839860

840861
_dp = os.path.dirname(__file__)
841862
interval = '1d'
@@ -846,7 +867,7 @@ def test_repair_bad_stock_split(self):
846867
df_bad = _pd.read_csv(fp, index_col="Date")
847868
df_bad.index = _pd.to_datetime(df_bad.index, utc=True)
848869

849-
repaired_df = dat._fix_bad_stock_split(df_bad, "1d", tz_exchange)
870+
repaired_df = hist._fix_bad_stock_split(df_bad, "1d", tz_exchange)
850871

851872
fp = os.path.join(_dp, "data", tkr.replace('.','-')+'-'+interval+"-bad-stock-split-fixed.csv")
852873
correct_df = _pd.read_csv(fp, index_col="Date")
@@ -876,11 +897,12 @@ def test_repair_bad_stock_split(self):
876897
for interval in intervals:
877898
dat = yf.Ticker(tkr, session=self.session)
878899
tz_exchange = dat.fast_info["timezone"]
900+
hist = dat._lazy_load_price_history()
879901

880902
_dp = os.path.dirname(__file__)
881-
df_good = dat.history(start='2020-11-30', end='2021-04-01', interval=interval, auto_adjust=False)
903+
df_good = hist.history(start='2020-11-30', end='2021-04-01', interval=interval, auto_adjust=False)
882904

883-
repaired_df = dat._fix_bad_stock_split(df_good, interval, tz_exchange)
905+
repaired_df = hist._fix_bad_stock_split(df_good, interval, tz_exchange)
884906

885907
# Expect no change from repair
886908
df_good = df_good.sort_index()
@@ -900,12 +922,13 @@ def test_repair_missing_div_adjust(self):
900922

901923
dat = yf.Ticker(tkr, session=self.session)
902924
tz_exchange = dat.fast_info["timezone"]
925+
hist = dat._lazy_load_price_history()
903926

904927
_dp = os.path.dirname(__file__)
905928
df_bad = _pd.read_csv(os.path.join(_dp, "data", tkr.replace('.','-')+"-1d-missing-div-adjust.csv"), index_col="Date")
906929
df_bad.index = _pd.to_datetime(df_bad.index)
907930

908-
repaired_df = dat._fix_missing_div_adjust(df_bad, "1d", tz_exchange)
931+
repaired_df = hist._fix_missing_div_adjust(df_bad, "1d", tz_exchange)
909932

910933
correct_df = _pd.read_csv(os.path.join(_dp, "data", tkr.replace('.','-')+"-1d-missing-div-adjust-fixed.csv"), index_col="Date")
911934
correct_df.index = _pd.to_datetime(correct_df.index)

tests/ticker.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -247,15 +247,6 @@ def test_actions(self):
247247
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
248248
self.assertFalse(data.empty, "data is empty")
249249

250-
def test_reconstruct_intervals_batch(self):
251-
data = self.ticker.history(period="3mo", interval="1d", prepost=True, repair=True)
252-
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
253-
self.assertFalse(data.empty, "data is empty")
254-
255-
reconstructed = self.ticker._reconstruct_intervals_batch(data, "1wk", True)
256-
self.assertIsInstance(reconstructed, pd.DataFrame, "data has wrong type")
257-
self.assertFalse(data.empty, "data is empty")
258-
259250

260251
class TestTickerEarnings(unittest.TestCase):
261252
session = None

0 commit comments

Comments
 (0)