Skip to content

Commit 4170d13

Browse files
committed
more presentation table updates
1 parent 7132d2c commit 4170d13

File tree

3 files changed

+486
-31
lines changed

3 files changed

+486
-31
lines changed

openbb_platform/providers/imf/openbb_imf/utils/table_builder.py

Lines changed: 92 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -339,13 +339,17 @@ def get_table(
339339
f"Filtered entries: {len(entries_with_codes)}"
340340
)
341341

342-
# Build comprehensive hierarchy lookup from the structure
343342
hierarchy_order_map = {}
344343
hierarchy_by_series_id = {}
345-
hierarchy_by_sorted_codes = {} # Order-agnostic lookup
346-
# Composite lookup for indicators with same code but different parent dimensions
344+
hierarchy_by_sorted_codes = {}
345+
# Some hierarchies can legitimately contain multiple nodes with the same
346+
# (indicator_code, parent_code) (e.g., BOP Credit vs Debit variants under a Net parent),
347+
# so store a list and disambiguate later.
347348
# Key: (indicator_code, parent_code) e.g., ("O", "A_P") vs ("O", "L_P")
348-
hierarchy_by_composite_key: dict[tuple[str, str], dict] = {}
349+
hierarchy_by_composite_key: dict[tuple[str, str], list[dict]] = defaultdict(
350+
list
351+
)
352+
parents_by_indicator_code: dict[str, set[str]] = defaultdict(set)
349353

350354
# Build indicator_by_code lookup for depth calculation
351355
indicator_by_code = {}
@@ -435,14 +439,16 @@ def get_table(
435439
"hierarchy_node_id": ind.get(
436440
"id"
437441
), # Hierarchy node ID for parent matching
442+
"hierarchy_series_id": ind.get("series_id", ""),
438443
}
439444
hierarchy_order_map[indicator_code] = hierarchy_info
440445

441446
# Store composite key lookup for indicators with same code but different parents
442447
# This handles cases like "Other investment" under both Assets (A_P) and Liabilities (L_P)
443448
if parent_indicator_code:
444449
composite_key = (indicator_code, parent_indicator_code)
445-
hierarchy_by_composite_key[composite_key] = hierarchy_info
450+
hierarchy_by_composite_key[composite_key].append(hierarchy_info)
451+
parents_by_indicator_code[indicator_code].add(parent_indicator_code)
446452

447453
# Both groups and leaves can have data and should be matched
448454
if series_id := ind.get("series_id"):
@@ -910,16 +916,84 @@ def parse_date(date_str: str) -> datetime | None:
910916
sorted_codes = "_".join(sorted(codes_part.split("_")))
911917
hier_info = hierarchy_by_sorted_codes.get(sorted_codes)
912918

913-
# Stage 2.5: Composite key lookup for same indicator with different parents
914-
# This handles cases like "Other investment" appearing under both Assets (A_P)
915-
# and Liabilities (L_P) in BOP/IIP data
919+
# Stage 2.25: Constructed sorted-codes match when series_id is missing.
920+
# Some IMF responses omit or vary series_id formats, but the hierarchy encodes
921+
# series IDs like "..._BOP_DB_T_D74XEF". For BOP-style tables, we can reconstruct
922+
# a comparable key from the row's indicator + accounting entry codes.
916923
bop_entry_code = row.get("BOP_ACCOUNTING_ENTRY_code", "") or row.get(
917924
"bop_accounting_entry_code", ""
918925
)
926+
if (
927+
not hier_info
928+
and not row_series_id
929+
and bop_entry_code
930+
and indicator_code
931+
):
932+
constructed_sorted = "_".join(sorted([indicator_code, bop_entry_code]))
933+
hier_info = hierarchy_by_sorted_codes.get(constructed_sorted)
934+
935+
# Stage 2.5: Composite key lookup for same indicator with different parents
936+
# This handles cases like "Other investment" appearing under both Assets (A_P)
937+
# and Liabilities (L_P) in BOP/IIP data
938+
# Stage 2.5: Composite key lookup for same indicator with different parents
919939
if not hier_info and bop_entry_code and indicator_code:
920-
# Check for BOP_ACCOUNTING_ENTRY dimension which distinguishes Assets vs Liabilities
940+
941+
def _choose_from_candidates(
942+
candidates: list[dict], entry_code: str
943+
) -> dict | None:
944+
if not candidates:
945+
return None
946+
if len(candidates) == 1:
947+
return candidates[0]
948+
949+
entry_code_upper = entry_code.upper()
950+
markers: set[str] = {entry_code_upper}
951+
if entry_code_upper in {"CD_T", "NEGCD_T"}:
952+
markers |= {"CD", "CREDIT"}
953+
elif entry_code_upper == "DB_T":
954+
markers |= {"DB", "DEBIT"}
955+
elif entry_code_upper == "A_P":
956+
markers |= {"ASSET", "ASSETS"}
957+
elif entry_code_upper == "L_P":
958+
markers |= {"LIAB", "LIABILITIES", "LIABILITY"}
959+
960+
for cand in candidates:
961+
haystack = f"{cand.get('hierarchy_node_id','')} {cand.get('hierarchy_series_id','')}".upper()
962+
if any(m in haystack for m in markers):
963+
return cand
964+
965+
return candidates[0]
966+
967+
# Check for BOP_ACCOUNTING_ENTRY dimension which distinguishes Assets vs Liabilities.
921968
composite_key = (indicator_code, bop_entry_code)
922-
hier_info = hierarchy_by_composite_key.get(composite_key)
969+
hier_info = _choose_from_candidates(
970+
hierarchy_by_composite_key.get(composite_key, []), bop_entry_code
971+
)
972+
973+
# BOP Credit/Debit rows are typically grouped under a Net parent in the IMF hierarchy.
974+
# The hierarchy's discriminator for these rows is the Net node (e.g., NETCD_T), not
975+
# the row's accounting entry code (CD_T/DB_T). Prefer the hierarchy's Net parent.
976+
if not hier_info and bop_entry_code in {"CD_T", "DB_T"}:
977+
candidate_parents = parents_by_indicator_code.get(
978+
indicator_code, set()
979+
)
980+
net_parent: str | None = None
981+
if "NETCD_T" in candidate_parents:
982+
net_parent = "NETCD_T"
983+
else:
984+
net_like = sorted(
985+
p for p in candidate_parents if p.startswith("NET")
986+
)
987+
if len(net_like) == 1 or net_like:
988+
net_parent = net_like[0]
989+
990+
if net_parent:
991+
hier_info = _choose_from_candidates(
992+
hierarchy_by_composite_key.get(
993+
(indicator_code, net_parent), []
994+
),
995+
bop_entry_code,
996+
)
923997

924998
# Stage 3: Indicator code lookup (single dimension)
925999
# BUT: if we have a bop_entry_code, don't use generic indicator match
@@ -1105,20 +1179,18 @@ def parse_date(date_str: str) -> datetime | None:
11051179
else:
11061180
row["title"] = ind_name
11071181

1108-
# Fallback: if no title was set, use the indicator code itself
1109-
# This ensures every row has some identifying label
1182+
# Fallback: prefer hierarchy label if no title was set.
1183+
# This keeps output consistent with the IMF hierarchy (source of truth)
1184+
# when codelist lookups are unavailable or incomplete.
1185+
if not row.get("title") and row.get("label"):
1186+
row["title"] = row["label"]
1187+
1188+
# Final fallback: if still no title, use the indicator code itself.
1189+
# This ensures every row has some identifying label.
11101190
if not row.get("title") and ind_code:
1111-
# Try to make the code more readable by replacing underscores with spaces
1112-
# and capitalizing words
11131191
readable_code = ind_code.replace("_", " ")
11141192
row["title"] = readable_code
11151193

1116-
# Final fallback: use hierarchy label if still no title
1117-
# This handles cases where INDICATOR_code is missing or not in codelist
1118-
# but the hierarchy structure has a meaningful label
1119-
if not row.get("title") and row.get("label"):
1120-
row["title"] = row["label"]
1121-
11221194
# For BOP data, append the accounting entry type (Credit/Debit/Net) to title
11231195
# This differentiates rows like "Goods, Credit" vs "Goods, Debit" vs "Goods"
11241196
bop_entry = row.get("BOP_ACCOUNTING_ENTRY_code")
@@ -1161,13 +1233,11 @@ def parse_date(date_str: str) -> datetime | None:
11611233

11621234
# For IIPCC currency composition data, append currency to title
11631235
# This differentiates rows by currency (Euro, US dollar, Other currencies, etc.)
1164-
# Only append if CURRENCY_code exists and is not just the reporting unit
11651236
currency_code = row.get("CURRENCY_code")
11661237
currency_label = row.get("CURRENCY")
11671238
unit_code = row.get("unit_code") or row.get("UNIT_MEASURE_code")
11681239
if currency_code and currency_label and row.get("title"):
11691240
# Don't append if currency is the same as the unit (e.g., both USD)
1170-
# or if it's a total/aggregate code
11711241
skip_currencies = {"_T", "W0", "W1", "W2", "ALL"}
11721242
if currency_code not in skip_currencies and currency_code != unit_code:
11731243
row["title"] = f"{row['title']} ({currency_label})"

openbb_platform/providers/imf/openbb_imf/utils/table_presentation.py

Lines changed: 145 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,17 @@ def find_best_suffix(
835835
# Start with immediate parent, go up the hierarchy
836836
levels_seen: set = set()
837837
ancestor_parts: set = set()
838+
# Never strip accounting-entry qualifiers like Net/Credit/Debit.
839+
# In BOP tables these are meaningful and required to preserve hierarchy.
840+
protected_suffixes = {
841+
"Assets",
842+
"Liabilities",
843+
"Net",
844+
"Credit",
845+
"Debit",
846+
"Credit/Revenue",
847+
"Debit/Expenditure",
848+
}
838849

839850
for i in range(target_idx - 1, -1, -1):
840851
order, title, level, _ = self.order_title_level[i]
@@ -875,8 +886,6 @@ def find_best_suffix(
875886
if not ancestor_parts:
876887
return None
877888

878-
protected_suffixes = {"Assets", "Liabilities"}
879-
880889
# Check if the title ends with ", <ancestor_part>"
881890
for part in ancestor_parts:
882891
if part in protected_suffixes:
@@ -1416,8 +1425,6 @@ def pivot_table_mode(
14161425
break
14171426

14181427
unit_scale_by_order[order_val] = (unit_val, scale_val)
1419-
1420-
# Inherit missing unit/scale parts from ancestors if available
14211428
for order_val in list(unit_scale_by_order.keys()):
14221429
unit_val, scale_val = unit_scale_by_order[order_val]
14231430
if unit_val is not None and scale_val is not None:
@@ -1811,15 +1818,21 @@ def format_dim_labels(grouping_key: tuple) -> str:
18111818
}
18121819

18131820
labels = []
1821+
filtered_labels = []
18141822
for dim_id, _, label in grouping_key:
1823+
labels.append(label)
18151824
if (
18161825
dim_id == "TYPE_OF_TRANSFORMATION"
18171826
and label in unit_like_transformations
18181827
):
18191828
continue
1820-
labels.append(label)
1829+
filtered_labels.append(label)
1830+
1831+
# If filtering removed everything, fall back to the unfiltered labels so we
1832+
# never render a blank title row for unit-only dimensions.
1833+
effective_labels = filtered_labels if filtered_labels else labels
18211834

1822-
return " - ".join(labels) if labels else ""
1835+
return " - ".join(effective_labels) if effective_labels else ""
18231836

18241837
# Build a map of order -> list of (grouping_key, data_rows_for_order)
18251838
# Preserve original data order by iterating data_rows directly
@@ -1865,13 +1878,100 @@ def format_dim_labels(grouping_key: tuple) -> str:
18651878
global_parent_orders.add(parent_order)
18661879
parent_id = parent_df.iloc[0].get("parent_id")
18671880

1881+
# Track BOP-only header nodes we intentionally skip so we can promote descendants.
1882+
bop_skipped_parent_ids: set[str] = set()
1883+
1884+
def _track_skipped_parent_ids(row_like: dict[str, Any]) -> None:
1885+
node_id = row_like.get("hierarchy_node_id")
1886+
ind_code = row_like.get("indicator_code")
1887+
for v in (node_id, ind_code):
1888+
if not v:
1889+
continue
1890+
sv = str(v)
1891+
bop_skipped_parent_ids.add(sv)
1892+
if "___" in sv:
1893+
bop_skipped_parent_ids.add(sv.rsplit("___", 1)[-1])
1894+
1895+
def _lookup_parent_row(parent_id: str):
1896+
parent_df = df[df["hierarchy_node_id"] == parent_id]
1897+
if len(parent_df) == 0:
1898+
suffix_pattern = f"___{parent_id}"
1899+
parent_df = df[
1900+
df["hierarchy_node_id"].fillna("").str.endswith(suffix_pattern)
1901+
]
1902+
if len(parent_df) == 0 and "indicator_code" in df.columns:
1903+
parent_df = df[df["indicator_code"] == parent_id]
1904+
return parent_df
1905+
1906+
def _promote_level_if_parent_skipped(level: int, parent_id: Any) -> int:
1907+
adjusted = level
1908+
pid = str(parent_id) if parent_id else ""
1909+
while pid and pid in bop_skipped_parent_ids and adjusted > 0:
1910+
adjusted -= 1
1911+
parent_df = _lookup_parent_row(pid)
1912+
if len(parent_df) == 0:
1913+
break
1914+
pid = str(parent_df.iloc[0].get("parent_id") or "")
1915+
return adjusted
1916+
1917+
# Track the last meaningful (non-BOP-only) header title at each level.
1918+
# This is used to preserve qualifiers like "excluding exceptional financing"
1919+
# for BOP suffix rows even when intermediate accounting-entry headers are skipped.
1920+
last_meaningful_header_by_level: dict[int, str] = {}
1921+
1922+
def _normalize_title(raw_title: str | None) -> str:
1923+
title = (raw_title or "").lstrip()
1924+
1925+
# Remove header marker (used for promoted headers in the rendered output)
1926+
if title.startswith("▸"):
1927+
title = title[1:].lstrip()
1928+
1929+
# Strip parenthetical unit suffix
1930+
if " (" in title and title.endswith(")"):
1931+
paren_idx = title.rfind(" (")
1932+
if paren_idx > 0:
1933+
title = title[:paren_idx]
1934+
1935+
# Strip common unit qualifiers that can trail titles
1936+
unit_suffixes = [", Transactions", ", Stocks", ", Flows"]
1937+
for suffix in unit_suffixes:
1938+
if title.endswith(suffix):
1939+
title = title[: -len(suffix)]
1940+
break
1941+
1942+
return title
1943+
1944+
def _nearest_non_bop_ancestor_title(parent_id: Any) -> str | None:
1945+
pid = str(parent_id) if parent_id else ""
1946+
safety = 0
1947+
while pid and safety < 50:
1948+
safety += 1
1949+
parent_df = _lookup_parent_row(pid)
1950+
if len(parent_df) == 0:
1951+
return None
1952+
parent_first = parent_df.iloc[0]
1953+
parent_title = _normalize_title(str(parent_first.get("title") or ""))
1954+
if (
1955+
parent_title
1956+
and not is_bop_suffix_only(parent_title)
1957+
and not parent_title.endswith((", Net", ", Credit", ", Debit"))
1958+
):
1959+
return parent_title
1960+
pid = str(parent_first.get("parent_id") or "")
1961+
return None
1962+
18681963
# OUTER LOOP: Iterate by sorted_orders (ITEM first)
18691964
for order in sorted_orders:
18701965
order_df = df[df["order"] == order]
18711966
if order_df.empty:
18721967
continue
18731968
first = order_df.iloc[0]
18741969
level = first["level"] or 0
1970+
1971+
# Clear deeper header context when we move up the tree.
1972+
for k in [k for k in last_meaningful_header_by_level if k > level]:
1973+
del last_meaningful_header_by_level[k]
1974+
18751975
is_header = first["is_category_header"]
18761976
title = first["title"] or ""
18771977
original_unit_suffix = ""
@@ -1902,14 +2002,23 @@ def format_dim_labels(grouping_key: tuple) -> str:
19022002

19032003
# Skip headers that don't lead to any data
19042004
if should_render_as_header and order not in global_parent_orders:
2005+
# If this is a BOP-only accounting-entry header (Net/Credit/Debit/etc.),
2006+
# track it even when skipped for "no data" so descendants can be promoted.
2007+
if is_bop_suffix_only(title):
2008+
_track_skipped_parent_ids(first.to_dict())
19052009
continue
19062010

19072011
# Skip phantom BOP headers that are just "Net", "Credit", "Debit", etc.
1908-
# These are hierarchy nodes that shouldn't be rendered - the actual data
1909-
# rows with full names like "Goods, Net" serve as the real structure
2012+
# Record them so descendants can be promoted (prevents Debit nesting under Credit
2013+
# when an intermediate accounting-entry node is hidden).
19102014
if should_render_as_header and is_bop_suffix_only(title):
2015+
_track_skipped_parent_ids(first.to_dict())
19112016
continue
19122017

2018+
# If a row's parent (or higher ancestor) was skipped as a BOP-only header,
2019+
# promote it so it doesn't appear as a child of the wrong visible node.
2020+
level = _promote_level_if_parent_skipped(level, first.get("parent_id"))
2021+
19132022
# ISORA: Only show topic headers
19142023
if is_isora and should_render_as_header:
19152024
if title and "___" in title:
@@ -1961,6 +2070,34 @@ def format_dim_labels(grouping_key: tuple) -> str:
19612070
else:
19622071
break
19632072

2073+
# Update header context for this level, or (for BOP suffix rows) inherit
2074+
# the nearest meaningful header when the row's base is a strict prefix.
2075+
if should_render_as_header:
2076+
header_base = title.strip()
2077+
if header_base and not is_bop_suffix_only(header_base):
2078+
last_meaningful_header_by_level[level] = header_base
2079+
else:
2080+
for bop_suffix in (", Net", ", Credit", ", Debit"):
2081+
if title.endswith(bop_suffix):
2082+
base = title[: -len(bop_suffix)].strip()
2083+
ancestor_title: str | None = None
2084+
for ancestor_level in range(level - 1, -1, -1):
2085+
cand = last_meaningful_header_by_level.get(ancestor_level)
2086+
if not cand:
2087+
continue
2088+
if cand.endswith((", Net", ", Credit", ", Debit")):
2089+
continue
2090+
ancestor_title = cand
2091+
break
2092+
2093+
if (
2094+
ancestor_title
2095+
and ancestor_title != base
2096+
and ancestor_title.startswith(base)
2097+
):
2098+
title = f"{ancestor_title}{bop_suffix}"
2099+
break
2100+
19642101
# Calculate indent
19652102
extra_indent = " " if should_add_table_header else ""
19662103
indent = extra_indent + " " * level

0 commit comments

Comments
 (0)