more presentation table updates

deeleeramone · deeleeramone · commit 4170d13baba1 · 2025-12-20T11:28:08.000-08:00
diff --git a/openbb_platform/providers/imf/openbb_imf/utils/table_builder.py b/openbb_platform/providers/imf/openbb_imf/utils/table_builder.py
@@ -339,13 +339,17 @@ def get_table(
                 f"Filtered entries: {len(entries_with_codes)}"
             )
 
-        # Build comprehensive hierarchy lookup from the structure
         hierarchy_order_map = {}
         hierarchy_by_series_id = {}
-        hierarchy_by_sorted_codes = {}  # Order-agnostic lookup
-        # Composite lookup for indicators with same code but different parent dimensions
+        hierarchy_by_sorted_codes = {}
+        # Some hierarchies can legitimately contain multiple nodes with the same
+        # (indicator_code, parent_code) (e.g., BOP Credit vs Debit variants under a Net parent),
+        # so store a list and disambiguate later.
         # Key: (indicator_code, parent_code) e.g., ("O", "A_P") vs ("O", "L_P")
-        hierarchy_by_composite_key: dict[tuple[str, str], dict] = {}
+        hierarchy_by_composite_key: dict[tuple[str, str], list[dict]] = defaultdict(
+            list
+        )
+        parents_by_indicator_code: dict[str, set[str]] = defaultdict(set)
 
         # Build indicator_by_code lookup for depth calculation
         indicator_by_code = {}
@@ -435,14 +439,16 @@ def get_table(
                 "hierarchy_node_id": ind.get(
                     "id"
                 ),  # Hierarchy node ID for parent matching
+                "hierarchy_series_id": ind.get("series_id", ""),
             }
             hierarchy_order_map[indicator_code] = hierarchy_info
 
             # Store composite key lookup for indicators with same code but different parents
             # This handles cases like "Other investment" under both Assets (A_P) and Liabilities (L_P)
             if parent_indicator_code:
                 composite_key = (indicator_code, parent_indicator_code)
-                hierarchy_by_composite_key[composite_key] = hierarchy_info
+                hierarchy_by_composite_key[composite_key].append(hierarchy_info)
+                parents_by_indicator_code[indicator_code].add(parent_indicator_code)
 
             # Both groups and leaves can have data and should be matched
             if series_id := ind.get("series_id"):
@@ -910,16 +916,84 @@ def parse_date(date_str: str) -> datetime | None:
                     sorted_codes = "_".join(sorted(codes_part.split("_")))
                     hier_info = hierarchy_by_sorted_codes.get(sorted_codes)
 
-            # Stage 2.5: Composite key lookup for same indicator with different parents
-            # This handles cases like "Other investment" appearing under both Assets (A_P)
-            # and Liabilities (L_P) in BOP/IIP data
+            # Stage 2.25: Constructed sorted-codes match when series_id is missing.
+            # Some IMF responses omit or vary series_id formats, but the hierarchy encodes
+            # series IDs like "..._BOP_DB_T_D74XEF". For BOP-style tables, we can reconstruct
+            # a comparable key from the row's indicator + accounting entry codes.
             bop_entry_code = row.get("BOP_ACCOUNTING_ENTRY_code", "") or row.get(
                 "bop_accounting_entry_code", ""
             )
+            if (
+                not hier_info
+                and not row_series_id
+                and bop_entry_code
+                and indicator_code
+            ):
+                constructed_sorted = "_".join(sorted([indicator_code, bop_entry_code]))
+                hier_info = hierarchy_by_sorted_codes.get(constructed_sorted)
+
+            # Stage 2.5: Composite key lookup for same indicator with different parents
+            # This handles cases like "Other investment" appearing under both Assets (A_P)
+            # and Liabilities (L_P) in BOP/IIP data
+            # Stage 2.5: Composite key lookup for same indicator with different parents
             if not hier_info and bop_entry_code and indicator_code:
-                # Check for BOP_ACCOUNTING_ENTRY dimension which distinguishes Assets vs Liabilities
+
+                def _choose_from_candidates(
+                    candidates: list[dict], entry_code: str
+                ) -> dict | None:
+                    if not candidates:
+                        return None
+                    if len(candidates) == 1:
+                        return candidates[0]
+
+                    entry_code_upper = entry_code.upper()
+                    markers: set[str] = {entry_code_upper}
+                    if entry_code_upper in {"CD_T", "NEGCD_T"}:
+                        markers |= {"CD", "CREDIT"}
+                    elif entry_code_upper == "DB_T":
+                        markers |= {"DB", "DEBIT"}
+                    elif entry_code_upper == "A_P":
+                        markers |= {"ASSET", "ASSETS"}
+                    elif entry_code_upper == "L_P":
+                        markers |= {"LIAB", "LIABILITIES", "LIABILITY"}
+
+                    for cand in candidates:
+                        haystack = f"{cand.get('hierarchy_node_id','')} {cand.get('hierarchy_series_id','')}".upper()
+                        if any(m in haystack for m in markers):
+                            return cand
+
+                    return candidates[0]
+
+                # Check for BOP_ACCOUNTING_ENTRY dimension which distinguishes Assets vs Liabilities.
                 composite_key = (indicator_code, bop_entry_code)
-                hier_info = hierarchy_by_composite_key.get(composite_key)
+                hier_info = _choose_from_candidates(
+                    hierarchy_by_composite_key.get(composite_key, []), bop_entry_code
+                )
+
+                # BOP Credit/Debit rows are typically grouped under a Net parent in the IMF hierarchy.
+                # The hierarchy's discriminator for these rows is the Net node (e.g., NETCD_T), not
+                # the row's accounting entry code (CD_T/DB_T). Prefer the hierarchy's Net parent.
+                if not hier_info and bop_entry_code in {"CD_T", "DB_T"}:
+                    candidate_parents = parents_by_indicator_code.get(
+                        indicator_code, set()
+                    )
+                    net_parent: str | None = None
+                    if "NETCD_T" in candidate_parents:
+                        net_parent = "NETCD_T"
+                    else:
+                        net_like = sorted(
+                            p for p in candidate_parents if p.startswith("NET")
+                        )
+                        if len(net_like) == 1 or net_like:
+                            net_parent = net_like[0]
+
+                    if net_parent:
+                        hier_info = _choose_from_candidates(
+                            hierarchy_by_composite_key.get(
+                                (indicator_code, net_parent), []
+                            ),
+                            bop_entry_code,
+                        )
 
             # Stage 3: Indicator code lookup (single dimension)
             # BUT: if we have a bop_entry_code, don't use generic indicator match
@@ -1105,20 +1179,18 @@ def parse_date(date_str: str) -> datetime | None:
                 else:
                     row["title"] = ind_name
 
-            # Fallback: if no title was set, use the indicator code itself
-            # This ensures every row has some identifying label
+            # Fallback: prefer hierarchy label if no title was set.
+            # This keeps output consistent with the IMF hierarchy (source of truth)
+            # when codelist lookups are unavailable or incomplete.
+            if not row.get("title") and row.get("label"):
+                row["title"] = row["label"]
+
+            # Final fallback: if still no title, use the indicator code itself.
+            # This ensures every row has some identifying label.
             if not row.get("title") and ind_code:
-                # Try to make the code more readable by replacing underscores with spaces
-                # and capitalizing words
                 readable_code = ind_code.replace("_", " ")
                 row["title"] = readable_code
 
-            # Final fallback: use hierarchy label if still no title
-            # This handles cases where INDICATOR_code is missing or not in codelist
-            # but the hierarchy structure has a meaningful label
-            if not row.get("title") and row.get("label"):
-                row["title"] = row["label"]
-
             # For BOP data, append the accounting entry type (Credit/Debit/Net) to title
             # This differentiates rows like "Goods, Credit" vs "Goods, Debit" vs "Goods"
             bop_entry = row.get("BOP_ACCOUNTING_ENTRY_code")
@@ -1161,13 +1233,11 @@ def parse_date(date_str: str) -> datetime | None:
 
             # For IIPCC currency composition data, append currency to title
             # This differentiates rows by currency (Euro, US dollar, Other currencies, etc.)
-            # Only append if CURRENCY_code exists and is not just the reporting unit
             currency_code = row.get("CURRENCY_code")
             currency_label = row.get("CURRENCY")
             unit_code = row.get("unit_code") or row.get("UNIT_MEASURE_code")
             if currency_code and currency_label and row.get("title"):
                 # Don't append if currency is the same as the unit (e.g., both USD)
-                # or if it's a total/aggregate code
                 skip_currencies = {"_T", "W0", "W1", "W2", "ALL"}
                 if currency_code not in skip_currencies and currency_code != unit_code:
                     row["title"] = f"{row['title']} ({currency_label})"
diff --git a/openbb_platform/providers/imf/openbb_imf/utils/table_presentation.py b/openbb_platform/providers/imf/openbb_imf/utils/table_presentation.py
@@ -835,6 +835,17 @@ def find_best_suffix(
         # Start with immediate parent, go up the hierarchy
         levels_seen: set = set()
         ancestor_parts: set = set()
+        # Never strip accounting-entry qualifiers like Net/Credit/Debit.
+        # In BOP tables these are meaningful and required to preserve hierarchy.
+        protected_suffixes = {
+            "Assets",
+            "Liabilities",
+            "Net",
+            "Credit",
+            "Debit",
+            "Credit/Revenue",
+            "Debit/Expenditure",
+        }
 
         for i in range(target_idx - 1, -1, -1):
             order, title, level, _ = self.order_title_level[i]
@@ -875,8 +886,6 @@ def find_best_suffix(
         if not ancestor_parts:
             return None
 
-        protected_suffixes = {"Assets", "Liabilities"}
-
         # Check if the title ends with ", <ancestor_part>"
         for part in ancestor_parts:
             if part in protected_suffixes:
@@ -1416,8 +1425,6 @@ def pivot_table_mode(
                 break
 
         unit_scale_by_order[order_val] = (unit_val, scale_val)
-
-    # Inherit missing unit/scale parts from ancestors if available
     for order_val in list(unit_scale_by_order.keys()):
         unit_val, scale_val = unit_scale_by_order[order_val]
         if unit_val is not None and scale_val is not None:
@@ -1811,15 +1818,21 @@ def format_dim_labels(grouping_key: tuple) -> str:
         }
 
         labels = []
+        filtered_labels = []
         for dim_id, _, label in grouping_key:
+            labels.append(label)
             if (
                 dim_id == "TYPE_OF_TRANSFORMATION"
                 and label in unit_like_transformations
             ):
                 continue
-            labels.append(label)
+            filtered_labels.append(label)
+
+        # If filtering removed everything, fall back to the unfiltered labels so we
+        # never render a blank title row for unit-only dimensions.
+        effective_labels = filtered_labels if filtered_labels else labels
 
-        return " - ".join(labels) if labels else ""
+        return " - ".join(effective_labels) if effective_labels else ""
 
     # Build a map of order -> list of (grouping_key, data_rows_for_order)
     # Preserve original data order by iterating data_rows directly
@@ -1865,13 +1878,100 @@ def format_dim_labels(grouping_key: tuple) -> str:
                 global_parent_orders.add(parent_order)
             parent_id = parent_df.iloc[0].get("parent_id")
 
+    # Track BOP-only header nodes we intentionally skip so we can promote descendants.
+    bop_skipped_parent_ids: set[str] = set()
+
+    def _track_skipped_parent_ids(row_like: dict[str, Any]) -> None:
+        node_id = row_like.get("hierarchy_node_id")
+        ind_code = row_like.get("indicator_code")
+        for v in (node_id, ind_code):
+            if not v:
+                continue
+            sv = str(v)
+            bop_skipped_parent_ids.add(sv)
+            if "___" in sv:
+                bop_skipped_parent_ids.add(sv.rsplit("___", 1)[-1])
+
+    def _lookup_parent_row(parent_id: str):
+        parent_df = df[df["hierarchy_node_id"] == parent_id]
+        if len(parent_df) == 0:
+            suffix_pattern = f"___{parent_id}"
+            parent_df = df[
+                df["hierarchy_node_id"].fillna("").str.endswith(suffix_pattern)
+            ]
+        if len(parent_df) == 0 and "indicator_code" in df.columns:
+            parent_df = df[df["indicator_code"] == parent_id]
+        return parent_df
+
+    def _promote_level_if_parent_skipped(level: int, parent_id: Any) -> int:
+        adjusted = level
+        pid = str(parent_id) if parent_id else ""
+        while pid and pid in bop_skipped_parent_ids and adjusted > 0:
+            adjusted -= 1
+            parent_df = _lookup_parent_row(pid)
+            if len(parent_df) == 0:
+                break
+            pid = str(parent_df.iloc[0].get("parent_id") or "")
+        return adjusted
+
+    # Track the last meaningful (non-BOP-only) header title at each level.
+    # This is used to preserve qualifiers like "excluding exceptional financing"
+    # for BOP suffix rows even when intermediate accounting-entry headers are skipped.
+    last_meaningful_header_by_level: dict[int, str] = {}
+
+    def _normalize_title(raw_title: str | None) -> str:
+        title = (raw_title or "").lstrip()
+
+        # Remove header marker (used for promoted headers in the rendered output)
+        if title.startswith("▸"):
+            title = title[1:].lstrip()
+
+        # Strip parenthetical unit suffix
+        if " (" in title and title.endswith(")"):
+            paren_idx = title.rfind(" (")
+            if paren_idx > 0:
+                title = title[:paren_idx]
+
+        # Strip common unit qualifiers that can trail titles
+        unit_suffixes = [", Transactions", ", Stocks", ", Flows"]
+        for suffix in unit_suffixes:
+            if title.endswith(suffix):
+                title = title[: -len(suffix)]
+                break
+
+        return title
+
+    def _nearest_non_bop_ancestor_title(parent_id: Any) -> str | None:
+        pid = str(parent_id) if parent_id else ""
+        safety = 0
+        while pid and safety < 50:
+            safety += 1
+            parent_df = _lookup_parent_row(pid)
+            if len(parent_df) == 0:
+                return None
+            parent_first = parent_df.iloc[0]
+            parent_title = _normalize_title(str(parent_first.get("title") or ""))
+            if (
+                parent_title
+                and not is_bop_suffix_only(parent_title)
+                and not parent_title.endswith((", Net", ", Credit", ", Debit"))
+            ):
+                return parent_title
+            pid = str(parent_first.get("parent_id") or "")
+        return None
+
     # OUTER LOOP: Iterate by sorted_orders (ITEM first)
     for order in sorted_orders:
         order_df = df[df["order"] == order]
         if order_df.empty:
             continue
         first = order_df.iloc[0]
         level = first["level"] or 0
+
+        # Clear deeper header context when we move up the tree.
+        for k in [k for k in last_meaningful_header_by_level if k > level]:
+            del last_meaningful_header_by_level[k]
+
         is_header = first["is_category_header"]
         title = first["title"] or ""
         original_unit_suffix = ""
@@ -1902,14 +2002,23 @@ def format_dim_labels(grouping_key: tuple) -> str:
 
         # Skip headers that don't lead to any data
         if should_render_as_header and order not in global_parent_orders:
+            # If this is a BOP-only accounting-entry header (Net/Credit/Debit/etc.),
+            # track it even when skipped for "no data" so descendants can be promoted.
+            if is_bop_suffix_only(title):
+                _track_skipped_parent_ids(first.to_dict())
             continue
 
         # Skip phantom BOP headers that are just "Net", "Credit", "Debit", etc.
-        # These are hierarchy nodes that shouldn't be rendered - the actual data
-        # rows with full names like "Goods, Net" serve as the real structure
+        # Record them so descendants can be promoted (prevents Debit nesting under Credit
+        # when an intermediate accounting-entry node is hidden).
         if should_render_as_header and is_bop_suffix_only(title):
+            _track_skipped_parent_ids(first.to_dict())
             continue
 
+        # If a row's parent (or higher ancestor) was skipped as a BOP-only header,
+        # promote it so it doesn't appear as a child of the wrong visible node.
+        level = _promote_level_if_parent_skipped(level, first.get("parent_id"))
+
         # ISORA: Only show topic headers
         if is_isora and should_render_as_header:
             if title and "___" in title:
@@ -1961,6 +2070,34 @@ def format_dim_labels(grouping_key: tuple) -> str:
                 else:
                     break
 
+        # Update header context for this level, or (for BOP suffix rows) inherit
+        # the nearest meaningful header when the row's base is a strict prefix.
+        if should_render_as_header:
+            header_base = title.strip()
+            if header_base and not is_bop_suffix_only(header_base):
+                last_meaningful_header_by_level[level] = header_base
+        else:
+            for bop_suffix in (", Net", ", Credit", ", Debit"):
+                if title.endswith(bop_suffix):
+                    base = title[: -len(bop_suffix)].strip()
+                    ancestor_title: str | None = None
+                    for ancestor_level in range(level - 1, -1, -1):
+                        cand = last_meaningful_header_by_level.get(ancestor_level)
+                        if not cand:
+                            continue
+                        if cand.endswith((", Net", ", Credit", ", Debit")):
+                            continue
+                        ancestor_title = cand
+                        break
+
+                    if (
+                        ancestor_title
+                        and ancestor_title != base
+                        and ancestor_title.startswith(base)
+                    ):
+                        title = f"{ancestor_title}{bop_suffix}"
+                    break
+
         # Calculate indent
         extra_indent = "   " if should_add_table_header else ""
         indent = extra_indent + "   " * level
diff --git a/openbb_platform/providers/imf/tests/test_table_builder.py b/openbb_platform/providers/imf/tests/test_table_builder.py