NVIDIA-NeMo
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/nemo-evaluator-launcher/scripts/autogen_task_yamls.py‎
Lines changed: 22 additions & 2 deletions b/‎packages/nemo-evaluator-launcher/scripts/autogen_task_yamls.py‎
Lines changed: 22 additions & 2 deletions
diff --git a/‎packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/api/functional.py‎
Lines changed: 2 additions & 1 deletion b/‎packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/api/functional.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/cli/ls_tasks.py‎
Lines changed: 100 additions & 45 deletions b/‎packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/cli/ls_tasks.py‎
Lines changed: 100 additions & 45 deletions
@@ -59,7 +59,7 @@ NeMo Evaluator Launcher provides pre-built evaluation containers for different e
 | **vlmevalkit** | Vision-language model evaluation | [Link](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/eval-factory/containers/vlmevalkit) | `25.11` | AI2D, ChartQA, MMMU, MathVista-MINI, OCRBench, SlideVQA |
 
 <!-- BEGIN AUTOGENERATION -->
-<!-- mapping toml checksum: sha256:b7fdaa7f01a641970f864c6aab95d7f9e49b883dee8558e8636eb8018a01388e -->
+<!-- mapping toml checksum: sha256:881e6d1de31824c9e77a3e13c0a9ab988d6bab7cc9fab5b298ef1e5b1bdf1af9 -->
 <!--
 | Container | Description | NGC Catalog | Latest Tag | Supported benchmarks |
 |-----------|-------------|-------------|------------| ------------|
 
@@ -115,6 +115,7 @@ def generate_yaml(self) -> dict:
             "harness": self.task_ir.harness,
             "container": self.task_ir.container,
             "container_digest": self.task_ir.container_digest,
+            "container_arch": getattr(self.task_ir, "container_arch", None),
             "defaults": self.task_ir.defaults,
         }
 
@@ -190,6 +191,9 @@ def generate_markdown_section(self, harness_id: str) -> list[str]:
             lines.append(str(self.task_ir.container_digest))
             lines.append("```")
             lines.append("")
+        container_arch = getattr(self.task_ir, "container_arch", None) or "unknown"
+        lines.append(f"**Container Arch:** `{container_arch}`")
+        lines.append("")
         if task_type:
             lines.append(f"**Task Type:** `{task_type}`")
             lines.append("")
@@ -522,12 +526,13 @@ def generate_benchmarks_table_markdown(
 
     lines.append("```{list-table}")
     lines.append(":header-rows: 1")
-    lines.append(":widths: 20 25 15 15 25")
+    lines.append(":widths: 18 24 14 12 8 24")
     lines.append("")
     lines.append("* - Container")
     lines.append("  - Description")
     lines.append("  - NGC Catalog")
     lines.append("  - Latest Tag")
+    lines.append("  - Arch")
     lines.append("  - Tasks")
 
     # Sort harnesses alphabetically for consistent ordering
@@ -579,6 +584,12 @@ def generate_benchmarks_table_markdown(
         # If no version found, use placeholder as fallback
         latest_tag = version if version else "{{ docker_compose_latest }}"
 
+        arch = (
+            harness.harness_ir.arch
+            or (harness.tasks[0].container_arch if harness.tasks else None)
+            or "unknown"
+        )
+
         # Escape special characters in markdown (but preserve links)
         # Some harnesses may store description as non-string types (e.g., list).
         if isinstance(description, list):
@@ -591,6 +602,7 @@ def generate_benchmarks_table_markdown(
         lines.append(f"  - {description_display}")
         lines.append(f"  - {ngc_link}")
         lines.append(f"  - {latest_tag}")
+        lines.append(f"  - `{arch}`")
         lines.append(f"  - {tasks_display}")
 
     lines.append("```")
@@ -650,11 +662,12 @@ def generate_benchmarks_table_internal_markdown(
 
     lines.append("```{list-table}")
     lines.append(":header-rows: 1")
-    lines.append(":widths: 20 30 25 25")
+    lines.append(":widths: 18 30 18 8 26")
     lines.append("")
     lines.append("* - Container")
     lines.append("  - Description")
     lines.append("  - Container Ref")
+    lines.append("  - Arch")
     lines.append("  - Tasks")
 
     sorted_harnesses = sorted(harnesses, key=lambda h: h.harness_name.lower())
@@ -693,10 +706,16 @@ def generate_benchmarks_table_internal_markdown(
         description_display = description_text.replace("|", "\\|").replace("\n", " ")
 
         container_ref_display = f"`{container_ref}`" if container_ref else "N/A"
+        arch = (
+            harness.harness_ir.arch
+            or (harness.tasks[0].container_arch if harness.tasks else None)
+            or "unknown"
+        )
 
         lines.append(f"* - {container_display}")
         lines.append(f"  - {description_display}")
         lines.append(f"  - {container_ref_display}")
+        lines.append(f"  - `{arch}`")
         lines.append(f"  - {tasks_display}")
 
     lines.append("```")
@@ -903,6 +922,7 @@ def main():
                     url=None,
                     container=container,
                     container_digest=container_digest,
+                    arch=None,
                 )
             harnesses.append(_HarnessAutogen(harness_ir, tasks))
 
 
@@ -36,7 +36,7 @@ def get_tasks_list() -> list[list[Any]]:
     """Get a list of available tasks from the mapping.
 
     Returns:
-        list[list[Any]]: Each sublist contains task name, endpoint type, harness, container, description, and type.
+        list[list[Any]]: Each sublist contains task name, endpoint type, harness, container, arch, description, and type.
     """
     mapping = load_tasks_mapping()
     data = [
@@ -45,6 +45,7 @@ def get_tasks_list() -> list[list[Any]]:
             task_data.get("endpoint_type"),
             task_data.get("harness"),
             task_data.get("container"),
+            task_data.get("arch", ""),
             task_data.get("description", ""),
             task_data.get("type", ""),
         ]
 
@@ -95,8 +95,6 @@ def execute(self) -> None:
                 if isinstance(endpoint_types, str):
                     endpoint_types = [endpoint_types]
 
-                task_type = task.defaults.get("config", {}).get("type", "")
-
                 data.append(
                     [
                         task.name,  # task
@@ -105,8 +103,8 @@ def execute(self) -> None:
                         else endpoint_types,  # endpoint_type
                         task.harness,  # harness
                         task.container,  # container
+                        getattr(task, "container_arch", "") or "",  # arch
                         task.description,  # description
-                        task_type,  # type
                     ]
                 )
         else:
@@ -121,13 +119,17 @@ def execute(self) -> None:
             "endpoint_type",
             "harness",
             "container",
+            "arch",
             "description",
-            "type",
         ]
         supported_benchmarks = []
         for task_data in data:
-            assert len(task_data) == len(headers)
-            supported_benchmarks.append(dict(zip(headers, task_data)))
+            if len(task_data) < len(headers):
+                raise ValueError(
+                    f"Invalid task row shape: expected at least {len(headers)} columns, got {len(task_data)}"
+                )
+            # Backwards/forwards compat: allow extra columns and ignore them.
+            supported_benchmarks.append(dict(zip(headers, task_data[: len(headers)])))
 
         if self.json:
             print(json.dumps({"tasks": supported_benchmarks}, indent=2))
@@ -140,6 +142,50 @@ def _print_table(self, tasks: list[dict]) -> None:
             print("No tasks found.")
             return
 
+        def _truncate(s: str, max_len: int) -> str:
+            s = s or ""
+            if max_len <= 0:
+                return ""
+            if len(s) <= max_len:
+                return s
+            if max_len <= 3:
+                return s[:max_len]
+            return s[: max_len - 3] + "..."
+
+        def _infer_arch(container: str, container_tasks: list[dict]) -> str:
+            # Prefer explicit arch from task IRs.
+            for t in container_tasks:
+                a = (t.get("arch") or "").strip()
+                if a:
+                    return a
+
+            # Heuristic fallback: look for common suffixes in tag.
+            c = (container or "").lower()
+            if "arm64" in c or "aarch64" in c:
+                return "arm"
+            if "amd64" in c or "x86_64" in c:
+                return "amd"
+            return "unknown"
+
+        def _infer_registry(container: str) -> str:
+            try:
+                from nemo_evaluator_launcher.common.container_metadata.utils import (
+                    parse_container_image,
+                )
+
+                registry_type, _registry_url, _repo, _ref = parse_container_image(
+                    container
+                )
+                return str(registry_type)
+            except Exception:
+                # Best-effort fallback for unknown formats.
+                c = (container or "").lower()
+                if "nvcr.io/" in c or c.startswith("nvcr.io"):
+                    return "nvcr"
+                if "gitlab" in c:
+                    return "gitlab"
+                return ""
+
         # Group tasks by harness and container
         grouped = defaultdict(lambda: defaultdict(list))
         for task in tasks:
@@ -156,73 +202,82 @@ def _print_table(self, tasks: list[dict]) -> None:
                 if j > 0:
                     print()  # Spacing between containers
 
-                # Prepare task table first to get column widths
-                task_header = "task"
                 rows = []
                 for task in container_tasks:
-                    task_name = task["task"]
-                    endpoint_type = task["endpoint_type"]
-                    task_type = task.get("type", "")
-                    description = task.get("description", "")
-                    # Format: task_name (endpoint_type, task_type) - first 30 chars...
-                    description_preview = description[:30] if description else ""
-                    if len(description) > 30:
-                        description_preview += "..."
-
-                    # Build the display name
-                    type_part = f"{endpoint_type}"
-                    if task_type:
-                        type_part += f", {task_type}"
-                    display_name = f"{task_name} ({type_part})"
-                    if description_preview:
-                        display_name = f"{display_name} - {description_preview}"
-                    rows.append(display_name)
-
-                # Sort tasks alphabetically for better readability
-                rows.sort()
-
-                # Calculate column width
-                max_task_width = (
-                    max(len(task_header), max(len(str(row)) for row in rows)) + 2
-                )
+                    rows.append(
+                        {
+                            "task": str(task.get("task", "")),
+                            "endpoint": str(task.get("endpoint_type", "")),
+                            "description": str(task.get("description", "")),
+                        }
+                    )
+                rows.sort(key=lambda r: r["task"].lower())
 
                 # Calculate required width for header content
                 harness_line = f"harness: {harness}"
                 container_line = f"container: {container}"
+                arch_line = f"arch: {_infer_arch(container, container_tasks)}"
+                registry_line = f"registry: {_infer_registry(container)}"
                 header_content_width = (
-                    max(len(harness_line), len(container_line)) + 4
+                    max(
+                        len(harness_line),
+                        len(container_line),
+                        len(arch_line),
+                        len(registry_line),
+                    )
+                    + 4
                 )  # +4 for "| " and " |"
 
-                # Use the larger of the two widths
-                table_width = max(max_task_width, header_content_width)
-
                 # Limit separator width to prevent overflow on small terminals
                 # Use terminal width if available, otherwise cap at 120 characters
                 import shutil
 
                 try:
                     terminal_width = shutil.get_terminal_size().columns
-                    separator_width = min(
-                        table_width, terminal_width - 2
-                    )  # -2 for safety margin
+                    separator_width = min(terminal_width - 2, 160)  # -2 safety margin
                 except Exception:
                     # Fallback if terminal size can't be determined
-                    separator_width = min(table_width, 120)
+                    separator_width = 120
+
+                separator_width = max(separator_width, min(header_content_width, 160))
+
+                # Table columns (keep compact and stable).
+                col_task = 36
+                col_endpoint = 14
+                sep = "  "
+                fixed = col_task + col_endpoint + len(sep) * 2
+                col_desc = max(20, separator_width - fixed)
 
                 # Print combined header with harness and container info - colorized
                 # Keys: magenta, Values: cyan (matching logging utils)
                 print(bold("=" * separator_width))
                 print(f"{magenta('harness:')} {cyan(str(harness))}")
                 print(f"{magenta('container:')} {cyan(str(container))}")
+                arch = _infer_arch(container, container_tasks)
+                registry = _infer_registry(container)
+                print(f"{magenta('arch:')} {cyan(str(arch))}")
+                if registry:
+                    print(f"{magenta('registry:')} {cyan(str(registry))}")
 
                 # Print task table header separator
-                print(" " * table_width)
-                print(bold(f"{task_header:<{table_width}}"))
+                print()
+                print(
+                    bold(
+                        f"{'task':<{col_task}}{sep}"
+                        f"{'endpoint':<{col_endpoint}}{sep}"
+                        f"{'description':<{col_desc}}"
+                    )
+                )
                 print(bold("-" * separator_width))
 
                 # Print task rows - use grey for task descriptions
-                for row in rows:
-                    print(f"{grey(str(row)):<{table_width}}")
+                for r in rows:
+                    line = (
+                        f"{_truncate(r['task'], col_task):<{col_task}}{sep}"
+                        f"{_truncate(r['endpoint'], col_endpoint):<{col_endpoint}}{sep}"
+                        f"{_truncate(r['description'], col_desc):<{col_desc}}"
+                    )
+                    print(grey(line))
 
                 print(bold("-" * separator_width))
                 # Show task count - grey for count text