Skip to content

Commit 1a22ed9

Browse files
committed
chore(multi): adds multiarch info to ls tasks and IRs
Signed-off-by: Alex Gronskiy <[email protected]>
1 parent 0569b8c commit 1a22ed9

File tree

13 files changed

+521
-118
lines changed

13 files changed

+521
-118
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ NeMo Evaluator Launcher provides pre-built evaluation containers for different e
5959
| **vlmevalkit** | Vision-language model evaluation | [Link](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/eval-factory/containers/vlmevalkit) | `25.11` | AI2D, ChartQA, MMMU, MathVista-MINI, OCRBench, SlideVQA |
6060

6161
<!-- BEGIN AUTOGENERATION -->
62-
<!-- mapping toml checksum: sha256:b7fdaa7f01a641970f864c6aab95d7f9e49b883dee8558e8636eb8018a01388e -->
62+
<!-- mapping toml checksum: sha256:881e6d1de31824c9e77a3e13c0a9ab988d6bab7cc9fab5b298ef1e5b1bdf1af9 -->
6363
<!--
6464
| Container | Description | NGC Catalog | Latest Tag | Supported benchmarks |
6565
|-----------|-------------|-------------|------------| ------------|

packages/nemo-evaluator-launcher/scripts/autogen_task_yamls.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def generate_yaml(self) -> dict:
115115
"harness": self.task_ir.harness,
116116
"container": self.task_ir.container,
117117
"container_digest": self.task_ir.container_digest,
118+
"container_arch": getattr(self.task_ir, "container_arch", None),
118119
"defaults": self.task_ir.defaults,
119120
}
120121

@@ -190,6 +191,9 @@ def generate_markdown_section(self, harness_id: str) -> list[str]:
190191
lines.append(str(self.task_ir.container_digest))
191192
lines.append("```")
192193
lines.append("")
194+
container_arch = getattr(self.task_ir, "container_arch", None) or "unknown"
195+
lines.append(f"**Container Arch:** `{container_arch}`")
196+
lines.append("")
193197
if task_type:
194198
lines.append(f"**Task Type:** `{task_type}`")
195199
lines.append("")
@@ -522,12 +526,13 @@ def generate_benchmarks_table_markdown(
522526

523527
lines.append("```{list-table}")
524528
lines.append(":header-rows: 1")
525-
lines.append(":widths: 20 25 15 15 25")
529+
lines.append(":widths: 18 24 14 12 8 24")
526530
lines.append("")
527531
lines.append("* - Container")
528532
lines.append(" - Description")
529533
lines.append(" - NGC Catalog")
530534
lines.append(" - Latest Tag")
535+
lines.append(" - Arch")
531536
lines.append(" - Tasks")
532537

533538
# Sort harnesses alphabetically for consistent ordering
@@ -579,6 +584,12 @@ def generate_benchmarks_table_markdown(
579584
# If no version found, use placeholder as fallback
580585
latest_tag = version if version else "{{ docker_compose_latest }}"
581586

587+
arch = (
588+
harness.harness_ir.arch
589+
or (harness.tasks[0].container_arch if harness.tasks else None)
590+
or "unknown"
591+
)
592+
582593
# Escape special characters in markdown (but preserve links)
583594
# Some harnesses may store description as non-string types (e.g., list).
584595
if isinstance(description, list):
@@ -591,6 +602,7 @@ def generate_benchmarks_table_markdown(
591602
lines.append(f" - {description_display}")
592603
lines.append(f" - {ngc_link}")
593604
lines.append(f" - {latest_tag}")
605+
lines.append(f" - `{arch}`")
594606
lines.append(f" - {tasks_display}")
595607

596608
lines.append("```")
@@ -650,11 +662,12 @@ def generate_benchmarks_table_internal_markdown(
650662

651663
lines.append("```{list-table}")
652664
lines.append(":header-rows: 1")
653-
lines.append(":widths: 20 30 25 25")
665+
lines.append(":widths: 18 30 18 8 26")
654666
lines.append("")
655667
lines.append("* - Container")
656668
lines.append(" - Description")
657669
lines.append(" - Container Ref")
670+
lines.append(" - Arch")
658671
lines.append(" - Tasks")
659672

660673
sorted_harnesses = sorted(harnesses, key=lambda h: h.harness_name.lower())
@@ -693,10 +706,16 @@ def generate_benchmarks_table_internal_markdown(
693706
description_display = description_text.replace("|", "\\|").replace("\n", " ")
694707

695708
container_ref_display = f"`{container_ref}`" if container_ref else "N/A"
709+
arch = (
710+
harness.harness_ir.arch
711+
or (harness.tasks[0].container_arch if harness.tasks else None)
712+
or "unknown"
713+
)
696714

697715
lines.append(f"* - {container_display}")
698716
lines.append(f" - {description_display}")
699717
lines.append(f" - {container_ref_display}")
718+
lines.append(f" - `{arch}`")
700719
lines.append(f" - {tasks_display}")
701720

702721
lines.append("```")
@@ -903,6 +922,7 @@ def main():
903922
url=None,
904923
container=container,
905924
container_digest=container_digest,
925+
arch=None,
906926
)
907927
harnesses.append(_HarnessAutogen(harness_ir, tasks))
908928

packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/api/functional.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def get_tasks_list() -> list[list[Any]]:
3636
"""Get a list of available tasks from the mapping.
3737
3838
Returns:
39-
list[list[Any]]: Each sublist contains task name, endpoint type, harness, container, description, and type.
39+
list[list[Any]]: Each sublist contains task name, endpoint type, harness, container, arch, description, and type.
4040
"""
4141
mapping = load_tasks_mapping()
4242
data = [
@@ -45,6 +45,7 @@ def get_tasks_list() -> list[list[Any]]:
4545
task_data.get("endpoint_type"),
4646
task_data.get("harness"),
4747
task_data.get("container"),
48+
task_data.get("arch", ""),
4849
task_data.get("description", ""),
4950
task_data.get("type", ""),
5051
]

packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/cli/ls_tasks.py

Lines changed: 93 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,6 @@ def execute(self) -> None:
9595
if isinstance(endpoint_types, str):
9696
endpoint_types = [endpoint_types]
9797

98-
task_type = task.defaults.get("config", {}).get("type", "")
99-
10098
data.append(
10199
[
102100
task.name, # task
@@ -105,8 +103,8 @@ def execute(self) -> None:
105103
else endpoint_types, # endpoint_type
106104
task.harness, # harness
107105
task.container, # container
106+
getattr(task, "container_arch", "") or "", # arch
108107
task.description, # description
109-
task_type, # type
110108
]
111109
)
112110
else:
@@ -121,13 +119,17 @@ def execute(self) -> None:
121119
"endpoint_type",
122120
"harness",
123121
"container",
122+
"arch",
124123
"description",
125-
"type",
126124
]
127125
supported_benchmarks = []
128126
for task_data in data:
129-
assert len(task_data) == len(headers)
130-
supported_benchmarks.append(dict(zip(headers, task_data)))
127+
if len(task_data) < len(headers):
128+
raise ValueError(
129+
f"Invalid task row shape: expected at least {len(headers)} columns, got {len(task_data)}"
130+
)
131+
# Backwards/forwards compat: allow extra columns and ignore them.
132+
supported_benchmarks.append(dict(zip(headers, task_data[: len(headers)])))
131133

132134
if self.json:
133135
print(json.dumps({"tasks": supported_benchmarks}, indent=2))
@@ -140,6 +142,48 @@ def _print_table(self, tasks: list[dict]) -> None:
140142
print("No tasks found.")
141143
return
142144

145+
def _truncate(s: str, max_len: int) -> str:
146+
s = s or ""
147+
if max_len <= 0:
148+
return ""
149+
if len(s) <= max_len:
150+
return s
151+
if max_len <= 3:
152+
return s[:max_len]
153+
return s[: max_len - 3] + "..."
154+
155+
def _infer_arch(container: str, container_tasks: list[dict]) -> str:
156+
# Prefer explicit arch from task IRs.
157+
for t in container_tasks:
158+
a = (t.get("arch") or "").strip()
159+
if a:
160+
return a
161+
162+
# Heuristic fallback: look for common suffixes in tag.
163+
c = (container or "").lower()
164+
if "arm64" in c or "aarch64" in c:
165+
return "arm"
166+
if "amd64" in c or "x86_64" in c:
167+
return "amd"
168+
return "unknown"
169+
170+
def _infer_registry(container: str) -> str:
171+
try:
172+
from nemo_evaluator_launcher.common.container_metadata.utils import (
173+
parse_container_image,
174+
)
175+
176+
registry_type, _registry_url, _repo, _ref = parse_container_image(container)
177+
return str(registry_type)
178+
except Exception:
179+
# Best-effort fallback for unknown formats.
180+
c = (container or "").lower()
181+
if "nvcr.io/" in c or c.startswith("nvcr.io"):
182+
return "nvcr"
183+
if "gitlab" in c:
184+
return "gitlab"
185+
return ""
186+
143187
# Group tasks by harness and container
144188
grouped = defaultdict(lambda: defaultdict(list))
145189
for task in tasks:
@@ -156,73 +200,77 @@ def _print_table(self, tasks: list[dict]) -> None:
156200
if j > 0:
157201
print() # Spacing between containers
158202

159-
# Prepare task table first to get column widths
160-
task_header = "task"
161203
rows = []
162204
for task in container_tasks:
163-
task_name = task["task"]
164-
endpoint_type = task["endpoint_type"]
165-
task_type = task.get("type", "")
166-
description = task.get("description", "")
167-
# Format: task_name (endpoint_type, task_type) - first 30 chars...
168-
description_preview = description[:30] if description else ""
169-
if len(description) > 30:
170-
description_preview += "..."
171-
172-
# Build the display name
173-
type_part = f"{endpoint_type}"
174-
if task_type:
175-
type_part += f", {task_type}"
176-
display_name = f"{task_name} ({type_part})"
177-
if description_preview:
178-
display_name = f"{display_name} - {description_preview}"
179-
rows.append(display_name)
180-
181-
# Sort tasks alphabetically for better readability
182-
rows.sort()
183-
184-
# Calculate column width
185-
max_task_width = (
186-
max(len(task_header), max(len(str(row)) for row in rows)) + 2
187-
)
205+
rows.append(
206+
{
207+
"task": str(task.get("task", "")),
208+
"endpoint": str(task.get("endpoint_type", "")),
209+
"description": str(task.get("description", "")),
210+
}
211+
)
212+
rows.sort(key=lambda r: r["task"].lower())
188213

189214
# Calculate required width for header content
190215
harness_line = f"harness: {harness}"
191216
container_line = f"container: {container}"
217+
arch_line = f"arch: {_infer_arch(container, container_tasks)}"
218+
registry_line = f"registry: {_infer_registry(container)}"
192219
header_content_width = (
193-
max(len(harness_line), len(container_line)) + 4
220+
max(len(harness_line), len(container_line), len(arch_line), len(registry_line))
221+
+ 4
194222
) # +4 for "| " and " |"
195223

196-
# Use the larger of the two widths
197-
table_width = max(max_task_width, header_content_width)
198-
199224
# Limit separator width to prevent overflow on small terminals
200225
# Use terminal width if available, otherwise cap at 120 characters
201226
import shutil
202227

203228
try:
204229
terminal_width = shutil.get_terminal_size().columns
205-
separator_width = min(
206-
table_width, terminal_width - 2
207-
) # -2 for safety margin
230+
separator_width = min(terminal_width - 2, 160) # -2 safety margin
208231
except Exception:
209232
# Fallback if terminal size can't be determined
210-
separator_width = min(table_width, 120)
233+
separator_width = 120
234+
235+
separator_width = max(separator_width, min(header_content_width, 160))
236+
237+
# Table columns (keep compact and stable).
238+
col_task = 36
239+
col_endpoint = 14
240+
sep = " "
241+
fixed = col_task + col_endpoint + len(sep) * 2
242+
col_desc = max(20, separator_width - fixed)
211243

212244
# Print combined header with harness and container info - colorized
213245
# Keys: magenta, Values: cyan (matching logging utils)
214246
print(bold("=" * separator_width))
215247
print(f"{magenta('harness:')} {cyan(str(harness))}")
216248
print(f"{magenta('container:')} {cyan(str(container))}")
249+
arch = _infer_arch(container, container_tasks)
250+
registry = _infer_registry(container)
251+
print(f"{magenta('arch:')} {cyan(str(arch))}")
252+
if registry:
253+
print(f"{magenta('registry:')} {cyan(str(registry))}")
217254

218255
# Print task table header separator
219-
print(" " * table_width)
220-
print(bold(f"{task_header:<{table_width}}"))
256+
print()
257+
print(
258+
bold(
259+
f"{'task':<{col_task}}{sep}"
260+
f"{'endpoint':<{col_endpoint}}{sep}"
261+
f"{'description':<{col_desc}}"
262+
)
263+
)
221264
print(bold("-" * separator_width))
222265

223266
# Print task rows - use grey for task descriptions
224-
for row in rows:
225-
print(f"{grey(str(row)):<{table_width}}")
267+
for r in rows:
268+
line = (
269+
f"{_truncate(r['task'], col_task):<{col_task}}{sep}"
270+
f"{_truncate(r['endpoint'], col_endpoint):<{col_endpoint}}{sep}"
271+
f"{_truncate(r['description'], col_desc):<{col_desc}}"
272+
)
273+
print(grey(line))
226274

227275
print(bold("-" * separator_width))
228276
# Show task count - grey for count text

packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/common/container_metadata/intermediate_repr.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,11 @@ class HarnessIntermediateRepresentation:
5050
url: Optional[str]
5151
container: str
5252
container_digest: Optional[str]
53+
# Architecture label for the container image:
54+
# - "amd": linux/amd64 only
55+
# - "arm": linux/arm64 only
56+
# - "multiarch": includes both amd64 and arm64
57+
arch: Optional[str] = None
5358

5459
def to_dict(self) -> dict[str, Any]:
5560
"""Convert to dictionary representation."""
@@ -60,6 +65,7 @@ def to_dict(self) -> dict[str, Any]:
6065
"url": self.url,
6166
"container": self.container,
6267
"container_digest": self.container_digest,
68+
"arch": self.arch,
6369
}
6470

6571

@@ -73,6 +79,7 @@ class TaskIntermediateRepresentation:
7379
container: str
7480
container_digest: Optional[str]
7581
defaults: dict[str, Any]
82+
container_arch: Optional[str] = None
7683

7784
def to_dict(self) -> dict[str, Any]:
7885
"""Convert to dictionary representation."""
@@ -82,6 +89,7 @@ def to_dict(self) -> dict[str, Any]:
8289
"harness": self.harness,
8390
"container": self.container,
8491
"container_digest": self.container_digest,
92+
"container_arch": self.container_arch,
8593
"defaults": self.defaults,
8694
}
8795

@@ -171,13 +179,17 @@ def _parse_tasks_from_yaml_data(
171179
container_digest = task_dict.get("container_digest") or (
172180
harness_ir.container_digest if harness_ir else None
173181
)
182+
container_arch = task_dict.get("container_arch") or (
183+
harness_ir.arch if harness_ir else None
184+
)
174185

175186
task_ir = TaskIntermediateRepresentation(
176187
name=task_dict["name"],
177188
description=task_dict.get("description", ""),
178189
harness=harness_name,
179190
container=container,
180191
container_digest=container_digest,
192+
container_arch=container_arch,
181193
defaults=task_dict.get("defaults", {}),
182194
)
183195
tasks.append(task_ir)
@@ -218,6 +230,7 @@ def _parse_harnesses_from_yaml_data(
218230
url=harness_dict.get("url"),
219231
container=harness_dict.get("container", ""),
220232
container_digest=harness_dict.get("container_digest"),
233+
arch=harness_dict.get("arch"),
221234
)
222235

223236
# Keep first occurrence if duplicates exist
@@ -250,6 +263,7 @@ def _infer_harnesses_from_tasks(
250263
container_digest=str(task.container_digest)
251264
if task.container_digest
252265
else None,
266+
arch=str(task.container_arch) if task.container_arch else None,
253267
)
254268
return harnesses
255269

0 commit comments

Comments
 (0)