Skip to content

Commit fdb9eb5

Browse files
committed
chore: simplify the multiarch -> single arch resolution on the registry side
Signed-off-by: Alex Gronskiy <[email protected]>
1 parent edd6f57 commit fdb9eb5

File tree

2 files changed

+36
-72
lines changed

2 files changed

+36
-72
lines changed

packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/common/container_metadata/loading.py

Lines changed: 24 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import json
2020
import os
2121
import pathlib
22-
import platform
2322
import tarfile
2423
import tempfile
2524
from typing import Optional
@@ -479,65 +478,6 @@ def find_file_matching_pattern_in_image_layers(
479478
authenticator.authenticate(repository=repository)
480479
# Don't fail here - authentication may fail but public containers can still be accessed
481480

482-
def _normalize_arch(a: object) -> Optional[str]:
483-
if not a:
484-
return None
485-
s = str(a).lower()
486-
if s in {"amd64", "x86_64"}:
487-
return "amd64"
488-
if s in {"arm64", "aarch64"}:
489-
return "arm64"
490-
return None
491-
492-
def _preferred_arch() -> str:
493-
return _normalize_arch(platform.machine()) or "amd64"
494-
495-
def _pick_platform_manifest_digest(index_manifest: dict) -> Optional[str]:
496-
manifests = index_manifest.get("manifests")
497-
if not isinstance(manifests, list) or not manifests:
498-
return None
499-
500-
pref = _preferred_arch()
501-
# Prefer linux + preferred arch, then linux/amd64, then linux/arm64, then first digest.
502-
best_score = 10_000
503-
best_digest: Optional[str] = None
504-
505-
for m in manifests:
506-
if not isinstance(m, dict):
507-
continue
508-
plat = m.get("platform") or {}
509-
if not isinstance(plat, dict):
510-
continue
511-
os_name = str(plat.get("os") or "").lower()
512-
arch = _normalize_arch(plat.get("architecture"))
513-
digest = m.get("digest")
514-
if not (isinstance(digest, str) and digest.startswith("sha256:")):
515-
continue
516-
if os_name != "linux" or not arch:
517-
continue
518-
519-
score = 100
520-
if arch == pref:
521-
score = 0
522-
elif arch == "amd64":
523-
score = 10
524-
elif arch == "arm64":
525-
score = 20
526-
527-
if score < best_score:
528-
best_score = score
529-
best_digest = digest
530-
531-
if best_digest:
532-
return best_digest
533-
534-
for m in manifests:
535-
if isinstance(m, dict):
536-
d = m.get("digest")
537-
if isinstance(d, str) and d.startswith("sha256:"):
538-
return d
539-
return None
540-
541481
# Get top-level manifest and digest (tag may resolve to multi-arch index).
542482
top_manifest, top_digest = authenticator.get_manifest_and_digest(
543483
repository, reference
@@ -554,13 +494,30 @@ def _pick_platform_manifest_digest(index_manifest: dict) -> Optional[str]:
554494
if isinstance(top_manifest, dict) and isinstance(
555495
top_manifest.get("manifests"), list
556496
):
557-
platform_digest = _pick_platform_manifest_digest(top_manifest)
558-
if platform_digest:
559-
resolved, _ = authenticator.get_manifest_and_digest(
560-
repository, platform_digest
561-
)
562-
if resolved:
563-
manifest = resolved
497+
# Prefer registry's default platform resolver by requesting a single-image manifest.
498+
single_accept = (
499+
"application/vnd.oci.image.manifest.v1+json, "
500+
"application/vnd.docker.distribution.manifest.v2+json"
501+
)
502+
resolved, _ = authenticator.get_manifest_and_digest(
503+
repository, reference, accept=single_accept
504+
)
505+
if resolved:
506+
manifest = resolved
507+
508+
# If a registry still returns an index (ignoring Accept), fall back to the
509+
# first digest entry for layer inspection. This does NOT affect recorded digests.
510+
if isinstance(manifest, dict) and isinstance(manifest.get("manifests"), list):
511+
for m in manifest.get("manifests") or []:
512+
if isinstance(m, dict):
513+
d = m.get("digest")
514+
if isinstance(d, str) and d.startswith("sha256:"):
515+
resolved2, _ = authenticator.get_manifest_and_digest(
516+
repository, d, accept=single_accept
517+
)
518+
if resolved2:
519+
manifest = resolved2
520+
break
564521

565522
# Check cache with digest validation (always validates digest)
566523
# For pattern searches, use pattern-based cache key (not resolved path)

packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/common/container_metadata/registries.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,9 @@ def _read_docker_credentials(registry_url: str) -> Optional[Tuple[str, str]]:
217217
return None
218218

219219

220-
def _retry_without_auth(url: str, stream: bool = False) -> Optional[requests.Response]:
220+
def _retry_without_auth(
221+
url: str, stream: bool = False, accept: Optional[str] = None
222+
) -> Optional[requests.Response]:
221223
"""Retry HTTP request without authentication headers.
222224
223225
Used for accessing public containers that may return 401/403 even for
@@ -231,7 +233,7 @@ def _retry_without_auth(url: str, stream: bool = False) -> Optional[requests.Res
231233
Response object if successful, None otherwise
232234
"""
233235
temp_session = requests.Session()
234-
temp_session.headers.update({"Accept": _DOCKER_MANIFEST_MEDIA_TYPE})
236+
temp_session.headers.update({"Accept": accept or _DOCKER_MANIFEST_MEDIA_TYPE})
235237
response = temp_session.get(url, stream=stream)
236238
if response.status_code == 200:
237239
return response
@@ -254,7 +256,7 @@ def authenticate(self, repository: Optional[str] = None) -> bool:
254256
pass
255257

256258
def get_manifest_and_digest(
257-
self, repository: str, reference: str
259+
self, repository: str, reference: str, accept: Optional[str] = None
258260
) -> Tuple[Optional[Dict], Optional[str]]:
259261
"""Get the manifest and digest for a specific image reference.
260262
@@ -276,7 +278,12 @@ def get_manifest_and_digest(
276278
url = f"https://{self.registry_url}/v2/{repository}/manifests/{reference}"
277279
logger.debug("Fetching manifest", url=url)
278280

279-
response = self.session.get(url)
281+
accept_header = (
282+
accept
283+
or self.session.headers.get("Accept")
284+
or _DOCKER_MANIFEST_MEDIA_TYPE
285+
)
286+
response = self.session.get(url, headers={"Accept": accept_header})
280287

281288
if response.status_code == 200:
282289
manifest = response.json()
@@ -322,7 +329,7 @@ def get_manifest_and_digest(
322329
"Got 401/403, retrying without authentication",
323330
status_code=response.status_code,
324331
)
325-
retry_response = _retry_without_auth(url)
332+
retry_response = _retry_without_auth(url, accept=accept_header)
326333
if retry_response:
327334
manifest = retry_response.json()
328335
headers_dict = dict(retry_response.headers)

0 commit comments

Comments
 (0)