Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions .common-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ trigger-pipeline:
variables:
DIST: rhel9

.dist-rhel10:
variables:
DIST: rhel10

.dist-fedora36:
variables:
DIST: fedora36
Expand Down Expand Up @@ -207,6 +211,14 @@ trigger-pipeline:
rules:
- if: $CI_PIPELINE_SOURCE != "schedule" && $CI_COMMIT_TAG == null

.release-rhel10:
# Perform for each DRIVER_VERSION
extends:
- .release-generic
- .driver-versions
rules:
- if: $CI_PIPELINE_SOURCE != "schedule" && $CI_COMMIT_TAG == null

.release:
# Perform for each DRIVER_VERSION
extends:
Expand Down Expand Up @@ -254,6 +266,15 @@ trigger-pipeline:
OUT_REGISTRY: "${NGC_REGISTRY}"
OUT_IMAGE_NAME: "${NGC_STAGING_REGISTRY}/driver"

.release:staging-rhel10:
extends:
- .release-rhel10
variables:
OUT_REGISTRY_USER: "${NGC_REGISTRY_USER}"
OUT_REGISTRY_TOKEN: "${NGC_REGISTRY_TOKEN}"
OUT_REGISTRY: "${NGC_REGISTRY}"
OUT_IMAGE_NAME: "${NGC_STAGING_REGISTRY}/driver"

# Define an external release step that pushes an image to an external repository.
.release:external:
extends:
Expand Down Expand Up @@ -322,6 +343,13 @@ release:staging-rhel9:
needs:
- image-rhel9

release:staging-rhel10:
extends:
- .release:staging
- .dist-rhel10
needs:
- image-rhel10

.release:staging-precompiled:
stage: release
variables:
Expand Down
9 changes: 9 additions & 0 deletions .github/workflows/image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,16 @@ jobs:
- ubuntu24.04
- rhel8
- rhel9
- rhel10
ispr:
- ${{github.event_name == 'pull_request'}}
exclude:
- dist: ubuntu24.04
driver: 535.274.02
- dist: rhel10
driver: 535.274.02
- dist: rhel10
driver: 570.195.03
fail-fast: false
steps:
- uses: actions/checkout@v5
Expand Down Expand Up @@ -128,6 +133,10 @@ jobs:
dist: ubuntu24.04
- flavor: azure-fde
dist: ubuntu22.04
- dist: rhel10
driver: 535
- dist: rhel10
driver: 570
steps:
- uses: actions/checkout@v5
name: Check out code
Expand Down
13 changes: 13 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,14 @@ include:
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"

# Define the image build targets
.image-build-rhel10:
# Perform for each DRIVER_VERSION
extends:
- .driver-versions
- .image-build-generic
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"

image-ubuntu20.04:
extends:
Expand All @@ -93,6 +101,11 @@ image-rhel9:
- .image-build-rhel9
- .dist-rhel9

image-rhel10:
extends:
- .image-build-rhel10
- .dist-rhel10

image-fedora36:
extends:
- .image-build
Expand Down
35 changes: 35 additions & 0 deletions .nvidia-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,11 @@ image-rhel9:
- .image-pull
- .dist-rhel9

image-rhel10:
extends:
- .image-pull
- .dist-rhel10

# The .scan step forms the base of the image scan operation performed before releasing
# images.
.scan-generic:
Expand Down Expand Up @@ -333,6 +338,22 @@ scan-rhel9-arm64:
needs:
- image-rhel9

scan-rhel10-amd64:
extends:
- .scan
- .dist-rhel10
- .platform-amd64
needs:
- image-rhel10

scan-rhel10-arm64:
extends:
- .scan
- .dist-rhel10
- .platform-arm64
needs:
- image-rhel10

.release:ngc-variables:
variables:
OUT_REGISTRY_USER: "${NGC_REGISTRY_USER}"
Expand Down Expand Up @@ -438,6 +459,20 @@ release:ngc-rhel9.6:
variables:
OUT_DIST: "rhel9.6"

release:ngc-rhel10.0:
extends:
- .release:ngc
- .dist-rhel10
variables:
OUT_DIST: "rhel10.0"

release:ngc-rhel10.1:
extends:
- .release:ngc
- .dist-rhel10
variables:
OUT_DIST: "rhel10.1"

generate-build-info:
stage: ngc-publish
artifacts:
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(OUT_DIST)
OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)

##### Public rules #####
DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 signed_ubuntu20.04 signed_ubuntu22.04 signed_ubuntu24.04 rhel8 rhel9 flatcar fedora36 sles15.3 precompiled_rhcos
DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 signed_ubuntu20.04 signed_ubuntu22.04 signed_ubuntu24.04 rhel8 rhel9 rhel10 flatcar fedora36 sles15.3 precompiled_rhcos
RHCOS_VERSIONS := rhcos4.14 rhcos4.15 rhcos4.16 rhcos4.17 rhcos4.18 rhel9.6
PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS))
BASE_FROM := noble jammy focal
Expand Down
1 change: 1 addition & 0 deletions ci/localbuild.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ driver_container_build_rhel()
driver_container_build_simple "rhel7"
driver_container_build_simple "rhel8"
driver_container_build_simple "rhel9"
driver_container_build_simple "rhel10"
}

list_all_containers()
Expand Down
1 change: 1 addition & 0 deletions ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ done
build "rhel7" "${CONTAINER_VERSION}-rhel7" "$(mk_short_version rhel7)" ""
build "rhel8" "${CONTAINER_VERSION}-rhel8" "$(mk_short_version rhel8)" ""
build "rhel9" "${CONTAINER_VERSION}-rhel9" "$(mk_short_version rhel9)" ""
build "rhel10" "${CONTAINER_VERSION}-rhel10" "$(mk_short_version rhel10)" ""

# Add rhcos tags
docker pull "${REGISTRY}:${CONTAINER_VERSION}-rhel8"
Expand Down
114 changes: 114 additions & 0 deletions rhel10/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
ARG BASE_IMAGE=nvcr.io/nvidia/cuda:13.0.1-base-ubi10

FROM ${BASE_IMAGE} as build

ARG TARGETARCH
ARG GOLANG_VERSION

# Arg to indicate if driver type is either of passthrough(baremetal) or vgpu
ARG DRIVER_TYPE=passthrough
ENV DRIVER_TYPE=$DRIVER_TYPE

SHELL ["/bin/bash", "-c"]

RUN dnf install -y git wget

# download appropriate binary based on the target architecture for multi-arch builds
RUN OS_ARCH=${TARGETARCH/x86_64/amd64} && OS_ARCH=${OS_ARCH/aarch64/arm64} && \
wget -nv -O - https://go.dev/dl/go${GOLANG_VERSION}.linux-${OS_ARCH}.tar.gz \
| tar -C /usr/local -xz

ENV PATH /usr/local/go/bin:$PATH

WORKDIR /work

RUN if [ "$DRIVER_TYPE" = "vgpu" ]; then \
git clone https://github.com/NVIDIA/gpu-driver-container driver && \
cd driver/vgpu/src && \
go build -o vgpu-util && \
mv vgpu-util /work; fi

FROM ${BASE_IMAGE}

ARG TARGETARCH
ENV TARGETARCH=$TARGETARCH

SHELL ["/bin/bash", "-c"]

#ARG BASE_URL=http://us.download.nvidia.com/XFree86/Linux-x86_64
ARG BASE_URL=https://us.download.nvidia.com/tesla
ARG DRIVER_VERSION
ENV DRIVER_VERSION=$DRIVER_VERSION
ARG DRIVER_BRANCH
ENV DRIVER_BRANCH=$DRIVER_BRANCH

# Arg to indicate if driver type is either of passthrough/baremetal or vgpu
ARG DRIVER_TYPE=passthrough
ENV DRIVER_TYPE=$DRIVER_TYPE
ARG VGPU_LICENSE_SERVER_TYPE=NLS
ENV VGPU_LICENSE_SERVER_TYPE=$VGPU_LICENSE_SERVER_TYPE
# Enable vGPU version compability check by default
ARG DISABLE_VGPU_VERSION_CHECK=true
ENV DISABLE_VGPU_VERSION_CHECK=$DISABLE_VGPU_VERSION_CHECK
# Avoid dependency of container-toolkit for driver container
ENV NVIDIA_VISIBLE_DEVICES=void

ADD install.sh /tmp/

RUN NVIDIA_GPGKEY_SUM=afbea87d3b979b3788ef34223aeeb323ade481128e2c133723ae99b8a51368bb && \
OS_ARCH=${TARGETARCH/amd64/x86_64} && OS_ARCH=${OS_ARCH/arm64/sbsa} && \
curl -fsSL "https://developer.download.nvidia.com/compute/cuda/repos/rhel10/$OS_ARCH/CDF6BA43.pub" | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
echo "$NVIDIA_GPGKEY_SUM /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict -

RUN sh /tmp/install.sh depinstall && \
sh /tmp/install.sh setup_cuda_repo && \
curl -fsSL -o /usr/local/bin/donkey https://github.com/3XX0/donkey/releases/download/v1.1.0/donkey && \
curl -fsSL -o /usr/local/bin/extract-vmlinux https://raw.githubusercontent.com/torvalds/linux/master/scripts/extract-vmlinux && \
chmod +x /usr/local/bin/donkey /usr/local/bin/extract-vmlinux && \
ln -s /sbin/ldconfig /sbin/ldconfig.real

ADD drivers drivers/

# Fetch the installer, fabricmanager and libnvidia-nscq automatically for passthrough/baremetal types
RUN if [ "$DRIVER_TYPE" != "vgpu" ]; then \
cd drivers && \
DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64} && \
curl -fSsl -O $BASE_URL/$DRIVER_VERSION/NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run && \
chmod +x NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run; fi

# Fetch the installer, fabricmanager, libnvidia-nscq, libnvsdm, imex packages
RUN sh /tmp/install.sh extrapkgsinstall

COPY nvidia-driver /usr/local/bin
COPY ocp_dtk_entrypoint /usr/local/bin
COPY common.sh /usr/local/bin

COPY --from=build /work/vgpu-util* /usr/local/bin

WORKDIR /drivers

ARG PUBLIC_KEY=empty
COPY ${PUBLIC_KEY} kernel/pubkey.x509

ARG PRIVATE_KEY
ARG KERNEL_VERSION=latest

LABEL io.k8s.display-name="NVIDIA Driver Container"
LABEL name="NVIDIA Driver Container"
LABEL vendor="NVIDIA"
LABEL version="${DRIVER_VERSION}"
LABEL release="N/A"
LABEL summary="Provision the NVIDIA driver through containers"
LABEL description="See summary"

# Install / upgrade packages here that are required to resolve CVEs
ARG CVE_UPDATES
RUN if [ -n "${CVE_UPDATES}" ]; then \
yum update -y ${CVE_UPDATES} && \
rm -rf /var/cache/yum/*; \
fi

# Remove cuda repository to avoid GPG errors
RUN rm -f /etc/yum.repos.d/cuda.repo

ENTRYPOINT ["nvidia-driver", "init"]
Loading