Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .devcontainer/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
matx.build.Dockerfile
35 changes: 35 additions & 0 deletions .devcontainer/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# MatX Container Generation and Usage scripts

## Steps for running a Matx container

1. Run the run_matx.sh script, optionally specifying a different repo, image base name, or version tag

`./run_matx.sh # defaults to latest tag in setup.sh`

or

`MATX_VERSION_TAG="12.9.1_ubuntu24.04" ./run_matx.sh`

Note: architecture (`-amd64` or `-arm64`) is automatically added to the tag by the scripts


## Steps for building a new container

1. Make your changes to the container recipe

2. Build the container

`MATX_IMAGE_NAME="someTestName" MATX_VERSION_TAG="someNewTag" create_base_container.sh`

The MATX_REPO, MATX_IMAGE_NAME, and/or MATX_VERSION_TAG must be different than the current values in setup.sh, to avoid accidentally overwriting the working container.

Note: architecture (`-amd64` or `-arm64`) is automatically added to the tag by the scripts

3. Test the container

4. Push the container. Also retag the container as latest and push that too

Exercise left to the reader, to prevent accidentally pushing the latest tag.

5. Modify setup.sh to update the MATX_VERSION_TAG and commit your updates to setup.sh and recipe.py

34 changes: 34 additions & 0 deletions .devcontainer/create_base_container.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Switch to SCRIPT_DIR directory
SCRIPT=$(readlink -f $0)
SCRIPT_DIR=$(dirname $SCRIPT)
echo $SCRIPT starting...
cd $SCRIPT_DIR
source ./setup.sh

current_image="${MATX_REPO}${MATX_IMAGE_NAME}:${MATX_VERSION_TAG}"

if [[ "$current_image" == "$(unset MATX_REPO && unset MATX_IMAGE_NAME && unset MATX_VERSION_TAG && source ./setup.sh && echo ${MATX_REPO}${MATX_IMAGE_NAME}:$MATX_VERSION_TAG)" ]]
then
echo "Error: Do not run this script without updating the MATX_REPO, MATX_IMAGE_NAME, and/or MATX_VERSION_TAG variables from command line"
exit 1
fi

TARGETARCH=$(basename $MATX_PLATFORM)
case "$TARGETARCH" in
"amd64")
CPU_TARGET=x86_64
;;
"arm64")
CPU_TARGET=aarch64
;;
*)
echo "Unsupported target architecture"
exit 1
;;
esac

hpccm --recipe recipe.py --cpu-target $CPU_TARGET --format docker > matx.build.Dockerfile
DOCKER_BUILDKIT=1 docker build -f matx.build.Dockerfile --platform $MATX_PLATFORM -t $current_image-$TARGETARCH .

echo Finished building container "$current_image-$TARGETARCH"

13 changes: 9 additions & 4 deletions .devcontainer/dev.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
# Use the base image
FROM ghcr.io/nvidia/matx/production:latest
FROM ghcr.io/nvidia/matx/release:latest

ARG REMOTE_USER
ARG REMOTE_UID
ARG REMOTE_GID

RUN apt-get update && apt-get install -y locales && \
sed -i 's/^# *en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && \
locale-gen
ENV LANG=en_US.UTF-8 \
LANGUAGE=en_US:en \
LC_ALL=en_US.UTF-8

# Create the user
RUN groupadd --gid $REMOTE_GID $REMOTE_USER \
&& useradd --uid $REMOTE_UID --gid $REMOTE_GID -m $REMOTE_USER \
#
# [Optional] Add sudo support. Omit if you don't need to install software after connecting.
&& apt-get update \
&& apt-get install -y sudo \
# [Optional] Add sudo support. Omit if you don't need sudo.
&& echo $REMOTE_USER ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$REMOTE_USER \
&& chmod 0440 /etc/sudoers.d/$REMOTE_USER

Expand Down
19 changes: 12 additions & 7 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
"dockerfile": "./dev.Dockerfile",
"args": {
"REMOTE_USER": "${localEnv:USER}",
"REMOTE_UID": "${localEnv:REMOTE_UID:1000}",
"REMOTE_GID": "${localEnv:REMOTE_GID:1000}"
"REMOTE_UID": "${localEnv:REMOTE_UID:2001}",
"REMOTE_GID": "${localEnv:REMOTE_GID:2001}"
}
},
"privileged": true,
Expand All @@ -36,20 +36,25 @@
// "postCreateCommand": "cat /etc/os-release",
"runArgs": [
"--ipc=host",
"--gpus=all",
"--entrypoint",
"fixuid"
"--gpus=all"
],
// Configure tool-specific properties.
"customizations": {
"vscode": {
"settings": {
"terminal.integrated.profiles.linux": {
"bash": {
"path": "/bin/bash"
}
},
"terminal.integrated.defaultProfile.linux": "bash"
},
"extensions": [
"ms-python.python",
"ms-toolsai.jupyter",
"eamodio.gitlens",
"ms-vscode.cpptools",
"llvm-vs-code-extensions.vscode-clangd",
"ms-vscode.cmake-tools"
"anysphere.cpptools"
]
}
}
Expand Down
116 changes: 116 additions & 0 deletions .devcontainer/recipe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#!/usr/bin/env python

import hpccm
from hpccm.building_blocks import gnu, mlnx_ofed, nvshmem, cmake
from hpccm.primitives import baseimage

DOXYGEN_VER = "1.14.0"
GDRCOPY_HOME = "/usr/local/gdrcopy"
PYBIND11_VER = "2.7.1"
FFTW_VER="3.3.10"
OPENBLAS_VER="0.3.27"
BLIS_VER="1.0"

if cpu_target == 'x86_64':
TARGETARCH='amd64'
elif cpu_target == 'aarch64':
TARGETARCH='arm64'
else:
raise RuntimeError("Unsupported platform")

Stage0 = hpccm.Stage()
Stage0 += baseimage(image='nvidia/cuda:12.9.1-devel-ubuntu24.04', _as='devel', _distro="ubuntu24")

Stage0 += packages(ospackages=[
'bison',
'clang-tidy',
'curl',
'flex',
'ghostscript',
'git',
'libjs-mathjax',
'liblapacke-dev',
'libopenblas64-openmp-dev',
'lcov',
'ninja-build',
'numactl',
'python3-pip',
'python3-dev',
'python3-venv',
'sudo',
'texlive-font-utils',
'valgrind',
'vim',
])

Stage0 += gnu()
Stage0 += cmake(eula=True, version="3.30.4")
Stage0 += nsight_compute(eula=True)
Stage0 += nsight_systems()

Stage0 += shell(commands=["cd /tmp && wget https://doxygen.nl/files/doxygen-{}.src.tar.gz".format(DOXYGEN_VER),
"tar -zxf doxygen-{}.src.tar.gz".format(DOXYGEN_VER),
"cd doxygen-{} && mkdir build && cd build && cmake .. && make -j && make install".format(DOXYGEN_VER)])
Stage0 += shell(commands=["python3 --version"])

# Note: Configure and build twice. First for float, second for double.
Stage0 += shell(commands=[f"cd /tmp && wget https://www.fftw.org/fftw-{FFTW_VER}.tar.gz && tar -xzf fftw-{FFTW_VER}.tar.gz && cd fftw-{FFTW_VER}",
f"./configure --enable-sse2 --enable-avx2 --enable-avx512 --enable-openmp --enable-float --enable-shared && make -j && make install",
f"./configure --enable-sse2 --enable-avx2 --enable-avx512 --enable-openmp --enable-shared && make -j && make install"])

# Stage0 += shell(commands=[f"cd /tmp && wget https://github.com/OpenMathLib/OpenBLAS/releases/download/v{OPENBLAS_VER}/OpenBLAS-{OPENBLAS_VER}.tar.gz && tar -zxvf OpenBLAS-{OPENBLAS_VER}.tar.gz && cd OpenBLAS-{OPENBLAS_VER}",
# "make -j && sudo make USE_OPENMP=1 INTERFACE64=1 install"])

Stage0 += shell(commands=["curl -L https://coveralls.io/coveralls-linux.tar.gz | tar -xz -C /usr/local/bin"])

Stage0 += shell(commands=[f"cd /tmp && wget https://github.com/flame/blis/archive/refs/tags/{BLIS_VER}.tar.gz -O blis_{BLIS_VER}.tar.gz && tar -zxvf blis_{BLIS_VER}.tar.gz && cd blis-{BLIS_VER}",
"./configure --enable-threading=openmp --enable-cblas -b 64 auto && make -j && make install"])

# Install fixuid
Stage0 += shell(commands=[
'addgroup --gid 2000 matx',
'adduser --uid 2000 --ingroup matx --home /home/matx --shell /bin/sh --disabled-password --gecos "" matx',
'USER=matx',
'GROUP=matx',
f'cd /tmp && curl -SsL https://github.com/boxboat/fixuid/releases/download/v0.6.0/fixuid-0.6.0-linux-{TARGETARCH}.tar.gz | tar -C /usr/local/bin -xzf -',
'chown root:root /usr/local/bin/fixuid',
'chmod 4755 /usr/local/bin/fixuid',
'mkdir -p /etc/fixuid',
'printf "user: $USER\\ngroup: $GROUP\\n" > /etc/fixuid/config.yml',
'/bin/echo "matx ALL = (root) NOPASSWD: ALL" >> /etc/sudoers',
])

Stage0 += shell(commands=[
'python3 -m venv /opt/nvidia/venv'
])

Stage0 += copy(src='run_from_venv.sh',dest='/opt/nvidia/run_from_venv.sh')
Stage0 += shell(commands=[
"echo '[[ -z \"$VIRTUAL_ENV\" ]] && source /opt/nvidia/run_from_venv.sh /bin/bash' >> /etc/skel/.bashrc",
"echo '[[ -z \"$VIRTUAL_ENV\" ]] && source /opt/nvidia/run_from_venv.sh /bin/bash' >> /root/.bashrc",
"echo '[[ -z \"$VIRTUAL_ENV\" ]] && source /opt/nvidia/run_from_venv.sh /bin/bash' >> /home/matx/.bashrc"
])

pip_packages=[
'breathe',
'cupy-cuda12x',
'hpccm',
'numpy',
'pandas',
'plotly==5.2.1',
'pybind11',
'scipy',
'sphinx',
'sphinx_book_theme',
'sphinx-rtd-theme'
]

pip_commands =[
'/opt/nvidia/run_from_venv.sh',
'pip3 --no-cache-dir install --upgrade',
" ".join(pip_packages)
]

Stage0 += shell(commands=[" ".join(pip_commands)])

print(Stage0)
11 changes: 11 additions & 0 deletions .devcontainer/run_from_venv.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash
set -e

# Step 1: Activate the virtual environment
if [[ -z "$VIRTUAL_ENV" ]]; then
echo "Activating venv"
source "/opt/nvidia/venv/bin/activate"
fi

# Step 2: Execute the final command (passed as args)
exec "$@"
69 changes: 69 additions & 0 deletions .devcontainer/run_matx.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/bin/bash

USER_ID=$(id -u)
GROUP_ID=$(id -g)

# Switch to SCRIPT_DIR directory
SCRIPT=$(readlink -f $0)
SCRIPT_DIR=$(dirname $SCRIPT)
echo $SCRIPT starting...
cd $SCRIPT_DIR
source ./setup.sh

TARGETARCH=$(basename $MATX_PLATFORM)

if [ -z "$1" ]; then
echo Start container instance at bash prompt
CMDS="/bin/bash"
else
CMDS="$@"
echo Run command then exit container
fi

if [[ $(lspci | grep -i NV) ]]; then
GPU_FLAG="--gpus all"

else
GPU_FLAG=""
echo This system has no GPU, running without --gpus all parameter
echo Creating soft link for libcuda.so.1 for any host-without-GPU code dependency
CMDS="sudo ln -s /usr/local/cuda/compat/libcuda.so.1 /usr/lib/\$(arch)-linux-gnu/libcuda.so.1 && $CMDS"
fi

echo Command: $CMDS

docker pull --platform=$MATX_PLATFORM $MATX_REPO$MATX_IMAGE_NAME:$MATX_VERSION_TAG-$TARGETARCH
if [[ "$?" != "0" ]]; then
echo "WARNING - The docker pull for $MATX_REPO$MATX_IMAGE_NAME:$MATX_VERSION_TAG-$TARGETARCH with platform $MATX_PLATFORM FAILED"
echo "You may have an image locally that could be used. This may be stale."
read -p "Do you want to continue? y/n " ret
if [[ "$ret" == "y" ]]; then
echo "Continuing..."
else
echo "Exiting."
exit 1
fi
fi

docker run --platform=$MATX_PLATFORM \
--privileged \
--cap-add=SYS_ADMIN \
--cap-add=SYS_PTRACE \
--security-opt seccomp=unconfined \
-it --rm \
$MATX_EXTRA_FLAGS \
$GPU_FLAG \
--name ${MATX_INSTANCE_NAME}_${USER} \
--hostname ${MATX_INSTANCE_NAME}_${USER} \
--add-host ${MATX_INSTANCE_NAME}_${USER}:127.0.0.1 \
--network host --shm-size=4096m \
-u $USER_ID:$GROUP_ID \
-w `pwd` \
-v $(echo ~):$(echo ~) \
-v /nfs:/nfs \
-v /dev/hugepages:/dev/hugepages \
-v /usr/src:/usr/src \
-v /lib/modules:/lib/modules \
--userns=host \
--ipc=host \
$MATX_REPO$MATX_IMAGE_NAME:$MATX_VERSION_TAG-$TARGETARCH fixuid /bin/bash -c "$CMDS"
31 changes: 31 additions & 0 deletions .devcontainer/setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/bash
if [ ! -v MATX_VERSION_TAG ]; then
MATX_VERSION_TAG="12.9.1_ubuntu24.04"
fi

if [ ! -v MATX_REPO ]; then
MATX_REPO="ghcr.io/nvidia/matx/"
fi

if [ ! -v MATX_IMAGE_NAME ]; then
MATX_IMAGE_NAME="release"
fi

if [ ! -v MATX_INSTANCE_NAME ]; then
MATX_INSTANCE_NAME="c_matx"
fi

if [ -z "$MATX_PLATFORM" ]; then
case "$(arch)" in
"x86_64")
MATX_PLATFORM="linux/amd64"
;;
"aarch64")
MATX_PLATFORM="linux/arm64"
;;
*)
echo "Unsupported arch type"
exit 1
;;
esac
fi
6 changes: 3 additions & 3 deletions .github/workflows/build-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
container:
image: ghcr.io/nvidia/matx/build:12.6.2_x86_64_ubuntu22.04-amd64
image: ghcr.io/nvidia/matx/release:12.9.1_ubuntu24.04-amd64
credentials:
username: ${{ secrets.DOCS_TOKEN_NAME }}
password: ${{ secrets.DOCS_TOKEN }}
Expand All @@ -72,8 +72,8 @@ jobs:
run: |
mkdir build
cd build
cmake .. -DMATX_BUILD_DOCS=ON
make
/opt/nvidia/run_from_venv.sh cmake .. -DMATX_BUILD_DOCS=ON
/opt/nvidia/run_from_venv.sh make
- name: Setup Pages
uses: actions/configure-pages@v5
- name: Upload artifact
Expand Down