Skip to content

Commit 2b8e451

Browse files
authored
Merge pull request #917 from dmedovich/feat/vllm-rocm-support
feat: add vLLM ROCm Docker variant for AMD GPUs
2 parents b2fc8f5 + c285e47 commit 2b8e451

2 files changed

Lines changed: 86 additions & 3 deletions

File tree

Dockerfile

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,3 +155,66 @@ COPY --from=builder /app/model-runner /app/model-runner
155155
FROM sglang AS final-sglang
156156
# Copy the built binary from builder-sglang (without vLLM)
157157
COPY --from=builder-sglang /app/model-runner /app/model-runner
158+
159+
# --- vLLM ROCm: builder stage ---
160+
# Builds upstream vLLM from source on AMD's pre-built ROCm dev image, which
161+
# already contains PyTorch ROCm, Triton, flash-attention, and the ROCm SDK
162+
# (see https://hub.docker.com/r/rocm/vllm-dev). vLLM is checked out at the
163+
# tagged release matching VLLM_VERSION — no fork, no custom wheels.
164+
FROM rocm/vllm-dev:base AS vllm-rocm-builder
165+
166+
ARG VLLM_VERSION=0.19.1
167+
# Target GPU architectures officially supported by vLLM ROCm:
168+
# gfx90a (MI200), gfx942 (MI300), gfx1100/1101 (RDNA3 7900/7800).
169+
ARG PYTORCH_ROCM_ARCH="gfx90a;gfx942;gfx1100;gfx1101"
170+
ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}
171+
172+
RUN git clone --depth 1 --branch v${VLLM_VERSION} \
173+
https://github.com/vllm-project/vllm.git /vllm-src
174+
175+
WORKDIR /vllm-src
176+
RUN python3 -m pip install --no-cache-dir -r requirements/rocm.txt \
177+
&& python3 setup.py bdist_wheel --dist-dir=/wheels
178+
179+
# --- vLLM ROCm: runtime stage ---
180+
# Mirrors the /opt/vllm-env layout that pkg/inference/backends/vllm/vllm.go
181+
# expects (binary at /opt/vllm-env/bin/vllm, version file at
182+
# /opt/vllm-env/version). Symlinks are used instead of a real venv because
183+
# rocm/vllm-dev:base installs Python dependencies system-wide and recreating
184+
# a venv would break the PyTorch ROCm / Triton ROCm wiring.
185+
#
186+
# Note: unlike the CUDA vllm stage, this image does NOT include llama.cpp.
187+
# The base image is incompatible (different ROCm runtime versions), and the
188+
# rocm vllm image is intended as a vLLM-only artifact.
189+
FROM rocm/vllm-dev:base AS vllm-rocm
190+
191+
COPY --from=vllm-rocm-builder /wheels/*.whl /tmp/
192+
RUN python3 -m pip install --no-cache-dir /tmp/*.whl && rm /tmp/*.whl
193+
194+
RUN groupadd --system modelrunner \
195+
&& useradd --system --gid modelrunner -G video \
196+
--create-home --home-dir /home/modelrunner modelrunner
197+
198+
RUN mkdir -p /opt/vllm-env/bin \
199+
&& ln -s "$(command -v vllm)" /opt/vllm-env/bin/vllm \
200+
&& python3 -c "import vllm; print(vllm.__version__)" > /opt/vllm-env/version \
201+
&& chown -R modelrunner:modelrunner /opt/vllm-env
202+
203+
RUN mkdir -p /var/run/model-runner /models /app \
204+
&& chown -R modelrunner:modelrunner /var/run/model-runner /app /models \
205+
&& chmod -R 755 /models
206+
207+
USER modelrunner
208+
209+
ENV MODEL_RUNNER_SOCK=/var/run/model-runner/model-runner.sock
210+
ENV MODEL_RUNNER_PORT=12434
211+
ENV HOME=/home/modelrunner
212+
ENV MODELS_PATH=/models
213+
214+
LABEL com.docker.desktop.service="model-runner"
215+
216+
ENTRYPOINT ["/app/model-runner"]
217+
218+
FROM vllm-rocm AS final-vllm-rocm
219+
# Copy the built binary from builder
220+
COPY --from=builder /app/model-runner /app/model-runner

Makefile

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ LLAMA_UPSTREAM_IMAGE ?= $(shell \
99
"$(LLAMA_SERVER_VERSION)" "$(LLAMA_SERVER_VARIANT)")
1010
DOCKER_IMAGE := docker/model-runner:latest
1111
DOCKER_IMAGE_VLLM := docker/model-runner:latest-vllm-cuda
12+
DOCKER_IMAGE_VLLM_ROCM := docker/model-runner:latest-vllm-rocm
1213
DOCKER_IMAGE_SGLANG := docker/model-runner:latest-sglang
1314
DOCKER_IMAGE_MUSA := docker/model-runner:latest-musa
1415
DOCKER_IMAGE_OPENVINO := docker/model-runner:latest-openvino
@@ -43,7 +44,7 @@ DOCKER_BUILD_COMMON_ARGS = \
4344
.PHONY: build build-cli build-dmr build-llamacpp install-cli run clean test integration-tests e2e
4445
.PHONY: validate validate-versions validate-all lint help
4546
.PHONY: docker-build docker-build-multiplatform docker-run docker-run-impl
46-
.PHONY: docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang
47+
.PHONY: docker-build-vllm docker-run-vllm docker-build-vllm-rocm docker-run-vllm-rocm docker-build-sglang docker-run-sglang
4748
.PHONY: docker-build-musa docker-run-musa docker-build-openvino docker-run-openvino
4849
.PHONY: test-docker-ce-installation
4950
.PHONY: vllm-metal-build vllm-metal-install vllm-metal-dev vllm-metal-clean
@@ -194,6 +195,23 @@ docker-build-vllm:
194195
docker-run-vllm: docker-build-vllm
195196
@$(MAKE) -s docker-run-impl DOCKER_IMAGE=$(DOCKER_IMAGE_VLLM)
196197

198+
# Build vLLM Docker image with ROCm (AMD GPU) support.
199+
# Builds upstream vLLM from source on top of rocm/vllm-dev:base — this is a
200+
# vLLM-only image (no llama.cpp), unlike the CUDA variant. Build is heavy:
201+
# expect 30-60 min and ~12-15 GB final image size.
202+
# LLAMA_SERVER_VARIANT is not consumed by the Dockerfile stages here, but
203+
# setting it to "rocm" restricts DOCKER_BUILD_PLATFORMS to linux/amd64
204+
# (vLLM ROCm has no aarch64 support).
205+
docker-build-vllm-rocm:
206+
@$(MAKE) docker-build \
207+
DOCKER_TARGET=final-vllm-rocm \
208+
DOCKER_IMAGE=$(DOCKER_IMAGE_VLLM_ROCM) \
209+
LLAMA_SERVER_VARIANT=rocm
210+
211+
# Run vLLM ROCm Docker container with TCP port access and mounted model storage
212+
docker-run-vllm-rocm: docker-build-vllm-rocm
213+
@$(MAKE) -s docker-run-impl DOCKER_IMAGE=$(DOCKER_IMAGE_VLLM_ROCM)
214+
197215
# Build SGLang Docker image
198216
docker-build-sglang:
199217
@$(MAKE) docker-build \
@@ -402,8 +420,10 @@ help:
402420
@echo " docker-build - Build Docker image for current platform"
403421
@echo " docker-build-multiplatform - Build Docker image for multiple platforms"
404422
@echo " docker-run - Run in Docker container with TCP port access and mounted model storage"
405-
@echo " docker-build-vllm - Build vLLM Docker image"
406-
@echo " docker-run-vllm - Run vLLM Docker container"
423+
@echo " docker-build-vllm - Build vLLM Docker image (CUDA)"
424+
@echo " docker-run-vllm - Run vLLM Docker container (CUDA)"
425+
@echo " docker-build-vllm-rocm - Build vLLM Docker image (ROCm / AMD GPU, source build)"
426+
@echo " docker-run-vllm-rocm - Run vLLM Docker container (ROCm / AMD GPU)"
407427
@echo " docker-build-sglang - Build SGLang Docker image"
408428
@echo " docker-run-sglang - Run SGLang Docker container"
409429
@echo " docker-build-musa - Build MUSA Docker image"

0 commit comments

Comments
 (0)