diff --git a/.github/workflows/ci_gpu.yml b/.github/workflows/ci_gpu.yml index fe35a69ad3..5185512eb4 100644 --- a/.github/workflows/ci_gpu.yml +++ b/.github/workflows/ci_gpu.yml @@ -17,6 +17,13 @@ concurrency: jobs: ci-gpu: runs-on: [self-hosted, ci-gpu] + runs-on: + - self-hosted + - ci-gpu + - ${{ matrix.gpu-type }} + strategy: + matrix: + gpu-type: [cuda, rocm] steps: - name: Clean up previous run run: | @@ -41,9 +48,14 @@ jobs: uses: actions/checkout@v3 with: submodules: recursive - - name: Install dependencies + - name: Install dependencies for CUDA + if: matrix.gpu-type == 'cuda' run: | python ts_scripts/install_dependencies.py --environment=dev --cuda=cu121 + - name: Install dependencies for ROCm + if: matrix.gpu-type == 'rocm' + run: | + python ts_scripts/install_dependencies.py --environment=dev --rocm=rocm6.2 - name: Torchserve Sanity uses: nick-fields/retry@v3 with: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2b209fe57e..1014eee2af 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -67,10 +67,10 @@ If you plan to develop with TorchServe and change some source code, you must ins Use the optional `--rocm` or `--cuda` flag with `install_dependencies.py` for installing accelerator specific dependencies. Possible values are - - rocm: `rocm61`, `rocm60` + - rocm: `rocm6.3`, `rocm6.2`, `rocm6.1`, 'rocm6.0' - cuda: `cu111`, `cu102`, `cu101`, `cu92` - For example `python ./ts_scripts/install_dependencies.py --environment=dev --rocm=rocm61` + For example `python ./ts_scripts/install_dependencies.py --environment=dev --rocm=rocm6.2` #### For Windows diff --git a/README.md b/README.md index afe21d8428..d80e7656f2 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ curl http://127.0.0.1:8080/predictions/bert -T input.txt python ./ts_scripts/install_dependencies.py # Include dependencies for accelerator support with the relevant optional flags -python ./ts_scripts/install_dependencies.py --rocm=rocm61 +python ./ts_scripts/install_dependencies.py --rocm=rocm6.2 python ./ts_scripts/install_dependencies.py --cuda=cu121 # Latest release @@ -45,8 +45,8 @@ pip install torchserve-nightly torch-model-archiver-nightly torch-workflow-archi # Install dependencies python ./ts_scripts/install_dependencies.py -# Include depeendencies for accelerator support with the relevant optional flags -python ./ts_scripts/install_dependencies.py --rocm=rocm61 +# Include dependencies for accelerator support with the relevant optional flags +python ./ts_scripts/install_dependencies.py --rocm=rocm6.2 python ./ts_scripts/install_dependencies.py --cuda=cu121 # Latest release @@ -106,7 +106,7 @@ curl -X POST -d '{"model":"meta-llama/Meta-Llama-3-8B-Instruct", "prompt":"Hello Refer to [LLM deployment](docs/llm_deployment.md) for details and other methods. ## ⚡ Why TorchServe -* Write once, run anywhere, on-prem, on-cloud, supports inference on CPUs, GPUs, AWS Inf1/Inf2/Trn1, Google Cloud TPUs, [Nvidia MPS](docs/nvidia_mps.md) +* Write once, run anywhere, on-prem, on-cloud, supports inference on CPUs, GPUs, AWS Inf1/Inf2/Trn1, Google Cloud TPUs, [Nvidia MPS](docs/hardware_support/nvidia_mps.md) * [Model Management API](docs/management_api.md): multi model management with optimized worker to model allocation * [Inference API](docs/inference_api.md): REST and gRPC support for batched inference * [TorchServe Workflows](examples/Workflows/README.md): deploy complex DAGs with multiple interdependent models diff --git a/cpp/src/backends/handler/handler_factory.hh b/cpp/src/backends/handler/handler_factory.hh index 52689cecf1..b42e5e31b2 100644 --- a/cpp/src/backends/handler/handler_factory.hh +++ b/cpp/src/backends/handler/handler_factory.hh @@ -18,6 +18,9 @@ class HandlerFactory { const std::string& handler_class_name) { auto it = handlers_.find(handler_class_name); if (it == handlers_.end()) { + // XXX: + // Why not use the default ctor of `std::shared_ptr` directly? + // What are the benefits of using this `std::shared_ptr(nullptr_t)`? return std::shared_ptr(nullptr); } else { return it->second(); @@ -25,6 +28,11 @@ class HandlerFactory { }; private: + // XXX: + // 1) What are the benefits of using a function (ctor) pointer as the value + // instead of using a `shared_ptr` instance directly? + // 2) Whenever we want to add a new pair to `handlers_`, we'll have to + // change the definition here. std::map (*)()> handlers_ = { {"TorchScriptHandler", []() -> std::shared_ptr { return std::make_shared(); diff --git a/cpp/src/backends/handler/torch_scripted_handler.hh b/cpp/src/backends/handler/torch_scripted_handler.hh index 1e7c816a34..edd173c1ab 100644 --- a/cpp/src/backends/handler/torch_scripted_handler.hh +++ b/cpp/src/backends/handler/torch_scripted_handler.hh @@ -4,6 +4,7 @@ namespace torchserve { class TorchScriptHandler : public BaseHandler { + public: std::pair, std::shared_ptr> LoadModel( std::shared_ptr& load_model_request) override; }; diff --git a/docker/Dockerfile b/docker/Dockerfile index 94f4a1ba99..1d9246e034 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -37,12 +37,12 @@ ARG BRANCH_NAME ARG REPO_URL=https://github.com/pytorch/serve.git ENV PYTHONUNBUFFERED TRUE -RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \ +RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \ apt-get update && \ apt-get upgrade -y && \ apt-get install software-properties-common -y && \ add-apt-repository -y ppa:deadsnakes/ppa && \ - apt remove python-pip python3-pip && \ + apt remove -y python-pip python3-pip && \ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ ca-certificates \ g++ \ @@ -55,6 +55,13 @@ RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \ git \ && rm -rf /var/lib/apt/lists/* +RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \ + if [ "$USE_ROCM_VERSION" ]; then \ + apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y rocm-dev amd-smi-lib \ + && rm -rf /var/lib/apt/lists/* ; \ + fi + # Make the virtual environment and "activating" it by adding it first to the path. # From here on the python$PYTHON_VERSION interpreter is used and the packages # are installed in /home/venv which is what we need for the "runtime-image" @@ -67,6 +74,7 @@ RUN python -m pip install -U pip setuptools RUN export USE_CUDA=1 ARG USE_CUDA_VERSION="" +ARG USE_ROCM_VERSION="" COPY ./ serve @@ -90,6 +98,14 @@ RUN \ else \ python ./ts_scripts/install_dependencies.py;\ fi; \ + elif echo "${BASE_IMAGE}" | grep -q "rocm/"; then \ + # Install ROCm version specific binary when ROCm version is specified as a build arg + if [ "$USE_ROCM_VERSION" ]; then \ + python ./ts_scripts/install_dependencies.py --rocm $USE_ROCM_VERSION;\ + # Install the binary with the latest CPU image on a ROCm base image + else \ + python ./ts_scripts/install_dependencies.py; \ + fi; \ # Install the CPU binary else \ python ./ts_scripts/install_dependencies.py; \ @@ -111,13 +127,14 @@ FROM ${BASE_IMAGE} AS production-image # Re-state ARG PYTHON_VERSION to make it active in this build-stage (uses default define at the top) ARG PYTHON_VERSION ENV PYTHONUNBUFFERED TRUE +ARG USE_ROCM_VERSION RUN --mount=type=cache,target=/var/cache/apt \ apt-get update && \ apt-get upgrade -y && \ apt-get install software-properties-common -y && \ add-apt-repository ppa:deadsnakes/ppa -y && \ - apt remove python-pip python3-pip && \ + apt remove -y python-pip python3-pip && \ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ python$PYTHON_VERSION \ python3-distutils \ @@ -130,6 +147,13 @@ RUN --mount=type=cache,target=/var/cache/apt \ && rm -rf /var/lib/apt/lists/* \ && cd /tmp +RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \ + if [ "$USE_ROCM_VERSION" ]; then \ + apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y rocm-dev amd-smi-lib \ + && rm -rf /var/lib/apt/lists/* ; \ + fi + RUN useradd -m model-server \ && mkdir -p /home/model-server/tmp @@ -137,6 +161,11 @@ COPY --chown=model-server --from=compile-image /home/venv /home/venv COPY --from=compile-image /usr/local/bin/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh ENV PATH="/home/venv/bin:$PATH" +RUN \ + if [ "$USE_ROCM_VERSION" ]; then \ + python -m pip install /opt/rocm/share/amd_smi; \ + fi + RUN chmod +x /usr/local/bin/dockerd-entrypoint.sh \ && chown -R model-server /home/model-server @@ -157,13 +186,14 @@ FROM ${BASE_IMAGE} AS ci-image ARG PYTHON_VERSION ARG BRANCH_NAME ENV PYTHONUNBUFFERED TRUE +ARG USE_ROCM_VERSION -RUN --mount=type=cache,target=/var/cache/apt \ +RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \ apt-get update && \ apt-get upgrade -y && \ apt-get install software-properties-common -y && \ add-apt-repository -y ppa:deadsnakes/ppa && \ - apt remove python-pip python3-pip && \ + apt remove -y python-pip python3-pip && \ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ python$PYTHON_VERSION \ python3-distutils \ @@ -183,6 +213,12 @@ RUN --mount=type=cache,target=/var/cache/apt \ && rm -rf /var/lib/apt/lists/* \ && cd /tmp +RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \ + if [ "$USE_ROCM_VERSION" ]; then \ + apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y rocm-dev amd-smi-lib \ + && rm -rf /var/lib/apt/lists/* ; \ + fi COPY --from=compile-image /home/venv /home/venv @@ -190,6 +226,11 @@ ENV PATH="/home/venv/bin:$PATH" RUN python -m pip install --no-cache-dir -r https://raw.githubusercontent.com/pytorch/serve/$BRANCH_NAME/requirements/developer.txt +RUN \ + if [ "$USE_ROCM_VERSION" ]; then \ + python -m pip install /opt/rocm/share/amd_smi; \ + fi + RUN mkdir /home/serve ENV TS_RUN_IN_DOCKER True @@ -203,11 +244,12 @@ ARG PYTHON_VERSION ARG BRANCH_NAME ARG BUILD_FROM_SRC ARG LOCAL_CHANGES +ARG USE_ROCM_VERSION ARG BUILD_WITH_IPEX ARG IPEX_VERSION=1.11.0 ARG IPEX_URL=https://software.intel.com/ipex-whl-stable ENV PYTHONUNBUFFERED TRUE -RUN --mount=type=cache,target=/var/cache/apt \ +RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \ apt-get update && \ apt-get upgrade -y && \ apt-get install software-properties-common -y && \ @@ -243,10 +285,16 @@ RUN \ COPY --from=compile-image /home/venv /home/venv ENV PATH="/home/venv/bin:$PATH" + +RUN \ + if [ "$USE_ROCM_VERSION" ]; then \ + python -m pip install /opt/rocm/share/amd_smi; \ + fi + WORKDIR "serve" RUN python -m pip install -U pip setuptools \ && python -m pip install --no-cache-dir -r requirements/developer.txt \ - && python ts_scripts/install_from_src.py \ + && python ts_scripts/install_from_src.py --environment=dev \ && useradd -m model-server \ && mkdir -p /home/model-server/tmp \ && cp docker/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh \ diff --git a/docker/Dockerfile.cpp b/docker/Dockerfile.cpp index 50d19b3322..e42fe3fd05 100644 --- a/docker/Dockerfile.cpp +++ b/docker/Dockerfile.cpp @@ -19,6 +19,7 @@ ARG CMAKE_VERSION=3.26.4 ARG GCC_VERSION=9 ARG BRANCH_NAME="master" ARG USE_CUDA_VERSION="" +ARG USE_ROCM_VERSION="" FROM ${BASE_IMAGE} AS cpp-dev-image ARG BASE_IMAGE @@ -28,6 +29,7 @@ ARG GCC_VERSION ARG BRANCH_NAME ARG REPO_URL=https://github.com/pytorch/serve.git ARG USE_CUDA_VERSION +ARG USE_ROCM_VERSION ARG DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED TRUE ENV TZ=Etc/UTC diff --git a/docker/Dockerfile.dev b/docker/Dockerfile.dev index 2f02d84680..0c39e24f48 100644 --- a/docker/Dockerfile.dev +++ b/docker/Dockerfile.dev @@ -19,6 +19,7 @@ ARG BRANCH_NAME=master ARG REPO_URL=https://github.com/pytorch/serve.git ARG MACHINE_TYPE=cpu ARG CUDA_VERSION +ARG ROCM_VERSION ARG BUILD_WITH_IPEX ARG IPEX_VERSION=1.11.0 @@ -62,7 +63,8 @@ RUN update-alternatives --install /usr/bin/python python /usr/bin/python$PYTHON_ FROM compile-image AS dev-image ARG MACHINE_TYPE=cpu ARG CUDA_VERSION -RUN if [ "$MACHINE_TYPE" = "gpu" ]; then export USE_CUDA=1; fi \ +ARG ROCM_VERSION +RUN if [ "$MACHINE_TYPE" = "gpu" ]; then if [ -n "$ROCM_VERSION" ]; then export USE_ROCM=1; else export USE_CUDA=1; fi fi \ && git clone $REPO_URL \ && cd serve \ && git checkout ${BRANCH_NAME} \ @@ -70,7 +72,7 @@ RUN if [ "$MACHINE_TYPE" = "gpu" ]; then export USE_CUDA=1; fi \ ENV PATH="/home/venv/bin:$PATH" WORKDIR serve RUN python -m pip install -U pip setuptools \ - && if [ -z "$CUDA_VERSION" ]; then python ts_scripts/install_dependencies.py --environment=dev; else python ts_scripts/install_dependencies.py --environment=dev --cuda $CUDA_VERSION; fi \ + && if [ -n "$CUDA_VERSION" ]; then python ts_scripts/install_dependencies.py --environment=dev --cuda $CUDA_VERSION; elif [ -n "$ROCM_VERSION" ]; then python ts_scripts/install_dependencies.py --environment=dev --rocm $ROCM_VERSION; else python ts_scripts/install_dependencies.py --environment=dev; fi \ && if [ "$BUILD_WITH_IPEX" = "true" ]; then python -m pip install --no-cache-dir intel_extension_for_pytorch==${IPEX_VERSION} -f ${IPEX_URL}; fi \ && python ts_scripts/install_from_src.py \ && useradd -m model-server \ diff --git a/docker/README.md b/docker/README.md index 7ddd9d01a0..36e3ba093e 100644 --- a/docker/README.md +++ b/docker/README.md @@ -48,6 +48,7 @@ Use `build_image.sh` script to build the docker images. The script builds the `p |-bt, --buildtype|Which type of docker image to build. Can be one of : production, dev, ci| |-t, --tag|Tag name for image. If not specified, script uses torchserve default tag names.| |-cv, --cudaversion| Specify to cuda version to use. Supported values `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`, `cu118`. `cu121`, Default `cu121`| +|-rv, --rocmversion| Specify to ROCm version to use. Supported values `rocm6.0`, `rocm6.1`, `rocm6.2`, `rocm6.3`| |-ipex, --build-with-ipex| Specify to build with intel_extension_for_pytorch. If not specified, script builds without intel_extension_for_pytorch.| |-cpp, --build-cpp specify to build TorchServe CPP| |-n, --nightly| Specify to build with TorchServe nightly.| @@ -66,9 +67,9 @@ Creates a docker image with publicly available `torchserve` and `torch-model-arc ./build_image.sh ``` - - To create a GPU based image with cuda 10.2. Options are `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`, `cu118` + - To create a GPU based image with cuda 10.2. Options are `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`, `cu118` for CUDA and `rocm6.0`, `rocm6.1`, `rocm6.2`, `rocm6.3` for ROCm. - - GPU images are built with NVIDIA CUDA base image. If you want to use ONNX, please specify the base image as shown in the next section. + - GPU images are built with either NVIDIA CUDA base image or AMD ROCm base image. If you want to use ONNX, please specify the base image as shown in the next section. ```bash ./build_image.sh -g -cv cu117 @@ -136,6 +137,30 @@ Creates a docker image with `torchserve` and `torch-model-archiver` installed fr ./build_image.sh -bt dev -g -cv cu92 ``` +- For creating GPU based image with ROCm version 6.0: + +```bash +./build_image.sh -bt dev -g -rv rocm6.0 +``` + +- For creating GPU based image with ROCm version 6.1: + +```bash +./build_image.sh -bt dev -g -rv rocm6.1 +``` + +- For creating GPU based image with ROCm version 6.2: + +```bash +./build_image.sh -bt dev -g -rv rocm6.2 +``` + +- For creating GPU based image with ROCm version 6.3: + +```bash +./build_image.sh -bt dev -g -rv rocm6.3 +``` + - For creating GPU based image with a different branch: ```bash @@ -152,7 +177,7 @@ Creates a docker image with `torchserve` and `torch-model-archiver` installed fr ./build_image.sh -bt dev -t torchserve-dev:1.0 ``` - - For creating image with Intel® Extension for PyTorch*: + - For creating image with Intel® Extension for PyTorch: ```bash ./build_image.sh -bt dev -ipex -t torchserve-ipex:1.0 @@ -168,7 +193,7 @@ Creates a docker image with `torchserve` and `torch-model-archiver` installed fr ./build_image.sh -bt dev -g [-cv cu121|cu118] -cpp ``` -- For ROCm support (*experimental*), refer to [this documentation](../docs/hardware_support/amd_support.md). +- For more ROCm support (*experimental*), refer to [this documentation](../docs/hardware_support/amd_support.md). ## Start a container with a TorchServe image @@ -194,7 +219,7 @@ For specific versions you can pass in the specific tag to use (ex: pytorch/torch docker run --rm -it -p 127.0.0.1:8080:8080 -p 127.0.0.1:8081:8081 -p 127.0.0.1:8082:8082 -p 127.0.0.1:7070:7070 -p 127.0.0.1:7071:7071 pytorch/torchserve:0.1.1-cpu ``` -#### Start CPU container with Intel® Extension for PyTorch* +#### Start CPU container with Intel® Extension for PyTorch ```bash docker run --rm -it -p 127.0.0.1:8080:8080 -p 127.0.0.1:8081:8081 -p 127.0.0.1:8082:8082 -p 127.0.0.1:7070:7070 -p 127.0.0.1:7071:7071 torchserve-ipex:1.0 @@ -208,6 +233,12 @@ For GPU latest image with gpu devices 1 and 2: docker run --rm -it --gpus '"device=1,2"' -p 127.0.0.1:8080:8080 -p 127.0.0.1:8081:8081 -p 127.0.0.1:8082:8082 -p 127.0.0.1:7070:7070 -p 127.0.0.1:7071:7071 pytorch/torchserve:latest-gpu ``` +For GPU with ROCm support with gpu devices 1 and 2: + +```bash +docker run --rm -it --device=/dev/kfd --device=/dev/dri -e HIP_VISIBLE_DEVICES=1,2 -p 127.0.0.1:8080:8080 -p 127.0.0.1:8081:8081 -p 127.0.0.1:8082:8082 -p 127.0.0.1:7070:7070 -p 127.0.0.1:7071:7071 pytorch/torchserve:latest-gpu +``` + For specific versions you can pass in the specific tag to use (ex: `0.1.1-cuda10.1-cudnn7-runtime`): ```bash diff --git a/docker/build_image.sh b/docker/build_image.sh index b5b9f8e87e..5cb071d967 100755 --- a/docker/build_image.sh +++ b/docker/build_image.sh @@ -11,6 +11,7 @@ BASE_IMAGE="ubuntu:20.04" UPDATE_BASE_IMAGE=false USE_CUSTOM_TAG=false CUDA_VERSION="" +ROCM_VERSION="" USE_LOCAL_SERVE_FOLDER=false BUILD_WITH_IPEX=false BUILD_CPP=false @@ -33,6 +34,7 @@ do echo "-bi, --baseimage specify base docker image. Example: nvidia/cuda:11.7.0-cudnn8-runtime-ubuntu20.04 " echo "-bt, --buildtype specify for type of created image. Possible values: production, dev, ci." echo "-cv, --cudaversion specify to cuda version to use" + echo "-rv, --rocmversion specify to rocm version to use" echo "-t, --tag specify tag name for docker image" echo "-lf, --use-local-serve-folder specify this option for the benchmark image if the current 'serve' folder should be used during automated benchmarks" echo "-ipex, --build-with-ipex specify to build with intel_extension_for_pytorch" @@ -167,6 +169,27 @@ do shift shift ;; + -rv|--rocmversion) + ROCM_VERSION="$2" + if [ "${ROCM_VERSION}" == "rocm6.0" ]; + then + BASE_IMAGE="rocm/dev-ubuntu-22.04:6.0.2" + elif [ "${ROCM_VERSION}" == "rocm6.1" ]; + then + BASE_IMAGE="rocm/dev-ubuntu-22.04:6.1.2" + elif [ "${ROCM_VERSION}" == "rocm6.2" ]; + then + BASE_IMAGE="rocm/dev-ubuntu-22.04:6.2.4" + elif [ "${ROCM_VERSION}" == "rocm6.3" ]; + then + BASE_IMAGE="rocm/dev-ubuntu-22.04:6.3.4" + else + echo "ROCm version not supported" + exit 1 + fi + shift + shift + ;; esac done @@ -218,30 +241,50 @@ then exit 1 fi fi + + if [[ "${MACHINE}" == "gpu" || "${ROCM_VERSION}" != "" ]]; + then + if [ "${ROCM_VERSION}" == "rocm6.0" ]; + then + BASE_IMAGE="rocm/dev-ubuntu-22.04:6.0.2" + elif [ "${ROCM_VERSION}" == "rocm6.1" ]; + then + BASE_IMAGE="rocm/dev-ubuntu-22.04:6.1.2" + elif [ "${ROCM_VERSION}" == "rocm6.2" ]; + then + BASE_IMAGE="rocm/dev-ubuntu-22.04:6.2.4" + elif [ "${ROCM_VERSION}" == "rocm6.3" ]; + then + BASE_IMAGE="rocm/dev-ubuntu-22.04:6.3.4" + else + echo "ROCm version $ROCM_VERSION is not supported for CPP" + exit 1 + fi + fi fi if [ "${BUILD_TYPE}" == "production" ]; then if [ "${MULTI}" == "true" ]; then - DOCKER_BUILDKIT=1 docker buildx build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\ + DOCKER_BUILDKIT=1 docker buildx build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg USE_ROCM_VERSION="${ROCM_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\ --build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg REPO_URL="${REPO_URL}" --build-arg BUILD_FROM_SRC="${BUILD_FROM_SRC}"\ --build-arg LOCAL_CHANGES="${LOCAL_CHANGES}" -t "${DOCKER_TAG}" --platform "${ARCH}" --target production-image ../ --push else - DOCKER_BUILDKIT=1 docker buildx build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\ + DOCKER_BUILDKIT=1 docker buildx build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg USE_ROCM_VERSION="${ROCM_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\ --build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg REPO_URL="${REPO_URL}" --build-arg BUILD_FROM_SRC="${BUILD_FROM_SRC}"\ --build-arg LOCAL_CHANGES="${LOCAL_CHANGES}" -t "${DOCKER_TAG}" --target production-image ../ --load fi elif [ "${BUILD_TYPE}" == "ci" ]; then - DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\ + DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg USE_ROCM_VERSION="${ROCM_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\ --build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg REPO_URL="${REPO_URL}" --build-arg BUILD_FROM_SRC="${BUILD_FROM_SRC}"\ --build-arg LOCAL_CHANGES="${LOCAL_CHANGES}" -t "${DOCKER_TAG}" --target ci-image ../ else if [ "${BUILD_CPP}" == "true" ] then - DOCKER_BUILDKIT=1 docker build --file Dockerfile.cpp --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\ + DOCKER_BUILDKIT=1 docker build --file Dockerfile.cpp --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg USE_ROCM_VERSION="${ROCM_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\ --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg REPO_URL="${REPO_URL}" -t "${DOCKER_TAG}" --target cpp-dev-image . else - DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\ + DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg USE_ROCM_VERSION="${ROCM_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\ --build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg REPO_URL="${REPO_URL}" --build-arg BUILD_FROM_SRC="${BUILD_FROM_SRC}" --build-arg LOCAL_CHANGES="${LOCAL_CHANGES}"\ --build-arg BUILD_WITH_IPEX="${BUILD_WITH_IPEX}" -t "${DOCKER_TAG}" --target dev-image ../ fi diff --git a/docs/README.md b/docs/README.md index d00e726070..cf0fa078a2 100644 --- a/docs/README.md +++ b/docs/README.md @@ -62,4 +62,4 @@ TorchServe is a performant, flexible and easy to use tool for serving PyTorch ea * [TorchServe on Kubernetes](https://github.com/pytorch/serve/blob/master/kubernetes/README.md#torchserve-on-kubernetes) - Demonstrates a Torchserve deployment in Kubernetes using Helm Chart supported in both Azure Kubernetes Service and Google Kubernetes service * [mlflow-torchserve](https://github.com/mlflow/mlflow-torchserve) - Deploy mlflow pipeline models into TorchServe * [Kubeflow pipelines](https://github.com/kubeflow/pipelines/tree/master/samples/contrib/pytorch-samples) - Kubeflow pipelines and Google Vertex AI Managed pipelines -* [NVIDIA MPS](nvidia_mps.md) - Use NVIDIA MPS to optimize multi-worker deployment on a single GPU +* [NVIDIA MPS](hardware_support/nvidia_mps.md) - Use NVIDIA MPS to optimize multi-worker deployment on a single GPU diff --git a/docs/getting_started.md b/docs/getting_started.md index 3f9e6176e2..71d1f58df8 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -26,6 +26,12 @@ This project is no longer actively maintained. While existing releases remain av Note: PyTorch 1.9+ will not support cu92 and cu101. So TorchServe only supports cu92 and cu101 up to PyTorch 1.8.1. + - For GPU with ROCm. Options are `rocm6.0`, `rocm6.1`, `rocm6.2`, `rocm6.3` + + ```bash + python ./ts_scripts/install_dependencies.py --rocm=rocm6.2 + ``` + #### For Windows Refer to the documentation [here](./torchserve_on_win_native.md). diff --git a/docs/github_actions.md b/docs/github_actions.md index 62800a23fd..d648578260 100644 --- a/docs/github_actions.md +++ b/docs/github_actions.md @@ -65,10 +65,10 @@ This project is no longer actively maintained. While existing releases remain av - This would create 2 runs. One run on `ci-gpu` with CUDA 11.6 and a second run on `ci-gpu` with CUDA 11.7 ``` runs-on: [self-hosted, ci-gpu] - strategy: - fail-fast: false - matrix: - cuda: ["cu116", "cu117"] + strategy: + fail-fast: false + matrix: + cuda: ["cu116", "cu117"] ``` 6. Specify the commands to be executed for the run diff --git a/docs/hardware_support/amd_support.md b/docs/hardware_support/amd_support.md index e231f4cae1..0a2b34fb93 100644 --- a/docs/hardware_support/amd_support.md +++ b/docs/hardware_support/amd_support.md @@ -9,7 +9,7 @@ TorchServe can be run on any combination of operating system and device that is ## Supported Versions of ROCm -The current stable `major.patch` version of ROCm and the previous path version will be supported. For example version `N.2` and `N.1` where `N` is the current major version. +The current stable `major.patch` version of ROCm and the previous patch version will be supported. For example version `N.2` and `N.1` where `N` is the current major version. ## Installation @@ -39,7 +39,7 @@ The current stable `major.patch` version of ROCm and the previous path version w - install the dependencies needed for ROCm support. ```bash - python ./ts_scripts/install_dependencies.py --rocm=rocm61 + python ./ts_scripts/install_dependencies.py --rocm=rocm6.2 python ./ts_scripts/install_from_src.py ``` - enable amd-smi in the python virtual environment @@ -68,12 +68,12 @@ If you have 8 accelerators but only want TorchServe to see the last four of them **In Development** -`Dockerfile.rocm` provides preliminary ROCm support for TorchServe. +`Dockerfile` and `build_image.sh` provides ROCm support for TorchServe. Building and running `dev-image`: ```bash -docker build --file docker/Dockerfile.rocm --target dev-image -t torch-serve-dev-image-rocm --build-arg USE_ROCM_VERSION=rocm62 --build-arg BUILD_FROM_SRC=true . +docker build --file docker/Dockerfile.rocm --target dev-image -t torch-serve-dev-image-rocm --build-arg USE_ROCM_VERSION=rocm6.2 --build-arg BUILD_FROM_SRC=true . docker run -it --rm --device=/dev/kfd --device=/dev/dri torch-serve-dev-image-rocm bash ``` diff --git a/docs/performance_checklist.md b/docs/performance_checklist.md index 32536d50f2..609c939aed 100644 --- a/docs/performance_checklist.md +++ b/docs/performance_checklist.md @@ -10,7 +10,7 @@ This checklist describes some steps that should be completed when diagnosing mod - Check the versions of PyTorch, Nvidia driver, and other components and update to the latest compatible releases. Oftentimes known performance bugs have already been fixed. -- Collect system-level activity logs to understand the overall resource utilizations. It’s useful to know how the model inference pipeline is using the system resources at a high level, as the first step of optimization. Even simple CLI tools such as nvidia-smi and htop would be helpful. +- Collect system-level activity logs to understand the overall resource utilizations. It’s useful to know how the model inference pipeline is using the system resources at a high level, as the first step of optimization. Even simple CLI tools such as nvidia-smi, amd-smi, and htop would be helpful. - Start with a target with the highest impact on performance. It should be obvious from the system activity logs where the biggest bottleneck is – look beyond model inference, as pre/post processing can be expensive and can affect the end-to-end throughput just as much. diff --git a/docs/performance_guide.md b/docs/performance_guide.md index a342eeaee1..2af4d076a4 100644 --- a/docs/performance_guide.md +++ b/docs/performance_guide.md @@ -85,7 +85,7 @@ While NVIDIA GPUs allow multiple processes to run on CUDA kernels, this comes wi * The execution of the kernels is generally serialized * Each processes creates its own CUDA context which occupies additional GPU memory -To get around these drawbacks, you can utilize the NVIDIA Multi-Process Service (MPS) to increase performance. You can find more information on how to utilize NVIDIA MPS with TorchServe [here](nvidia_mps.md). +To get around these drawbacks, you can utilize the NVIDIA Multi-Process Service (MPS) to increase performance. You can find more information on how to utilize NVIDIA MPS with TorchServe [here](hardware_support/nvidia_mps.md).
NVIDIA DALI
diff --git a/docs/use_cases.md b/docs/use_cases.md index cf58baf30a..b0f3457b66 100644 --- a/docs/use_cases.md +++ b/docs/use_cases.md @@ -155,7 +155,7 @@ The example taken here uses scripted mode model however you can also deploy eage - Move MAR file in a new directory name it as `model-store` - Docker - Make sure that MAR file is being copied in volume/directory shared while starting torchserve docker image - torchserve start command in following instruction will automatically detect GPUs and use for loading/serving models. If you want to [limit the GPU usage](https://github.com/pytorch/serve/blob/master/docs/configuration.md#limit-gpu-usage) -then use `nvidia-smi` to determine the number of GPU and corresponding ids. Once you have gpu details, you can add `number_of_gpu` param in config.proerties and use second command as given next instruction. +then use `nvidia-smi` or `amd-smi` to determine the number of GPU and corresponding ids. Once you have gpu details, you can add `number_of_gpu` param in config.proerties and use second command as given next instruction. e.g. number_of_gpu=2 - Start torchserve with all GPUs- `torchserve --start --ncs --model-store `. With restricted GPUs - `torchserve --start --ncs --model-store --ts-config /config.properties` - Docker - For all GPU `docker run --rm -it --gpus all -p 127.0.0.1:8080:8080 -p 127.0.0.1:8081:8081 torchserve:gpu-latest` For GPUs 1 and 2 `docker run --rm -it --gpus '"device=1,2"' -p 8080:8080 -p 8081:8081 pytorch/torchserve:latest-gpu` diff --git a/kubernetes/kserve/Dockerfile.dev b/kubernetes/kserve/Dockerfile.dev index 54af943553..ebfb0f76f0 100644 --- a/kubernetes/kserve/Dockerfile.dev +++ b/kubernetes/kserve/Dockerfile.dev @@ -55,11 +55,12 @@ RUN python -m pip install -U pip setuptools FROM compile-image AS dev-image ARG MACHINE_TYPE=cpu ARG CUDA_VERSION -RUN if [ "$MACHINE_TYPE" = "gpu" ]; then export USE_CUDA=1; fi \ +ARG ROCM_VERSION +RUN if [ "$MACHINE_TYPE" = "gpu" ]; then if [ -n "$ROCM_VERSION"]; then export USE_ROCM=1; else export USE_CUDA=1; fi fi \ && git clone https://github.com/pytorch/serve.git \ && cd serve \ && git checkout ${BRANCH_NAME} \ - && if [ -z "$CUDA_VERSION" ]; then python ts_scripts/install_dependencies.py --environment=dev; else python ts_scripts/install_dependencies.py --environment=dev --cuda $CUDA_VERSION; fi \ + && if [ -n "$CUDA_VERSION" ]; then python ts_scripts/install_dependencies.py --environment=dev --cuda $CUDA_VERSION; elif [ -n "$ROCM_VERSION" ]; then python ts_scripts/install_dependencies.py --environment=dev --rcom $ROCM_VERSION; else python ts_scripts/install_dependencies.py --environment=dev; fi \ && python ts_scripts/install_from_src.py \ && python -m pip install captum transformers kserve[storage]>=0.11.0 \ && python -m pip install . \ diff --git a/requirements/torch_rocm60.txt b/requirements/torch_rocm6.0.txt similarity index 100% rename from requirements/torch_rocm60.txt rename to requirements/torch_rocm6.0.txt diff --git a/requirements/torch_rocm61.txt b/requirements/torch_rocm6.1.txt similarity index 100% rename from requirements/torch_rocm61.txt rename to requirements/torch_rocm6.1.txt diff --git a/requirements/torch_rocm62.txt b/requirements/torch_rocm6.2.txt similarity index 100% rename from requirements/torch_rocm62.txt rename to requirements/torch_rocm6.2.txt diff --git a/requirements/torch_rocm6.3.txt b/requirements/torch_rocm6.3.txt new file mode 100644 index 0000000000..6416aea8aa --- /dev/null +++ b/requirements/torch_rocm6.3.txt @@ -0,0 +1,6 @@ +# PyTorch 2.7 will be the first release with ROCm 6.3 support. +# For now (as of 20250317), nightly builds need to be used. +#--index-url https://download.pytorch.org/whl/rocm6.3 +#torch==2.7.1+rocm6.2; sys_platform == 'linux' +#torchvision==0.20.1+rocm6.2; sys_platform == 'linux' +#torchaudio==2.5.1+rocm6.2; sys_platform == 'linux' diff --git a/ts/torch_handler/base_handler.py b/ts/torch_handler/base_handler.py index 1dad241922..41a3a4588b 100644 --- a/ts/torch_handler/base_handler.py +++ b/ts/torch_handler/base_handler.py @@ -99,11 +99,12 @@ def setup_ort_session(model_pt_path, map_location): - providers = ( - ["CUDAExecutionProvider", "CPUExecutionProvider"] - if map_location == "cuda" - else ["CPUExecutionProvider"] - ) + providers = ["CPUExecutionProvider"] + if map_location == "cuda": + if torch.version.cuda: + providers.append("CUDAExecutionProvider") + elif torch.version.hip: + providers.append("ROCMExecutionProvider") sess_options = ort.SessionOptions() sess_options.intra_op_num_threads = psutil.cpu_count(logical=True) diff --git a/ts_scripts/api_utils.py b/ts_scripts/api_utils.py index 1b719018d6..3f4063fa50 100755 --- a/ts_scripts/api_utils.py +++ b/ts_scripts/api_utils.py @@ -2,6 +2,7 @@ import os import shutil import sys +import time REPO_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") sys.path.append(REPO_ROOT) @@ -108,10 +109,38 @@ def cleanup_model_store(): os.remove(f) -def move_logs(log_file, artifact_dir): +def move_logs(log_file, artifact_dir, retries=5): + """ + Move log files to artifacts directory. If directory already exists, merge contents. + """ logs_dir = os.path.join("logs") - os.rename(log_file, os.path.join(logs_dir, log_file)) # mv file logs/ - os.rename(logs_dir, os.path.join(artifact_dir, logs_dir)) # mv logs/ dir + if not os.path.exists(logs_dir): + os.makedirs(logs_dir) + + shutil.move(log_file, os.path.join(logs_dir, log_file)) # mv file logs/ + + destination_dir = os.path.join(artifact_dir, logs_dir) + + # Retry is used because the directory might not be ready to be moved. + for attempt in range(retries): + try: + if os.path.exists(destination_dir): + # Merge contents if destination directory already exists + for root, dirs, files in os.walk(logs_dir): + for file in files: + shutil.move( + os.path.join(root, file), + os.path.join(destination_dir, file), + ) + shutil.rmtree(logs_dir) # Remove the empty logs directory + else: + shutil.move(logs_dir, destination_dir) # mv logs/ dir + break + except: + if attempt < retries - 1: + time.sleep(2) + else: + raise def trigger_management_tests(): diff --git a/ts_scripts/install_dependencies.py b/ts_scripts/install_dependencies.py index 4d464e03fe..85a6ec0ba8 100644 --- a/ts_scripts/install_dependencies.py +++ b/ts_scripts/install_dependencies.py @@ -385,7 +385,7 @@ def get_brew_version(): parser.add_argument( "--rocm", default=None, - choices=["rocm60", "rocm61", "rocm62"], + choices=["rocm6.0", "rocm6.1", "rocm6.2", "rocm6.3"], help="ROCm version for torch", ) parser.add_argument( diff --git a/ts_scripts/install_from_src.py b/ts_scripts/install_from_src.py index 9c555f9c3d..8f0a8eb9a2 100644 --- a/ts_scripts/install_from_src.py +++ b/ts_scripts/install_from_src.py @@ -26,7 +26,7 @@ def install_from_src(dev=False): parser.add_argument( "--environment", type=str, - default="production", + default="prod", help="options: dev|prod", ) args = parser.parse_args() diff --git a/ts_scripts/print_env_info.py b/ts_scripts/print_env_info.py index 2d3fc059ae..ec7a3a80cb 100644 --- a/ts_scripts/print_env_info.py +++ b/ts_scripts/print_env_info.py @@ -231,6 +231,9 @@ def get_nvidia_driver_cuda_version(): return f"{cuda_major}.{cuda_minor}" +# XXX: +# Even though `torch._C_._cuda_getCompiledVersion` is compatible with both CUDA and ROCm/HIP, +# the way of calculating the major & minor version numbers may not. def get_running_cuda_version(): cuda = torch._C._cuda_getCompiledVersion() cuda_major = cuda // 1000 diff --git a/ts_scripts/spellcheck_conf/wordlist.txt b/ts_scripts/spellcheck_conf/wordlist.txt index 3e055db67d..09da229bf1 100644 --- a/ts_scripts/spellcheck_conf/wordlist.txt +++ b/ts_scripts/spellcheck_conf/wordlist.txt @@ -1301,3 +1301,10 @@ OpenAI openai kv OOM +recurse +submodules +ROCm +rocm +rocmversion +rv +amd