Skip to content

Commit 7bb4bf0

Browse files
committed
Add hf-serve DLCs for CPU and GPU
1 parent f7f68e2 commit 7bb4bf0

File tree

6 files changed

+337
-0
lines changed

6 files changed

+337
-0
lines changed
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
FROM ubuntu:24.04
2+
LABEL maintainer="Hugging Face"
3+
4+
SHELL ["/bin/bash", "-c"]
5+
6+
ENV DEBIAN_FRONTEND=noninteractive
7+
RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
8+
curl \
9+
ca-certificates \
10+
build-essential \
11+
git \
12+
# NOTE: `ffmpeg` and `libmagic-dev` are required for audio related tasks
13+
# NOTE: `ffmpeg` version is constrained < 8, as it's a `torchcodec` requirement
14+
ffmpeg=7:* \
15+
libmagic-dev \
16+
# NOTE: `espeak-ng` is the backend used by `phonemizer` so adding it here only
17+
# tentatively as it's most likely "too specific" for models relying on `phonemizer`
18+
# as e.g. https://huggingface.co/facebook/wav2vec2-lv-60-espeak-cv-ft
19+
espeak-ng \
20+
&& apt-get autoremove -y \
21+
&& apt-get clean \
22+
&& rm -rf /var/lib/apt/lists/*
23+
24+
RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \
25+
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
26+
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \
27+
| gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg && \
28+
apt-get update -y && \
29+
apt-get install google-cloud-cli -y && \
30+
apt-get clean autoremove --yes && \
31+
rm -rf /var/lib/{apt,dpkg,cache,log}
32+
33+
# NOTE: Inference Endpoints API writes the Hugging Face Hub repository in
34+
# `/repository` hence it should allow any user to read from it
35+
RUN mkdir -p /repository && chmod 755 /repository
36+
37+
# NOTE: GID and UID set to 1001 instead of standard 1000, given that's reserved
38+
# for the default non-root Ubuntu user
39+
RUN groupadd --gid 1001 huggingface \
40+
&& useradd --uid 1001 --gid huggingface --shell /bin/bash --create-home huggingface
41+
42+
# Create the /opt/huggingface directory and set correct owner and permissions,
43+
# given that's the directory formerly used for the Vertex AI DLCs
44+
RUN mkdir -p /opt/huggingface \
45+
&& chown 1001:1001 /opt/huggingface \
46+
&& chmod 755 /opt/huggingface
47+
48+
USER huggingface
49+
WORKDIR /home/huggingface
50+
51+
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
52+
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
53+
ENV PATH="/home/huggingface/.cargo/bin:/home/huggingface/.local/bin/:$PATH"
54+
55+
RUN uv python install 3.12
56+
57+
RUN uv venv /home/huggingface/venv --python 3.12
58+
ENV VIRTUAL_ENV=/home/huggingface/venv \
59+
PATH="/home/huggingface/venv/bin:$PATH"
60+
61+
WORKDIR /home/huggingface/app
62+
63+
COPY hf_serve-0.0.1rc6-py3-none-any.whl .
64+
RUN uv pip install --frozen hf_serve-0.0.1rc6-py3-none-any.whl --extra cpu
65+
66+
RUN uv pip install google-cloud-storage crcmod --upgrade
67+
68+
COPY --chown=huggingface:huggingface entrypoint.sh /home/huggingface/entrypoint.sh
69+
RUN chmod +x /home/huggingface/entrypoint.sh
70+
71+
USER huggingface
72+
ENTRYPOINT ["/home/huggingface/entrypoint.sh"]
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#!/bin/bash
2+
3+
set -eo pipefail
4+
5+
export CLOUD="google"
6+
7+
# Default `PORT` is 5000, unless other port is provided via `AIP_HTTP_PORT` only
8+
# when `AIP_MODE` is set (usually set to `PREDICT`)
9+
readonly DEFAULT_PORT=5000
10+
if [[ ! -z "${AIP_MODE}" ]]; then
11+
export PORT="${AIP_HTTP_PORT:-$DEFAULT_PORT}"
12+
else
13+
export PORT="$DEFAULT_PORT"
14+
fi
15+
16+
if [[ "${AIP_STORAGE_URI:-}" == gs://* ]]; then
17+
echo "INFO: AIP_STORAGE_URI set and starts with 'gs://', proceeding to download from GCS."
18+
echo "INFO: AIP_STORAGE_URI: $AIP_STORAGE_URI"
19+
20+
TARGET_DIR="/opt/huggingface/model"
21+
mkdir -p "$TARGET_DIR"
22+
23+
if ! command -v gsutil &>/dev/null; then
24+
echo "ERROR: gsutil command not found. Please install Google Cloud SDK." >&2
25+
exit 1
26+
fi
27+
28+
echo "INFO: Running: gsutil -m cp -e -r \"$AIP_STORAGE_URI/*\" \"$TARGET_DIR\""
29+
if ! gsutil -m cp -e -r "$AIP_STORAGE_URI/*" "$TARGET_DIR"; then
30+
echo "ERROR: Failed to download model from GCS." >&2
31+
exit 1
32+
fi
33+
34+
echo "INFO: Model downloaded successfully to ${TARGET_DIR}."
35+
echo "INFO: Updating HF_MODEL_DIR to point to the local directory."
36+
37+
export HF_MODEL_DIR="$TARGET_DIR"
38+
fi
39+
40+
# If `HF_MODEL_ID` is a path instead of a Hub ID, then clear its value and assign it
41+
# to the `HF_MODEL_DIR` instead, including a user warning
42+
if [[ -d "${HF_MODEL_ID:-}" ]]; then
43+
echo "WARNING: HF_MODEL_ID is a path, please use HF_MODEL_DIR for paths instead."
44+
export HF_MODEL_DIR="${HF_MODEL_ID}"
45+
unset HF_MODEL_ID
46+
fi
47+
48+
# If `HF_MODEL_DIR` is set, then unset it and set `MODEL_DIR` instead
49+
if [[ -n "${HF_MODEL_DIR:-}" ]]; then
50+
if [[ -z "${MODEL_DIR:-}" ]]; then
51+
export MODEL_DIR="${HF_MODEL_DIR}"
52+
else
53+
echo "WARNING: MODEL_DIR is already set to '${MODEL_DIR}', keeping its value."
54+
fi
55+
unset HF_MODEL_DIR
56+
fi
57+
58+
# If `HF_DEFAULT_PIPELINE_NAME` is set, then unset it and set `CUSTOM_HANDLER_FILE` instead
59+
if [[ -n "${HF_DEFAULT_PIPELINE_NAME:-}" ]]; then
60+
if [[ -z "${CUSTOM_HANDLER_FILE:-}" ]]; then
61+
export CUSTOM_HANDLER_FILE="${HF_DEFAULT_PIPELINE_NAME}"
62+
else
63+
echo "WARNING: CUSTOM_HANDLER_FILE is already set to '${CUSTOM_HANDLER_FILE}', keeping its value."
64+
fi
65+
unset HF_DEFAULT_PIPELINE_NAME
66+
fi
67+
68+
# If `MODEL_DIR` is set and is a valid directory
69+
if [[ -n "${MODEL_DIR:-}" ]]; then
70+
if [[ ! -d "${MODEL_DIR}" ]]; then
71+
echo "ERROR: Provided MODEL_DIR is not a valid directory" >&2
72+
exit 1
73+
fi
74+
75+
# Check if `requirements.txt` exists and if so install dependencies
76+
if [[ -f "${MODEL_DIR}/requirements.txt" ]]; then
77+
echo "INFO: Installing custom dependencies from ${MODEL_DIR}/requirements.txt"
78+
uv pip install --active -r "${MODEL_DIR}/requirements.txt" --no-cache-dir
79+
80+
# Check if the custom handler file is missing when `requirements.txt` is present
81+
if [[ ! -f "${MODEL_DIR}/${CUSTOM_HANDLER_FILE}" ]]; then
82+
echo "WARNING: requirements.txt is present, but ${CUSTOM_HANDLER_FILE} is missing in ${MODEL_DIR}."
83+
echo "WARNING: If you intend to run custom code, make sure to include ${CUSTOM_HANDLER_FILE}."
84+
fi
85+
fi
86+
fi
87+
88+
exec hf-serve "$@"
Binary file not shown.
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
FROM nvidia/cuda:12.8.1-devel-ubuntu24.04
2+
LABEL maintainer="Hugging Face"
3+
4+
SHELL ["/bin/bash", "-c"]
5+
6+
ENV DEBIAN_FRONTEND=noninteractive
7+
RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
8+
curl \
9+
ca-certificates \
10+
build-essential \
11+
git \
12+
# NOTE: `ffmpeg` and `libmagic-dev` are required for audio related tasks
13+
# NOTE: `ffmpeg` version is constrained < 8, as it's a `torchcodec` requirement
14+
ffmpeg=7:* \
15+
libmagic-dev \
16+
# NOTE: `espeak-ng` is the backend used by `phonemizer` so adding it here only
17+
# tentatively as it's most likely "too specific" for models relying on `phonemizer`
18+
# as e.g. https://huggingface.co/facebook/wav2vec2-lv-60-espeak-cv-ft
19+
espeak-ng \
20+
&& apt-get autoremove -y \
21+
&& apt-get clean \
22+
&& rm -rf /var/lib/apt/lists/*
23+
24+
# NOTE: `torchcodec` and hence the audio-related models as e.g. Wav2Vec, require
25+
# both `libnpp` and `libnvrtc` to be preset for it to work seamlessly on CUDA
26+
RUN if ! ldconfig -p | grep -q libnpp; then \
27+
apt-get update && apt-get install -y --no-install-recommends libnpp-dev; \
28+
fi && \
29+
if ! ldconfig -p | grep -q libnvrtc; then \
30+
apt-get update && apt-get install -y --no-install-recommends cuda-nvrtc-dev; \
31+
fi && \
32+
apt-get autoremove -y && \
33+
apt-get clean && \
34+
rm -rf /var/lib/apt/lists/*
35+
36+
RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \
37+
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
38+
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \
39+
| gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg && \
40+
apt-get update -y && \
41+
apt-get install google-cloud-cli -y && \
42+
apt-get clean autoremove --yes && \
43+
rm -rf /var/lib/{apt,dpkg,cache,log}
44+
45+
# NOTE: Inference Endpoints API writes the Hugging Face Hub repository in
46+
# `/repository` hence it should allow any user to read from it
47+
RUN mkdir -p /repository && chmod 755 /repository
48+
49+
# NOTE: GID and UID set to 1001 instead of standard 1000, given that's reserved
50+
# for the default non-root Ubuntu user
51+
RUN groupadd --gid 1001 huggingface \
52+
&& useradd --uid 1001 --gid huggingface --shell /bin/bash --create-home huggingface
53+
54+
# Create the /opt/huggingface directory and set correct owner and permissions,
55+
# given that's the directory formerly used for the Vertex AI DLCs
56+
RUN mkdir -p /opt/huggingface \
57+
&& chown 1001:1001 /opt/huggingface \
58+
&& chmod 755 /opt/huggingface
59+
60+
USER huggingface
61+
WORKDIR /home/huggingface
62+
63+
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
64+
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
65+
ENV PATH="/home/huggingface/.cargo/bin:/home/huggingface/.local/bin/:$PATH"
66+
67+
RUN uv python install 3.11
68+
69+
RUN uv venv /home/huggingface/venv --python 3.11
70+
ENV VIRTUAL_ENV=/home/huggingface/venv \
71+
PATH="/home/huggingface/venv/bin:$PATH"
72+
73+
WORKDIR /home/huggingface/app
74+
75+
COPY hf_serve-0.0.1rc6-py3-none-any.whl .
76+
RUN uv pip install --frozen hf_serve-0.0.1rc6-py3-none-any.whl --extra cuda --extra flash-attn --preview-features extra-build-dependencies
77+
78+
RUN uv pip install google-cloud-storage crcmod --upgrade
79+
80+
COPY --chown=huggingface:huggingface entrypoint.sh /home/huggingface/entrypoint.sh
81+
RUN chmod +x /home/huggingface/entrypoint.sh
82+
83+
USER huggingface
84+
ENTRYPOINT ["/home/huggingface/entrypoint.sh"]
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#!/bin/bash
2+
3+
set -eo pipefail
4+
5+
if ! command -v nvidia-smi &>/dev/null; then
6+
echo "ERROR: nvidia-smi command not found, please use the CPU DLC instead."
7+
exit 1
8+
fi
9+
10+
export CLOUD="google"
11+
12+
# Default `PORT` is 5000, unless other port is provided via `AIP_HTTP_PORT` only
13+
# when `AIP_MODE` is set (usually set to `PREDICT`)
14+
readonly DEFAULT_PORT=5000
15+
if [[ ! -z "${AIP_MODE}" ]]; then
16+
export PORT="${AIP_HTTP_PORT:-$DEFAULT_PORT}"
17+
else
18+
export PORT="$DEFAULT_PORT"
19+
fi
20+
21+
if [[ "${AIP_STORAGE_URI:-}" == gs://* ]]; then
22+
echo "INFO: AIP_STORAGE_URI set and starts with 'gs://', proceeding to download from GCS."
23+
echo "INFO: AIP_STORAGE_URI: $AIP_STORAGE_URI"
24+
25+
TARGET_DIR="/opt/huggingface/model"
26+
mkdir -p "$TARGET_DIR"
27+
28+
if ! command -v gsutil &>/dev/null; then
29+
echo "ERROR: gsutil command not found. Please install Google Cloud SDK." >&2
30+
exit 1
31+
fi
32+
33+
echo "INFO: Running: gsutil -m cp -e -r \"$AIP_STORAGE_URI/*\" \"$TARGET_DIR\""
34+
if ! gsutil -m cp -e -r "$AIP_STORAGE_URI/*" "$TARGET_DIR"; then
35+
echo "ERROR: Failed to download model from GCS." >&2
36+
exit 1
37+
fi
38+
39+
echo "INFO: Model downloaded successfully to ${TARGET_DIR}."
40+
echo "INFO: Updating HF_MODEL_DIR to point to the local directory."
41+
42+
export HF_MODEL_DIR="$TARGET_DIR"
43+
fi
44+
45+
# If `HF_MODEL_ID` is a path instead of a Hub ID, then clear its value and assign it
46+
# to the `HF_MODEL_DIR` instead, including a user warning
47+
if [[ -d "${HF_MODEL_ID:-}" ]]; then
48+
echo "WARNING: HF_MODEL_ID is a path, please use HF_MODEL_DIR for paths instead."
49+
export HF_MODEL_DIR="${HF_MODEL_ID}"
50+
unset HF_MODEL_ID
51+
fi
52+
53+
# If `HF_MODEL_DIR` is set, then unset it and set `MODEL_DIR` instead
54+
if [[ -n "${HF_MODEL_DIR:-}" ]]; then
55+
if [[ -z "${MODEL_DIR:-}" ]]; then
56+
export MODEL_DIR="${HF_MODEL_DIR}"
57+
else
58+
echo "WARNING: MODEL_DIR is already set to '${MODEL_DIR}', keeping its value."
59+
fi
60+
unset HF_MODEL_DIR
61+
fi
62+
63+
# If `HF_DEFAULT_PIPELINE_NAME` is set, then unset it and set `CUSTOM_HANDLER_FILE` instead
64+
if [[ -n "${HF_DEFAULT_PIPELINE_NAME:-}" ]]; then
65+
if [[ -z "${CUSTOM_HANDLER_FILE:-}" ]]; then
66+
export CUSTOM_HANDLER_FILE="${HF_DEFAULT_PIPELINE_NAME}"
67+
else
68+
echo "WARNING: CUSTOM_HANDLER_FILE is already set to '${CUSTOM_HANDLER_FILE}', keeping its value."
69+
fi
70+
unset HF_DEFAULT_PIPELINE_NAME
71+
fi
72+
73+
# If `MODEL_DIR` is set and is a valid directory
74+
if [[ -n "${MODEL_DIR:-}" ]]; then
75+
if [[ ! -d "${MODEL_DIR}" ]]; then
76+
echo "ERROR: Provided MODEL_DIR is not a valid directory" >&2
77+
exit 1
78+
fi
79+
80+
# Check if `requirements.txt` exists and if so install dependencies
81+
if [[ -f "${MODEL_DIR}/requirements.txt" ]]; then
82+
echo "INFO: Installing custom dependencies from ${MODEL_DIR}/requirements.txt"
83+
uv pip install --active -r "${MODEL_DIR}/requirements.txt" --no-cache-dir
84+
85+
# Check if the custom handler file is missing when `requirements.txt` is present
86+
if [[ ! -f "${MODEL_DIR}/${CUSTOM_HANDLER_FILE}" ]]; then
87+
echo "WARNING: requirements.txt is present, but ${CUSTOM_HANDLER_FILE} is missing in ${MODEL_DIR}."
88+
echo "WARNING: If you intend to run custom code, make sure to include ${CUSTOM_HANDLER_FILE}."
89+
fi
90+
fi
91+
fi
92+
93+
exec hf-serve "$@"
Binary file not shown.

0 commit comments

Comments
 (0)