Skip to content

Commit 0dfda41

Browse files
authored
ONNX GPU support
1 parent 6f944c9 commit 0dfda41

File tree

10 files changed

+64
-2
lines changed

10 files changed

+64
-2
lines changed

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ ci-build-images:
133133
@./build/build-image.sh images/tf-serve-gpu tf-serve-gpu
134134
@./build/build-image.sh images/tf-api tf-api
135135
@./build/build-image.sh images/onnx-serve onnx-serve
136+
@./build/build-image.sh images/onnx-serve-gpu onnx-serve-gpu
136137
@./build/build-image.sh images/operator operator
137138
@./build/build-image.sh images/fluentd fluentd
138139
@./build/build-image.sh images/nginx-controller nginx-controller
@@ -154,6 +155,7 @@ ci-push-images:
154155
@./build/push-image.sh tf-serve-gpu
155156
@./build/push-image.sh tf-api
156157
@./build/push-image.sh onnx-serve
158+
@./build/push-image.sh onnx-serve-gpu
157159
@./build/push-image.sh operator
158160
@./build/push-image.sh fluentd
159161
@./build/push-image.sh nginx-controller

cortex.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ export CORTEX_IMAGE_PYTHON_PACKAGER="${CORTEX_IMAGE_PYTHON_PACKAGER:-cortexlabs/
132132
export CORTEX_IMAGE_TF_SERVE_GPU="${CORTEX_IMAGE_TF_SERVE_GPU:-cortexlabs/tf-serve-gpu:$CORTEX_VERSION_STABLE}"
133133
export CORTEX_IMAGE_TF_TRAIN_GPU="${CORTEX_IMAGE_TF_TRAIN_GPU:-cortexlabs/tf-train-gpu:$CORTEX_VERSION_STABLE}"
134134
export CORTEX_IMAGE_ONNX_SERVE="${CORTEX_IMAGE_ONNX_SERVE:-cortexlabs/onnx-serve:$CORTEX_VERSION_STABLE}"
135+
export CORTEX_IMAGE_ONNX_SERVE_GPU="${CORTEX_IMAGE_ONNX_SERVE_GPU:-cortexlabs/onnx-serve-gpu:$CORTEX_VERSION_STABLE}"
135136
export CORTEX_IMAGE_CLUSTER_AUTOSCALER="${CORTEX_IMAGE_CLUSTER_AUTOSCALER:-cortexlabs/cluster-autoscaler:$CORTEX_VERSION_STABLE}"
136137
export CORTEX_IMAGE_NVIDIA="${CORTEX_IMAGE_NVIDIA:-cortexlabs/nvidia:$CORTEX_VERSION_STABLE}"
137138
export CORTEX_IMAGE_METRICS_SERVER="${CORTEX_IMAGE_METRICS_SERVER:-cortexlabs/metrics-server:$CORTEX_VERSION_STABLE}"

dev/registry.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ function create_registry() {
5151
aws ecr create-repository --repository-name=cortexlabs/tf-train-gpu --region=$REGISTRY_REGION || true
5252
aws ecr create-repository --repository-name=cortexlabs/tf-serve-gpu --region=$REGISTRY_REGION || true
5353
aws ecr create-repository --repository-name=cortexlabs/onnx-serve --region=$REGISTRY_REGION || true
54+
aws ecr create-repository --repository-name=cortexlabs/onnx-serve-gpu --region=$REGISTRY_REGION || true
5455
aws ecr create-repository --repository-name=cortexlabs/cluster-autoscaler --region=$REGISTRY_REGION || true
5556
aws ecr create-repository --repository-name=cortexlabs/nvidia --region=$REGISTRY_REGION || true
5657
aws ecr create-repository --repository-name=cortexlabs/metrics-server --region=$REGISTRY_REGION || true
@@ -141,6 +142,7 @@ elif [ "$cmd" = "update" ]; then
141142
build_and_push $ROOT/images/argo-executor argo-executor latest
142143
build_and_push $ROOT/images/tf-serve tf-serve latest
143144
build_and_push $ROOT/images/tf-serve-gpu tf-serve-gpu latest
145+
build_and_push $ROOT/images/onnx-serve-gpu onnx-serve-gpu latest
144146
build_and_push $ROOT/images/python-packager python-packager latest
145147
build_and_push $ROOT/images/cluster-autoscaler cluster-autoscaler latest
146148
build_and_push $ROOT/images/nvidia nvidia latest

docs/cluster/config.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ export CORTEX_IMAGE_TF_API="cortexlabs/tf-api:master"
5454
export CORTEX_IMAGE_TF_TRAIN_GPU="cortexlabs/tf-train-gpu:master"
5555
export CORTEX_IMAGE_TF_SERVE_GPU="cortexlabs/tf-serve-gpu:master"
5656
export CORTEX_IMAGE_ONNX_SERVE="cortexlabs/onnx-serve:master"
57+
export CORTEX_IMAGE_ONNX_SERVE_GPU="cortexlabs/onnx-serve-gpu:master"
5758
export CORTEX_IMAGE_PYTHON_PACKAGER="cortexlabs/python-packager:master"
5859
export CORTEX_IMAGE_CLUSTER_AUTOSCALER="cortexlabs/cluster-autoscaler:master"
5960
export CORTEX_IMAGE_NVIDIA="cortexlabs/nvidia:master"

docs/cluster/development.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ export CORTEX_IMAGE_FLUENTD="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs
6262
export CORTEX_IMAGE_NGINX_BACKEND="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/nginx-backend:latest"
6363
export CORTEX_IMAGE_NGINX_CONTROLLER="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/nginx-controller:latest"
6464
export CORTEX_IMAGE_ONNX_SERVE="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/onnx-serve:latest"
65+
export CORTEX_IMAGE_ONNX_SERVE_GPU="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/onnx-serve-gpu:latest"
6566
export CORTEX_IMAGE_OPERATOR="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/operator:latest"
6667
export CORTEX_IMAGE_SPARK="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/spark:latest"
6768
export CORTEX_IMAGE_SPARK_OPERATOR="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/spark-operator:latest"

images/onnx-serve-gpu/Dockerfile

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
FROM nvidia/cuda:9.1-cudnn7-devel
2+
3+
ARG ONNXRUNTIME_VERSION="0.4.0"
4+
5+
RUN apt-get update -qq && apt-get install -y -q \
6+
python3 \
7+
python3-dev \
8+
python3-pip \
9+
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/* && \
10+
pip3 install --upgrade \
11+
pip \
12+
setuptools \
13+
&& rm -rf /root/.cache/pip*
14+
15+
RUN apt-get update -qq && apt-get install -y -q \
16+
build-essential \
17+
curl \
18+
libfreetype6-dev \
19+
libpng-dev \
20+
libzmq3-dev \
21+
pkg-config \
22+
rsync \
23+
software-properties-common \
24+
unzip \
25+
zlib1g-dev \
26+
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/*
27+
28+
29+
ENV PYTHONPATH="/src:${PYTHONPATH}"
30+
31+
COPY pkg/workloads/cortex/lib/requirements.txt /src/cortex/lib/requirements.txt
32+
COPY pkg/workloads/cortex/onnx_serve/requirements.txt /src/cortex/onnx_serve/requirements.txt
33+
RUN pip3 install -r /src/cortex/lib/requirements.txt && \
34+
pip3 install -r /src/cortex/onnx_serve/requirements.txt && \
35+
pip3 install onnxruntime-gpu==${ONNXRUNTIME_VERSION} && \
36+
rm -rf /root/.cache/pip*
37+
38+
COPY pkg/workloads/cortex/consts.py /src/cortex
39+
COPY pkg/workloads/cortex/lib /src/cortex/lib
40+
COPY pkg/workloads/cortex/onnx_serve /src/cortex/onnx_serve
41+
42+
ENTRYPOINT ["/usr/bin/python3", "/src/cortex/onnx_serve/api.py"]

images/onnx-serve/Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
FROM ubuntu:16.04
22

3+
ARG ONNXRUNTIME_VERSION="0.4.0"
4+
35
RUN apt-get update -qq && apt-get install -y -q \
46
python3 \
57
python3-dev \
@@ -30,6 +32,7 @@ COPY pkg/workloads/cortex/lib/requirements.txt /src/cortex/lib/requirements.txt
3032
COPY pkg/workloads/cortex/onnx_serve/requirements.txt /src/cortex/onnx_serve/requirements.txt
3133
RUN pip3 install -r /src/cortex/lib/requirements.txt && \
3234
pip3 install -r /src/cortex/onnx_serve/requirements.txt && \
35+
pip3 install onnxruntime==${ONNXRUNTIME_VERSION} && \
3336
rm -rf /root/.cache/pip*
3437

3538
COPY pkg/workloads/cortex/consts.py /src/cortex

pkg/operator/config/config.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ type CortexConfig struct {
5454
TFTrainImageGPU string `json:"tf_train_image_gpu"`
5555
TFServeImageGPU string `json:"tf_serve_image_gpu"`
5656
ONNXServeImage string `json:"onnx_serve_image"`
57+
ONNXServeImageGPU string `json:"onnx_serve_gpu_image"`
5758

5859
TelemetryURL string `json:"telemetry_url"`
5960
EnableTelemetry bool `json:"enable_telemetry"`
@@ -76,6 +77,7 @@ func Init() error {
7677
TFTrainImageGPU: getStr("IMAGE_TF_TRAIN_GPU"),
7778
TFServeImageGPU: getStr("IMAGE_TF_SERVE_GPU"),
7879
ONNXServeImage: getStr("IMAGE_ONNX_SERVE"),
80+
ONNXServeImageGPU: getStr("IMAGE_ONNX_SERVE_GPU"),
7981

8082
TelemetryURL: configreader.MustStringFromEnv("CONST_TELEMETRY_URL", &configreader.StringValidation{Required: false, Default: consts.TelemetryURL}),
8183
EnableTelemetry: getBool("ENABLE_TELEMETRY"),

pkg/operator/workloads/api.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,13 +171,21 @@ func onnxAPISpec(
171171
workloadID string,
172172
desiredReplicas int32,
173173
) *kapps.Deployment {
174+
servingImage := config.Cortex.ONNXServeImage
174175
resourceList := kcore.ResourceList{}
176+
resourceLimitsList := kcore.ResourceList{}
175177
resourceList[kcore.ResourceCPU] = api.Compute.CPU.Quantity
176178

177179
if api.Compute.Mem != nil {
178180
resourceList[kcore.ResourceMemory] = api.Compute.Mem.Quantity
179181
}
180182

183+
if api.Compute.GPU > 0 {
184+
servingImage = config.Cortex.ONNXServeImageGPU
185+
resourceList["nvidia.com/gpu"] = *kresource.NewQuantity(api.Compute.GPU, kresource.DecimalSI)
186+
resourceLimitsList["nvidia.com/gpu"] = *kresource.NewQuantity(api.Compute.GPU, kresource.DecimalSI)
187+
}
188+
181189
return k8s.Deployment(&k8s.DeploymentSpec{
182190
Name: internalAPIName(api.Name, ctx.App.Name),
183191
Replicas: desiredReplicas,
@@ -206,7 +214,7 @@ func onnxAPISpec(
206214
Containers: []kcore.Container{
207215
{
208216
Name: apiContainerName,
209-
Image: config.Cortex.ONNXServeImage,
217+
Image: servingImage,
210218
ImagePullPolicy: "Always",
211219
Args: []string{
212220
"--workload-id=" + workloadID,
@@ -235,6 +243,7 @@ func onnxAPISpec(
235243
},
236244
Resources: kcore.ResourceRequirements{
237245
Requests: resourceList,
246+
Limits: resourceLimitsList,
238247
},
239248
},
240249
},
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
flask==1.0.2
22
flask-api==1.1
33
waitress==1.2.1
4-
onnxruntime==0.4.0
54
numpy>=1.15.0

0 commit comments

Comments
 (0)