Skip to content

ONNX GPU support #232

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ ci-build-images:
@./build/build-image.sh images/tf-serve-gpu tf-serve-gpu
@./build/build-image.sh images/tf-api tf-api
@./build/build-image.sh images/onnx-serve onnx-serve
@./build/build-image.sh images/onnx-serve-gpu onnx-serve-gpu
@./build/build-image.sh images/operator operator
@./build/build-image.sh images/fluentd fluentd
@./build/build-image.sh images/nginx-controller nginx-controller
Expand All @@ -154,6 +155,7 @@ ci-push-images:
@./build/push-image.sh tf-serve-gpu
@./build/push-image.sh tf-api
@./build/push-image.sh onnx-serve
@./build/push-image.sh onnx-serve-gpu
@./build/push-image.sh operator
@./build/push-image.sh fluentd
@./build/push-image.sh nginx-controller
Expand Down
1 change: 1 addition & 0 deletions cortex.sh
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ export CORTEX_IMAGE_PYTHON_PACKAGER="${CORTEX_IMAGE_PYTHON_PACKAGER:-cortexlabs/
export CORTEX_IMAGE_TF_SERVE_GPU="${CORTEX_IMAGE_TF_SERVE_GPU:-cortexlabs/tf-serve-gpu:$CORTEX_VERSION_STABLE}"
export CORTEX_IMAGE_TF_TRAIN_GPU="${CORTEX_IMAGE_TF_TRAIN_GPU:-cortexlabs/tf-train-gpu:$CORTEX_VERSION_STABLE}"
export CORTEX_IMAGE_ONNX_SERVE="${CORTEX_IMAGE_ONNX_SERVE:-cortexlabs/onnx-serve:$CORTEX_VERSION_STABLE}"
export CORTEX_IMAGE_ONNX_SERVE_GPU="${CORTEX_IMAGE_ONNX_SERVE_GPU:-cortexlabs/onnx-serve-gpu:$CORTEX_VERSION_STABLE}"
export CORTEX_IMAGE_CLUSTER_AUTOSCALER="${CORTEX_IMAGE_CLUSTER_AUTOSCALER:-cortexlabs/cluster-autoscaler:$CORTEX_VERSION_STABLE}"
export CORTEX_IMAGE_NVIDIA="${CORTEX_IMAGE_NVIDIA:-cortexlabs/nvidia:$CORTEX_VERSION_STABLE}"
export CORTEX_IMAGE_METRICS_SERVER="${CORTEX_IMAGE_METRICS_SERVER:-cortexlabs/metrics-server:$CORTEX_VERSION_STABLE}"
Expand Down
2 changes: 2 additions & 0 deletions dev/registry.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ function create_registry() {
aws ecr create-repository --repository-name=cortexlabs/tf-train-gpu --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/tf-serve-gpu --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/onnx-serve --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/onnx-serve-gpu --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/cluster-autoscaler --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/nvidia --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/metrics-server --region=$REGISTRY_REGION || true
Expand Down Expand Up @@ -141,6 +142,7 @@ elif [ "$cmd" = "update" ]; then
build_and_push $ROOT/images/argo-executor argo-executor latest
build_and_push $ROOT/images/tf-serve tf-serve latest
build_and_push $ROOT/images/tf-serve-gpu tf-serve-gpu latest
build_and_push $ROOT/images/onnx-serve-gpu onnx-serve-gpu latest
build_and_push $ROOT/images/python-packager python-packager latest
build_and_push $ROOT/images/cluster-autoscaler cluster-autoscaler latest
build_and_push $ROOT/images/nvidia nvidia latest
Expand Down
1 change: 1 addition & 0 deletions docs/cluster/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ export CORTEX_IMAGE_TF_API="cortexlabs/tf-api:master"
export CORTEX_IMAGE_TF_TRAIN_GPU="cortexlabs/tf-train-gpu:master"
export CORTEX_IMAGE_TF_SERVE_GPU="cortexlabs/tf-serve-gpu:master"
export CORTEX_IMAGE_ONNX_SERVE="cortexlabs/onnx-serve:master"
export CORTEX_IMAGE_ONNX_SERVE_GPU="cortexlabs/onnx-serve-gpu:master"
export CORTEX_IMAGE_PYTHON_PACKAGER="cortexlabs/python-packager:master"
export CORTEX_IMAGE_CLUSTER_AUTOSCALER="cortexlabs/cluster-autoscaler:master"
export CORTEX_IMAGE_NVIDIA="cortexlabs/nvidia:master"
Expand Down
1 change: 1 addition & 0 deletions docs/cluster/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ export CORTEX_IMAGE_FLUENTD="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs
export CORTEX_IMAGE_NGINX_BACKEND="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/nginx-backend:latest"
export CORTEX_IMAGE_NGINX_CONTROLLER="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/nginx-controller:latest"
export CORTEX_IMAGE_ONNX_SERVE="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/onnx-serve:latest"
export CORTEX_IMAGE_ONNX_SERVE_GPU="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/onnx-serve-gpu:latest"
export CORTEX_IMAGE_OPERATOR="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/operator:latest"
export CORTEX_IMAGE_SPARK="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/spark:latest"
export CORTEX_IMAGE_SPARK_OPERATOR="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/spark-operator:latest"
Expand Down
42 changes: 42 additions & 0 deletions images/onnx-serve-gpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
FROM nvidia/cuda:9.1-cudnn7-devel

ARG ONNXRUNTIME_VERSION="0.4.0"

RUN apt-get update -qq && apt-get install -y -q \
python3 \
python3-dev \
python3-pip \
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/* && \
pip3 install --upgrade \
pip \
setuptools \
&& rm -rf /root/.cache/pip*

RUN apt-get update -qq && apt-get install -y -q \
build-essential \
curl \
libfreetype6-dev \
libpng-dev \
libzmq3-dev \
pkg-config \
rsync \
software-properties-common \
unzip \
zlib1g-dev \
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/*


ENV PYTHONPATH="/src:${PYTHONPATH}"

COPY pkg/workloads/cortex/lib/requirements.txt /src/cortex/lib/requirements.txt
COPY pkg/workloads/cortex/onnx_serve/requirements.txt /src/cortex/onnx_serve/requirements.txt
RUN pip3 install -r /src/cortex/lib/requirements.txt && \
pip3 install -r /src/cortex/onnx_serve/requirements.txt && \
pip3 install onnxruntime-gpu==${ONNXRUNTIME_VERSION} && \
rm -rf /root/.cache/pip*

COPY pkg/workloads/cortex/consts.py /src/cortex
COPY pkg/workloads/cortex/lib /src/cortex/lib
COPY pkg/workloads/cortex/onnx_serve /src/cortex/onnx_serve

ENTRYPOINT ["/usr/bin/python3", "/src/cortex/onnx_serve/api.py"]
3 changes: 3 additions & 0 deletions images/onnx-serve/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
FROM ubuntu:16.04

ARG ONNXRUNTIME_VERSION="0.4.0"

RUN apt-get update -qq && apt-get install -y -q \
python3 \
python3-dev \
Expand Down Expand Up @@ -30,6 +32,7 @@ COPY pkg/workloads/cortex/lib/requirements.txt /src/cortex/lib/requirements.txt
COPY pkg/workloads/cortex/onnx_serve/requirements.txt /src/cortex/onnx_serve/requirements.txt
RUN pip3 install -r /src/cortex/lib/requirements.txt && \
pip3 install -r /src/cortex/onnx_serve/requirements.txt && \
pip3 install onnxruntime==${ONNXRUNTIME_VERSION} && \
rm -rf /root/.cache/pip*

COPY pkg/workloads/cortex/consts.py /src/cortex
Expand Down
2 changes: 2 additions & 0 deletions pkg/operator/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ type CortexConfig struct {
TFTrainImageGPU string `json:"tf_train_image_gpu"`
TFServeImageGPU string `json:"tf_serve_image_gpu"`
ONNXServeImage string `json:"onnx_serve_image"`
ONNXServeImageGPU string `json:"onnx_serve_gpu_image"`

TelemetryURL string `json:"telemetry_url"`
EnableTelemetry bool `json:"enable_telemetry"`
Expand All @@ -76,6 +77,7 @@ func Init() error {
TFTrainImageGPU: getStr("IMAGE_TF_TRAIN_GPU"),
TFServeImageGPU: getStr("IMAGE_TF_SERVE_GPU"),
ONNXServeImage: getStr("IMAGE_ONNX_SERVE"),
ONNXServeImageGPU: getStr("IMAGE_ONNX_SERVE_GPU"),

TelemetryURL: configreader.MustStringFromEnv("CONST_TELEMETRY_URL", &configreader.StringValidation{Required: false, Default: consts.TelemetryURL}),
EnableTelemetry: getBool("ENABLE_TELEMETRY"),
Expand Down
11 changes: 10 additions & 1 deletion pkg/operator/workloads/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,13 +171,21 @@ func onnxAPISpec(
workloadID string,
desiredReplicas int32,
) *kapps.Deployment {
servingImage := config.Cortex.ONNXServeImage
resourceList := kcore.ResourceList{}
resourceLimitsList := kcore.ResourceList{}
resourceList[kcore.ResourceCPU] = api.Compute.CPU.Quantity

if api.Compute.Mem != nil {
resourceList[kcore.ResourceMemory] = api.Compute.Mem.Quantity
}

if api.Compute.GPU > 0 {
servingImage = config.Cortex.ONNXServeImageGPU
resourceList["nvidia.com/gpu"] = *kresource.NewQuantity(api.Compute.GPU, kresource.DecimalSI)
resourceLimitsList["nvidia.com/gpu"] = *kresource.NewQuantity(api.Compute.GPU, kresource.DecimalSI)
}

return k8s.Deployment(&k8s.DeploymentSpec{
Name: internalAPIName(api.Name, ctx.App.Name),
Replicas: desiredReplicas,
Expand Down Expand Up @@ -206,7 +214,7 @@ func onnxAPISpec(
Containers: []kcore.Container{
{
Name: apiContainerName,
Image: config.Cortex.ONNXServeImage,
Image: servingImage,
ImagePullPolicy: "Always",
Args: []string{
"--workload-id=" + workloadID,
Expand Down Expand Up @@ -235,6 +243,7 @@ func onnxAPISpec(
},
Resources: kcore.ResourceRequirements{
Requests: resourceList,
Limits: resourceLimitsList,
},
},
},
Expand Down
1 change: 0 additions & 1 deletion pkg/workloads/cortex/onnx_serve/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
flask==1.0.2
flask-api==1.1
waitress==1.2.1
onnxruntime==0.4.0
numpy>=1.15.0