diff --git a/Makefile b/Makefile index 057edcb928..4a81c2af75 100644 --- a/Makefile +++ b/Makefile @@ -133,6 +133,7 @@ ci-build-images: @./build/build-image.sh images/tf-serve-gpu tf-serve-gpu @./build/build-image.sh images/tf-api tf-api @./build/build-image.sh images/onnx-serve onnx-serve + @./build/build-image.sh images/onnx-serve-gpu onnx-serve-gpu @./build/build-image.sh images/operator operator @./build/build-image.sh images/fluentd fluentd @./build/build-image.sh images/nginx-controller nginx-controller @@ -154,6 +155,7 @@ ci-push-images: @./build/push-image.sh tf-serve-gpu @./build/push-image.sh tf-api @./build/push-image.sh onnx-serve + @./build/push-image.sh onnx-serve-gpu @./build/push-image.sh operator @./build/push-image.sh fluentd @./build/push-image.sh nginx-controller diff --git a/cortex.sh b/cortex.sh index d682bb1102..1c0b5e3d12 100755 --- a/cortex.sh +++ b/cortex.sh @@ -132,6 +132,7 @@ export CORTEX_IMAGE_PYTHON_PACKAGER="${CORTEX_IMAGE_PYTHON_PACKAGER:-cortexlabs/ export CORTEX_IMAGE_TF_SERVE_GPU="${CORTEX_IMAGE_TF_SERVE_GPU:-cortexlabs/tf-serve-gpu:$CORTEX_VERSION_STABLE}" export CORTEX_IMAGE_TF_TRAIN_GPU="${CORTEX_IMAGE_TF_TRAIN_GPU:-cortexlabs/tf-train-gpu:$CORTEX_VERSION_STABLE}" export CORTEX_IMAGE_ONNX_SERVE="${CORTEX_IMAGE_ONNX_SERVE:-cortexlabs/onnx-serve:$CORTEX_VERSION_STABLE}" +export CORTEX_IMAGE_ONNX_SERVE_GPU="${CORTEX_IMAGE_ONNX_SERVE_GPU:-cortexlabs/onnx-serve-gpu:$CORTEX_VERSION_STABLE}" export CORTEX_IMAGE_CLUSTER_AUTOSCALER="${CORTEX_IMAGE_CLUSTER_AUTOSCALER:-cortexlabs/cluster-autoscaler:$CORTEX_VERSION_STABLE}" export CORTEX_IMAGE_NVIDIA="${CORTEX_IMAGE_NVIDIA:-cortexlabs/nvidia:$CORTEX_VERSION_STABLE}" export CORTEX_IMAGE_METRICS_SERVER="${CORTEX_IMAGE_METRICS_SERVER:-cortexlabs/metrics-server:$CORTEX_VERSION_STABLE}" diff --git a/dev/registry.sh b/dev/registry.sh index b501e5844b..e9efbdd3ce 100755 --- a/dev/registry.sh +++ b/dev/registry.sh @@ -51,6 +51,7 @@ function create_registry() { aws ecr create-repository --repository-name=cortexlabs/tf-train-gpu --region=$REGISTRY_REGION || true aws ecr create-repository --repository-name=cortexlabs/tf-serve-gpu --region=$REGISTRY_REGION || true aws ecr create-repository --repository-name=cortexlabs/onnx-serve --region=$REGISTRY_REGION || true + aws ecr create-repository --repository-name=cortexlabs/onnx-serve-gpu --region=$REGISTRY_REGION || true aws ecr create-repository --repository-name=cortexlabs/cluster-autoscaler --region=$REGISTRY_REGION || true aws ecr create-repository --repository-name=cortexlabs/nvidia --region=$REGISTRY_REGION || true aws ecr create-repository --repository-name=cortexlabs/metrics-server --region=$REGISTRY_REGION || true @@ -141,6 +142,7 @@ elif [ "$cmd" = "update" ]; then build_and_push $ROOT/images/argo-executor argo-executor latest build_and_push $ROOT/images/tf-serve tf-serve latest build_and_push $ROOT/images/tf-serve-gpu tf-serve-gpu latest + build_and_push $ROOT/images/onnx-serve-gpu onnx-serve-gpu latest build_and_push $ROOT/images/python-packager python-packager latest build_and_push $ROOT/images/cluster-autoscaler cluster-autoscaler latest build_and_push $ROOT/images/nvidia nvidia latest diff --git a/docs/cluster/config.md b/docs/cluster/config.md index 6dc5ac1358..e4000960e8 100644 --- a/docs/cluster/config.md +++ b/docs/cluster/config.md @@ -54,6 +54,7 @@ export CORTEX_IMAGE_TF_API="cortexlabs/tf-api:master" export CORTEX_IMAGE_TF_TRAIN_GPU="cortexlabs/tf-train-gpu:master" export CORTEX_IMAGE_TF_SERVE_GPU="cortexlabs/tf-serve-gpu:master" export CORTEX_IMAGE_ONNX_SERVE="cortexlabs/onnx-serve:master" +export CORTEX_IMAGE_ONNX_SERVE_GPU="cortexlabs/onnx-serve-gpu:master" export CORTEX_IMAGE_PYTHON_PACKAGER="cortexlabs/python-packager:master" export CORTEX_IMAGE_CLUSTER_AUTOSCALER="cortexlabs/cluster-autoscaler:master" export CORTEX_IMAGE_NVIDIA="cortexlabs/nvidia:master" diff --git a/docs/cluster/development.md b/docs/cluster/development.md index e00f8105f1..0748b193f6 100644 --- a/docs/cluster/development.md +++ b/docs/cluster/development.md @@ -62,6 +62,7 @@ export CORTEX_IMAGE_FLUENTD="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs export CORTEX_IMAGE_NGINX_BACKEND="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/nginx-backend:latest" export CORTEX_IMAGE_NGINX_CONTROLLER="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/nginx-controller:latest" export CORTEX_IMAGE_ONNX_SERVE="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/onnx-serve:latest" +export CORTEX_IMAGE_ONNX_SERVE_GPU="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/onnx-serve-gpu:latest" export CORTEX_IMAGE_OPERATOR="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/operator:latest" export CORTEX_IMAGE_SPARK="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/spark:latest" export CORTEX_IMAGE_SPARK_OPERATOR="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/spark-operator:latest" diff --git a/images/onnx-serve-gpu/Dockerfile b/images/onnx-serve-gpu/Dockerfile new file mode 100644 index 0000000000..dfb56ca348 --- /dev/null +++ b/images/onnx-serve-gpu/Dockerfile @@ -0,0 +1,42 @@ +FROM nvidia/cuda:9.1-cudnn7-devel + +ARG ONNXRUNTIME_VERSION="0.4.0" + +RUN apt-get update -qq && apt-get install -y -q \ + python3 \ + python3-dev \ + python3-pip \ + && apt-get clean -qq && rm -rf /var/lib/apt/lists/* && \ + pip3 install --upgrade \ + pip \ + setuptools \ + && rm -rf /root/.cache/pip* + +RUN apt-get update -qq && apt-get install -y -q \ + build-essential \ + curl \ + libfreetype6-dev \ + libpng-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + unzip \ + zlib1g-dev \ + && apt-get clean -qq && rm -rf /var/lib/apt/lists/* + + +ENV PYTHONPATH="/src:${PYTHONPATH}" + +COPY pkg/workloads/cortex/lib/requirements.txt /src/cortex/lib/requirements.txt +COPY pkg/workloads/cortex/onnx_serve/requirements.txt /src/cortex/onnx_serve/requirements.txt +RUN pip3 install -r /src/cortex/lib/requirements.txt && \ + pip3 install -r /src/cortex/onnx_serve/requirements.txt && \ + pip3 install onnxruntime-gpu==${ONNXRUNTIME_VERSION} && \ + rm -rf /root/.cache/pip* + +COPY pkg/workloads/cortex/consts.py /src/cortex +COPY pkg/workloads/cortex/lib /src/cortex/lib +COPY pkg/workloads/cortex/onnx_serve /src/cortex/onnx_serve + +ENTRYPOINT ["/usr/bin/python3", "/src/cortex/onnx_serve/api.py"] diff --git a/images/onnx-serve/Dockerfile b/images/onnx-serve/Dockerfile index ea66059698..8210cf3c65 100644 --- a/images/onnx-serve/Dockerfile +++ b/images/onnx-serve/Dockerfile @@ -1,5 +1,7 @@ FROM ubuntu:16.04 +ARG ONNXRUNTIME_VERSION="0.4.0" + RUN apt-get update -qq && apt-get install -y -q \ python3 \ python3-dev \ @@ -30,6 +32,7 @@ COPY pkg/workloads/cortex/lib/requirements.txt /src/cortex/lib/requirements.txt COPY pkg/workloads/cortex/onnx_serve/requirements.txt /src/cortex/onnx_serve/requirements.txt RUN pip3 install -r /src/cortex/lib/requirements.txt && \ pip3 install -r /src/cortex/onnx_serve/requirements.txt && \ + pip3 install onnxruntime==${ONNXRUNTIME_VERSION} && \ rm -rf /root/.cache/pip* COPY pkg/workloads/cortex/consts.py /src/cortex diff --git a/pkg/operator/config/config.go b/pkg/operator/config/config.go index c6bcc64a7a..6339b48195 100644 --- a/pkg/operator/config/config.go +++ b/pkg/operator/config/config.go @@ -54,6 +54,7 @@ type CortexConfig struct { TFTrainImageGPU string `json:"tf_train_image_gpu"` TFServeImageGPU string `json:"tf_serve_image_gpu"` ONNXServeImage string `json:"onnx_serve_image"` + ONNXServeImageGPU string `json:"onnx_serve_gpu_image"` TelemetryURL string `json:"telemetry_url"` EnableTelemetry bool `json:"enable_telemetry"` @@ -76,6 +77,7 @@ func Init() error { TFTrainImageGPU: getStr("IMAGE_TF_TRAIN_GPU"), TFServeImageGPU: getStr("IMAGE_TF_SERVE_GPU"), ONNXServeImage: getStr("IMAGE_ONNX_SERVE"), + ONNXServeImageGPU: getStr("IMAGE_ONNX_SERVE_GPU"), TelemetryURL: configreader.MustStringFromEnv("CONST_TELEMETRY_URL", &configreader.StringValidation{Required: false, Default: consts.TelemetryURL}), EnableTelemetry: getBool("ENABLE_TELEMETRY"), diff --git a/pkg/operator/workloads/api.go b/pkg/operator/workloads/api.go index fe7f1d5606..fd8fd5aca0 100644 --- a/pkg/operator/workloads/api.go +++ b/pkg/operator/workloads/api.go @@ -171,13 +171,21 @@ func onnxAPISpec( workloadID string, desiredReplicas int32, ) *kapps.Deployment { + servingImage := config.Cortex.ONNXServeImage resourceList := kcore.ResourceList{} + resourceLimitsList := kcore.ResourceList{} resourceList[kcore.ResourceCPU] = api.Compute.CPU.Quantity if api.Compute.Mem != nil { resourceList[kcore.ResourceMemory] = api.Compute.Mem.Quantity } + if api.Compute.GPU > 0 { + servingImage = config.Cortex.ONNXServeImageGPU + resourceList["nvidia.com/gpu"] = *kresource.NewQuantity(api.Compute.GPU, kresource.DecimalSI) + resourceLimitsList["nvidia.com/gpu"] = *kresource.NewQuantity(api.Compute.GPU, kresource.DecimalSI) + } + return k8s.Deployment(&k8s.DeploymentSpec{ Name: internalAPIName(api.Name, ctx.App.Name), Replicas: desiredReplicas, @@ -206,7 +214,7 @@ func onnxAPISpec( Containers: []kcore.Container{ { Name: apiContainerName, - Image: config.Cortex.ONNXServeImage, + Image: servingImage, ImagePullPolicy: "Always", Args: []string{ "--workload-id=" + workloadID, @@ -235,6 +243,7 @@ func onnxAPISpec( }, Resources: kcore.ResourceRequirements{ Requests: resourceList, + Limits: resourceLimitsList, }, }, }, diff --git a/pkg/workloads/cortex/onnx_serve/requirements.txt b/pkg/workloads/cortex/onnx_serve/requirements.txt index 4712d29649..bdd76b18f2 100644 --- a/pkg/workloads/cortex/onnx_serve/requirements.txt +++ b/pkg/workloads/cortex/onnx_serve/requirements.txt @@ -1,5 +1,4 @@ flask==1.0.2 flask-api==1.1 waitress==1.2.1 -onnxruntime==0.4.0 numpy>=1.15.0