cortexlabs · vishalbollu · Jul 5, 2019 · Jun 26, 2019 · Jun 26, 2019 · Jul 2, 2019
diff --git a/Makefile b/Makefile
@@ -131,6 +131,7 @@ ci-build-images:
 	@./build/build-image.sh images/tf-serve tf-serve
 	@./build/build-image.sh images/tf-serve-gpu tf-serve-gpu
 	@./build/build-image.sh images/tf-api tf-api
+	@./build/build-image.sh images/onnx-serve onnx-serve
 	@./build/build-image.sh images/operator operator
 	@./build/build-image.sh images/fluentd fluentd
 	@./build/build-image.sh images/nginx-controller nginx-controller
@@ -151,6 +152,7 @@ ci-push-images:
 	@./build/push-image.sh tf-serve
 	@./build/push-image.sh tf-serve-gpu
 	@./build/push-image.sh tf-api
+	@./build/push-image.sh images/onnx-serve onnx-serve
 	@./build/push-image.sh operator
 	@./build/push-image.sh fluentd
 	@./build/push-image.sh nginx-controller

diff --git a/README.md b/README.md
@@ -35,11 +35,11 @@ Cortex is actively maintained by Cortex Labs. We're a venture-backed team of inf
 ```python
 # handler.py
 
-def preprocess(payload):
+def pre_inference(sample, metadata):
   # Python code
 
 
-def postprocess(prediction):
+def post_inference(prediction, metadata):
   # Python code
 ```
 

diff --git a/cli/cmd/predict.go b/cli/cmd/predict.go
@@ -22,6 +22,7 @@ import (
 	"net/http"
 	"strings"
 
+	"github.com/cortexlabs/yaml"
 	"github.com/spf13/cobra"
 
 	"github.com/cortexlabs/cortex/pkg/lib/cast"
@@ -43,11 +44,11 @@ func init() {
 }
 
 type PredictResponse struct {
-	ResourceID  string       `json:"resource_id"`
-	Predictions []Prediction `json:"predictions"`
+	ResourceID  string        `json:"resource_id"`
+	Predictions []interface{} `json:"predictions"`
 }
 
-type Prediction struct {
+type DetailedPrediction struct {
 	Prediction         interface{} `json:"prediction"`
 	PredictionReversed interface{} `json:"prediction_reversed"`
 	TransformedSample  interface{} `json:"transformed_sample"`
@@ -97,9 +98,10 @@ var predictCmd = &cobra.Command{
 		}
 
 		apiID := predictResponse.ResourceID
-		api := resourcesRes.APIStatuses[apiID]
+		apiStatus := resourcesRes.APIStatuses[apiID]
+		api := resourcesRes.Context.APIs[apiName]
 
-		apiStart := libtime.LocalTimestampHuman(api.Start)
+		apiStart := libtime.LocalTimestampHuman(apiStatus.Start)
 		fmt.Println("\n" + apiName + " was last updated on " + apiStart + "\n")
 
 		if len(predictResponse.Predictions) == 1 {
@@ -109,8 +111,8 @@ var predictCmd = &cobra.Command{
 		}
 
 		for _, prediction := range predictResponse.Predictions {
-			if prediction.Prediction == nil {
-				prettyResp, err := json.Pretty(prediction.Response)
+			if !yaml.StartsWithEscapedAtSymbol(api.Model) {
+				prettyResp, err := json.Pretty(prediction)
 				if err != nil {
 					errors.Exit(err)
 				}
@@ -119,9 +121,30 @@ var predictCmd = &cobra.Command{
 				continue
 			}
 
-			value := prediction.Prediction
-			if prediction.PredictionReversed != nil {
-				value = prediction.PredictionReversed
+			predictionBytes, err := json.Marshal(prediction)
+			if err != nil {
+				errors.Exit(err)
+			}
+
+			var detailedPrediction DetailedPrediction
+			err = json.DecodeWithNumber(predictionBytes, &detailedPrediction)
+			if err != nil {
+				errors.Exit(err, "prediction response")
+			}
+
+			if detailedPrediction.Prediction == nil {
+				prettyResp, err := json.Pretty(detailedPrediction.Response)
+				if err != nil {
+					errors.Exit(err)
+				}
+
+				fmt.Println(prettyResp)
+				continue
+			}
+
+			value := detailedPrediction.Prediction
+			if detailedPrediction.PredictionReversed != nil {
+				value = detailedPrediction.PredictionReversed
 			}
 
 			if cast.IsFloatType(value) {

diff --git a/cortex.sh b/cortex.sh
@@ -131,6 +131,7 @@ export CORTEX_IMAGE_TF_API="${CORTEX_IMAGE_TF_API:-cortexlabs/tf-api:$CORTEX_VER
 export CORTEX_IMAGE_PYTHON_PACKAGER="${CORTEX_IMAGE_PYTHON_PACKAGER:-cortexlabs/python-packager:$CORTEX_VERSION_STABLE}"
 export CORTEX_IMAGE_TF_SERVE_GPU="${CORTEX_IMAGE_TF_SERVE_GPU:-cortexlabs/tf-serve-gpu:$CORTEX_VERSION_STABLE}"
 export CORTEX_IMAGE_TF_TRAIN_GPU="${CORTEX_IMAGE_TF_TRAIN_GPU:-cortexlabs/tf-train-gpu:$CORTEX_VERSION_STABLE}"
+export CORTEX_IMAGE_ONNX_SERVE="${CORTEX_IMAGE_ONNX_SERVE:-cortexlabs/onnx-serve:$CORTEX_VERSION_STABLE}"
 export CORTEX_IMAGE_CLUSTER_AUTOSCALER="${CORTEX_IMAGE_CLUSTER_AUTOSCALER:-cortexlabs/cluster-autoscaler:$CORTEX_VERSION_STABLE}"
 export CORTEX_IMAGE_NVIDIA="${CORTEX_IMAGE_NVIDIA:-cortexlabs/nvidia:$CORTEX_VERSION_STABLE}"
 export CORTEX_IMAGE_METRICS_SERVER="${CORTEX_IMAGE_METRICS_SERVER:-cortexlabs/metrics-server:$CORTEX_VERSION_STABLE}"
@@ -188,6 +189,7 @@ function install_cortex() {
     -e CORTEX_IMAGE_PYTHON_PACKAGER=$CORTEX_IMAGE_PYTHON_PACKAGER \
     -e CORTEX_IMAGE_TF_SERVE_GPU=$CORTEX_IMAGE_TF_SERVE_GPU \
     -e CORTEX_IMAGE_TF_TRAIN_GPU=$CORTEX_IMAGE_TF_TRAIN_GPU \
+    -e CORTEX_IMAGE_ONNX_SERVE=$CORTEX_IMAGE_ONNX_SERVE \
     -e CORTEX_IMAGE_CLUSTER_AUTOSCALER=$CORTEX_IMAGE_CLUSTER_AUTOSCALER \
     -e CORTEX_IMAGE_NVIDIA=$CORTEX_IMAGE_NVIDIA \
     -e CORTEX_IMAGE_METRICS_SERVER=$CORTEX_IMAGE_METRICS_SERVER \

diff --git a/dev/registry.sh b/dev/registry.sh
@@ -50,6 +50,7 @@ function create_registry() {
   aws ecr create-repository --repository-name=cortexlabs/python-packager --region=$REGISTRY_REGION || true
   aws ecr create-repository --repository-name=cortexlabs/tf-train-gpu --region=$REGISTRY_REGION || true
   aws ecr create-repository --repository-name=cortexlabs/tf-serve-gpu --region=$REGISTRY_REGION || true
+  aws ecr create-repository --repository-name=cortexlabs/onnx-serve --region=$REGISTRY_REGION || true
   aws ecr create-repository --repository-name=cortexlabs/cluster-autoscaler --region=$REGISTRY_REGION || true
   aws ecr create-repository --repository-name=cortexlabs/nvidia --region=$REGISTRY_REGION || true
   aws ecr create-repository --repository-name=cortexlabs/metrics-server --region=$REGISTRY_REGION || true
@@ -130,7 +131,9 @@ elif [ "$cmd" = "update" ]; then
 
     cache_builder $ROOT/images/spark-operator spark-operator
     build_and_push $ROOT/images/spark-operator spark-operator latest
-
+    build_and_push $ROOT/images/spark spark latest
+    build_and_push $ROOT/images/tf-train tf-train latest
+    build_and_push $ROOT/images/tf-train-gpu tf-train-gpu latest
     build_and_push $ROOT/images/nginx-controller nginx-controller latest
     build_and_push $ROOT/images/nginx-backend nginx-backend latest
     build_and_push $ROOT/images/fluentd fluentd latest
@@ -144,10 +147,8 @@ elif [ "$cmd" = "update" ]; then
     build_and_push $ROOT/images/metrics-server metrics-server latest
   fi
 
-  build_and_push $ROOT/images/spark spark latest
-  build_and_push $ROOT/images/tf-train tf-train latest
-  build_and_push $ROOT/images/tf-train-gpu tf-train-gpu latest
   build_and_push $ROOT/images/tf-api tf-api latest
+  build_and_push $ROOT/images/onnx-serve onnx-serve latest
 
   cleanup
 fi
diff --git a/docs/apis/apis.md b/docs/apis/apis.md
@@ -8,6 +8,8 @@ Serve models at scale and use them to build smarter applications.
 - kind: api
   name: <string>  # API name (required)
   model: <string>  # path to a zipped model dir (e.g. s3://my-bucket/model.zip)
+  model_format: <string>  # model format, must be "tensorflow" or "onnx"
+  request_handler: <string>  # path to the request handler implementation file, relative to the cortex root
   compute:
     min_replicas: <int>  # minimum number of replicas (default: 1)
     max_replicas: <int>  # maximum number of replicas (default: 100)
@@ -26,12 +28,19 @@ See [packaging models](packaging-models.md) for how to create the zipped model.
 - kind: api
   name: my-api
   model: s3://my-bucket/my-model.zip
+  request_handler: inference.py
   compute:
     min_replicas: 5
     max_replicas: 20
     cpu: "1"
 ```
 
+## Custom Request Handlers
+
+Request handlers are used to decouple the interface of an API endpoint from its model. A `pre_inference` request handler can be used to modify request payloads before they are sent to the model. A `post_inference` request handler can be used to modify model predictions in the server before they are sent to the client.
+
+See [request handlers](request-handlers.md) for a detailed guide.
+
 ## Integration
 
 APIs can be integrated into other applications or services via their JSON endpoints. The endpoint for any API follows the following format: {apis_endpoint}/{deployment_name}/{api_name}.

diff --git a/docs/apis/packaging-models.md b/docs/apis/packaging-models.md
@@ -2,7 +2,7 @@
 
 ## TensorFlow
 
-Zip the exported estimator output in your checkpoint directory, e.g.
+Zip the exported estimator output in your checkpoint directory:
 
 ```text
 $ ls export/estimator
@@ -11,16 +11,58 @@ saved_model.pb  variables/
 $ zip -r model.zip export/estimator
 ```
 
-Upload the zipped file to Amazon S3, e.g.
+Upload the zipped file to Amazon S3:
 
 ```text
 $ aws s3 cp model.zip s3://my-bucket/model.zip
 ```
 
-Specify `model` in an API, e.g.
+Reference your `model` in an API:
 
 ```yaml
 - kind: api
   name: my-api
+  model_format: tensorflow
   model: s3://my-bucket/model.zip
 ```
+
+## ONNX
+
+Export your trained model to an ONNX model format. An example of an sklearn model being exported to ONNX is shown below:
+
+```Python
+...
+logreg_model = sklearn.linear_model.LogisticRegression(solver="lbfgs", multi_class="multinomial")
+
+# Train the model
+logreg_model.fit(X_train, y_train)
+
+# Convert to ONNX model format
+onnx_model = onnxmltools.convert_sklearn(
+    logreg_model, initial_types=[("input", onnxconverter_common.data_types.FloatTensorType([1, 4]))]
+)
+with open("model.onnx", "wb") as f:
+    f.write(onnx_model.SerializeToString())
+```
+
+Here are examples of converting models from some of the common ML frameworks to ONNX:
+
+* [PyTorch](https://github.com/cortexlabs/cortex/blob/master/examples/iris/pytorch/model.py)
+* [Sklearn](https://github.com/cortexlabs/cortex/blob/master/examples/iris/sklearn/model.py)
+* [XGBoost](https://github.com/cortexlabs/cortex/blob/master/examples/iris/xgboost/model.py)
+* [Keras](https://github.com/cortexlabs/cortex/blob/master/examples/iris/keras/model.py)
+
+Upload your trained model in ONNX format to Amazon S3:
+
+```text
+$ aws s3 cp model.onnx s3://my-bucket/model.onnx
+```
+
+Reference your `model` in an API:
+
+```yaml
+- kind: api
+  name: my-api
+  model_format: onnx
+  model: s3://my-bucket/model.onnx
+```
diff --git a/docs/apis/request-handlers.md b/docs/apis/request-handlers.md
@@ -0,0 +1,85 @@
+# Request Handlers
+
+Request handlers are python files that can contain a `pre_inference` function and a `post_inference` function. Both functions are optional.
+
+## Implementation
+
+```python
+def pre_inference(sample, metadata):
+    """Prepare a sample before it is passed into the model.
+
+    Args:
+        sample: A sample from the request payload.
+
+        metadata: Describes the expected shape and type of inputs to the model.
+            If API model_format is tensorflow: map<string, SignatureDef>
+                https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/meta_graph.proto
+            If API model_format is onnx: list<onnxruntime.NodeArg>
+                https://microsoft.github.io/onnxruntime/api_summary.html#onnxruntime.NodeArg
+
+    Returns:
+        A dictionary containing model input names as keys and python lists or numpy arrays as values. If the model only has a single input, then a python list or numpy array can be returned.
+    """
+    pass
+
+def post_inference(prediction, metadata):
+    """Modify a prediction from the model before responding to the request.
+
+    Args:
+        prediction: The output of the model.
+
+        metadata: Describes the output shape and type of outputs from the model.
+            If API model_format is tensorflow: map<string, SignatureDef>
+                https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/meta_graph.proto
+            If API model_format is onnx: list<onnxruntime.NodeArg>
+                https://microsoft.github.io/onnxruntime/api_summary.html#onnxruntime.NodeArg
+
+    Returns:
+        A python dictionary or list.
+    """
+```
+
+## Example
+
+```python
+import numpy as np
+
+iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
+
+def pre_inference(sample, metadata):
+    # Convert a dictionary of features to a flattened in list in the order expected by the model
+    return {
+        metadata[0].name : [
+            sample["sepal_length"],
+            sample["sepal_width"],
+            sample["petal_length"],
+            sample["petal_width"],
+        ]
+    }
+
+
+def post_inference(prediction, metadata):
+    # Update the model prediction to include the index and the label of the predicted class
+    probabilites = prediction[0][0]
+    predicted_class_id = int(np.argmax(probabilites))
+    return {
+        "class_label": iris_labels[predicted_class_id],
+        "class_index": predicted_class_id,
+        "probabilities": probabilites,
+    }
+
+```
+
+## Pre-installed Packages
+
+The following packages have been pre-installed and can be used in your implementations:
+
+```text
+boto3==1.9.78
+msgpack==0.6.1
+numpy>=1.13.3,<2
+requirements-parser==0.2.0
+packaging==19.0.0
+```
+
+You can install additional PyPI packages and import your own Python packages. See [Python Packages](../piplines/python-packages.md) for more details.
diff --git a/docs/cluster/config.md b/docs/cluster/config.md
@@ -53,6 +53,7 @@ export CORTEX_IMAGE_TF_TRAIN="cortexlabs/tf-train:master"
 export CORTEX_IMAGE_TF_API="cortexlabs/tf-api:master"
 export CORTEX_IMAGE_TF_TRAIN_GPU="cortexlabs/tf-train-gpu:master"
 export CORTEX_IMAGE_TF_SERVE_GPU="cortexlabs/tf-serve-gpu:master"
+export CORTEX_IMAGE_ONNX_SERVE="cortexlabs/onnx-serve:master"
 export CORTEX_IMAGE_PYTHON_PACKAGER="cortexlabs/python-packager:master"
 export CORTEX_IMAGE_CLUSTER_AUTOSCALER="cortexlabs/cluster-autoscaler:master"
 export CORTEX_IMAGE_NVIDIA="cortexlabs/nvidia:master"

diff --git a/docs/cluster/development.md b/docs/cluster/development.md
@@ -61,6 +61,7 @@ export CORTEX_IMAGE_ARGO_EXECUTOR="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cort
 export CORTEX_IMAGE_FLUENTD="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/fluentd:latest"
 export CORTEX_IMAGE_NGINX_BACKEND="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/nginx-backend:latest"
 export CORTEX_IMAGE_NGINX_CONTROLLER="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/nginx-controller:latest"
+export CORTEX_IMAGE_ONNX_SERVE="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/onnx-serve:latest"
 export CORTEX_IMAGE_OPERATOR="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/operator:latest"
 export CORTEX_IMAGE_SPARK="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/spark:latest"
 export CORTEX_IMAGE_SPARK_OPERATOR="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/spark-operator:latest"

diff --git a/examples/iris/cortex.yaml b/examples/iris/cortex.yaml
@@ -2,5 +2,30 @@
   name: iris
 
 - kind: api
-  name: iris-type
-  model: s3://cortex-examples/iris-tensorflow.zip
+  name: tensorflow
+  model_format: tensorflow
+  model: s3://cortex-examples/iris/tensorflow.zip
+
+- kind: api
+  name: pytorch
+  model_format: onnx
+  request_handler: pytorch/request_handler.py
+  model: s3://cortex-examples/iris/pytorch.onnx
+
+- kind: api
+  name: xgboost
+  model_format: onnx
+  request_handler: xgboost/request_handler.py
+  model: s3://cortex-examples/iris/xgboost.onnx
+
+- kind: api
+  name: sklearn
+  model_format: onnx
+  request_handler: sklearn/request_handler.py
+  model: s3://cortex-examples/iris/sklearn.onnx
+
+- kind: api
+  name: keras
+  model_format: onnx
+  request_handler: keras/request_handler.py
+  model: s3://cortex-examples/iris/keras.onnx