Skip to content

Add onnx support #182

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Jul 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
de5fa6e
Add onnx support
vishalbollu Jun 26, 2019
48d1562
Add model examples to examples directory
vishalbollu Jun 26, 2019
0da0a8f
Rename to request handler
vishalbollu Jul 2, 2019
9898415
Merge branch 'master' into onnx-support
vishalbollu Jul 2, 2019
aca81f7
Optional pre/post
vishalbollu Jul 3, 2019
6405a4a
Merge branch 'master' into onnx-support
vishalbollu Jul 3, 2019
c07f3ce
Optional pre and post processing
vishalbollu Jul 4, 2019
e8b12f4
Remove unnecessary config
vishalbollu Jul 4, 2019
05e3b1a
Fix linting
vishalbollu Jul 4, 2019
143e27c
Remove unnecessary logs
vishalbollu Jul 4, 2019
b2838e3
Move models to cortex-examples bucket
vishalbollu Jul 4, 2019
3ef4eb8
Add docs and respond to PR comments
vishalbollu Jul 4, 2019
52be6ac
Merge branch 'master' into onnx-support
vishalbollu Jul 4, 2019
b9261a1
Fix predictions
vishalbollu Jul 4, 2019
905c2d4
Add request handler docs round 1
vishalbollu Jul 4, 2019
c7e3b37
Remove commented code
vishalbollu Jul 4, 2019
3892a23
Cleanup examples and docs round 2
vishalbollu Jul 5, 2019
37adbf7
Rename payload to sample
vishalbollu Jul 5, 2019
9fb94a7
Remove trailing whitespace
vishalbollu Jul 5, 2019
5811ab0
Breakout example models into their own directories
vishalbollu Jul 5, 2019
d4d614d
Refactor request handlers
vishalbollu Jul 5, 2019
857da8d
Fix linting issue with predict.go
vishalbollu Jul 5, 2019
1bbf6e1
Restructure examples folder
vishalbollu Jul 5, 2019
b3b5ffc
ModelType to ModelFormat
vishalbollu Jul 5, 2019
c89005c
Merge branch 'master' into onnx-support
vishalbollu Jul 5, 2019
3e9a265
Tweak example post_inference handler doc
vishalbollu Jul 5, 2019
7231e42
Push down error wrapping
vishalbollu Jul 5, 2019
c2af7aa
Tweak comment in pre_inference request-handlers doc
vishalbollu Jul 5, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ ci-build-images:
@./build/build-image.sh images/tf-serve tf-serve
@./build/build-image.sh images/tf-serve-gpu tf-serve-gpu
@./build/build-image.sh images/tf-api tf-api
@./build/build-image.sh images/onnx-serve onnx-serve
@./build/build-image.sh images/operator operator
@./build/build-image.sh images/fluentd fluentd
@./build/build-image.sh images/nginx-controller nginx-controller
Expand All @@ -151,6 +152,7 @@ ci-push-images:
@./build/push-image.sh tf-serve
@./build/push-image.sh tf-serve-gpu
@./build/push-image.sh tf-api
@./build/push-image.sh images/onnx-serve onnx-serve
@./build/push-image.sh operator
@./build/push-image.sh fluentd
@./build/push-image.sh nginx-controller
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ Cortex is actively maintained by Cortex Labs. We're a venture-backed team of inf
```python
# handler.py

def preprocess(payload):
def pre_inference(sample, metadata):
# Python code


def postprocess(prediction):
def post_inference(prediction, metadata):
# Python code
```

Expand Down
43 changes: 33 additions & 10 deletions cli/cmd/predict.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"net/http"
"strings"

"github.com/cortexlabs/yaml"
"github.com/spf13/cobra"

"github.com/cortexlabs/cortex/pkg/lib/cast"
Expand All @@ -43,11 +44,11 @@ func init() {
}

type PredictResponse struct {
ResourceID string `json:"resource_id"`
Predictions []Prediction `json:"predictions"`
ResourceID string `json:"resource_id"`
Predictions []interface{} `json:"predictions"`
}

type Prediction struct {
type DetailedPrediction struct {
Prediction interface{} `json:"prediction"`
PredictionReversed interface{} `json:"prediction_reversed"`
TransformedSample interface{} `json:"transformed_sample"`
Expand Down Expand Up @@ -97,9 +98,10 @@ var predictCmd = &cobra.Command{
}

apiID := predictResponse.ResourceID
api := resourcesRes.APIStatuses[apiID]
apiStatus := resourcesRes.APIStatuses[apiID]
api := resourcesRes.Context.APIs[apiName]

apiStart := libtime.LocalTimestampHuman(api.Start)
apiStart := libtime.LocalTimestampHuman(apiStatus.Start)
fmt.Println("\n" + apiName + " was last updated on " + apiStart + "\n")

if len(predictResponse.Predictions) == 1 {
Expand All @@ -109,8 +111,8 @@ var predictCmd = &cobra.Command{
}

for _, prediction := range predictResponse.Predictions {
if prediction.Prediction == nil {
prettyResp, err := json.Pretty(prediction.Response)
if !yaml.StartsWithEscapedAtSymbol(api.Model) {
prettyResp, err := json.Pretty(prediction)
if err != nil {
errors.Exit(err)
}
Expand All @@ -119,9 +121,30 @@ var predictCmd = &cobra.Command{
continue
}

value := prediction.Prediction
if prediction.PredictionReversed != nil {
value = prediction.PredictionReversed
predictionBytes, err := json.Marshal(prediction)
if err != nil {
errors.Exit(err)
}

var detailedPrediction DetailedPrediction
err = json.DecodeWithNumber(predictionBytes, &detailedPrediction)
if err != nil {
errors.Exit(err, "prediction response")
}

if detailedPrediction.Prediction == nil {
prettyResp, err := json.Pretty(detailedPrediction.Response)
if err != nil {
errors.Exit(err)
}

fmt.Println(prettyResp)
continue
}

value := detailedPrediction.Prediction
if detailedPrediction.PredictionReversed != nil {
value = detailedPrediction.PredictionReversed
}

if cast.IsFloatType(value) {
Expand Down
2 changes: 2 additions & 0 deletions cortex.sh
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ export CORTEX_IMAGE_TF_API="${CORTEX_IMAGE_TF_API:-cortexlabs/tf-api:$CORTEX_VER
export CORTEX_IMAGE_PYTHON_PACKAGER="${CORTEX_IMAGE_PYTHON_PACKAGER:-cortexlabs/python-packager:$CORTEX_VERSION_STABLE}"
export CORTEX_IMAGE_TF_SERVE_GPU="${CORTEX_IMAGE_TF_SERVE_GPU:-cortexlabs/tf-serve-gpu:$CORTEX_VERSION_STABLE}"
export CORTEX_IMAGE_TF_TRAIN_GPU="${CORTEX_IMAGE_TF_TRAIN_GPU:-cortexlabs/tf-train-gpu:$CORTEX_VERSION_STABLE}"
export CORTEX_IMAGE_ONNX_SERVE="${CORTEX_IMAGE_ONNX_SERVE:-cortexlabs/onnx-serve:$CORTEX_VERSION_STABLE}"
export CORTEX_IMAGE_CLUSTER_AUTOSCALER="${CORTEX_IMAGE_CLUSTER_AUTOSCALER:-cortexlabs/cluster-autoscaler:$CORTEX_VERSION_STABLE}"
export CORTEX_IMAGE_NVIDIA="${CORTEX_IMAGE_NVIDIA:-cortexlabs/nvidia:$CORTEX_VERSION_STABLE}"
export CORTEX_IMAGE_METRICS_SERVER="${CORTEX_IMAGE_METRICS_SERVER:-cortexlabs/metrics-server:$CORTEX_VERSION_STABLE}"
Expand Down Expand Up @@ -188,6 +189,7 @@ function install_cortex() {
-e CORTEX_IMAGE_PYTHON_PACKAGER=$CORTEX_IMAGE_PYTHON_PACKAGER \
-e CORTEX_IMAGE_TF_SERVE_GPU=$CORTEX_IMAGE_TF_SERVE_GPU \
-e CORTEX_IMAGE_TF_TRAIN_GPU=$CORTEX_IMAGE_TF_TRAIN_GPU \
-e CORTEX_IMAGE_ONNX_SERVE=$CORTEX_IMAGE_ONNX_SERVE \
-e CORTEX_IMAGE_CLUSTER_AUTOSCALER=$CORTEX_IMAGE_CLUSTER_AUTOSCALER \
-e CORTEX_IMAGE_NVIDIA=$CORTEX_IMAGE_NVIDIA \
-e CORTEX_IMAGE_METRICS_SERVER=$CORTEX_IMAGE_METRICS_SERVER \
Expand Down
9 changes: 5 additions & 4 deletions dev/registry.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ function create_registry() {
aws ecr create-repository --repository-name=cortexlabs/python-packager --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/tf-train-gpu --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/tf-serve-gpu --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/onnx-serve --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/cluster-autoscaler --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/nvidia --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/metrics-server --region=$REGISTRY_REGION || true
Expand Down Expand Up @@ -130,7 +131,9 @@ elif [ "$cmd" = "update" ]; then

cache_builder $ROOT/images/spark-operator spark-operator
build_and_push $ROOT/images/spark-operator spark-operator latest

build_and_push $ROOT/images/spark spark latest
build_and_push $ROOT/images/tf-train tf-train latest
build_and_push $ROOT/images/tf-train-gpu tf-train-gpu latest
build_and_push $ROOT/images/nginx-controller nginx-controller latest
build_and_push $ROOT/images/nginx-backend nginx-backend latest
build_and_push $ROOT/images/fluentd fluentd latest
Expand All @@ -144,10 +147,8 @@ elif [ "$cmd" = "update" ]; then
build_and_push $ROOT/images/metrics-server metrics-server latest
fi

build_and_push $ROOT/images/spark spark latest
build_and_push $ROOT/images/tf-train tf-train latest
build_and_push $ROOT/images/tf-train-gpu tf-train-gpu latest
build_and_push $ROOT/images/tf-api tf-api latest
build_and_push $ROOT/images/onnx-serve onnx-serve latest

cleanup
fi
9 changes: 9 additions & 0 deletions docs/apis/apis.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ Serve models at scale and use them to build smarter applications.
- kind: api
name: <string> # API name (required)
model: <string> # path to a zipped model dir (e.g. s3://my-bucket/model.zip)
model_format: <string> # model format, must be "tensorflow" or "onnx"
request_handler: <string> # path to the request handler implementation file, relative to the cortex root
compute:
min_replicas: <int> # minimum number of replicas (default: 1)
max_replicas: <int> # maximum number of replicas (default: 100)
Expand All @@ -26,12 +28,19 @@ See [packaging models](packaging-models.md) for how to create the zipped model.
- kind: api
name: my-api
model: s3://my-bucket/my-model.zip
request_handler: inference.py
compute:
min_replicas: 5
max_replicas: 20
cpu: "1"
```

## Custom Request Handlers

Request handlers are used to decouple the interface of an API endpoint from its model. A `pre_inference` request handler can be used to modify request payloads before they are sent to the model. A `post_inference` request handler can be used to modify model predictions in the server before they are sent to the client.

See [request handlers](request-handlers.md) for a detailed guide.

## Integration

APIs can be integrated into other applications or services via their JSON endpoints. The endpoint for any API follows the following format: {apis_endpoint}/{deployment_name}/{api_name}.
Expand Down
48 changes: 45 additions & 3 deletions docs/apis/packaging-models.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## TensorFlow

Zip the exported estimator output in your checkpoint directory, e.g.
Zip the exported estimator output in your checkpoint directory:

```text
$ ls export/estimator
Expand All @@ -11,16 +11,58 @@ saved_model.pb variables/
$ zip -r model.zip export/estimator
```

Upload the zipped file to Amazon S3, e.g.
Upload the zipped file to Amazon S3:

```text
$ aws s3 cp model.zip s3://my-bucket/model.zip
```

Specify `model` in an API, e.g.
Reference your `model` in an API:

```yaml
- kind: api
name: my-api
model_format: tensorflow
model: s3://my-bucket/model.zip
```

## ONNX

Export your trained model to an ONNX model format. An example of an sklearn model being exported to ONNX is shown below:

```Python
...
logreg_model = sklearn.linear_model.LogisticRegression(solver="lbfgs", multi_class="multinomial")

# Train the model
logreg_model.fit(X_train, y_train)

# Convert to ONNX model format
onnx_model = onnxmltools.convert_sklearn(
logreg_model, initial_types=[("input", onnxconverter_common.data_types.FloatTensorType([1, 4]))]
)
with open("model.onnx", "wb") as f:
f.write(onnx_model.SerializeToString())
```

Here are examples of converting models from some of the common ML frameworks to ONNX:

* [PyTorch](https://github.com/cortexlabs/cortex/blob/master/examples/iris/pytorch/model.py)
* [Sklearn](https://github.com/cortexlabs/cortex/blob/master/examples/iris/sklearn/model.py)
* [XGBoost](https://github.com/cortexlabs/cortex/blob/master/examples/iris/xgboost/model.py)
* [Keras](https://github.com/cortexlabs/cortex/blob/master/examples/iris/keras/model.py)

Upload your trained model in ONNX format to Amazon S3:

```text
$ aws s3 cp model.onnx s3://my-bucket/model.onnx
```

Reference your `model` in an API:

```yaml
- kind: api
name: my-api
model_format: onnx
model: s3://my-bucket/model.onnx
```
85 changes: 85 additions & 0 deletions docs/apis/request-handlers.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Request Handlers

Request handlers are python files that can contain a `pre_inference` function and a `post_inference` function. Both functions are optional.

## Implementation

```python
def pre_inference(sample, metadata):
"""Prepare a sample before it is passed into the model.

Args:
sample: A sample from the request payload.

metadata: Describes the expected shape and type of inputs to the model.
If API model_format is tensorflow: map<string, SignatureDef>
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/meta_graph.proto
If API model_format is onnx: list<onnxruntime.NodeArg>
https://microsoft.github.io/onnxruntime/api_summary.html#onnxruntime.NodeArg

Returns:
A dictionary containing model input names as keys and python lists or numpy arrays as values. If the model only has a single input, then a python list or numpy array can be returned.
"""
pass

def post_inference(prediction, metadata):
"""Modify a prediction from the model before responding to the request.

Args:
prediction: The output of the model.

metadata: Describes the output shape and type of outputs from the model.
If API model_format is tensorflow: map<string, SignatureDef>
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/meta_graph.proto
If API model_format is onnx: list<onnxruntime.NodeArg>
https://microsoft.github.io/onnxruntime/api_summary.html#onnxruntime.NodeArg

Returns:
A python dictionary or list.
"""
```

## Example

```python
import numpy as np

iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]

def pre_inference(sample, metadata):
# Convert a dictionary of features to a flattened in list in the order expected by the model
return {
metadata[0].name : [
sample["sepal_length"],
sample["sepal_width"],
sample["petal_length"],
sample["petal_width"],
]
}


def post_inference(prediction, metadata):
# Update the model prediction to include the index and the label of the predicted class
probabilites = prediction[0][0]
predicted_class_id = int(np.argmax(probabilites))
return {
"class_label": iris_labels[predicted_class_id],
"class_index": predicted_class_id,
"probabilities": probabilites,
}

```

## Pre-installed Packages

The following packages have been pre-installed and can be used in your implementations:

```text
boto3==1.9.78
msgpack==0.6.1
numpy>=1.13.3,<2
requirements-parser==0.2.0
packaging==19.0.0
```

You can install additional PyPI packages and import your own Python packages. See [Python Packages](../piplines/python-packages.md) for more details.
1 change: 1 addition & 0 deletions docs/cluster/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ export CORTEX_IMAGE_TF_TRAIN="cortexlabs/tf-train:master"
export CORTEX_IMAGE_TF_API="cortexlabs/tf-api:master"
export CORTEX_IMAGE_TF_TRAIN_GPU="cortexlabs/tf-train-gpu:master"
export CORTEX_IMAGE_TF_SERVE_GPU="cortexlabs/tf-serve-gpu:master"
export CORTEX_IMAGE_ONNX_SERVE="cortexlabs/onnx-serve:master"
export CORTEX_IMAGE_PYTHON_PACKAGER="cortexlabs/python-packager:master"
export CORTEX_IMAGE_CLUSTER_AUTOSCALER="cortexlabs/cluster-autoscaler:master"
export CORTEX_IMAGE_NVIDIA="cortexlabs/nvidia:master"
Expand Down
1 change: 1 addition & 0 deletions docs/cluster/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ export CORTEX_IMAGE_ARGO_EXECUTOR="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cort
export CORTEX_IMAGE_FLUENTD="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/fluentd:latest"
export CORTEX_IMAGE_NGINX_BACKEND="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/nginx-backend:latest"
export CORTEX_IMAGE_NGINX_CONTROLLER="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/nginx-controller:latest"
export CORTEX_IMAGE_ONNX_SERVE="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/onnx-serve:latest"
export CORTEX_IMAGE_OPERATOR="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/operator:latest"
export CORTEX_IMAGE_SPARK="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/spark:latest"
export CORTEX_IMAGE_SPARK_OPERATOR="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/spark-operator:latest"
Expand Down
29 changes: 27 additions & 2 deletions examples/iris/cortex.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,30 @@
name: iris

- kind: api
name: iris-type
model: s3://cortex-examples/iris-tensorflow.zip
name: tensorflow
model_format: tensorflow
model: s3://cortex-examples/iris/tensorflow.zip

- kind: api
name: pytorch
model_format: onnx
request_handler: pytorch/request_handler.py
model: s3://cortex-examples/iris/pytorch.onnx

- kind: api
name: xgboost
model_format: onnx
request_handler: xgboost/request_handler.py
model: s3://cortex-examples/iris/xgboost.onnx

- kind: api
name: sklearn
model_format: onnx
request_handler: sklearn/request_handler.py
model: s3://cortex-examples/iris/sklearn.onnx

- kind: api
name: keras
model_format: onnx
request_handler: keras/request_handler.py
model: s3://cortex-examples/iris/keras.onnx
Loading