diff --git a/examples/movie-ratings/app.yaml b/examples/movie-ratings/app.yaml new file mode 100644 index 0000000000..e9dda0b34b --- /dev/null +++ b/examples/movie-ratings/app.yaml @@ -0,0 +1,2 @@ +- kind: app + name: movie-ratings diff --git a/examples/movie-ratings/implementations/models/basic_embedding.py b/examples/movie-ratings/implementations/models/basic_embedding.py new file mode 100644 index 0000000000..d2e64c43d5 --- /dev/null +++ b/examples/movie-ratings/implementations/models/basic_embedding.py @@ -0,0 +1,27 @@ +import tensorflow as tf + + +def create_estimator(run_config, model_config): + user_id_index = model_config["aggregates"]["user_id_index"] + movie_id_index = model_config["aggregates"]["movie_id_index"] + + feature_columns = [ + tf.feature_column.embedding_column( + tf.feature_column.categorical_column_with_identity( + "user_id_indexed", len(user_id_index) + ), + model_config["hparams"]["embedding_size"], + ), + tf.feature_column.embedding_column( + tf.feature_column.categorical_column_with_identity( + "movie_id_indexed", len(movie_id_index) + ), + model_config["hparams"]["embedding_size"], + ), + ] + + return tf.estimator.DNNRegressor( + feature_columns=feature_columns, + hidden_units=model_config["hparams"]["hidden_units"], + config=run_config, + ) diff --git a/examples/movie-ratings/movies.json b/examples/movie-ratings/movies.json new file mode 100644 index 0000000000..a4cfd58bab --- /dev/null +++ b/examples/movie-ratings/movies.json @@ -0,0 +1,12 @@ +{ + "samples": [ + { + "user_id": "71", + "movie_id": "91529" + }, + { + "user_id": "71", + "movie_id": "174055" + } + ] +} diff --git a/examples/movie-ratings/resources/apis.yaml b/examples/movie-ratings/resources/apis.yaml new file mode 100644 index 0000000000..187089009a --- /dev/null +++ b/examples/movie-ratings/resources/apis.yaml @@ -0,0 +1,5 @@ +- kind: api + name: ratings + model_name: basic_embedding + compute: + replicas: 1 diff --git a/examples/movie-ratings/resources/environments.yaml b/examples/movie-ratings/resources/environments.yaml new file mode 100644 index 0000000000..a30f3baef9 --- /dev/null +++ b/examples/movie-ratings/resources/environments.yaml @@ -0,0 +1,24 @@ +- kind: environment + name: dev + data: + type: csv + path: s3a://cortex-examples/movie-ratings.csv + csv_config: + header: true + schema: ['user_id','movie_id','rating','timestamp'] + +- kind: raw_column + name: user_id + type: STRING_COLUMN + +- kind: raw_column + name: movie_id + type: STRING_COLUMN + +- kind: raw_column + name: rating + type: FLOAT_COLUMN + +- kind: raw_column + name: timestamp + type: INT_COLUMN diff --git a/examples/movie-ratings/resources/models.yaml b/examples/movie-ratings/resources/models.yaml new file mode 100644 index 0000000000..8f5e0ef5c8 --- /dev/null +++ b/examples/movie-ratings/resources/models.yaml @@ -0,0 +1,15 @@ +- kind: model + name: basic_embedding + type: regression + target_column: rating + feature_columns: [user_id_indexed, movie_id_indexed] + aggregates: [user_id_index, movie_id_index] + hparams: + embedding_size: 10 + hidden_units: [128] + data_partition_ratio: + training: 0.8 + evaluation: 0.2 + training: + batch_size: 50 + num_steps: 8000 diff --git a/examples/movie-ratings/resources/transformed_columns.yaml b/examples/movie-ratings/resources/transformed_columns.yaml new file mode 100644 index 0000000000..2a93022ffd --- /dev/null +++ b/examples/movie-ratings/resources/transformed_columns.yaml @@ -0,0 +1,31 @@ +- kind: aggregate + name: user_id_index + aggregator: cortex.index_string + inputs: + columns: + col: user_id + +- kind: transformed_column + name: user_id_indexed + transformer: cortex.index_string + inputs: + columns: + text: user_id + args: + index: user_id_index + +- kind: aggregate + name: movie_id_index + aggregator: cortex.index_string + inputs: + columns: + col: movie_id + +- kind: transformed_column + name: movie_id_indexed + transformer: cortex.index_string + inputs: + columns: + text: movie_id + args: + index: movie_id_index