Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/mnist/implementations/models/dnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
def create_estimator(run_config, model_config):
feature_columns = [
tf.feature_column.numeric_column(
"image_pixels", shape=model_config["hparams"]["input_shape"]
model_config["input"], shape=model_config["hparams"]["input_shape"]
)
]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@
import math


def transform_python(sample, args):
image = sample["image"]

decoded = base64.b64decode(image)
def transform_python(input):
decoded = base64.b64decode(input)
decoded_image = np.asarray(Image.open(BytesIO(decoded)), dtype=np.uint8)

# reimplmenting tf.per_image_standardization
Expand Down
6 changes: 3 additions & 3 deletions examples/mnist/resources/apis.yaml
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
- kind: api
name: dnn-classifier
model_name: dnn
model: @dnn
compute:
replicas: 1

- kind: api
name: conv-classifier
model_name: conv
model: @conv
compute:
replicas: 1

- kind: api
name: t2t-classifier
model_name: t2t
model: @t2t
compute:
replicas: 1
25 changes: 25 additions & 0 deletions examples/mnist/resources/data.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
- kind: environment
name: dev
data:
type: csv
path: s3a://cortex-examples/mnist.csv
csv_config:
header: true
schema: [@image, @label]

- kind: raw_column
name: image
type: STRING_COLUMN
required: true

- kind: raw_column
name: label
type: INT_COLUMN
required: true
min: 0
max: 9

- kind: transformed_column
name: image_pixels
transformer_path: implementations/transformers/decode_and_normalize.py
input: @image
10 changes: 0 additions & 10 deletions examples/mnist/resources/environments.yaml

This file was deleted.

27 changes: 10 additions & 17 deletions examples/mnist/resources/models.yaml
Original file line number Diff line number Diff line change
@@ -1,26 +1,21 @@
- kind: model
name: dnn
path: implementations/models/dnn.py
type: classification
target_column: label
feature_columns:
- image_pixels
estimator_path: implementations/models/dnn.py
target_column: @label
input: @image_pixels
hparams:
learning_rate: 0.01
input_shape: [784]
output_shape: [10]
learning_rate: 0.01
hidden_units: [100, 200]
data_partition_ratio:
training: 0.7
evaluation: 0.3

- kind: model
name: conv
path: implementations/models/custom.py
type: classification
target_column: label
feature_columns:
- image_pixels
estimator_path: implementations/models/custom.py
target_column: @label
input: @image_pixels
hparams:
layer_type: conv
learning_rate: 0.01
Expand All @@ -38,11 +33,9 @@

- kind: model
name: t2t
path: implementations/models/t2t.py
type: classification
target_column: label
feature_columns:
- image_pixels
estimator_path: implementations/models/t2t.py
target_column: @label
input: @image_pixels
prediction_key: outputs
hparams:
input_shape: [28, 28, 1]
Expand Down
11 changes: 0 additions & 11 deletions examples/mnist/resources/raw_columns.yaml

This file was deleted.

6 changes: 0 additions & 6 deletions examples/mnist/resources/transformed_columns.yaml

This file was deleted.

22 changes: 7 additions & 15 deletions examples/movie-ratings/implementations/models/basic_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,18 @@


def create_estimator(run_config, model_config):
user_id_index = model_config["aggregates"]["user_id_index"]
movie_id_index = model_config["aggregates"]["movie_id_index"]

feature_columns = [
tf.feature_column.embedding_column(
tf.feature_column.categorical_column_with_identity(
"user_id_indexed", len(user_id_index)
),
model_config["hparams"]["embedding_size"],
),
tf.feature_column.embedding_column(
embedding_feature_columns = []
for feature_col_data in model_config["input"]["embedding_columns"]:
embedding_col = tf.feature_column.embedding_column(
tf.feature_column.categorical_column_with_identity(
"movie_id_indexed", len(movie_id_index)
feature_col_data["col"], len(feature_col_data["vocab"]["index"])
),
model_config["hparams"]["embedding_size"],
),
]
)
embedding_feature_columns.append(embedding_col)

return tf.estimator.DNNRegressor(
feature_columns=feature_columns,
feature_columns=embedding_feature_columns,
hidden_units=model_config["hparams"]["hidden_units"],
config=run_config,
)
2 changes: 1 addition & 1 deletion examples/movie-ratings/resources/apis.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
- kind: api
name: ratings
model_name: basic_embedding
model: @basic_embedding
compute:
replicas: 1
44 changes: 44 additions & 0 deletions examples/movie-ratings/resources/data.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
- kind: environment
name: dev
data:
type: csv
path: s3a://cortex-examples/movie-ratings.csv
csv_config:
header: true
schema: [@user_id, @movie_id, @rating, @timestamp]

- kind: raw_column
name: user_id
type: STRING_COLUMN

- kind: raw_column
name: movie_id
type: STRING_COLUMN

- kind: raw_column
name: rating
type: FLOAT_COLUMN

- kind: aggregate
name: user_id_index
aggregator: cortex.index_string
input: @user_id

- kind: transformed_column
name: user_id_indexed
transformer: cortex.index_string
input:
col: @user_id
indexes: @user_id_index

- kind: aggregate
name: movie_id_index
aggregator: cortex.index_string
input: @movie_id

- kind: transformed_column
name: movie_id_indexed
transformer: cortex.index_string
input:
col: @movie_id
indexes: @movie_id_index
20 changes: 0 additions & 20 deletions examples/movie-ratings/resources/environments.yaml

This file was deleted.

16 changes: 10 additions & 6 deletions examples/movie-ratings/resources/models.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
- kind: model
name: basic_embedding
type: regression
target_column: rating
feature_columns: [user_id_indexed, movie_id_indexed]
aggregates: [user_id_index, movie_id_index]
estimator_path: implementations/models/basic_embedding.py
target_column: @rating
input:
embedding_columns:
- col: @user_id_indexed
vocab: @user_id_index
- col: @movie_id_indexed
vocab: @movie_id_index
hparams:
embedding_size: 10
hidden_units: [128]
embedding_size: 20
hidden_units: [10, 10]
data_partition_ratio:
training: 0.8
evaluation: 0.2
Expand Down
31 changes: 0 additions & 31 deletions examples/movie-ratings/resources/transformed_columns.yaml

This file was deleted.

4 changes: 2 additions & 2 deletions examples/reviews/implementations/aggregators/max_length.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
def aggregate_spark(data, columns, args):
def aggregate_spark(data, input):
from pyspark.ml.feature import RegexTokenizer
import pyspark.sql.functions as F
from pyspark.sql.types import IntegerType

regexTokenizer = RegexTokenizer(inputCol=columns["col"], outputCol="token_list", pattern="\\W")
regexTokenizer = RegexTokenizer(inputCol=input, outputCol="token_list", pattern="\\W")
regexTokenized = regexTokenizer.transform(data)

max_review_length_row = (
Expand Down
6 changes: 3 additions & 3 deletions examples/reviews/implementations/aggregators/vocab.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
def aggregate_spark(data, columns, args):
def aggregate_spark(data, input):
import pyspark.sql.functions as F
from pyspark.ml.feature import RegexTokenizer

regexTokenizer = RegexTokenizer(inputCol=columns["col"], outputCol="token_list", pattern="\\W")
regexTokenizer = RegexTokenizer(inputCol=input["col"], outputCol="token_list", pattern="\\W")
regexTokenized = regexTokenizer.transform(data)

vocab_rows = (
regexTokenized.select(F.explode(F.col("token_list")).alias("word"))
.groupBy("word")
.count()
.orderBy(F.col("count").desc())
.limit(args["vocab_size"])
.limit(input["vocab_size"])
.select("word")
.collect()
)
Expand Down
2 changes: 1 addition & 1 deletion examples/reviews/implementations/models/sentiment_dnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

def create_estimator(run_config, model_config):
hparams = model_config["hparams"]
vocab_size = len(model_config["aggregates"]["reviews_vocab"])
vocab_size = len(model_config["input"]["vocab"])

def model_fn(features, labels, mode, params):
embedding_input = features["embedding_input"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


def create_estimator(run_config, model_config):
vocab_size = len(model_config["aggregates"]["reviews_vocab"])
vocab_size = len(model_config["input"]["vocab"])
feature_column = tf.feature_column.categorical_column_with_identity(
"embedding_input", vocab_size
)
Expand Down
4 changes: 2 additions & 2 deletions examples/reviews/implementations/models/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def create_estimator(run_config, model_config):
hparams = trainer_lib.create_hparams("transformer_base_single_gpu")

# SentimentIMDBCortex subclasses SentimentIMDB
problem = SentimentIMDBCortex(list(model_config["aggregates"]["reviews_vocab"]))
problem = SentimentIMDBCortex(list(model_config["input"]["vocab"]))
hparams.problem = problem
hparams.problem_hparams = problem.get_hparams(hparams)

Expand All @@ -39,7 +39,7 @@ def create_estimator(run_config, model_config):


def transform_tensorflow(features, labels, model_config):
max_length = model_config["aggregates"]["max_review_length"]
max_length = model_config["input"]["max_review_length"]

features["inputs"] = tf.expand_dims(tf.reshape(features["embedding_input"], [max_length]), -1)
features["targets"] = tf.expand_dims(tf.expand_dims(labels, -1), -1)
Expand Down
Loading