Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
214e1cc
ResourceConfigField -> ResourceField
1vn May 5, 2019
719ed13
go transformer changes
1vn May 6, 2019
b26eccc
Merge branch 'master' into spec-optional
1vn May 7, 2019
5c835c2
comment out error stack printing
1vn May 7, 2019
b54ffd3
format
1vn May 7, 2019
162bc86
simplify fetch_metadata
1vn May 7, 2019
b8db138
clean up
1vn May 8, 2019
a35f089
fix transform spark bug and training_columns
1vn May 8, 2019
fbf9f56
format
1vn May 8, 2019
78e0095
fix url
1vn May 8, 2019
09992cc
unfix url
1vn May 8, 2019
aa9e900
Merge branch 'spec-optional' of github.com:cortexlabs/cortex into spe…
1vn May 8, 2019
0f6954d
merge master
1vn May 9, 2019
36c5fcf
address some comments
1vn May 10, 2019
6a3a9bd
check 5 samples
1vn May 10, 2019
c5f760d
remove skip validation
1vn May 13, 2019
0bd6b73
remove outdated check
1vn May 13, 2019
081b9b3
add AlphaNumericDashDotUnderscoreEmpty
1vn May 13, 2019
7fedb6f
address some comments - lowercase resourceFields and replace error
1vn May 13, 2019
9445688
cache anon specs
1vn May 13, 2019
14e45ff
fix autogen
1vn May 13, 2019
03a22bc
progress
1vn May 14, 2019
c4a04bb
lazy load metadata
1vn May 15, 2019
cd38e52
clean up spark
1vn May 15, 2019
909a001
format and lint
1vn May 15, 2019
8c93f4c
don't skip cast
1vn May 16, 2019
106fd58
default to None
1vn May 16, 2019
28fcdc8
fix context
1vn May 16, 2019
c5504ac
format
1vn May 16, 2019
5271c3f
fix tests
1vn May 16, 2019
19a69bb
fix test
1vn May 16, 2019
ad9d8b3
address some python comments
1vn May 16, 2019
b6d4337
format
1vn May 16, 2019
181da9a
add test, remove camel case
1vn May 17, 2019
487cf40
Merge branch 'master' into spec-optional
1vn May 17, 2019
0f7c363
clean up type checking logic, formatting
1vn May 17, 2019
076c020
remove more camel case
1vn May 17, 2019
486c026
fix more camel case
1vn May 17, 2019
79657c4
address some comments
1vn May 17, 2019
471e98f
refactor metadata
1vn May 18, 2019
2d5602c
use raw_dataset key
1vn May 18, 2019
8c7fa9b
address some comments
1vn May 21, 2019
e493684
move type inference to validate_trannsformer
1vn May 21, 2019
c1551c0
validate inferred types from transform spark and python
1vn May 21, 2019
8abc9dc
pass type downstream
1vn May 21, 2019
b36218e
add comment about transform_python
1vn May 21, 2019
5a7bdd3
remove unused
1vn May 21, 2019
7583e39
Merge branch 'master' into spec-optional
1vn May 21, 2019
2aa5a99
address comments
1vn May 21, 2019
95d98e1
format
1vn May 21, 2019
694b08d
wrap more code in try
1vn May 21, 2019
5b5f0b6
move type check before value check
1vn May 21, 2019
b2fd9d7
address comments
1vn May 21, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 1 addition & 10 deletions examples/fraud/resources/weight_column.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,9 @@
columns:
col: class

- kind: transformer
name: weight
inputs:
columns:
col: INT_COLUMN
args:
class_distribution: {INT: FLOAT}
output_type: FLOAT_COLUMN

- kind: transformed_column
name: weight_column
transformer: weight
transformer_path: implementations/transformers/weight.py
inputs:
columns:
col: class
Expand Down
2 changes: 1 addition & 1 deletion examples/mnist/resources/transformed_columns.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
- kind: transformed_column
name: image_pixels
transformer: decode_and_normalize
transformer_path: implementations/transformers/decode_and_normalize.py
inputs:
columns:
image: image
6 changes: 0 additions & 6 deletions examples/mnist/resources/transformers.yaml

This file was deleted.

9 changes: 1 addition & 8 deletions examples/reviews/resources/max_length.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
- kind: aggregator
name: max_length
inputs:
columns:
col: STRING_COLUMN
output_type: INT

- kind: aggregate
name: max_review_length
aggregator: max_length
aggregator_path: implementations/aggregators/max_length.py
inputs:
columns:
col: review
12 changes: 1 addition & 11 deletions examples/reviews/resources/tokenized_columns.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
- kind: transformer
name: tokenize_string_to_int
output_type: INT_LIST_COLUMN
inputs:
columns:
col: STRING_COLUMN
args:
max_len: INT
vocab: {STRING: INT}

- kind: transformed_column
name: embedding_input
transformer: tokenize_string_to_int
transformer_path: implementations/transformers/tokenize_string_to_int.py
inputs:
columns:
col: review
Expand Down
33 changes: 18 additions & 15 deletions pkg/consts/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,21 +36,24 @@ var (
RequirementsTxt = "requirements.txt"
PackageDir = "packages"

AppsDir = "apps"
DataDir = "data"
RawDataDir = "data_raw"
TrainingDataDir = "data_training"
AggregatorsDir = "aggregators"
AggregatesDir = "aggregates"
TransformersDir = "transformers"
ModelImplsDir = "model_implementations"
PythonPackagesDir = "python_packages"
ModelsDir = "models"
ConstantsDir = "constants"
ContextsDir = "contexts"
ResourceStatusesDir = "resource_statuses"
WorkloadSpecsDir = "workload_specs"
LogPrefixesDir = "log_prefixes"
AppsDir = "apps"
APIsDir = "apis"
DataDir = "data"
RawDataDir = "data_raw"
TrainingDataDir = "data_training"
AggregatorsDir = "aggregators"
AggregatesDir = "aggregates"
TransformersDir = "transformers"
ModelImplsDir = "model_implementations"
PythonPackagesDir = "python_packages"
ModelsDir = "models"
ConstantsDir = "constants"
ContextsDir = "contexts"
ResourceStatusesDir = "resource_statuses"
WorkloadSpecsDir = "workload_specs"
LogPrefixesDir = "log_prefixes"
RawColumnsDir = "raw_columns"
TransformedColumnsDir = "transformed_columns"

TelemetryURL = "https://telemetry.cortexlabs.dev"
)
5 changes: 3 additions & 2 deletions pkg/operator/api/context/aggregators.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ type Aggregators map[string]*Aggregator
type Aggregator struct {
*userconfig.Aggregator
*ResourceFields
Namespace *string `json:"namespace"`
ImplKey string `json:"impl_key"`
Namespace *string `json:"namespace"`
ImplKey string `json:"impl_key"`
SkipValidation bool `json:"skip_validation"`
}

func (aggregators Aggregators) OneByID(id string) *Aggregator {
Expand Down
6 changes: 6 additions & 0 deletions pkg/operator/api/context/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ type Resource interface {
GetID() string
GetIDWithTags() string
GetResourceFields() *ResourceFields
GetMetadataKey() string
}

type ComputedResource interface {
Expand All @@ -72,6 +73,7 @@ type ResourceFields struct {
ID string `json:"id"`
IDWithTags string `json:"id_with_tags"`
ResourceType resource.Type `json:"resource_type"`
MetadataKey string `json:"metadata_key"`
}

type ComputedResourceFields struct {
Expand All @@ -91,6 +93,10 @@ func (r *ResourceFields) GetResourceFields() *ResourceFields {
return r
}

func (r *ResourceFields) GetMetadataKey() string {
return r.MetadataKey
}

func (r *ComputedResourceFields) GetWorkloadID() string {
return r.WorkloadID
}
Expand Down
9 changes: 4 additions & 5 deletions pkg/operator/api/context/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,11 @@ type Model struct {
}

type TrainingDataset struct {
userconfig.ResourceConfigFields
userconfig.ResourceFields
*ComputedResourceFields
ModelName string `json:"model_name"`
TrainKey string `json:"train_key"`
EvalKey string `json:"eval_key"`
MetadataKey string `json:"metadata_key"`
ModelName string `json:"model_name"`
TrainKey string `json:"train_key"`
EvalKey string `json:"eval_key"`
}

func (trainingDataset *TrainingDataset) GetResourceType() resource.Type {
Expand Down
2 changes: 1 addition & 1 deletion pkg/operator/api/context/python_packages.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
type PythonPackages map[string]*PythonPackage

type PythonPackage struct {
userconfig.ResourceConfigFields
userconfig.ResourceFields
*ComputedResourceFields
SrcKey string `json:"src_key"`
PackageKey string `json:"package_key"`
Expand Down
5 changes: 3 additions & 2 deletions pkg/operator/api/context/transformers.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ type Transformers map[string]*Transformer
type Transformer struct {
*userconfig.Transformer
*ResourceFields
Namespace *string `json:"namespace"`
ImplKey string `json:"impl_key"`
Namespace *string `json:"namespace"`
ImplKey string `json:"impl_key"`
SkipValidation bool `json:"skip_validation"`
}

func (transformers Transformers) OneByID(id string) *Transformer {
Expand Down
18 changes: 11 additions & 7 deletions pkg/operator/api/userconfig/aggregates.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@ import (
type Aggregates []*Aggregate

type Aggregate struct {
ResourceConfigFields
Aggregator string `json:"aggregator" yaml:"aggregator"`
Inputs *Inputs `json:"inputs" yaml:"inputs"`
Compute *SparkCompute `json:"compute" yaml:"compute"`
Tags Tags `json:"tags" yaml:"tags"`
ResourceFields
Aggregator string `json:"aggregator" yaml:"aggregator"`
AggregatorPath *string `json:"aggregator_path" yaml:"aggregator_path"`
Inputs *Inputs `json:"inputs" yaml:"inputs"`
Compute *SparkCompute `json:"compute" yaml:"compute"`
Tags Tags `json:"tags" yaml:"tags"`
}

var aggregateValidation = &configreader.StructValidation{
Expand All @@ -45,10 +46,13 @@ var aggregateValidation = &configreader.StructValidation{
{
StructField: "Aggregator",
StringValidation: &configreader.StringValidation{
Required: true,
AlphaNumericDashDotUnderscore: true,
AllowEmpty: true,
},
},
{
StructField: "AggregatorPath",
StringPtrValidation: &configreader.StringPtrValidation{},
},
inputValuesFieldValidation,
sparkComputeFieldValidation("Compute"),
tagsFieldValidation,
Expand Down
2 changes: 1 addition & 1 deletion pkg/operator/api/userconfig/aggregators.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
type Aggregators []*Aggregator

type Aggregator struct {
ResourceConfigFields
ResourceFields
Inputs *Inputs `json:"inputs" yaml:"inputs"`
OutputType interface{} `json:"output_type" yaml:"output_type"`
Path string `json:"path" yaml:"path"`
Expand Down
2 changes: 1 addition & 1 deletion pkg/operator/api/userconfig/apis.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
type APIs []*API

type API struct {
ResourceConfigFields
ResourceFields
ModelName string `json:"model_name" yaml:"model_name"`
Compute *APICompute `json:"compute" yaml:"compute"`
Tags Tags `json:"tags" yaml:"tags"`
Expand Down
40 changes: 34 additions & 6 deletions pkg/operator/api/userconfig/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,19 +200,47 @@ func (config *Config) Validate(envName string) error {
}
}

// Check local aggregators exist
// Check local aggregators exist or a path to one is defined
aggregatorNames := config.Aggregators.Names()
for _, aggregate := range config.Aggregates {
if !strings.Contains(aggregate.Aggregator, ".") && !slices.HasString(aggregatorNames, aggregate.Aggregator) {
return errors.Wrap(ErrorUndefinedResource(aggregate.Aggregator, resource.AggregatorType), Identify(aggregate), AggregatorKey)
if aggregate.AggregatorPath == nil && aggregate.Aggregator == "" {
return ErrorMissingAggregator(aggregate)
}

if aggregate.AggregatorPath != nil && aggregate.Aggregator != "" {
return ErrorMultipleAggregatorSpecified(aggregate)
}

switch {
case aggregate.AggregatorPath != nil:
continue
case aggregate.Aggregator != "":
if !strings.Contains(aggregate.Aggregator, ".") &&
!slices.HasString(aggregatorNames, aggregate.Aggregator) {
return errors.Wrap(ErrorUndefinedResource(aggregate.Aggregator, resource.AggregatorType), Identify(aggregate), AggregatorKey)
}
}
}

// Check local transformers exist
// Check local transformers exist or a path to one is defined
transformerNames := config.Transformers.Names()
for _, transformedColumn := range config.TransformedColumns {
if !strings.Contains(transformedColumn.Transformer, ".") && !slices.HasString(transformerNames, transformedColumn.Transformer) {
return errors.Wrap(ErrorUndefinedResource(transformedColumn.Transformer, resource.TransformerType), Identify(transformedColumn), TransformerKey)
if transformedColumn.TransformerPath == nil && transformedColumn.Transformer == "" {
return ErrorMissingTransformer(transformedColumn)
}

if transformedColumn.TransformerPath != nil && transformedColumn.Transformer != "" {
return ErrorMultipleTransformerSpecified(transformedColumn)
}

switch {
case transformedColumn.TransformerPath != nil:
continue
case transformedColumn.Transformer != "":
if !strings.Contains(transformedColumn.Transformer, ".") &&
!slices.HasString(transformerNames, transformedColumn.Transformer) {
return errors.Wrap(ErrorUndefinedResource(transformedColumn.Transformer, resource.TransformerType), Identify(transformedColumn), TransformerKey)
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/operator/api/userconfig/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import (
type Constants []*Constant

type Constant struct {
ResourceConfigFields
ResourceFields
Type interface{} `json:"type" yaml:"type"`
Value interface{} `json:"value" yaml:"value"`
Tags Tags `json:"tags" yaml:"tags"`
Expand Down
2 changes: 1 addition & 1 deletion pkg/operator/api/userconfig/embed.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
type Embeds []*Embed

type Embed struct {
ResourceConfigFields
ResourceFields
Template string `json:"template" yaml:"template"`
Args map[string]interface{} `json:"args" yaml:"args"`
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/operator/api/userconfig/environments.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import (
type Environments []*Environment

type Environment struct {
ResourceConfigFields
ResourceFields
LogLevel *LogLevel `json:"log_level" yaml:"log_level"`
Limit *Limit `json:"limit" yaml:"limit"`
Data Data `json:"-" yaml:"-"`
Expand Down
39 changes: 38 additions & 1 deletion pkg/operator/api/userconfig/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ const (
ErrK8sQuantityMustBeInt
ErrRegressionTargetType
ErrClassificationTargetType
ErrMissingAggregator
ErrMissingTransformer
ErrMultipleAggregatorSpecified
ErrMultipleTransformerSpecified
)

var errorKinds = []string{
Expand Down Expand Up @@ -90,9 +94,13 @@ var errorKinds = []string{
"err_k8s_quantity_must_be_int",
"err_regression_target_type",
"err_classification_target_type",
"err_missing_aggregator",
"err_missing_transformer",
"err_multiple_aggregator_specified",
"err_multiple_transformer_specified",
}

var _ = [1]int{}[int(ErrClassificationTargetType)-(len(errorKinds)-1)] // Ensure list length matches
var _ = [1]int{}[int(ErrMultipleTransformerSpecified)-(len(errorKinds)-1)] // Ensure list length matches

func (t ErrorKind) String() string {
return errorKinds[t]
Expand Down Expand Up @@ -376,9 +384,38 @@ func ErrorRegressionTargetType() error {
message: "regression models can only predict float target values",
}
}

func ErrorClassificationTargetType() error {
return Error{
Kind: ErrClassificationTargetType,
message: "classification models can only predict integer target values (i.e. {0, 1, ..., num_classes-1})",
}
}

func ErrorMissingAggregator(aggregate *Aggregate) error {
return Error{
Kind: ErrMissingAggregator,
message: fmt.Sprintf("missing aggregator for aggregate \"%s\", expecting either \"aggregator\" or \"aggregator_path\"", aggregate.Name),
}
}

func ErrorMissingTransformer(transformedColumn *TransformedColumn) error {
return Error{
Kind: ErrMissingTransformer,
message: fmt.Sprintf("missing transformer for transformed_column \"%s\", expecting either \"transformer\" or \"transformer_path\"", transformedColumn.Name),
}
}

func ErrorMultipleAggregatorSpecified(aggregate *Aggregate) error {
return Error{
Kind: ErrMultipleAggregatorSpecified,
message: fmt.Sprintf("aggregate \"%s\" specified both \"aggregator\" and \"aggregator_path\", please specify only one", aggregate.Name),
}
}

func ErrorMultipleTransformerSpecified(transformedColumn *TransformedColumn) error {
return Error{
Kind: ErrMultipleTransformerSpecified,
message: fmt.Sprintf("transformed_column \"%s\" specified both \"transformer\" and \"transformer_path\", please specify only one", transformedColumn.Name),
}
}
2 changes: 1 addition & 1 deletion pkg/operator/api/userconfig/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import (
type Models []*Model

type Model struct {
ResourceConfigFields
ResourceFields
Type ModelType `json:"type" yaml:"type"`
Path string `json:"path" yaml:"path"`
TargetColumn string `json:"target_column" yaml:"target_column"`
Expand Down
Loading