diff --git a/cli/cmd/cluster.go b/cli/cmd/cluster.go index f89ba386f3..154404df7f 100644 --- a/cli/cmd/cluster.go +++ b/cli/cmd/cluster.go @@ -390,7 +390,7 @@ var _clusterConfigureCmd = &cobra.Command{ confirmConfigureClusterConfig(configureChanges, oldClusterConfig, *newClusterConfig, _flagClusterDisallowPrompt) out, exitCode, err := runManagerWithClusterConfig("/root/install.sh --configure", newClusterConfig, awsClient, nil, nil, []string{ - "CORTEX_NODEGROUP_NAMES_TO_SCALE=" + strings.Join(configureChanges.NodeGroupsToScale, " "), // NodeGroupsToScale contain the cluster config node-group names + "CORTEX_NODEGROUP_NAMES_TO_UPDATE=" + strings.Join(configureChanges.NodeGroupsToUpdate, " "), // NodeGroupsToUpdate contain the cluster config node-group names "CORTEX_NODEGROUP_NAMES_TO_ADD=" + strings.Join(configureChanges.NodeGroupsToAdd, " "), // NodeGroupsToAdd contain the cluster config node-group names "CORTEX_EKS_NODEGROUP_NAMES_TO_REMOVE=" + strings.Join(configureChanges.EKSNodeGroupsToRemove, " "), // EKSNodeGroupsToRemove contain the EKS node-group names }) diff --git a/cli/cmd/lib_cluster_config.go b/cli/cmd/lib_cluster_config.go index 5c596ad320..dc45592936 100644 --- a/cli/cmd/lib_cluster_config.go +++ b/cli/cmd/lib_cluster_config.go @@ -290,16 +290,10 @@ func confirmConfigureClusterConfig(configureChanges clusterconfig.ConfigureChang fmt.Printf("○ %s will be updated\n", fieldToUpdate) } - for _, ngName := range configureChanges.NodeGroupsToScale { + for _, ngName := range configureChanges.NodeGroupsToUpdate { ngOld := oldCc.GetNodeGroupByName(ngName) - ngScaled := newCc.GetNodeGroupByName(ngName) - if ngOld.MinInstances != ngScaled.MinInstances && ngOld.MaxInstances != ngScaled.MaxInstances { - fmt.Printf("○ nodegroup %s will update %s from %d to %d and %s from %d to %d\n", ngName, clusterconfig.MinInstancesKey, ngOld.MinInstances, ngScaled.MinInstances, clusterconfig.MaxInstancesKey, ngOld.MaxInstances, ngScaled.MaxInstances) - } else if ngOld.MinInstances == ngScaled.MinInstances && ngOld.MaxInstances != ngScaled.MaxInstances { - fmt.Printf("○ nodegroup %s will update %s from %d to %d\n", ngName, clusterconfig.MaxInstancesKey, ngOld.MaxInstances, ngScaled.MaxInstances) - } else if ngOld.MinInstances != ngScaled.MinInstances && ngOld.MaxInstances == ngScaled.MaxInstances { - fmt.Printf("○ nodegroup %s will update %s from %d to %d\n", ngName, clusterconfig.MinInstancesKey, ngOld.MinInstances, ngScaled.MinInstances) - } + ngNew := newCc.GetNodeGroupByName(ngName) + fmt.Printf("○ %s\n", ngNew.UpdatePlan(ngOld)) } for _, ngName := range configureChanges.NodeGroupsToAdd { diff --git a/manager/install.sh b/manager/install.sh index 0f1ce822fc..99a89cde54 100755 --- a/manager/install.sh +++ b/manager/install.sh @@ -287,7 +287,7 @@ function restart_controller_manager() { } function resize_nodegroups() { - if [ -z "$CORTEX_NODEGROUP_NAMES_TO_SCALE" ]; then + if [ -z "$CORTEX_NODEGROUP_NAMES_TO_UPDATE" ]; then return fi @@ -295,7 +295,7 @@ function resize_nodegroups() { eks_ng_len=$(cat nodegroups.json | jq -r length) cfg_ng_len=$(cat $CORTEX_CLUSTER_CONFIG_FILE | yq -r .node_groups | yq -r length) - for cfg_ng_name in $CORTEX_NODEGROUP_NAMES_TO_SCALE; do + for cfg_ng_name in $CORTEX_NODEGROUP_NAMES_TO_UPDATE; do has_ng="false" for eks_idx in $(seq 0 $(($eks_ng_len-1))); do stack_ng=$(cat nodegroups.json | jq -r .[$eks_idx].Name) diff --git a/pkg/types/clusterconfig/cluster_config.go b/pkg/types/clusterconfig/cluster_config.go index 874082bb11..e4e0b57b6d 100644 --- a/pkg/types/clusterconfig/cluster_config.go +++ b/pkg/types/clusterconfig/cluster_config.go @@ -41,6 +41,7 @@ import ( "github.com/cortexlabs/cortex/pkg/lib/sets/strset" "github.com/cortexlabs/cortex/pkg/lib/slices" libstr "github.com/cortexlabs/cortex/pkg/lib/strings" + s "github.com/cortexlabs/cortex/pkg/lib/strings" "github.com/cortexlabs/cortex/pkg/lib/structs" "github.com/cortexlabs/yaml" ) @@ -165,6 +166,27 @@ type NodeGroup struct { SpotConfig *SpotConfig `json:"spot_config" yaml:"spot_config"` } +// compares the supported updatable fields of a nodegroup +func (ng *NodeGroup) HasChanged(old *NodeGroup) bool { + return ng.MaxInstances != old.MaxInstances || ng.MinInstances != old.MinInstances || ng.Priority != old.Priority +} + +func (ng *NodeGroup) UpdatePlan(old *NodeGroup) string { + var changes []string + + if old.MinInstances != ng.MinInstances { + changes = append(changes, fmt.Sprintf("%s %d->%d", MinInstancesKey, old.MinInstances, ng.MinInstances)) + } + if old.MaxInstances != ng.MaxInstances { + changes = append(changes, fmt.Sprintf("%s %d->%d", MaxInstancesKey, old.MaxInstances, ng.MaxInstances)) + } + if old.Priority != ng.Priority { + changes = append(changes, fmt.Sprintf("%s %d->%d", PriorityKey, old.Priority, ng.Priority)) + } + + return fmt.Sprintf("nodegroup %s will be updated with the following changes: %s", ng.Name, s.StrsAnd(changes)) +} + type SpotConfig struct { InstanceDistribution []string `json:"instance_distribution" yaml:"instance_distribution"` OnDemandBaseCapacity *int64 `json:"on_demand_base_capacity" yaml:"on_demand_base_capacity"` @@ -207,13 +229,13 @@ type AccessConfig struct { type ConfigureChanges struct { NodeGroupsToAdd []string NodeGroupsToRemove []string - NodeGroupsToScale []string + NodeGroupsToUpdate []string EKSNodeGroupsToRemove []string // EKS node group names of (NodeGroupsToRemove ∩ Cortex-converted EKS node groups) ∪ (Cortex-converted EKS node groups - the new cluster config's nodegroups) FieldsToUpdate []string } func (c *ConfigureChanges) HasChanges() bool { - return len(c.NodeGroupsToAdd)+len(c.NodeGroupsToRemove)+len(c.NodeGroupsToScale)+len(c.EKSNodeGroupsToRemove)+len(c.FieldsToUpdate) != 0 + return len(c.NodeGroupsToAdd)+len(c.NodeGroupsToRemove)+len(c.NodeGroupsToUpdate)+len(c.EKSNodeGroupsToRemove)+len(c.FieldsToUpdate) != 0 } // GetGhostEKSNodeGroups returns the set difference between EKSNodeGroupsToRemove and the EKS-converted NodeGroupsToRemove @@ -1087,8 +1109,10 @@ func (cc *Config) validateSharedNodeGroupsDiff(oldConfig Config) error { newNgCopy.MinInstances = 0 newNgCopy.MaxInstances = 0 + newNgCopy.Priority = 0 oldNgCopy.MinInstances = 0 oldNgCopy.MaxInstances = 0 + oldNgCopy.Priority = 0 newHash, err := newNgCopy.Hash() if err != nil { @@ -1200,17 +1224,17 @@ func (cc *Config) ValidateOnConfigure(awsClient *aws.Client, k8sClient *k8s.Clie } sharedNgsFromNewConfig, sharedNgsFromOldConfig := cc.getCommonNodeGroups(oldConfig) - ngNamesToBeScaled := []*NodeGroup{} + ngsToBeUpdated := []*NodeGroup{} for i := range sharedNgsFromNewConfig { - if sharedNgsFromNewConfig[i].MinInstances != sharedNgsFromOldConfig[i].MinInstances || sharedNgsFromNewConfig[i].MaxInstances != sharedNgsFromOldConfig[i].MaxInstances { - ngNamesToBeScaled = append(ngNamesToBeScaled, sharedNgsFromNewConfig[i]) + if sharedNgsFromNewConfig[i].HasChanged(sharedNgsFromOldConfig[i]) { + ngsToBeUpdated = append(ngsToBeUpdated, sharedNgsFromNewConfig[i]) } } return ConfigureChanges{ NodeGroupsToAdd: GetNodeGroupNames(ngsToBeAdded), NodeGroupsToRemove: GetNodeGroupNames(ngsToBeRemoved), - NodeGroupsToScale: GetNodeGroupNames(ngNamesToBeScaled), + NodeGroupsToUpdate: GetNodeGroupNames(ngsToBeUpdated), EKSNodeGroupsToRemove: getStaleEksNodeGroups(cc.ClusterName, eksNodeGroupStacks, cc.NodeGroups, ngsToBeRemoved), FieldsToUpdate: fieldsToUpdate, }, nil diff --git a/pkg/types/clusterconfig/config_key.go b/pkg/types/clusterconfig/config_key.go index d277e8af17..cd5474bb53 100644 --- a/pkg/types/clusterconfig/config_key.go +++ b/pkg/types/clusterconfig/config_key.go @@ -29,6 +29,7 @@ const ( AcceleratorsPerInstanceKey = "accelerators_per_instance" MinInstancesKey = "min_instances" MaxInstancesKey = "max_instances" + PriorityKey = "priority" SpotKey = "spot" SpotConfigKey = "spot_config" InstanceDistributionKey = "instance_distribution"