Skip to content

Commit c3b38d8

Browse files
authored
Adding retries for gRPC calls (#1248)
1 parent 2e9d676 commit c3b38d8

19 files changed

Lines changed: 2532 additions & 7 deletions

File tree

Gopkg.lock

Lines changed: 27 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Gopkg.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,3 +115,7 @@ required = [
115115
[[prune.project]]
116116
name = "k8s.io/gengo"
117117
unused-packages = false
118+
119+
[[constraint]]
120+
name = "github.com/grpc-ecosystem/go-grpc-middleware"
121+
version = "1.2.0"

pkg/controller.v1alpha3/consts/const.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package consts
22

3-
import "github.com/kubeflow/katib/pkg/util/v1alpha3/env"
3+
import (
4+
"github.com/kubeflow/katib/pkg/util/v1alpha3/env"
5+
"time"
6+
)
47

58
const (
69
// ConfigExperimentSuggestionName is the config name of the
@@ -35,6 +38,11 @@ const (
3538
// which is used to run healthz check using grpc probe.
3639
DefaultGRPCService = "manager.v1alpha3.Suggestion"
3740

41+
// DefaultGRPCRetryAttempts is the the maximum number of retries for gRPC calls
42+
DefaultGRPCRetryAttempts = 10
43+
// DefaultGRPCRetryPeriod is a fixed period of time between gRPC call retries
44+
DefaultGRPCRetryPeriod = 3 * time.Second
45+
3846
// DefaultKatibNamespaceEnvName is the default env name of katib namespace
3947
DefaultKatibNamespaceEnvName = "KATIB_CORE_NAMESPACE"
4048
// DefaultKatibComposerEnvName is the default env name of katib suggestion composer

pkg/controller.v1alpha3/suggestion/suggestionclient/suggestionclient.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ import (
55
"fmt"
66
"time"
77

8+
"github.com/kubeflow/katib/pkg/controller.v1alpha3/consts"
9+
10+
grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry"
811
"google.golang.org/grpc"
912
"google.golang.org/grpc/codes"
1013
"google.golang.org/grpc/status"
@@ -105,7 +108,15 @@ func (g *General) SyncAssignments(
105108
func (g *General) ValidateAlgorithmSettings(instance *suggestionsv1alpha3.Suggestion, e *experimentsv1alpha3.Experiment) error {
106109
logger := log.WithValues("Suggestion", types.NamespacedName{Name: instance.GetName(), Namespace: instance.GetNamespace()})
107110
endpoint := util.GetAlgorithmEndpoint(instance)
108-
conn, err := grpc.Dial(endpoint, grpc.WithInsecure())
111+
112+
callOpts := []grpc_retry.CallOption{
113+
grpc_retry.WithBackoff(grpc_retry.BackoffLinear(consts.DefaultGRPCRetryPeriod)),
114+
grpc_retry.WithMax(consts.DefaultGRPCRetryAttempts),
115+
}
116+
conn, err := grpc.Dial(endpoint, grpc.WithInsecure(),
117+
grpc.WithStreamInterceptor(grpc_retry.StreamClientInterceptor(callOpts...)),
118+
grpc.WithUnaryInterceptor(grpc_retry.UnaryClientInterceptor(callOpts...)),
119+
)
109120
if err != nil {
110121
return err
111122
}
@@ -118,6 +129,7 @@ func (g *General) ValidateAlgorithmSettings(instance *suggestionsv1alpha3.Sugges
118129
request := &suggestionapi.ValidateAlgorithmSettingsRequest{
119130
Experiment: g.ConvertExperiment(e),
120131
}
132+
121133
// See https://github.com/grpc/grpc-go/issues/2636
122134
// See https://github.com/grpc/grpc-go/pull/2503
123135
_, err = client.ValidateAlgorithmSettings(ctx, request, grpc.WaitForReady(true))

pkg/controller.v1beta1/consts/const.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package consts
22

3-
import "github.com/kubeflow/katib/pkg/util/v1beta1/env"
3+
import (
4+
"github.com/kubeflow/katib/pkg/util/v1beta1/env"
5+
"time"
6+
)
47

58
const (
69
// ConfigExperimentSuggestionName is the config name of the
@@ -35,6 +38,11 @@ const (
3538
// which is used to run healthz check using grpc probe.
3639
DefaultGRPCService = "manager.v1beta1.Suggestion"
3740

41+
// DefaultGRPCRetryAttempts is the the maximum number of retries for gRPC calls
42+
DefaultGRPCRetryAttempts = 10
43+
// DefaultGRPCRetryPeriod is a fixed period of time between gRPC call retries
44+
DefaultGRPCRetryPeriod = 3 * time.Second
45+
3846
// DefaultKatibNamespaceEnvName is the default env name of katib namespace
3947
DefaultKatibNamespaceEnvName = "KATIB_CORE_NAMESPACE"
4048
// DefaultKatibComposerEnvName is the default env name of katib suggestion composer

pkg/controller.v1beta1/suggestion/suggestionclient/suggestionclient.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import (
55
"fmt"
66
"time"
77

8+
grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry"
9+
810
"google.golang.org/grpc"
911
"google.golang.org/grpc/codes"
1012
"google.golang.org/grpc/status"
@@ -110,7 +112,15 @@ func (g *General) SyncAssignments(
110112
func (g *General) ValidateAlgorithmSettings(instance *suggestionsv1beta1.Suggestion, e *experimentsv1beta1.Experiment) error {
111113
logger := log.WithValues("Suggestion", types.NamespacedName{Name: instance.GetName(), Namespace: instance.GetNamespace()})
112114
endpoint := util.GetAlgorithmEndpoint(instance)
113-
conn, err := grpc.Dial(endpoint, grpc.WithInsecure())
115+
116+
callOpts := []grpc_retry.CallOption{
117+
grpc_retry.WithBackoff(grpc_retry.BackoffLinear(consts.DefaultGRPCRetryPeriod)),
118+
grpc_retry.WithMax(consts.DefaultGRPCRetryAttempts),
119+
}
120+
conn, err := grpc.Dial(endpoint, grpc.WithInsecure(),
121+
grpc.WithStreamInterceptor(grpc_retry.StreamClientInterceptor(callOpts...)),
122+
grpc.WithUnaryInterceptor(grpc_retry.UnaryClientInterceptor(callOpts...)),
123+
)
114124
if err != nil {
115125
return err
116126
}

0 commit comments

Comments
 (0)