Skip to content

Commit 5740632

Browse files
authored
feat: handle MC force delete (#192)
1 parent bb74f0a commit 5740632

File tree

13 files changed

+681
-4
lines changed

13 files changed

+681
-4
lines changed

charts/hub-net-controller-manager/templates/deployment.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ spec:
2828
- --leader-election-namespace={{ .Values.leaderElectionNamespace }}
2929
- --v={{ .Values.logVerbosity }}
3030
- --add_dir_header
31+
- --force-delete-wait-time={{ .Values.forceDeleteWaitTime }}
3132
ports:
3233
- name: metrics
3334
containerPort: 8080

charts/hub-net-controller-manager/templates/rbac.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,14 @@ rules:
129129
- get
130130
- patch
131131
- update
132+
- apiGroups:
133+
- cluster.kubernetes-fleet.io
134+
resources:
135+
- memberclusters
136+
verbs:
137+
- get
138+
- list
139+
- watch
132140
---
133141
kind: ClusterRoleBinding
134142
apiVersion: rbac.authorization.k8s.io/v1

charts/hub-net-controller-manager/values.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,15 @@ replicaCount: 1
66

77
image:
88
repository: ghcr.io/azure/fleet-networking/hub-net-controller-manager
9-
pullPolicy: IfNotPresent
9+
pullPolicy: Always
1010
# Overrides the image tag whose default is the chart appVersion.
1111
tag: "v0.1.0"
1212

1313
logVerbosity: 2
1414

1515
leaderElectionNamespace: fleet-system
1616
fleetSystemNamespace: fleet-system
17+
forceDeleteWaitTime: 2m0s
1718

1819
resources:
1920
limits:

cmd/hub-net-controller-manager/main.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,13 @@ import (
2626
"sigs.k8s.io/controller-runtime/pkg/webhook"
2727

2828
//+kubebuilder:scaffold:imports
29+
clusterv1beta1 "go.goms.io/fleet/apis/cluster/v1beta1"
2930

3031
fleetnetv1alpha1 "go.goms.io/fleet-networking/api/v1alpha1"
3132
"go.goms.io/fleet-networking/pkg/controllers/hub/endpointsliceexport"
3233
"go.goms.io/fleet-networking/pkg/controllers/hub/internalserviceexport"
3334
"go.goms.io/fleet-networking/pkg/controllers/hub/internalserviceimport"
35+
"go.goms.io/fleet-networking/pkg/controllers/hub/membercluster"
3436
"go.goms.io/fleet-networking/pkg/controllers/hub/serviceimport"
3537
)
3638

@@ -47,11 +49,14 @@ var (
4749
internalServiceExportRetryInterval = flag.Duration("internalserviceexport-retry-interval", 2*time.Second,
4850
"The wait time for the internalserviceexport controller to requeue the request and to wait for the"+
4951
"ServiceImport controller to resolve the service Spec")
52+
53+
forceDeleteWaitTime = flag.Duration("force-delete-wait-time", 15*time.Minute, "The duration the fleet hub agent waits before trying to force delete a member cluster.")
5054
)
5155

5256
func init() {
5357
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
5458
utilruntime.Must(fleetnetv1alpha1.AddToScheme(scheme))
59+
utilruntime.Must(clusterv1beta1.AddToScheme(scheme))
5560
klog.InitFlags(nil)
5661
//+kubebuilder:scaffold:scheme
5762
}
@@ -140,6 +145,16 @@ func main() {
140145
exitWithErrorFunc()
141146
}
142147

148+
klog.V(1).InfoS("Start to setup MemberCluster controller")
149+
if err := (&membercluster.Reconciler{
150+
Client: mgr.GetClient(),
151+
Recorder: mgr.GetEventRecorderFor(membercluster.ControllerName),
152+
ForceDeleteWaitTime: *forceDeleteWaitTime,
153+
}).SetupWithManager(mgr); err != nil {
154+
klog.ErrorS(err, "Unable to create MemberCluster controller")
155+
exitWithErrorFunc()
156+
}
157+
143158
klog.V(1).InfoS("Starting ServiceExportImport controller manager")
144159
if err := mgr.Start(ctx); err != nil {
145160
klog.ErrorS(err, "Problem running manager")

examples/getting-started/artifacts/hub-rbac.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ rules:
2626
- patch
2727
- apiGroups:
2828
- networking.fleet.azure.com
29+
- cluster.kubernetes-fleet.io
2930
resources: ["*"]
3031
verbs: ["*"]
3132
---
@@ -70,6 +71,7 @@ rules:
7071
- patch
7172
- apiGroups:
7273
- networking.fleet.azure.com
74+
- cluster.kubernetes-fleet.io
7375
resources: ["*"]
7476
verbs: ["*"]
7577
---

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ require (
1919
require (
2020
github.com/stretchr/testify v1.9.0
2121
go.goms.io/fleet v0.10.5
22+
golang.org/x/sync v0.7.0
2223
)
2324

2425
require (

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@ golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbht
120120
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
121121
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
122122
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
123+
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
124+
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
123125
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
124126
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
125127
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=

pkg/common/hubconfig/hubconfig.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ const (
3434

3535
// Naming pattern of member cluster namespace in hub cluster, should be the same as envValue as defined in
3636
// https://github.com/Azure/fleet/blob/main/pkg/utils/common.go
37-
hubNamespaceNameFormat = "fleet-member-%s"
37+
HubNamespaceNameFormat = "fleet-member-%s"
3838
)
3939

4040
// PrepareHubConfig return the config holding attributes for a Kubernetes client to request hub cluster.
@@ -115,5 +115,5 @@ func FetchMemberClusterNamespace() (string, error) {
115115
klog.ErrorS(err, "Member cluster name cannot be empty")
116116
return "", err
117117
}
118-
return fmt.Sprintf(hubNamespaceNameFormat, mcName), nil
118+
return fmt.Sprintf(HubNamespaceNameFormat, mcName), nil
119119
}

pkg/common/hubconfig/hubconfig_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ func TestFetchMemberClusterNamespace(t *testing.T) {
161161
name: "environment variable is present",
162162
envKey: "MEMBER_CLUSTER_NAME",
163163
envValue: memberCluster,
164-
want: fmt.Sprintf(hubNamespaceNameFormat, memberCluster),
164+
want: fmt.Sprintf(HubNamespaceNameFormat, memberCluster),
165165
wantErr: false,
166166
},
167167
{
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
/*
2+
Copyright (c) Microsoft Corporation.
3+
Licensed under the MIT license.
4+
*/
5+
6+
// Package membercluster features the MemberCluster controller for watching
7+
// update/delete events to the MemberCluster object and removes finalizers
8+
// on all fleet networking resources in the fleet member cluster namespace.
9+
package membercluster
10+
11+
import (
12+
"context"
13+
"fmt"
14+
"time"
15+
16+
"golang.org/x/sync/errgroup"
17+
"k8s.io/apimachinery/pkg/api/errors"
18+
"k8s.io/client-go/tools/record"
19+
"k8s.io/klog/v2"
20+
ctrl "sigs.k8s.io/controller-runtime"
21+
"sigs.k8s.io/controller-runtime/pkg/client"
22+
"sigs.k8s.io/controller-runtime/pkg/event"
23+
"sigs.k8s.io/controller-runtime/pkg/predicate"
24+
25+
clusterv1beta1 "go.goms.io/fleet/apis/cluster/v1beta1"
26+
"go.goms.io/fleet/pkg/utils/controller"
27+
28+
fleetnetv1alpha1 "go.goms.io/fleet-networking/api/v1alpha1"
29+
"go.goms.io/fleet-networking/pkg/common/hubconfig"
30+
)
31+
32+
const (
33+
ControllerName = "membercluster-controller"
34+
)
35+
36+
// Reconciler reconciles a MemberCluster object.
37+
type Reconciler struct {
38+
client.Client
39+
Recorder record.EventRecorder
40+
// the wait time in minutes before we need to force delete a member cluster.
41+
ForceDeleteWaitTime time.Duration
42+
}
43+
44+
// Reconcile watches the deletion of the member cluster and removes finalizers on fleet networking resources in the
45+
// member cluster namespace.
46+
func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
47+
mcObjRef := klog.KRef(req.Namespace, req.Name)
48+
startTime := time.Now()
49+
klog.V(2).InfoS("Reconciliation starts", "memberCluster", mcObjRef)
50+
defer func() {
51+
latency := time.Since(startTime).Milliseconds()
52+
klog.V(2).InfoS("Reconciliation ends", "memberCluster", mcObjRef, "latency", latency)
53+
}()
54+
var mc clusterv1beta1.MemberCluster
55+
if err := r.Client.Get(ctx, req.NamespacedName, &mc); err != nil {
56+
if errors.IsNotFound(err) {
57+
klog.V(4).InfoS("Ignoring NotFound memberCluster", "memberCluster", mcObjRef)
58+
return ctrl.Result{}, nil
59+
}
60+
klog.ErrorS(err, "Failed to get memberCluster", "memberCluster", mcObjRef)
61+
return ctrl.Result{}, err
62+
}
63+
if mc.DeletionTimestamp.IsZero() {
64+
klog.ErrorS(controller.NewUnexpectedBehaviorError(fmt.Errorf("member cluster %s is not being deleted",
65+
mc.Name)), "The member cluster should have deletionTimeStamp set to a non-zero/non-nil value")
66+
return ctrl.Result{}, nil // no need to retry.
67+
}
68+
69+
// Handle deleting member cluster, removes finalizers on all the resources in the cluster namespace
70+
// after member cluster force delete wait time.
71+
if !mc.DeletionTimestamp.IsZero() && time.Since(mc.DeletionTimestamp.Time) >= r.ForceDeleteWaitTime {
72+
klog.V(2).InfoS("The member cluster deletion is stuck removing the "+
73+
"finalizers from all the resources in member cluster namespace", "memberCluster", mcObjRef)
74+
return r.removeFinalizer(ctx, mc)
75+
}
76+
// we need to only wait for force delete wait time, if the update/delete member cluster event takes
77+
// longer to be reconciled we need to account for that time.
78+
return ctrl.Result{RequeueAfter: r.ForceDeleteWaitTime - time.Since(mc.DeletionTimestamp.Time)}, nil
79+
}
80+
81+
// removeFinalizer removes finalizers on the resources in the member cluster namespace.
82+
// For EndpointSliceExport, InternalServiceImport & InternalServiceExport resources, the finalizers should be
83+
// removed by other hub networking controllers when leaving. So this MemberCluster controller only handles
84+
// EndpointSliceImports here.
85+
func (r *Reconciler) removeFinalizer(ctx context.Context, mc clusterv1beta1.MemberCluster) (ctrl.Result, error) {
86+
// Remove finalizer for EndpointSliceImport resources in the cluster namespace.
87+
mcObjRef := klog.KRef(mc.Namespace, mc.Name)
88+
mcNamespace := fmt.Sprintf(hubconfig.HubNamespaceNameFormat, mc.Name)
89+
var endpointSliceImportList fleetnetv1alpha1.EndpointSliceImportList
90+
if err := r.Client.List(ctx, &endpointSliceImportList, client.InNamespace(mcNamespace)); err != nil {
91+
klog.ErrorS(err, "Failed to list endpointSliceImports", "memberCluster", mcObjRef)
92+
return ctrl.Result{}, err
93+
}
94+
errs, ctx := errgroup.WithContext(ctx)
95+
for i := range endpointSliceImportList.Items {
96+
esi := &endpointSliceImportList.Items[i]
97+
errs.Go(func() error {
98+
esiObjRef := klog.KRef(esi.Namespace, esi.Name)
99+
esi.SetFinalizers(nil)
100+
if err := r.Client.Update(ctx, esi); err != nil {
101+
klog.ErrorS(err, "Failed to remove finalizers for endpointSliceImport",
102+
"memberCluster", mcObjRef, "endpointSliceImport", esiObjRef)
103+
return err
104+
}
105+
klog.V(2).InfoS("Removed finalizers for endpointSliceImport",
106+
"memberCluster", mcObjRef, "endpointSliceImport", esiObjRef)
107+
return nil
108+
})
109+
}
110+
return ctrl.Result{}, errs.Wait()
111+
}
112+
113+
// SetupWithManager sets up the controller with the Manager.
114+
func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error {
115+
customPredicate := predicate.Funcs{
116+
CreateFunc: func(e event.CreateEvent) bool {
117+
// Ignore creation events.
118+
return false
119+
},
120+
DeleteFunc: func(e event.DeleteEvent) bool {
121+
// trigger reconcile on delete event just in case update event is missed.
122+
return true
123+
},
124+
UpdateFunc: func(e event.UpdateEvent) bool {
125+
// If new object is being deleted, trigger reconcile.
126+
return !e.ObjectNew.GetDeletionTimestamp().IsZero()
127+
},
128+
}
129+
// Watch for changes to primary resource MemberCluster
130+
return ctrl.NewControllerManagedBy(mgr).
131+
For(&clusterv1beta1.MemberCluster{}).
132+
WithEventFilter(customPredicate).
133+
Complete(r)
134+
}

0 commit comments

Comments
 (0)