Skip to content

Commit 1b4f7cf

Browse files
authored
Merge pull request #4752 from cyclinder/ipam/disable_gc_ternamating
IPAM fix: ENV EnableGCStatelessTerminatingPod(Not)ReadyNode=false does not work
2 parents 7a9370d + 40e179a commit 1b4f7cf

File tree

2 files changed

+118
-45
lines changed

2 files changed

+118
-45
lines changed

pkg/gcmanager/pod_cache.go

Lines changed: 51 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ import (
1717
"github.com/spidernet-io/spiderpool/pkg/constant"
1818
"github.com/spidernet-io/spiderpool/pkg/lock"
1919
"github.com/spidernet-io/spiderpool/pkg/logutils"
20-
"github.com/spidernet-io/spiderpool/pkg/nodemanager"
2120
"github.com/spidernet-io/spiderpool/pkg/types"
2221
)
2322

@@ -125,7 +124,9 @@ func (p *PodDatabase) ApplyPodEntry(podEntry *PodEntry) error {
125124
return nil
126125
}
127126

128-
// buildPodEntry will build PodEntry with the given args, it serves for Pod Informer event hooks
127+
// buildPodEntry will build PodEntry with the given args, it serves for Pod Informer event hooks and scanAll
128+
// for Pod Informer event hooks, if the podEntry is nil, we don't tracing it
129+
// for scanAll, if the podEntry is nil, we will not GC it's IP
129130
func (s *SpiderGC) buildPodEntry(oldPod, currentPod *corev1.Pod, deleted bool) (*PodEntry, error) {
130131
if currentPod == nil {
131132
return nil, fmt.Errorf("currentPod must be specified")
@@ -136,29 +137,25 @@ func (s *SpiderGC) buildPodEntry(oldPod, currentPod *corev1.Pod, deleted bool) (
136137
return nil, nil
137138
}
138139

139-
ownerRef := metav1.GetControllerOf(currentPod)
140140
ctx := context.TODO()
141-
142-
// check StatefulSet pod, we will trace it if its controller StatefulSet object was deleted or decreased its replicas and the pod index was out of the replicas.
143-
if s.gcConfig.EnableStatefulSet && ownerRef != nil &&
144-
ownerRef.APIVersion == appsv1.SchemeGroupVersion.String() && ownerRef.Kind == constant.KindStatefulSet {
145-
isValidStsPod, err := s.stsMgr.IsValidStatefulSetPod(ctx, currentPod.Namespace, currentPod.Name, ownerRef.Kind)
146-
if nil != err {
141+
// check StatefulSet pod, we will trace it if its controller StatefulSet object was deleted or decreased
142+
// its replicas and the pod index was out of the replicas.
143+
if s.gcConfig.EnableStatefulSet {
144+
isValidStsPod, err := s.isValidStatefulSetPod(ctx, currentPod)
145+
if err != nil {
147146
return nil, err
148147
}
149148

150-
// StatefulSet pod restarted, no need to trace it.
151149
if isValidStsPod {
152150
logger.Sugar().Debugf("the StatefulSet pod '%s/%s' just restarts, keep its IPs", currentPod.Namespace, currentPod.Name)
153151
return nil, nil
154152
}
155153
}
156154

157155
// check kubevirt vm pod, we will trace it if its controller is no longer exist
158-
if s.gcConfig.EnableKubevirtStaticIP && ownerRef != nil &&
159-
ownerRef.APIVersion == kubevirtv1.SchemeGroupVersion.String() && ownerRef.Kind == constant.KindKubevirtVMI {
160-
isValidVMPod, err := s.kubevirtMgr.IsValidVMPod(logutils.IntoContext(ctx, logger), currentPod.Namespace, ownerRef.Kind, ownerRef.Name)
161-
if nil != err {
156+
if s.gcConfig.EnableKubevirtStaticIP {
157+
isValidVMPod, err := s.isValidKubevirtVMIPod(ctx, currentPod)
158+
if err != nil {
162159
return nil, err
163160
}
164161

@@ -227,18 +224,12 @@ func (s *SpiderGC) buildPodEntry(oldPod, currentPod *corev1.Pod, deleted bool) (
227224

228225
if isBuildTerminatingPodEntry {
229226
// check terminating Pod corresponding Node status
230-
node, err := s.nodeMgr.GetNodeByName(ctx, currentPod.Spec.NodeName, constant.UseCache)
231-
if nil != err {
232-
return nil, fmt.Errorf("failed to get terminating Pod '%s/%s' corredponing Node '%s', error: %v", currentPod.Namespace, currentPod.Name, currentPod.Spec.NodeName, err)
233-
}
234-
// disable for gc terminating pod with Node Ready
235-
if nodemanager.IsNodeReady(node) && !s.gcConfig.EnableGCStatelessTerminatingPodOnReadyNode {
236-
logger.Sugar().Debugf("IP GC already turn off 'EnableGCForTerminatingPodWithNodeReady' configuration, disacrd tracing pod '%s/%s'", currentPod.Namespace, currentPod.Name)
237-
return nil, nil
227+
enabled, err := s.isShouldGCOrTraceStatelessTerminatingPodOnNode(ctx, currentPod)
228+
if err != nil {
229+
return nil, err
238230
}
239-
// disable for gc terminating pod with Node NotReady
240-
if !nodemanager.IsNodeReady(node) && !s.gcConfig.EnableGCStatelessTerminatingPodOnNotReadyNode {
241-
logger.Sugar().Debugf("IP GC already turn off 'EnableGCForTerminatingPodWithNodeNotReady' configuration, disacrd tracing pod '%s/%s'", currentPod.Namespace, currentPod.Name)
231+
232+
if !enabled {
242233
return nil, nil
243234
}
244235

@@ -331,3 +322,38 @@ func (s *SpiderGC) computeSucceededOrFailedPodTerminatingTime(podYaml *corev1.Po
331322
terminatingStopTime = terminatingStartTime.Add(gracefulTime)
332323
return
333324
}
325+
326+
func (s *SpiderGC) isValidStatefulSetPod(ctx context.Context, currentPod *corev1.Pod) (isValidStsPod bool, err error) {
327+
ownerRef := metav1.GetControllerOf(currentPod)
328+
// check StatefulSet pod, we will trace it if its controller StatefulSet object was deleted or decreased its replicas and the pod index was out of the replicas.
329+
if ownerRef != nil &&
330+
ownerRef.APIVersion == appsv1.SchemeGroupVersion.String() && ownerRef.Kind == constant.KindStatefulSet {
331+
isValidStsPod, err := s.stsMgr.IsValidStatefulSetPod(ctx, currentPod.Namespace, currentPod.Name, ownerRef.Kind)
332+
if err != nil {
333+
return false, err
334+
}
335+
336+
// StatefulSet pod restarted, no need to trace it.
337+
if isValidStsPod {
338+
return true, nil
339+
}
340+
}
341+
return false, nil
342+
}
343+
344+
func (s *SpiderGC) isValidKubevirtVMIPod(ctx context.Context, currentPod *corev1.Pod) (isKubevirtVMIPod bool, err error) {
345+
ownerRef := metav1.GetControllerOf(currentPod)
346+
// check StatefulSet pod, we will trace it if its controller StatefulSet object was deleted or decreased its replicas and the pod index was out of the replicas.
347+
if s.gcConfig.EnableKubevirtStaticIP && ownerRef != nil &&
348+
ownerRef.APIVersion == kubevirtv1.SchemeGroupVersion.String() && ownerRef.Kind == constant.KindKubevirtVMI {
349+
isValidVMPod, err := s.kubevirtMgr.IsValidVMPod(logutils.IntoContext(ctx, logger), currentPod.Namespace, ownerRef.Kind, ownerRef.Name)
350+
if err != nil {
351+
return false, err
352+
}
353+
354+
if isValidVMPod {
355+
return true, nil
356+
}
357+
}
358+
return false, nil
359+
}

pkg/gcmanager/scanAll_IPPool.go

Lines changed: 67 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/spidernet-io/spiderpool/pkg/constant"
1717
spiderpoolv2beta1 "github.com/spidernet-io/spiderpool/pkg/k8s/apis/spiderpool.spidernet.io/v2beta1"
1818
"github.com/spidernet-io/spiderpool/pkg/logutils"
19+
"github.com/spidernet-io/spiderpool/pkg/nodemanager"
1920
"github.com/spidernet-io/spiderpool/pkg/podmanager"
2021
"github.com/spidernet-io/spiderpool/pkg/types"
2122
"github.com/spidernet-io/spiderpool/pkg/utils/convert"
@@ -124,6 +125,7 @@ func (s *SpiderGC) executeScanAll(ctx context.Context) {
124125
flagPodStatusShouldGCIP := false
125126
flagTracePodEntry := false
126127
flagStaticIPPod := false
128+
shouldGcstatelessTerminatingPod := false
127129
endpoint, endpointErr := s.wepMgr.GetEndpointByName(ctx, podNS, podName, constant.UseCache)
128130
podYaml, podErr := s.podMgr.GetPodByName(ctx, podNS, podName, constant.UseCache)
129131

@@ -170,6 +172,15 @@ func (s *SpiderGC) executeScanAll(ctx context.Context) {
170172
continue
171173
}
172174

175+
// check should handle podIP via corresponding Node status and global gc flag
176+
if !flagStaticIPPod {
177+
shouldGcstatelessTerminatingPod, err = s.isShouldGCOrTraceStatelessTerminatingPodOnNode(ctx, podYaml)
178+
if err != nil {
179+
scanAllLogger.Sugar().Errorf("failed to check pod %s/%s should trace, ignore handle IP %s, error: %v", podNS, podName, poolIP, err)
180+
continue
181+
}
182+
}
183+
173184
// check the pod status
174185
switch {
175186
case podYaml.Status.Phase == corev1.PodSucceeded || podYaml.Status.Phase == corev1.PodFailed:
@@ -193,8 +204,10 @@ func (s *SpiderGC) executeScanAll(ctx context.Context) {
193204
flagPodStatusShouldGCIP = true
194205
}
195206
} else {
196-
wrappedLog.Sugar().Infof("pod %s/%s is not a static Pod. the IPPool.Status.AllocatedIPs %s in IPPool %s should be reclaimed. ", podNS, podName, poolIP, pool.Name)
197-
flagPodStatusShouldGCIP = true
207+
if podYaml.DeletionTimestamp != nil {
208+
wrappedLog.Sugar().Infof("Pod %s/%s has been deleting. compare the graceful deletion period if it is over and handle the IP %s in IPPool %s", podNS, podName, poolIP, pool.Name)
209+
flagPodStatusShouldGCIP, flagTracePodEntry = s.shouldTraceOrReclaimIPInDeletionTimeStampPod(scanAllLogger, podYaml, shouldGcstatelessTerminatingPod)
210+
}
198211
}
199212
case podYaml.Status.Phase == corev1.PodPending:
200213
// PodPending means the pod has been accepted by the system, but one or more of the containers
@@ -203,24 +216,7 @@ func (s *SpiderGC) executeScanAll(ctx context.Context) {
203216
scanAllLogger.Sugar().Debugf("The Pod %s/%s status is %s , and the IP %s should not be reclaimed", podNS, podName, podYaml.Status.Phase, poolIP)
204217
flagPodStatusShouldGCIP = false
205218
case podYaml.DeletionTimestamp != nil:
206-
podTracingGracefulTime := (time.Duration(*podYaml.DeletionGracePeriodSeconds) + time.Duration(s.gcConfig.AdditionalGraceDelay)) * time.Second
207-
podTracingStopTime := podYaml.DeletionTimestamp.Time.Add(podTracingGracefulTime)
208-
if time.Now().UTC().After(podTracingStopTime) {
209-
scanAllLogger.Sugar().Infof("the graceful deletion period of pod '%s/%s' is over, try to reclaim the IP %s in the IPPool %s.", podNS, podName, poolIP, pool.Name)
210-
flagPodStatusShouldGCIP = true
211-
} else {
212-
wrappedLog := scanAllLogger.With(zap.String("gc-reason", "The graceful deletion period of kubernetes Pod has not yet ended"))
213-
if len(podYaml.Status.PodIPs) != 0 {
214-
wrappedLog.Sugar().Infof("pod %s/%s still holds the IP address %v. try to track it through trace GC.", podNS, podName, podYaml.Status.PodIPs)
215-
flagPodStatusShouldGCIP = false
216-
// The graceful deletion period of kubernetes Pod has not yet ended, and the Pod's already has an IP address. Let trace_worker track and recycle the IP in time.
217-
// In addition, avoid that all trace data is blank when the controller is just started.
218-
flagTracePodEntry = true
219-
} else {
220-
wrappedLog.Sugar().Infof("pod %s/%s IP has been reclaimed, try to reclaim the IP %s in IPPool %s", podNS, podName, poolIP, pool.Name)
221-
flagPodStatusShouldGCIP = true
222-
}
223-
}
219+
flagPodStatusShouldGCIP, flagTracePodEntry = s.shouldTraceOrReclaimIPInDeletionTimeStampPod(scanAllLogger, podYaml, shouldGcstatelessTerminatingPod)
224220
default:
225221
wrappedLog := scanAllLogger.With(zap.String("gc-reason", fmt.Sprintf("The current state of the Pod %s/%s is: %v", podNS, podName, podYaml.Status.Phase)))
226222
if len(podYaml.Status.PodIPs) != 0 {
@@ -427,3 +423,54 @@ func (s *SpiderGC) isValidStatefulsetOrKubevirt(ctx context.Context, logger *zap
427423

428424
return false, nil
429425
}
426+
427+
func (s *SpiderGC) isShouldGCOrTraceStatelessTerminatingPodOnNode(ctx context.Context, pod *corev1.Pod) (bool, error) {
428+
// check terminating Pod corresponding Node status
429+
node, err := s.nodeMgr.GetNodeByName(ctx, pod.Spec.NodeName, constant.UseCache)
430+
if err != nil {
431+
return false, fmt.Errorf("failed to get terminating Pod '%s/%s' corredponing Node '%s', error: %v", pod.Namespace, pod.Name, pod.Spec.NodeName, err)
432+
}
433+
434+
// disable for gc terminating pod with Node Ready
435+
if nodemanager.IsNodeReady(node) && !s.gcConfig.EnableGCStatelessTerminatingPodOnReadyNode {
436+
logger.Sugar().Debugf("IP GC already turn off 'EnableGCForTerminatingPodWithNodeReady' configuration, disacrd tracing pod '%s/%s'", pod.Namespace, pod.Name)
437+
return false, nil
438+
}
439+
// disable for gc terminating pod with Node NotReady
440+
if !nodemanager.IsNodeReady(node) && !s.gcConfig.EnableGCStatelessTerminatingPodOnNotReadyNode {
441+
logger.Sugar().Debugf("IP GC already turn off 'EnableGCForTerminatingPodWithNodeNotReady' configuration, disacrd tracing pod '%s/%s'", pod.Namespace, pod.Name)
442+
return false, nil
443+
}
444+
445+
return true, nil
446+
}
447+
448+
// shouldTraceOrReclaimIPInDeletionTimeStampPod check the deletion timestamp of the pod
449+
// If the deletion timestamp of the pod is over, try to reclaim the IP
450+
// If the deletion timestamp of the pod is not over and the pod still holds an IP, try to track the IP
451+
// or the pod has no IP, try to reclaim the IP
452+
func (s *SpiderGC) shouldTraceOrReclaimIPInDeletionTimeStampPod(scanAllLogger *zap.Logger, pod *corev1.Pod, shouldGcOrTraceStatelessTerminatingPod bool) (bool, bool) {
453+
flagPodStatusShouldGCIP, flagTracePodEntry := false, false
454+
455+
podTracingGracefulTime := (time.Duration(*pod.DeletionGracePeriodSeconds) + time.Duration(s.gcConfig.AdditionalGraceDelay)) * time.Second
456+
podTracingStopTime := pod.DeletionTimestamp.Time.Add(podTracingGracefulTime)
457+
if time.Now().UTC().After(podTracingStopTime) {
458+
scanAllLogger.Sugar().Infof("the graceful deletion period of pod '%s/%s' is over, try to reclaim the IP %s ", pod.Namespace, pod.Name, &pod.Status.PodIPs)
459+
if shouldGcOrTraceStatelessTerminatingPod {
460+
flagPodStatusShouldGCIP = true
461+
}
462+
return flagPodStatusShouldGCIP, flagTracePodEntry
463+
}
464+
wrappedLog := scanAllLogger.With(zap.String("gc-reason", "The graceful deletion period of kubernetes Pod has not yet ended"))
465+
if len(pod.Status.PodIPs) != 0 {
466+
wrappedLog.Sugar().Infof("pod %s/%s still holds the IP address %v. try to track it through trace GC.", pod.Namespace, pod.Name, pod.Status.PodIPs)
467+
// The graceful deletion period of kubernetes Pod has not yet ended, and the Pod's already has an IP address. Let trace_worker track and recycle the IP in time.
468+
// In addition, avoid that all trace data is blank when the controller is just started.
469+
flagTracePodEntry = true
470+
} else {
471+
wrappedLog.Sugar().Infof("pod %s/%s IP has been reclaimed, try to reclaim the IP %s", pod.Namespace, pod.Name, pod.Status.PodIPs)
472+
flagPodStatusShouldGCIP = true
473+
}
474+
475+
return flagPodStatusShouldGCIP, flagTracePodEntry
476+
}

0 commit comments

Comments
 (0)