Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 51 additions & 25 deletions pkg/gcmanager/pod_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import (
"github.com/spidernet-io/spiderpool/pkg/constant"
"github.com/spidernet-io/spiderpool/pkg/lock"
"github.com/spidernet-io/spiderpool/pkg/logutils"
"github.com/spidernet-io/spiderpool/pkg/nodemanager"
"github.com/spidernet-io/spiderpool/pkg/types"
)

Expand Down Expand Up @@ -125,7 +124,9 @@ func (p *PodDatabase) ApplyPodEntry(podEntry *PodEntry) error {
return nil
}

// buildPodEntry will build PodEntry with the given args, it serves for Pod Informer event hooks
// buildPodEntry will build PodEntry with the given args, it serves for Pod Informer event hooks and scanAll
// for Pod Informer event hooks, if the podEntry is nil, we don't tracing it
// for scanAll, if the podEntry is nil, we will not GC it's IP
func (s *SpiderGC) buildPodEntry(oldPod, currentPod *corev1.Pod, deleted bool) (*PodEntry, error) {
if currentPod == nil {
return nil, fmt.Errorf("currentPod must be specified")
Expand All @@ -136,29 +137,25 @@ func (s *SpiderGC) buildPodEntry(oldPod, currentPod *corev1.Pod, deleted bool) (
return nil, nil
}

ownerRef := metav1.GetControllerOf(currentPod)
ctx := context.TODO()

// check StatefulSet pod, we will trace it if its controller StatefulSet object was deleted or decreased its replicas and the pod index was out of the replicas.
if s.gcConfig.EnableStatefulSet && ownerRef != nil &&
ownerRef.APIVersion == appsv1.SchemeGroupVersion.String() && ownerRef.Kind == constant.KindStatefulSet {
isValidStsPod, err := s.stsMgr.IsValidStatefulSetPod(ctx, currentPod.Namespace, currentPod.Name, ownerRef.Kind)
if nil != err {
// check StatefulSet pod, we will trace it if its controller StatefulSet object was deleted or decreased
// its replicas and the pod index was out of the replicas.
if s.gcConfig.EnableStatefulSet {
isValidStsPod, err := s.isValidStatefulSetPod(ctx, currentPod)
if err != nil {
return nil, err
}

// StatefulSet pod restarted, no need to trace it.
if isValidStsPod {
logger.Sugar().Debugf("the StatefulSet pod '%s/%s' just restarts, keep its IPs", currentPod.Namespace, currentPod.Name)
return nil, nil
}
}

// check kubevirt vm pod, we will trace it if its controller is no longer exist
if s.gcConfig.EnableKubevirtStaticIP && ownerRef != nil &&
ownerRef.APIVersion == kubevirtv1.SchemeGroupVersion.String() && ownerRef.Kind == constant.KindKubevirtVMI {
isValidVMPod, err := s.kubevirtMgr.IsValidVMPod(logutils.IntoContext(ctx, logger), currentPod.Namespace, ownerRef.Kind, ownerRef.Name)
if nil != err {
if s.gcConfig.EnableKubevirtStaticIP {
isValidVMPod, err := s.isValidKubevirtVMIPod(ctx, currentPod)
if err != nil {
return nil, err
}

Expand Down Expand Up @@ -227,18 +224,12 @@ func (s *SpiderGC) buildPodEntry(oldPod, currentPod *corev1.Pod, deleted bool) (

if isBuildTerminatingPodEntry {
// check terminating Pod corresponding Node status
node, err := s.nodeMgr.GetNodeByName(ctx, currentPod.Spec.NodeName, constant.UseCache)
if nil != err {
return nil, fmt.Errorf("failed to get terminating Pod '%s/%s' corredponing Node '%s', error: %v", currentPod.Namespace, currentPod.Name, currentPod.Spec.NodeName, err)
}
// disable for gc terminating pod with Node Ready
if nodemanager.IsNodeReady(node) && !s.gcConfig.EnableGCStatelessTerminatingPodOnReadyNode {
logger.Sugar().Debugf("IP GC already turn off 'EnableGCForTerminatingPodWithNodeReady' configuration, disacrd tracing pod '%s/%s'", currentPod.Namespace, currentPod.Name)
return nil, nil
enabled, err := s.isShouldGCOrTraceStatelessTerminatingPodOnNode(ctx, currentPod)
if err != nil {
return nil, err
}
// disable for gc terminating pod with Node NotReady
if !nodemanager.IsNodeReady(node) && !s.gcConfig.EnableGCStatelessTerminatingPodOnNotReadyNode {
logger.Sugar().Debugf("IP GC already turn off 'EnableGCForTerminatingPodWithNodeNotReady' configuration, disacrd tracing pod '%s/%s'", currentPod.Namespace, currentPod.Name)

if !enabled {
return nil, nil
}

Expand Down Expand Up @@ -331,3 +322,38 @@ func (s *SpiderGC) computeSucceededOrFailedPodTerminatingTime(podYaml *corev1.Po
terminatingStopTime = terminatingStartTime.Add(gracefulTime)
return
}

func (s *SpiderGC) isValidStatefulSetPod(ctx context.Context, currentPod *corev1.Pod) (isValidStsPod bool, err error) {
ownerRef := metav1.GetControllerOf(currentPod)
// check StatefulSet pod, we will trace it if its controller StatefulSet object was deleted or decreased its replicas and the pod index was out of the replicas.
if ownerRef != nil &&
ownerRef.APIVersion == appsv1.SchemeGroupVersion.String() && ownerRef.Kind == constant.KindStatefulSet {
isValidStsPod, err := s.stsMgr.IsValidStatefulSetPod(ctx, currentPod.Namespace, currentPod.Name, ownerRef.Kind)
if err != nil {
return false, err
}

// StatefulSet pod restarted, no need to trace it.
if isValidStsPod {
return true, nil
}
}
return false, nil
}

func (s *SpiderGC) isValidKubevirtVMIPod(ctx context.Context, currentPod *corev1.Pod) (isKubevirtVMIPod bool, err error) {
ownerRef := metav1.GetControllerOf(currentPod)
// check StatefulSet pod, we will trace it if its controller StatefulSet object was deleted or decreased its replicas and the pod index was out of the replicas.
if s.gcConfig.EnableKubevirtStaticIP && ownerRef != nil &&
ownerRef.APIVersion == kubevirtv1.SchemeGroupVersion.String() && ownerRef.Kind == constant.KindKubevirtVMI {
isValidVMPod, err := s.kubevirtMgr.IsValidVMPod(logutils.IntoContext(ctx, logger), currentPod.Namespace, ownerRef.Kind, ownerRef.Name)
if err != nil {
return false, err
}

if isValidVMPod {
return true, nil
}
}
return false, nil
}
87 changes: 67 additions & 20 deletions pkg/gcmanager/scanAll_IPPool.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/spidernet-io/spiderpool/pkg/constant"
spiderpoolv2beta1 "github.com/spidernet-io/spiderpool/pkg/k8s/apis/spiderpool.spidernet.io/v2beta1"
"github.com/spidernet-io/spiderpool/pkg/logutils"
"github.com/spidernet-io/spiderpool/pkg/nodemanager"
"github.com/spidernet-io/spiderpool/pkg/podmanager"
"github.com/spidernet-io/spiderpool/pkg/types"
"github.com/spidernet-io/spiderpool/pkg/utils/convert"
Expand Down Expand Up @@ -124,6 +125,7 @@ func (s *SpiderGC) executeScanAll(ctx context.Context) {
flagPodStatusShouldGCIP := false
flagTracePodEntry := false
flagStaticIPPod := false
shouldGcstatelessTerminatingPod := false
endpoint, endpointErr := s.wepMgr.GetEndpointByName(ctx, podNS, podName, constant.UseCache)
podYaml, podErr := s.podMgr.GetPodByName(ctx, podNS, podName, constant.UseCache)

Expand Down Expand Up @@ -170,6 +172,15 @@ func (s *SpiderGC) executeScanAll(ctx context.Context) {
continue
}

// check should handle podIP via corresponding Node status and global gc flag
if !flagStaticIPPod {
shouldGcstatelessTerminatingPod, err = s.isShouldGCOrTraceStatelessTerminatingPodOnNode(ctx, podYaml)
if err != nil {
scanAllLogger.Sugar().Errorf("failed to check pod %s/%s should trace, ignore handle IP %s, error: %v", podNS, podName, poolIP, err)
continue
}
}

// check the pod status
switch {
case podYaml.Status.Phase == corev1.PodSucceeded || podYaml.Status.Phase == corev1.PodFailed:
Expand All @@ -193,8 +204,10 @@ func (s *SpiderGC) executeScanAll(ctx context.Context) {
flagPodStatusShouldGCIP = true
}
} else {
wrappedLog.Sugar().Infof("pod %s/%s is not a static Pod. the IPPool.Status.AllocatedIPs %s in IPPool %s should be reclaimed. ", podNS, podName, poolIP, pool.Name)
flagPodStatusShouldGCIP = true
if podYaml.DeletionTimestamp != nil {
wrappedLog.Sugar().Infof("Pod %s/%s has been deleting. compare the graceful deletion period if it is over and handle the IP %s in IPPool %s", podNS, podName, poolIP, pool.Name)
flagPodStatusShouldGCIP, flagTracePodEntry = s.shouldTraceOrReclaimIPInDeletionTimeStampPod(scanAllLogger, podYaml, shouldGcstatelessTerminatingPod)
}
}
case podYaml.Status.Phase == corev1.PodPending:
// PodPending means the pod has been accepted by the system, but one or more of the containers
Expand All @@ -203,24 +216,7 @@ func (s *SpiderGC) executeScanAll(ctx context.Context) {
scanAllLogger.Sugar().Debugf("The Pod %s/%s status is %s , and the IP %s should not be reclaimed", podNS, podName, podYaml.Status.Phase, poolIP)
flagPodStatusShouldGCIP = false
case podYaml.DeletionTimestamp != nil:
podTracingGracefulTime := (time.Duration(*podYaml.DeletionGracePeriodSeconds) + time.Duration(s.gcConfig.AdditionalGraceDelay)) * time.Second
podTracingStopTime := podYaml.DeletionTimestamp.Time.Add(podTracingGracefulTime)
if time.Now().UTC().After(podTracingStopTime) {
scanAllLogger.Sugar().Infof("the graceful deletion period of pod '%s/%s' is over, try to reclaim the IP %s in the IPPool %s.", podNS, podName, poolIP, pool.Name)
flagPodStatusShouldGCIP = true
} else {
wrappedLog := scanAllLogger.With(zap.String("gc-reason", "The graceful deletion period of kubernetes Pod has not yet ended"))
if len(podYaml.Status.PodIPs) != 0 {
wrappedLog.Sugar().Infof("pod %s/%s still holds the IP address %v. try to track it through trace GC.", podNS, podName, podYaml.Status.PodIPs)
flagPodStatusShouldGCIP = false
// The graceful deletion period of kubernetes Pod has not yet ended, and the Pod's already has an IP address. Let trace_worker track and recycle the IP in time.
// In addition, avoid that all trace data is blank when the controller is just started.
flagTracePodEntry = true
} else {
wrappedLog.Sugar().Infof("pod %s/%s IP has been reclaimed, try to reclaim the IP %s in IPPool %s", podNS, podName, poolIP, pool.Name)
flagPodStatusShouldGCIP = true
}
}
flagPodStatusShouldGCIP, flagTracePodEntry = s.shouldTraceOrReclaimIPInDeletionTimeStampPod(scanAllLogger, podYaml, shouldGcstatelessTerminatingPod)
default:
wrappedLog := scanAllLogger.With(zap.String("gc-reason", fmt.Sprintf("The current state of the Pod %s/%s is: %v", podNS, podName, podYaml.Status.Phase)))
if len(podYaml.Status.PodIPs) != 0 {
Expand Down Expand Up @@ -427,3 +423,54 @@ func (s *SpiderGC) isValidStatefulsetOrKubevirt(ctx context.Context, logger *zap

return false, nil
}

func (s *SpiderGC) isShouldGCOrTraceStatelessTerminatingPodOnNode(ctx context.Context, pod *corev1.Pod) (bool, error) {
// check terminating Pod corresponding Node status
node, err := s.nodeMgr.GetNodeByName(ctx, pod.Spec.NodeName, constant.UseCache)
if err != nil {
return false, fmt.Errorf("failed to get terminating Pod '%s/%s' corredponing Node '%s', error: %v", pod.Namespace, pod.Name, pod.Spec.NodeName, err)
}

// disable for gc terminating pod with Node Ready
if nodemanager.IsNodeReady(node) && !s.gcConfig.EnableGCStatelessTerminatingPodOnReadyNode {
logger.Sugar().Debugf("IP GC already turn off 'EnableGCForTerminatingPodWithNodeReady' configuration, disacrd tracing pod '%s/%s'", pod.Namespace, pod.Name)
return false, nil
}
// disable for gc terminating pod with Node NotReady
if !nodemanager.IsNodeReady(node) && !s.gcConfig.EnableGCStatelessTerminatingPodOnNotReadyNode {
logger.Sugar().Debugf("IP GC already turn off 'EnableGCForTerminatingPodWithNodeNotReady' configuration, disacrd tracing pod '%s/%s'", pod.Namespace, pod.Name)
return false, nil
}

return true, nil
}

// shouldTraceOrReclaimIPInDeletionTimeStampPod check the deletion timestamp of the pod
// If the deletion timestamp of the pod is over, try to reclaim the IP
// If the deletion timestamp of the pod is not over and the pod still holds an IP, try to track the IP
// or the pod has no IP, try to reclaim the IP
func (s *SpiderGC) shouldTraceOrReclaimIPInDeletionTimeStampPod(scanAllLogger *zap.Logger, pod *corev1.Pod, shouldGcOrTraceStatelessTerminatingPod bool) (bool, bool) {
flagPodStatusShouldGCIP, flagTracePodEntry := false, false

podTracingGracefulTime := (time.Duration(*pod.DeletionGracePeriodSeconds) + time.Duration(s.gcConfig.AdditionalGraceDelay)) * time.Second
podTracingStopTime := pod.DeletionTimestamp.Time.Add(podTracingGracefulTime)
if time.Now().UTC().After(podTracingStopTime) {
scanAllLogger.Sugar().Infof("the graceful deletion period of pod '%s/%s' is over, try to reclaim the IP %s ", pod.Namespace, pod.Name, &pod.Status.PodIPs)
if shouldGcOrTraceStatelessTerminatingPod {
flagPodStatusShouldGCIP = true
}
return flagPodStatusShouldGCIP, flagTracePodEntry
}
wrappedLog := scanAllLogger.With(zap.String("gc-reason", "The graceful deletion period of kubernetes Pod has not yet ended"))
if len(pod.Status.PodIPs) != 0 {
wrappedLog.Sugar().Infof("pod %s/%s still holds the IP address %v. try to track it through trace GC.", pod.Namespace, pod.Name, pod.Status.PodIPs)
// The graceful deletion period of kubernetes Pod has not yet ended, and the Pod's already has an IP address. Let trace_worker track and recycle the IP in time.
// In addition, avoid that all trace data is blank when the controller is just started.
flagTracePodEntry = true
} else {
wrappedLog.Sugar().Infof("pod %s/%s IP has been reclaimed, try to reclaim the IP %s", pod.Namespace, pod.Name, pod.Status.PodIPs)
flagPodStatusShouldGCIP = true
}

return flagPodStatusShouldGCIP, flagTracePodEntry
}
Loading