Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions hcloud/cloud_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"github.com/syself/hetzner-cloud-controller-manager/internal/annotation"
"github.com/syself/hetzner-cloud-controller-manager/internal/credentials"
"github.com/syself/hetzner-cloud-controller-manager/internal/hcops"
robotclient "github.com/syself/hetzner-cloud-controller-manager/internal/robot/client"
hrobot "github.com/syself/hrobot-go"
"github.com/syself/hrobot-go/models"
corev1 "k8s.io/api/core/v1"
Expand All @@ -44,7 +45,7 @@ type testEnv struct {
Server *httptest.Server
Mux *http.ServeMux
Client *hcloud.Client
RobotClient hrobot.RobotClient
RobotClient robotclient.Client
}

func (env *testEnv) Teardown() {
Expand All @@ -70,7 +71,7 @@ func newTestEnv() testEnv {
Server: server,
Mux: mux,
Client: client,
RobotClient: robotClient,
RobotClient: robotclient.New(robotClient),
}
}

Expand Down
123 changes: 123 additions & 0 deletions hcloud/instances_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (

"github.com/hetznercloud/hcloud-go/v2/hcloud"
"github.com/hetznercloud/hcloud-go/v2/hcloud/schema"
"github.com/syself/hetzner-cloud-controller-manager/internal/robot/client/cache"
"github.com/syself/hrobot-go/models"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -173,6 +174,128 @@ func TestInstances_InstanceExists(t *testing.T) {
}
}

func TestInstances_InstanceExistsRobotServerCreatedAfterCacheFill(t *testing.T) {
env := newTestEnv()
defer env.Teardown()

resetEnv := Setenv(t,
"ROBOT_USER_NAME", "user",
"ROBOT_PASSWORD", "pass",
"CACHE_TIMEOUT", "1h",
)
defer resetEnv()

// servers backs the Robot list response and is mutated during the test.
servers := make([]models.Server, 0, 2)
servers = append(servers, models.Server{
ServerIP: "123.123.123.123",
ServerIPv6Net: "2a01:f48:111:4221::",
ServerNumber: 321,
Name: "bm-existing",
})
env.Mux.HandleFunc("/robot/server", func(w http.ResponseWriter, _ *http.Request) {
responses := make([]models.ServerResponse, 0, len(servers))
for _, server := range servers {
responses = append(responses, models.ServerResponse{Server: server})
}
json.NewEncoder(w).Encode(responses)
})

robotClient, err := cache.NewCachedRobotClient(t.TempDir(), env.Server.Client(), env.Server.URL+"/robot")
if err != nil {
t.Fatalf("Unexpected error creating cached robot client: %v", err)
}

instances := newInstances(env.Client, robotClient, AddressFamilyIPv4, 0)

// Warm the cache while bm-new does not exist yet.
exists, err := instances.InstanceExists(context.TODO(), &corev1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "bm-existing"},
})
if err != nil {
t.Fatalf("Unexpected error warming cache: %v", err)
}
if !exists {
t.Fatal("Expected bm-existing to exist")
}

servers = append(servers, models.Server{
ServerIP: "123.123.123.124",
ServerIPv6Net: "2a01:f48:111:4222::",
ServerNumber: 322,
Name: "bm-new",
})

exists, err = instances.InstanceExists(context.TODO(), &corev1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "bm-new"},
})
if err != nil {
t.Fatalf("Unexpected error for bm-new: %v", err)
}
if !exists {
t.Fatal("Expected bm-new to exist after it was created")
}
}

func TestInstances_InstanceExistsRobotServerRepeatedMissingNameSkipsForceRefresh(t *testing.T) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand this test

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this comment:

	// If a node name is not in the cache, then only on the time the cache should be refreshed. A
	// second time (during the time of CACHE_TIMEOUT), the unknown node name should not trigger a
	// cache refresh again.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you just check twice the same thing. How does that fit to the description? I still don't understand it.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@janiskemper I updated the test and added coments: 4cfba46

env := newTestEnv()
defer env.Teardown()

resetEnv := Setenv(t,
"ROBOT_USER_NAME", "user",
"ROBOT_PASSWORD", "pass",
"CACHE_TIMEOUT", "1h",
)
defer resetEnv()

robotServerListCalls := 0
env.Mux.HandleFunc("/robot/server", func(w http.ResponseWriter, _ *http.Request) {
robotServerListCalls++
json.NewEncoder(w).Encode([]models.ServerResponse{
{
Server: models.Server{
ServerIP: "123.123.123.123",
ServerIPv6Net: "2a01:f48:111:4221::",
ServerNumber: 321,
Name: "bm-existing",
},
},
})
})

robotClient, err := cache.NewCachedRobotClient(t.TempDir(), env.Server.Client(), env.Server.URL+"/robot")
if err != nil {
t.Fatalf("Unexpected error creating cached robot client: %v", err)
}

instances := newInstances(env.Client, robotClient, AddressFamilyIPv4, 0)
node := &corev1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "bm-missing"},
}

exists, err := instances.InstanceExists(context.TODO(), node)
if err != nil {
t.Fatalf("Unexpected error on first miss: %v", err)
}
if exists {
t.Fatal("Expected bm-missing to be absent on first lookup")
}
if robotServerListCalls != 2 {
t.Fatalf("Expected 2 Robot list calls after first miss, got %d", robotServerListCalls)
}

exists, err = instances.InstanceExists(context.TODO(), node)
if err != nil {
t.Fatalf("Unexpected error on second miss: %v", err)
}
if exists {
t.Fatal("Expected bm-missing to be absent on second lookup")
}
if robotServerListCalls != 2 {
t.Fatalf("Expected repeated miss to skip force refresh, got %d Robot list calls", robotServerListCalls)
}
}

func TestInstances_InstanceShutdown(t *testing.T) {
env := newTestEnv()
defer env.Teardown()
Expand Down
22 changes: 20 additions & 2 deletions hcloud/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,29 @@ func getRobotServerByName(c robotclient.Client, node *corev1.Node) (server *mode

for i, s := range serverList {
if s.Name == node.Name {
server = &serverList[i]
return &serverList[i], nil
}
}

return server, nil
// CAPH changes the Robot server name during provisioning. This means the cache of the
// server-name-to-server-ID mapping could be outdated. Force one uncached Robot list reload for
// that name to handle the rename. The Robot client suppresses repeated forced refreshes for the
// same missing name until the normal Robot list cache timeout has elapsed.
serverList, err = c.ServerGetListForceRefresh(string(node.Name))
if err != nil {
hcops.HandleRateLimitExceededError(err, node)
return nil, fmt.Errorf("%s: force refresh after cache miss: %w", op, err)
}

for i, s := range serverList {
if s.Name == node.Name {
// Server was found after cache refresh
return &serverList[i], nil
}
}

// No server found.
return nil, nil
}

func getRobotServerByID(c robotclient.Client, id int, node *corev1.Node) (s *models.Server, e error) {
Expand Down
4 changes: 4 additions & 0 deletions internal/mocks/robot.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ func (m *RobotClient) ServerGetList() ([]models.Server, error) {
return getRobotServers(args, 0), args.Error(1)
}

func (m *RobotClient) ServerGetListForceRefresh(_ string) ([]models.Server, error) {
return m.ServerGetList()
}

func (m *RobotClient) SetCredentials(_, _ string) error {
args := m.Called()
return args.Error(3)
Expand Down
26 changes: 26 additions & 0 deletions internal/robot/client/adapter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package client

import (
hrobot "github.com/syself/hrobot-go"
"github.com/syself/hrobot-go/models"
)

var _ Client = &adapter{}

type adapter struct {
hrobot.RobotClient
}

// New wraps a plain Robot client so it satisfies the local Client interface.
func New(robotClient hrobot.RobotClient) Client {
if robotClient == nil {
return nil
}
return &adapter{RobotClient: robotClient}
}

// ServerGetListForceRefresh falls back to the plain list call because the
// uncached behavior only exists in the cache-backed client implementation.
func (a *adapter) ServerGetListForceRefresh(_ string) ([]models.Server, error) {
return a.ServerGetList()
}
77 changes: 73 additions & 4 deletions internal/robot/client/cache/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const (
robotUserNameENVVar = "ROBOT_USER_NAME"
robotPasswordENVVar = "ROBOT_PASSWORD"
cacheTimeoutENVVar = "CACHE_TIMEOUT"
defaultCacheTimeout = 5 * time.Minute
)

var _ robotclient.Client = &cacheRobotClient{}
Expand All @@ -27,10 +28,16 @@ type cacheRobotClient struct {
timeout time.Duration

lastUpdate time.Time
now func() time.Time

// cache
l []models.Server
m map[int]*models.Server

// forcedRefreshServerNames stores when a node name last triggered a forced Robot server list
// refresh. While that timestamp is still within the cache timeout window, repeated lookups for
// the same missing server name skip the extra uncached Robot API call.
forcedRefreshServerNames map[string]time.Time
}

// NewCachedRobotClient creates a new robot client with caching enabled.
Expand All @@ -51,7 +58,7 @@ func NewCachedRobotClient(rootDir string, httpClient *http.Client, baseURL strin
}

if cacheTimeout == 0 {
cacheTimeout = 5 * time.Minute
cacheTimeout = defaultCacheTimeout
}

credentialsDir := credentials.GetDirectory(rootDir)
Expand Down Expand Up @@ -85,6 +92,7 @@ func NewCachedRobotClient(rootDir string, httpClient *http.Client, baseURL strin
handler := &cacheRobotClient{}
handler.timeout = cacheTimeout
handler.robotClient = c
handler.now = time.Now
return handler, nil
}

Expand All @@ -105,7 +113,7 @@ func (c *cacheRobotClient) ServerGet(id int) (*models.Server, error) {
}

// set time of last update
c.lastUpdate = time.Now()
c.lastUpdate = c.currentTime()
}

server, found := c.m[id]
Expand Down Expand Up @@ -134,30 +142,91 @@ func (c *cacheRobotClient) ServerGetList() ([]models.Server, error) {
}

// set time of last update
c.lastUpdate = time.Now()
c.lastUpdate = c.currentTime()
}

return c.l, nil
}

// ServerGetListForceRefresh invalidates the current cache and reloads the list
// from Robot unless nodeName already triggered a forced refresh within the
// current timeout window.
func (c *cacheRobotClient) ServerGetListForceRefresh(nodeName string) ([]models.Server, error) {
if nodeName != "" && c.nodeHasAlreadyForcedRefresh(nodeName) {
return c.ServerGetList()
}

c.m = nil

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a comment that you do this in order to delete the cache and to call the actual API?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

part of 57538d0

list, err := c.ServerGetList()
if err != nil {
return nil, err
}
if nodeName != "" {
c.nodeTriggeredForcedRefresh(nodeName)
}
return list, nil
}

// nodeHasAlreadyForcedRefresh reports whether nodeName already triggered a
// forced refresh within the current cache timeout window.
func (c *cacheRobotClient) nodeHasAlreadyForcedRefresh(nodeName string) bool {
if c.forcedRefreshServerNames == nil {
return false
}

forcedAt, found := c.forcedRefreshServerNames[nodeName]
if !found {
return false
}
if c.currentTime().After(forcedAt.Add(c.forceRefreshTimeout())) {
delete(c.forcedRefreshServerNames, nodeName)
return false
}
return true
}

// nodeTriggeredForcedRefresh records that nodeName already triggered a forced
// refresh.
func (c *cacheRobotClient) nodeTriggeredForcedRefresh(nodeName string) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you inline this function? I find it confusing that it is not. AFAIK it's only used in one place.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

part of 57538d0

if c.forcedRefreshServerNames == nil {
c.forcedRefreshServerNames = make(map[string]time.Time)
}
c.forcedRefreshServerNames[nodeName] = c.currentTime()
}

func (c *cacheRobotClient) shouldSync() bool {
// map is nil means we have no cached value yet
if c.m == nil {
c.m = make(map[int]*models.Server)
return true
}
if time.Now().After(c.lastUpdate.Add(c.timeout)) {
if c.currentTime().After(c.lastUpdate.Add(c.timeout)) {
return true
}
return false
}

func (c *cacheRobotClient) currentTime() time.Time {
if c.now == nil {
return time.Now()
}
return c.now()
}

func (c *cacheRobotClient) forceRefreshTimeout() time.Duration {
if c.timeout == 0 {
return defaultCacheTimeout
}
return c.timeout
}

func (c *cacheRobotClient) SetCredentials(username, password string) error {
err := c.robotClient.SetCredentials(username, password)
if err != nil {
return err
}
// The credentials have been updated, so we need to invalidate the cache.
c.m = nil
c.forcedRefreshServerNames = nil
return nil
}
Loading
Loading