Skip to content

Commit 3742073

Browse files
committed
fix(robot): refresh stale cache on bare-metal name lookup miss
1 parent 0dfac3c commit 3742073

File tree

7 files changed

+133
-38
lines changed

7 files changed

+133
-38
lines changed

hcloud/cloud_test.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import (
3434
"github.com/syself/hetzner-cloud-controller-manager/internal/annotation"
3535
"github.com/syself/hetzner-cloud-controller-manager/internal/credentials"
3636
"github.com/syself/hetzner-cloud-controller-manager/internal/hcops"
37+
robotclient "github.com/syself/hetzner-cloud-controller-manager/internal/robot/client"
3738
hrobot "github.com/syself/hrobot-go"
3839
"github.com/syself/hrobot-go/models"
3940
corev1 "k8s.io/api/core/v1"
@@ -44,7 +45,15 @@ type testEnv struct {
4445
Server *httptest.Server
4546
Mux *http.ServeMux
4647
Client *hcloud.Client
47-
RobotClient hrobot.RobotClient
48+
RobotClient robotclient.Client
49+
}
50+
51+
type testRobotClient struct {
52+
hrobot.RobotClient
53+
}
54+
55+
func (c testRobotClient) ServerGetListForceRefresh() ([]models.Server, error) {
56+
return c.ServerGetList()
4857
}
4958

5059
func (env *testEnv) Teardown() {
@@ -70,7 +79,7 @@ func newTestEnv() testEnv {
7079
Server: server,
7180
Mux: mux,
7281
Client: client,
73-
RobotClient: robotClient,
82+
RobotClient: testRobotClient{RobotClient: robotClient},
7483
}
7584
}
7685

hcloud/instances.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,12 @@ type instances struct {
4949
var errServerNotFound = fmt.Errorf("server not found")
5050

5151
func newInstances(client *hcloud.Client, robotClient robotclient.Client, addressFamily addressFamily, networkID int64) *instances {
52-
return &instances{client, robotClient, addressFamily, networkID}
52+
return &instances{
53+
client: client,
54+
robotClient: robotClient,
55+
addressFamily: addressFamily,
56+
networkID: networkID,
57+
}
5358
}
5459

5560
// lookupServer attempts to locate the corresponding hcloud.Server or models.Server (robot server) for a given v1.Node.

hcloud/instances_test.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626

2727
"github.com/hetznercloud/hcloud-go/v2/hcloud"
2828
"github.com/hetznercloud/hcloud-go/v2/hcloud/schema"
29+
"github.com/syself/hetzner-cloud-controller-manager/internal/robot/client/cache"
2930
"github.com/syself/hrobot-go/models"
3031
corev1 "k8s.io/api/core/v1"
3132
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -173,6 +174,69 @@ func TestInstances_InstanceExists(t *testing.T) {
173174
}
174175
}
175176

177+
func TestInstances_InstanceExistsRobotServerCreatedAfterCacheFill(t *testing.T) {
178+
env := newTestEnv()
179+
defer env.Teardown()
180+
181+
resetEnv := Setenv(t,
182+
"ROBOT_USER_NAME", "user",
183+
"ROBOT_PASSWORD", "pass",
184+
"CACHE_TIMEOUT", "1h",
185+
)
186+
defer resetEnv()
187+
188+
// servers backs the Robot list response and is mutated during the test.
189+
servers := make([]models.Server, 0, 2)
190+
servers = append(servers, models.Server{
191+
ServerIP: "123.123.123.123",
192+
ServerIPv6Net: "2a01:f48:111:4221::",
193+
ServerNumber: 321,
194+
Name: "bm-existing",
195+
})
196+
env.Mux.HandleFunc("/robot/server", func(w http.ResponseWriter, _ *http.Request) {
197+
responses := make([]models.ServerResponse, 0, len(servers))
198+
for _, server := range servers {
199+
responses = append(responses, models.ServerResponse{Server: server})
200+
}
201+
json.NewEncoder(w).Encode(responses)
202+
})
203+
204+
robotClient, err := cache.NewCachedRobotClient(t.TempDir(), env.Server.Client(), env.Server.URL+"/robot")
205+
if err != nil {
206+
t.Fatalf("Unexpected error creating cached robot client: %v", err)
207+
}
208+
209+
instances := newInstances(env.Client, robotClient, AddressFamilyIPv4, 0)
210+
211+
// Warm the cache while bm-new does not exist yet.
212+
exists, err := instances.InstanceExists(context.TODO(), &corev1.Node{
213+
ObjectMeta: metav1.ObjectMeta{Name: "bm-existing"},
214+
})
215+
if err != nil {
216+
t.Fatalf("Unexpected error warming cache: %v", err)
217+
}
218+
if !exists {
219+
t.Fatal("Expected bm-existing to exist")
220+
}
221+
222+
servers = append(servers, models.Server{
223+
ServerIP: "123.123.123.124",
224+
ServerIPv6Net: "2a01:f48:111:4222::",
225+
ServerNumber: 322,
226+
Name: "bm-new",
227+
})
228+
229+
exists, err = instances.InstanceExists(context.TODO(), &corev1.Node{
230+
ObjectMeta: metav1.ObjectMeta{Name: "bm-new"},
231+
})
232+
if err != nil {
233+
t.Fatalf("Unexpected error for bm-new: %v", err)
234+
}
235+
if !exists {
236+
t.Fatal("Expected bm-new to exist after it was created")
237+
}
238+
}
239+
176240
func TestInstances_InstanceShutdown(t *testing.T) {
177241
env := newTestEnv()
178242
defer env.Teardown()

hcloud/util.go

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,18 @@ func getRobotServerByName(c robotclient.Client, node *corev1.Node) (server *mode
7171
return nil, fmt.Errorf("%s: %w", op, err)
7272
}
7373

74-
for i, s := range serverList {
75-
if s.Name == node.Name {
76-
server = &serverList[i]
77-
}
74+
server = findRobotServerByName(serverList, string(node.Name))
75+
if server != nil {
76+
return server, nil
7877
}
7978

80-
return server, nil
79+
serverList, err = c.ServerGetListForceRefresh()
80+
if err != nil {
81+
hcops.HandleRateLimitExceededError(err, node)
82+
return nil, fmt.Errorf("%s: force refresh after cache miss: %w", op, err)
83+
}
84+
85+
return findRobotServerByName(serverList, string(node.Name)), nil
8186
}
8287

8388
func getRobotServerByID(c robotclient.Client, id int, node *corev1.Node) (s *models.Server, e error) {
@@ -116,6 +121,15 @@ func getRobotServerByID(c robotclient.Client, id int, node *corev1.Node) (s *mod
116121
return server, nil
117122
}
118123

124+
func findRobotServerByName(serverList []models.Server, name string) *models.Server {
125+
for i, s := range serverList {
126+
if s.Name == name {
127+
return &serverList[i]
128+
}
129+
}
130+
return nil
131+
}
132+
119133
func isHCloudServerByName(name string) bool {
120134
return !strings.HasPrefix(name, hostNamePrefixRobot)
121135
}

internal/mocks/robot.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ func (m *RobotClient) ServerGetList() ([]models.Server, error) {
1414
return getRobotServers(args, 0), args.Error(1)
1515
}
1616

17+
func (m *RobotClient) ServerGetListForceRefresh() ([]models.Server, error) {
18+
return m.ServerGetList()
19+
}
20+
1721
func (m *RobotClient) SetCredentials(_, _ string) error {
1822
args := m.Called()
1923
return args.Error(3)

internal/robot/client/cache/client.go

Lines changed: 28 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -90,22 +90,9 @@ func NewCachedRobotClient(rootDir string, httpClient *http.Client, baseURL strin
9090

9191
func (c *cacheRobotClient) ServerGet(id int) (*models.Server, error) {
9292
if c.shouldSync() {
93-
list, err := c.robotClient.ServerGetList()
94-
if err != nil {
93+
if _, err := c.sync(); err != nil {
9594
return nil, err
9695
}
97-
98-
// populate list
99-
c.l = list
100-
101-
// remove all entries from map and populate it freshly
102-
c.m = make(map[int]*models.Server)
103-
for i, server := range list {
104-
c.m[server.ServerNumber] = &list[i]
105-
}
106-
107-
// set time of last update
108-
c.lastUpdate = time.Now()
10996
}
11097

11198
server, found := c.m[id]
@@ -119,27 +106,17 @@ func (c *cacheRobotClient) ServerGet(id int) (*models.Server, error) {
119106

120107
func (c *cacheRobotClient) ServerGetList() ([]models.Server, error) {
121108
if c.shouldSync() {
122-
list, err := c.robotClient.ServerGetList()
123-
if err != nil {
124-
return list, err
125-
}
126-
127-
// populate list
128-
c.l = list
129-
130-
// remove all entries from map and populate it freshly
131-
c.m = make(map[int]*models.Server)
132-
for i, server := range list {
133-
c.m[server.ServerNumber] = &list[i]
134-
}
135-
136-
// set time of last update
137-
c.lastUpdate = time.Now()
109+
return c.sync()
138110
}
139111

140112
return c.l, nil
141113
}
142114

115+
// ServerGetListForceRefresh bypasses the timeout check and reloads the cache from Robot.
116+
func (c *cacheRobotClient) ServerGetListForceRefresh() ([]models.Server, error) {
117+
return c.sync()
118+
}
119+
143120
func (c *cacheRobotClient) shouldSync() bool {
144121
// map is nil means we have no cached value yet
145122
if c.m == nil {
@@ -161,3 +138,24 @@ func (c *cacheRobotClient) SetCredentials(username, password string) error {
161138
c.m = nil
162139
return nil
163140
}
141+
142+
func (c *cacheRobotClient) sync() ([]models.Server, error) {
143+
list, err := c.robotClient.ServerGetList()
144+
if err != nil {
145+
return list, err
146+
}
147+
148+
// populate list
149+
c.l = list
150+
151+
// remove all entries from map and repopulate it from the current list
152+
c.m = make(map[int]*models.Server)
153+
for i, server := range list {
154+
c.m[server.ServerNumber] = &list[i]
155+
}
156+
157+
// set time of last update
158+
c.lastUpdate = time.Now()
159+
160+
return c.l, nil
161+
}

internal/robot/client/interface.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,6 @@ import "github.com/syself/hrobot-go/models"
55
type Client interface {
66
ServerGet(id int) (*models.Server, error)
77
ServerGetList() ([]models.Server, error)
8+
ServerGetListForceRefresh() ([]models.Server, error)
89
SetCredentials(username, password string) error
910
}

0 commit comments

Comments
 (0)