Skip to content

Commit

Permalink
fix(routes): deleting wrong routes when other server has same private…
Browse files Browse the repository at this point in the history
… IP (#472)

In case another server in the project has the same Private IP (in
another network) as one of our cluster nodes, hccm would happily
delete&recreate the route on every route reconciliation.

This fixes the bug by only adding the Private IPs in the correct network
to the AllServersCache ByPrivateIP lookup map.

Fixes #470

Co-authored-by: Jonas Lammler <ljonas@riseup.net>
  • Loading branch information
apricote and jooola authored Jul 18, 2023
1 parent 5a066a1 commit 5461038
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 3 deletions.
11 changes: 8 additions & 3 deletions hcloud/routes.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,14 @@ func newRoutes(client *hcloud.Client, networkID int64) (*routes, error) {
}

return &routes{
client: client,
network: networkObj,
serverCache: &hcops.AllServersCache{LoadFunc: client.Server.All},
client: client,
network: networkObj,
serverCache: &hcops.AllServersCache{
// client.Server.All will load ALL the servers in the project, even those
// that are not part of the Kubernetes cluster.
LoadFunc: client.Server.All,
Network: networkObj,
},
}, nil
}

Expand Down
14 changes: 14 additions & 0 deletions internal/hcops/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"sync"
"time"

"k8s.io/klog/v2"

"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/metrics"
"github.com/hetznercloud/hcloud-go/v2/hcloud"
)
Expand All @@ -21,6 +23,9 @@ type AllServersCache struct {
LoadTimeout time.Duration
MaxAge time.Duration

// If set, only IPs in this network will be considered for [ByPrivateIP]
Network *hcloud.Network

lastRefresh time.Time
byPrivIP map[string]*hcloud.Server
byName map[string]*hcloud.Server
Expand Down Expand Up @@ -102,9 +107,18 @@ func (c *AllServersCache) getCache(getSrv func() (*hcloud.Server, bool)) (*hclou
for _, srv := range srvs {
// Index servers by the IPs of their private networks
for _, n := range srv.PrivateNet {
if c.Network != nil {
if n.Network.ID != c.Network.ID {
// Only consider private IPs in the right network
continue
}
}
if n.IP == nil {
continue
}
if _, ok := c.byPrivIP[n.IP.String()]; ok {
klog.Warningf("Overriding AllServersCache entry for private ip %s with server %s\n", n.IP.String(), srv.Name)
}
c.byPrivIP[n.IP.String()] = srv
}

Expand Down
46 changes: 46 additions & 0 deletions internal/hcops/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,52 @@ func TestAllServersCache_ClientError(t *testing.T) {
runAllServersCacheTests(t, "Not found", tmpl, cacheOps)
}

func TestAllServersCache_DuplicatePrivateIP(t *testing.T) {
// Regression test for /~https://github.com/hetznercloud/hcloud-cloud-controller-manager/issues/470

network := &hcloud.Network{
ID: 12345,
Name: "cluster-network",
}
srv := &hcloud.Server{
ID: 101010,
Name: "cluster-node",
PrivateNet: []hcloud.ServerPrivateNet{
{
IP: net.ParseIP("10.0.0.4"),
Network: network,
},
},
}
srvInvalid := &hcloud.Server{
ID: 101012,
Name: "invalid-node",
PrivateNet: []hcloud.ServerPrivateNet{
{
IP: net.ParseIP("10.0.0.4"),
Network: &hcloud.Network{
ID: 54321,
Name: "invalid-network",
},
},
},
}

cacheOps := newAllServersCacheOps(t, srv)
tmpl := allServersCacheTestCase{
SetUp: func(t *testing.T, tt *allServersCacheTestCase) {
tt.Cache.Network = network

tt.ServerClient.
On("All", mock.Anything).
Return([]*hcloud.Server{srv, srvInvalid}, nil)
},
Expected: srv,
}

runAllServersCacheTests(t, "DuplicatePrivateIP", tmpl, cacheOps)
}

type allServersCacheOp func(c *hcops.AllServersCache) (*hcloud.Server, error)

func newAllServersCacheOps(t *testing.T, srv *hcloud.Server) map[string]allServersCacheOp {
Expand Down

0 comments on commit 5461038

Please sign in to comment.