From e3c9b70e536e412b7bcf568e6de961451f5b5654 Mon Sep 17 00:00:00 2001 From: Cyrill Troxler Date: Tue, 19 Mar 2024 17:20:21 +0100 Subject: [PATCH] fix: deletion priority to avoid deleting too many machines This introduces an additional deletePriority between betterDelete and mustDelete to avoid a race where multiple machines might be deleted at the same time. --- .../machineset/machineset_delete_policy.go | 13 +++++---- .../machineset_delete_policy_test.go | 29 +++++++++++++++++++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/internal/controllers/machineset/machineset_delete_policy.go b/internal/controllers/machineset/machineset_delete_policy.go index 280ff7ea43e3..4e029659f27f 100644 --- a/internal/controllers/machineset/machineset_delete_policy.go +++ b/internal/controllers/machineset/machineset_delete_policy.go @@ -35,6 +35,7 @@ type ( const ( mustDelete deletePriority = 100.0 + shouldDelete deletePriority = 75.0 betterDelete deletePriority = 50.0 couldDelete deletePriority = 20.0 mustNotDelete deletePriority = 0.0 @@ -48,10 +49,10 @@ func oldestDeletePriority(machine *clusterv1.Machine) deletePriority { return mustDelete } if _, ok := machine.ObjectMeta.Annotations[clusterv1.DeleteMachineAnnotation]; ok { - return mustDelete + return shouldDelete } if !isMachineHealthy(machine) { - return mustDelete + return betterDelete } if machine.ObjectMeta.CreationTimestamp.Time.IsZero() { return mustNotDelete @@ -60,7 +61,7 @@ func oldestDeletePriority(machine *clusterv1.Machine) deletePriority { if d.Seconds() < 0 { return mustNotDelete } - return deletePriority(float64(mustDelete) * (1.0 - math.Exp(-d.Seconds()/secondsPerTenDays))) + return deletePriority(float64(betterDelete) * (1.0 - math.Exp(-d.Seconds()/secondsPerTenDays))) } func newestDeletePriority(machine *clusterv1.Machine) deletePriority { @@ -68,12 +69,12 @@ func newestDeletePriority(machine *clusterv1.Machine) deletePriority { return mustDelete } if _, ok := machine.ObjectMeta.Annotations[clusterv1.DeleteMachineAnnotation]; ok { - return mustDelete + return shouldDelete } if !isMachineHealthy(machine) { - return mustDelete + return betterDelete } - return mustDelete - oldestDeletePriority(machine) + return betterDelete - oldestDeletePriority(machine) } func randomDeletePolicy(machine *clusterv1.Machine) deletePriority { diff --git a/internal/controllers/machineset/machineset_delete_policy_test.go b/internal/controllers/machineset/machineset_delete_policy_test.go index eb550a218bfc..cf15c65ef9b7 100644 --- a/internal/controllers/machineset/machineset_delete_policy_test.go +++ b/internal/controllers/machineset/machineset_delete_policy_test.go @@ -407,6 +407,18 @@ func TestMachineOldestDelete(t *testing.T) { ObjectMeta: metav1.ObjectMeta{CreationTimestamp: metav1.NewTime(currentTime.Time.AddDate(0, 0, -10))}, Status: clusterv1.MachineStatus{FailureReason: &statusError, NodeRef: nodeRef}, } + mustDeleteMachine := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "b", DeletionTimestamp: ¤tTime}, + Status: clusterv1.MachineStatus{NodeRef: nodeRef}, + } + unhealthyMachineA := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "a", CreationTimestamp: metav1.NewTime(currentTime.Time.AddDate(0, 0, -10))}, + Status: clusterv1.MachineStatus{FailureReason: &statusError, NodeRef: nodeRef}, + } + unhealthyMachineZ := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "z", CreationTimestamp: metav1.NewTime(currentTime.Time.AddDate(0, 0, -10))}, + Status: clusterv1.MachineStatus{FailureReason: &statusError, NodeRef: nodeRef}, + } deleteMachineWithoutNodeRef := &clusterv1.Machine{ ObjectMeta: metav1.ObjectMeta{CreationTimestamp: metav1.NewTime(currentTime.Time.AddDate(0, 0, -10))}, } @@ -513,6 +525,23 @@ func TestMachineOldestDelete(t *testing.T) { }, expect: []*clusterv1.Machine{nodeHealthyConditionUnknownMachine}, }, + // these two cases ensures the mustDeleteMachine is always picked regardless of the machine names. + { + desc: "func=oldestDeletePriority, diff=1 (unhealthyMachineA)", + diff: 1, + machines: []*clusterv1.Machine{ + empty, secondNewest, oldest, secondOldest, newest, mustDeleteMachine, unhealthyMachineA, + }, + expect: []*clusterv1.Machine{mustDeleteMachine}, + }, + { + desc: "func=oldestDeletePriority, diff=1 (unhealthyMachineZ)", + diff: 1, + machines: []*clusterv1.Machine{ + empty, secondNewest, oldest, secondOldest, newest, mustDeleteMachine, unhealthyMachineZ, + }, + expect: []*clusterv1.Machine{mustDeleteMachine}, + }, } for _, test := range tests {