From f725c94ec8ca693695552a0c9fa5b8e023192b01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Cr=C3=A9gut?= Date: Thu, 26 Sep 2024 11:51:18 +0200 Subject: [PATCH] Fix non terminating Data/DataTemplate deletion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some Metal3Data and Metal3DataTemplates were not destroyed in rare circumstances due to missing reconcile events caused by two distinct problems. DataTemplate deletion has a watch over DataClaims but deletion can only be performed when Data are completely removed (finalizer executed) because Data resource requires the template. We keep the facts that Data and DataClaims are still existing. * When DataClaims exist, we do not remove the finalizer and wait for reconcile events from the watch. * If we have neither DataClaims or Data, we can safely remove the finalizer. Deletion is complete. * Otherwise we have to make a busy wait on Data deletion as there is no more claim to generate event but we still need to wait until the Data finalizers have been executed. Actively wait for DataClaim deletion: If Metal3Data is deleted before the Metal3DataClaim, we must actively wait for the claim deletion, as no other reconciliation event will be triggered. Fixes: #1994 Co-authored-by: Pierre Crégut Co-authored-by: Thomas Morin Signed-off-by: Pierre Crégut --- baremetal/metal3datatemplate_manager.go | 33 +++++++++---------- baremetal/metal3datatemplate_manager_test.go | 4 +-- ...zz_generated.metal3datatemplate_manager.go | 9 ++--- controllers/metal3data_controller.go | 11 ++++--- controllers/metal3datatemplate_controller.go | 15 +++++---- .../metal3datatemplate_controller_test.go | 18 +++++----- 6 files changed, 48 insertions(+), 42 deletions(-) diff --git a/baremetal/metal3datatemplate_manager.go b/baremetal/metal3datatemplate_manager.go index 564c28cba9..0c4dcc3e21 100644 --- a/baremetal/metal3datatemplate_manager.go +++ b/baremetal/metal3datatemplate_manager.go @@ -40,8 +40,8 @@ type DataTemplateManagerInterface interface { UnsetFinalizer() SetClusterOwnerRef(*clusterv1.Cluster) error // UpdateDatas handles the Metal3DataClaims and creates or deletes Metal3Data accordingly. - // It returns the number of current allocations. - UpdateDatas(context.Context) (int, error) + // It returns if there are still Data object and undeleted DataClaims objects. + UpdateDatas(context.Context) (bool, bool, error) } // DataTemplateManager is responsible for performing machine reconciliation. @@ -135,12 +135,7 @@ func (m *DataTemplateManager) getIndexes(ctx context.Context) (map[int]string, e continue } - // Get the claim Name, if unset use empty string, to still record the - // index being used, to avoid conflicts - claimName := "" - if dataObject.Spec.Claim.Name != "" { - claimName = dataObject.Spec.Claim.Name - } + claimName := dataObject.Spec.Claim.Name m.DataTemplate.Status.Indexes[claimName] = dataObject.Spec.Index indexes[dataObject.Spec.Index] = claimName } @@ -170,12 +165,13 @@ func (m *DataTemplateManager) updateStatusTimestamp() { } // UpdateDatas handles the Metal3DataClaims and creates or deletes Metal3Data accordingly. -// It returns the number of current allocations. -func (m *DataTemplateManager) UpdateDatas(ctx context.Context) (int, error) { +// It returns if there are still Data object and undeleted DataClaims objects. +func (m *DataTemplateManager) UpdateDatas(ctx context.Context) (bool, bool, error) { indexes, err := m.getIndexes(ctx) if err != nil { - return 0, err + return false, false, err } + hasData := len(indexes) > 0 // get list of Metal3DataClaim objects dataClaimObjects := infrav1.Metal3DataClaimList{} @@ -186,9 +182,10 @@ func (m *DataTemplateManager) UpdateDatas(ctx context.Context) (int, error) { err = m.client.List(ctx, &dataClaimObjects, opts) if err != nil { - return 0, err + return false, false, err } + hasClaims := false // Iterate over the Metal3Data objects to find all indexes and objects for _, dataClaim := range dataClaimObjects.Items { dataClaim := dataClaim @@ -196,18 +193,20 @@ func (m *DataTemplateManager) UpdateDatas(ctx context.Context) (int, error) { if dataClaim.Spec.Template.Name != m.DataTemplate.Name { continue } - - if dataClaim.Status.RenderedData != nil && dataClaim.DeletionTimestamp.IsZero() { - continue + if dataClaim.DeletionTimestamp.IsZero() { + hasClaims = true + if dataClaim.Status.RenderedData != nil { + continue + } } indexes, err = m.updateData(ctx, &dataClaim, indexes) if err != nil { - return 0, err + return false, false, err } } m.updateStatusTimestamp() - return len(indexes), nil + return hasData, hasClaims, nil } func (m *DataTemplateManager) updateData(ctx context.Context, diff --git a/baremetal/metal3datatemplate_manager_test.go b/baremetal/metal3datatemplate_manager_test.go index 38979c1e92..3a06596a9b 100644 --- a/baremetal/metal3datatemplate_manager_test.go +++ b/baremetal/metal3datatemplate_manager_test.go @@ -256,7 +256,7 @@ var _ = Describe("Metal3DataTemplate manager", func() { ) Expect(err).NotTo(HaveOccurred()) - nbIndexes, err := templateMgr.UpdateDatas(context.TODO()) + hasData, _, err := templateMgr.UpdateDatas(context.TODO()) if tc.expectRequeue || tc.expectError { Expect(err).To(HaveOccurred()) if tc.expectRequeue { @@ -267,7 +267,7 @@ var _ = Describe("Metal3DataTemplate manager", func() { } else { Expect(err).NotTo(HaveOccurred()) } - Expect(nbIndexes).To(Equal(tc.expectedNbIndexes)) + Expect(hasData).To(Equal(tc.expectedNbIndexes > 0)) Expect(tc.template.Status.LastUpdated.IsZero()).To(BeFalse()) Expect(tc.template.Status.Indexes).To(Equal(tc.expectedIndexes)) diff --git a/baremetal/mocks/zz_generated.metal3datatemplate_manager.go b/baremetal/mocks/zz_generated.metal3datatemplate_manager.go index a7a1fbf1ed..158f1395b2 100644 --- a/baremetal/mocks/zz_generated.metal3datatemplate_manager.go +++ b/baremetal/mocks/zz_generated.metal3datatemplate_manager.go @@ -92,12 +92,13 @@ func (mr *MockDataTemplateManagerInterfaceMockRecorder) UnsetFinalizer() *gomock } // UpdateDatas mocks base method. -func (m *MockDataTemplateManagerInterface) UpdateDatas(arg0 context.Context) (int, error) { +func (m *MockDataTemplateManagerInterface) UpdateDatas(arg0 context.Context) (bool, bool, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "UpdateDatas", arg0) - ret0, _ := ret[0].(int) - ret1, _ := ret[1].(error) - return ret0, ret1 + ret0, _ := ret[0].(bool) + ret1, _ := ret[1].(bool) + ret2, _ := ret[2].(error) + return ret0, ret1, ret2 } // UpdateDatas indicates an expected call of UpdateDatas. diff --git a/controllers/metal3data_controller.go b/controllers/metal3data_controller.go index 077e89bdc4..b915121844 100644 --- a/controllers/metal3data_controller.go +++ b/controllers/metal3data_controller.go @@ -113,11 +113,14 @@ func (r *Metal3DataReconciler) Reconcile(ctx context.Context, req ctrl.Request) if !metal3Data.ObjectMeta.DeletionTimestamp.IsZero() { // Check if the Metal3DataClaim is gone. We cannot clean up until it is. err := r.Client.Get(ctx, types.NamespacedName{Name: metal3Data.Spec.Claim.Name, Namespace: metal3Data.Spec.Claim.Namespace}, &infrav1.Metal3DataClaim{}) - if err != nil && apierrors.IsNotFound(err) { - return r.reconcileDelete(ctx, metadataMgr) + if err != nil { + if apierrors.IsNotFound(err) { + return r.reconcileDelete(ctx, metadataMgr) + } + return ctrl.Result{}, err } - // We were unable to determine if the Metal3DataClaim is gone - return ctrl.Result{}, err + // Requeue until Metal3DataClaim is gone. + return ctrl.Result{Requeue: true, RequeueAfter: requeueAfter}, nil } // Handle non-deleted machines diff --git a/controllers/metal3datatemplate_controller.go b/controllers/metal3datatemplate_controller.go index d23a6fed64..3b06457d93 100644 --- a/controllers/metal3datatemplate_controller.go +++ b/controllers/metal3datatemplate_controller.go @@ -140,7 +140,7 @@ func (r *Metal3DataTemplateReconciler) reconcileNormal(ctx context.Context, // If the Metal3DataTemplate doesn't have finalizer, add it. dataTemplateMgr.SetFinalizer() - _, err := dataTemplateMgr.UpdateDatas(ctx) + _, _, err := dataTemplateMgr.UpdateDatas(ctx) if err != nil { return checkReconcileError(err, "Failed to recreate the status") } @@ -150,17 +150,20 @@ func (r *Metal3DataTemplateReconciler) reconcileNormal(ctx context.Context, func (r *Metal3DataTemplateReconciler) reconcileDelete(ctx context.Context, dataTemplateMgr baremetal.DataTemplateManagerInterface, ) (ctrl.Result, error) { - allocationsCount, err := dataTemplateMgr.UpdateDatas(ctx) + hasData, hasClaims, err := dataTemplateMgr.UpdateDatas(ctx) if err != nil { return checkReconcileError(err, "Failed to recreate the status") } - if allocationsCount == 0 { - // metal3datatemplate is marked for deletion and ready to be deleted, - // so remove the finalizer. - dataTemplateMgr.UnsetFinalizer() + if hasClaims { + return ctrl.Result{}, nil + } + if hasData { + return ctrl.Result{Requeue: true, RequeueAfter: requeueAfter}, nil } + dataTemplateMgr.UnsetFinalizer() + return ctrl.Result{}, nil } diff --git a/controllers/metal3datatemplate_controller_test.go b/controllers/metal3datatemplate_controller_test.go index b89d3258ec..7c6496aec7 100644 --- a/controllers/metal3datatemplate_controller_test.go +++ b/controllers/metal3datatemplate_controller_test.go @@ -83,9 +83,9 @@ var _ = Describe("Metal3DataTemplate manager", func() { } } if tc.m3dt != nil && !tc.m3dt.DeletionTimestamp.IsZero() && tc.reconcileDeleteError { - m.EXPECT().UpdateDatas(context.Background()).Return(0, errors.New("")) + m.EXPECT().UpdateDatas(context.Background()).Return(false, false, errors.New("")) } else if tc.m3dt != nil && !tc.m3dt.DeletionTimestamp.IsZero() { - m.EXPECT().UpdateDatas(context.Background()).Return(0, nil) + m.EXPECT().UpdateDatas(context.Background()).Return(false, false, nil) m.EXPECT().UnsetFinalizer() } @@ -93,9 +93,9 @@ var _ = Describe("Metal3DataTemplate manager", func() { tc.reconcileNormal { m.EXPECT().SetFinalizer() if tc.reconcileNormalError { - m.EXPECT().UpdateDatas(context.Background()).Return(0, errors.New("")) + m.EXPECT().UpdateDatas(context.Background()).Return(false, false, errors.New("")) } else { - m.EXPECT().UpdateDatas(context.Background()).Return(1, nil) + m.EXPECT().UpdateDatas(context.Background()).Return(true, true, nil) } } @@ -232,9 +232,9 @@ var _ = Describe("Metal3DataTemplate manager", func() { m.EXPECT().SetFinalizer() if !tc.UpdateError { - m.EXPECT().UpdateDatas(context.TODO()).Return(1, nil) + m.EXPECT().UpdateDatas(context.TODO()).Return(true, true, nil) } else { - m.EXPECT().UpdateDatas(context.TODO()).Return(0, errors.New("")) + m.EXPECT().UpdateDatas(context.TODO()).Return(false, false, errors.New("")) } res, err := r.reconcileNormal(context.TODO(), m) @@ -284,12 +284,12 @@ var _ = Describe("Metal3DataTemplate manager", func() { m := baremetal_mocks.NewMockDataTemplateManagerInterface(gomockCtrl) if !tc.DeleteError && tc.DeleteReady { - m.EXPECT().UpdateDatas(context.TODO()).Return(0, nil) + m.EXPECT().UpdateDatas(context.TODO()).Return(false, false, nil) m.EXPECT().UnsetFinalizer() } else if !tc.DeleteError { - m.EXPECT().UpdateDatas(context.TODO()).Return(1, nil) + m.EXPECT().UpdateDatas(context.TODO()).Return(true, true, nil) } else { - m.EXPECT().UpdateDatas(context.TODO()).Return(0, errors.New("")) + m.EXPECT().UpdateDatas(context.TODO()).Return(false, false, errors.New("")) } res, err := r.reconcileDelete(context.TODO(), m)