From d754ad5df262f1c5054c0582c29d9bf377452f04 Mon Sep 17 00:00:00 2001 From: Christian Schlotter Date: Thu, 25 Apr 2024 14:21:01 +0200 Subject: [PATCH 1/3] clusterctl: always run crd migration if possible to reduce conversion webhook usage If objects are still stored in the old version, every time an object is requested the kube-apiserver will call the conversion webhook. Instead of lots of conversion webhook calls we once do the migration if possible. Also increases the timeout used for the migrations: Before running CRD migration clusterctl usually upgrades cert-manager which may lead to updating Certificates and rolling out new certificates to our controllers. There is a chance that this can take up to 90s in which conversion, validating or mutatingwebhooks may be unavailable. Because the migration gets run more aggresively during upgrades this also increases the relevant timeouts. --- .../client/cluster/crd_migration.go | 28 +++++++++++++++---- .../client/cluster/crd_migration_test.go | 28 +++++++++++++++++-- 2 files changed, 48 insertions(+), 8 deletions(-) diff --git a/cmd/clusterctl/client/cluster/crd_migration.go b/cmd/clusterctl/client/cluster/crd_migration.go index 17870b0017b9..6f59fecb0a42 100644 --- a/cmd/clusterctl/client/cluster/crd_migration.go +++ b/cmd/clusterctl/client/cluster/crd_migration.go @@ -29,6 +29,7 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/rand" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/cluster-api/cmd/clusterctl/internal/scheme" @@ -115,9 +116,9 @@ func (m *crdMigrator) run(ctx context.Context, newCRD *apiextensionsv1.CustomRes } currentStatusStoredVersions := sets.Set[string]{}.Insert(currentCRD.Status.StoredVersions...) - // If the new CRD still contains all current stored versions, nothing to do - // as no previous storage version will be dropped. - if servedVersions.HasAll(currentStatusStoredVersions.UnsortedList()...) { + // If the old CRD only contains its current storageVersion as storedVersion, + // nothing to do as all objects are already on the current storageVersion. + if currentStatusStoredVersions.Len() == 1 && currentCRD.Status.StoredVersions[0] == currentStorageVersion { log.V(2).Info("CRD migration check passed", "name", newCRD.Name) return false, nil } @@ -157,7 +158,7 @@ func (m *crdMigrator) migrateResourcesForCRD(ctx context.Context, crd *apiextens var i int for { - if err := retryWithExponentialBackoff(ctx, newReadBackoff(), func(ctx context.Context) error { + if err := retryWithExponentialBackoff(ctx, newCRDMigrationBackoff(), func(ctx context.Context) error { return m.Client.List(ctx, list, client.Continue(list.GetContinue())) }); err != nil { return errors.Wrapf(err, "failed to list %q", list.GetKind()) @@ -167,7 +168,7 @@ func (m *crdMigrator) migrateResourcesForCRD(ctx context.Context, crd *apiextens obj := list.Items[i] log.V(5).Info("Migrating", logf.UnstructuredToValues(obj)...) - if err := retryWithExponentialBackoff(ctx, newWriteBackoff(), func(ctx context.Context) error { + if err := retryWithExponentialBackoff(ctx, newCRDMigrationBackoff(), func(ctx context.Context) error { return handleMigrateErr(m.Client.Update(ctx, &obj)) }); err != nil { return errors.Wrapf(err, "failed to migrate %s/%s", obj.GetNamespace(), obj.GetName()) @@ -230,3 +231,20 @@ func storageVersionForCRD(crd *apiextensionsv1.CustomResourceDefinition) (string } return "", errors.Errorf("could not find storage version for CRD %q", crd.Name) } + +// newCRDMigrationBackoff creates a new API Machinery backoff parameter set suitable for use with crd migration operations. +// Clusterctl upgrades cert-manager right before doing CRD migration. This may lead to rollout of new certificates. +// The time between new certificate creation + injection into objects (CRD, Webhooks) and the new secrets getting propagated +// to the controller can be 60-90s, because the kubelet only periodically syncs secret contents to pods. +// During this timespan conversion, validating- or mutationwebhooks may be unavailable and cause a failure. +func newCRDMigrationBackoff() wait.Backoff { + // Return a exponential backoff configuration which returns durations for a total time of ~1m30s. + // Example: 0, .25s, .6s, 1.2, 2.1s, 3.4s, 5.5s, 8s, 12s, 19s, 28s, 43s, 64s, 97s + // Jitter is added as a random fraction of the duration multiplied by the jitter factor. + return wait.Backoff{ + Duration: 500 * time.Millisecond, + Factor: 1.5, + Steps: 9, + Jitter: 0.1, + } +} diff --git a/cmd/clusterctl/client/cluster/crd_migration_test.go b/cmd/clusterctl/client/cluster/crd_migration_test.go index 950e5150b875..9cc977fde7d2 100644 --- a/cmd/clusterctl/client/cluster/crd_migration_test.go +++ b/cmd/clusterctl/client/cluster/crd_migration_test.go @@ -111,6 +111,28 @@ func Test_CRDMigrator(t *testing.T) { }, wantIsMigrated: false, }, + { + name: "No-op if new CRD adds a new versions and unserves the old", + currentCRD: &apiextensionsv1.CustomResourceDefinition{ + ObjectMeta: metav1.ObjectMeta{Name: "foo"}, + Spec: apiextensionsv1.CustomResourceDefinitionSpec{ + Versions: []apiextensionsv1.CustomResourceDefinitionVersion{ + {Name: "v1alpha1", Storage: true, Served: true}, + }, + }, + Status: apiextensionsv1.CustomResourceDefinitionStatus{StoredVersions: []string{"v1alpha1"}}, + }, + newCRD: &apiextensionsv1.CustomResourceDefinition{ + ObjectMeta: metav1.ObjectMeta{Name: "foo"}, + Spec: apiextensionsv1.CustomResourceDefinitionSpec{ + Versions: []apiextensionsv1.CustomResourceDefinitionVersion{ + {Name: "v1beta1", Storage: true, Served: true}, // v1beta1 is being added + {Name: "v1alpha1", Served: false}, // v1alpha1 still exists but is not served anymore + }, + }, + }, + wantIsMigrated: false, + }, { name: "Fails if new CRD drops current storage version", currentCRD: &apiextensionsv1.CustomResourceDefinition{ @@ -185,7 +207,7 @@ func Test_CRDMigrator(t *testing.T) { Names: apiextensionsv1.CustomResourceDefinitionNames{Kind: "Foo", ListKind: "FooList"}, Versions: []apiextensionsv1.CustomResourceDefinitionVersion{ {Name: "v1", Storage: true, Served: true}, // v1 is being added - {Name: "v1beta1", Served: true}, // v1beta1 still there (required for migration) + {Name: "v1beta1", Served: true}, // v1beta1 still there // v1alpha1 is being dropped }, }, @@ -246,8 +268,8 @@ func Test_CRDMigrator(t *testing.T) { Names: apiextensionsv1.CustomResourceDefinitionNames{Kind: "Foo", ListKind: "FooList"}, Versions: []apiextensionsv1.CustomResourceDefinitionVersion{ {Name: "v1", Storage: true, Served: true}, // v1 is being added - {Name: "v1beta1", Served: true}, // v1beta1 still there (required for migration) - {Name: "v1alpha1", Served: false}, // v1alpha1 is no longer being served (required for migration) + {Name: "v1beta1", Served: true}, // v1beta1 still there (required for future migration) + {Name: "v1alpha1", Served: false}, // v1alpha1 is no longer being served }, }, }, From 4a7acb09ebf1d0950a4baed1fd102b26448bdc12 Mon Sep 17 00:00:00 2001 From: Christian Schlotter Date: Fri, 26 Apr 2024 11:22:59 +0200 Subject: [PATCH 2/3] review fixes --- cmd/clusterctl/client/cluster/crd_migration.go | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/cmd/clusterctl/client/cluster/crd_migration.go b/cmd/clusterctl/client/cluster/crd_migration.go index 6f59fecb0a42..b55a9263073e 100644 --- a/cmd/clusterctl/client/cluster/crd_migration.go +++ b/cmd/clusterctl/client/cluster/crd_migration.go @@ -84,12 +84,8 @@ func (m *crdMigrator) run(ctx context.Context, newCRD *apiextensionsv1.CustomRes // Gets the list of version supported by the new CRD newVersions := sets.Set[string]{} - servedVersions := sets.Set[string]{} for _, version := range newCRD.Spec.Versions { newVersions.Insert(version.Name) - if version.Served { - servedVersions.Insert(version.Name) - } } // Get the current CRD. @@ -130,9 +126,8 @@ func (m *crdMigrator) run(ctx context.Context, newCRD *apiextensionsv1.CustomRes // This way we can make sure that all CR objects are now stored in the current storage version. // Alternatively, we would have to figure out which objects are stored in which version but this information is not // exposed by the apiserver. - storedVersionsToDelete := currentStatusStoredVersions.Difference(servedVersions) - storedVersionsToPreserve := currentStatusStoredVersions.Intersection(servedVersions) - log.Info("CR migration required", "kind", newCRD.Spec.Names.Kind, "storedVersionsToDelete", strings.Join(sets.List(storedVersionsToDelete), ","), "storedVersionsToPreserve", strings.Join(sets.List(storedVersionsToPreserve), ",")) + storedVersionsToDelete := currentStatusStoredVersions.Delete(currentStorageVersion) + log.Info("CR migration required", "kind", newCRD.Spec.Names.Kind, "storedVersionsToDelete", strings.Join(sets.List(storedVersionsToDelete), ","), "storedVersionToPreserve", currentStorageVersion) if err := m.migrateResourcesForCRD(ctx, currentCRD, currentStorageVersion); err != nil { return false, err @@ -239,12 +234,12 @@ func storageVersionForCRD(crd *apiextensionsv1.CustomResourceDefinition) (string // During this timespan conversion, validating- or mutationwebhooks may be unavailable and cause a failure. func newCRDMigrationBackoff() wait.Backoff { // Return a exponential backoff configuration which returns durations for a total time of ~1m30s. - // Example: 0, .25s, .6s, 1.2, 2.1s, 3.4s, 5.5s, 8s, 12s, 19s, 28s, 43s, 64s, 97s + // Example: 0, .25s, .6s, 1.1s, 1.8s, 2.7s, 4s, 6s, 9s, 12s, 17s, 25s, 35s, 49s, 69s, 97s, 135s // Jitter is added as a random fraction of the duration multiplied by the jitter factor. return wait.Backoff{ - Duration: 500 * time.Millisecond, - Factor: 1.5, - Steps: 9, + Duration: 250 * time.Millisecond, + Factor: 1.4, + Steps: 17, Jitter: 0.1, } } From b46a303451e9acd9bf59f634ad468631ba6b2d0c Mon Sep 17 00:00:00 2001 From: Christian Schlotter Date: Tue, 30 Apr 2024 17:02:38 +0200 Subject: [PATCH 3/3] review fixes --- .../client/cluster/crd_migration.go | 10 +- .../client/cluster/crd_migration_test.go | 93 +------------------ 2 files changed, 10 insertions(+), 93 deletions(-) diff --git a/cmd/clusterctl/client/cluster/crd_migration.go b/cmd/clusterctl/client/cluster/crd_migration.go index b55a9263073e..98098f911461 100644 --- a/cmd/clusterctl/client/cluster/crd_migration.go +++ b/cmd/clusterctl/client/cluster/crd_migration.go @@ -114,18 +114,18 @@ func (m *crdMigrator) run(ctx context.Context, newCRD *apiextensionsv1.CustomRes currentStatusStoredVersions := sets.Set[string]{}.Insert(currentCRD.Status.StoredVersions...) // If the old CRD only contains its current storageVersion as storedVersion, // nothing to do as all objects are already on the current storageVersion. + // Note: We want to migrate objects to new storage versions as soon as possible + // to prevent unnecessary conversion webhook calls. if currentStatusStoredVersions.Len() == 1 && currentCRD.Status.StoredVersions[0] == currentStorageVersion { log.V(2).Info("CRD migration check passed", "name", newCRD.Name) return false, nil } - // Otherwise a version that has been used as storage version will be dropped, so it is necessary to migrate all the - // objects and drop the storage version from the current CRD status before installing the new CRD. - // Ref https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definition-versioning/#writing-reading-and-updating-versioned-customresourcedefinition-objects // Note: We are simply migrating all CR objects independent of the version in which they are actually stored in etcd. // This way we can make sure that all CR objects are now stored in the current storage version. // Alternatively, we would have to figure out which objects are stored in which version but this information is not // exposed by the apiserver. + // Ref https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definition-versioning/#writing-reading-and-updating-versioned-customresourcedefinition-objects storedVersionsToDelete := currentStatusStoredVersions.Delete(currentStorageVersion) log.Info("CR migration required", "kind", newCRD.Spec.Names.Kind, "storedVersionsToDelete", strings.Join(sets.List(storedVersionsToDelete), ","), "storedVersionToPreserve", currentStorageVersion) @@ -231,9 +231,9 @@ func storageVersionForCRD(crd *apiextensionsv1.CustomResourceDefinition) (string // Clusterctl upgrades cert-manager right before doing CRD migration. This may lead to rollout of new certificates. // The time between new certificate creation + injection into objects (CRD, Webhooks) and the new secrets getting propagated // to the controller can be 60-90s, because the kubelet only periodically syncs secret contents to pods. -// During this timespan conversion, validating- or mutationwebhooks may be unavailable and cause a failure. +// During this timespan conversion, validating- or mutating-webhooks may be unavailable and cause a failure. func newCRDMigrationBackoff() wait.Backoff { - // Return a exponential backoff configuration which returns durations for a total time of ~1m30s. + // Return a exponential backoff configuration which returns durations for a total time of ~1m30s + some buffer. // Example: 0, .25s, .6s, 1.1s, 1.8s, 2.7s, 4s, 6s, 9s, 12s, 17s, 25s, 35s, 49s, 69s, 97s, 135s // Jitter is added as a random fraction of the duration multiplied by the jitter factor. return wait.Backoff{ diff --git a/cmd/clusterctl/client/cluster/crd_migration_test.go b/cmd/clusterctl/client/cluster/crd_migration_test.go index 9cc977fde7d2..95efc6c31488 100644 --- a/cmd/clusterctl/client/cluster/crd_migration_test.go +++ b/cmd/clusterctl/client/cluster/crd_migration_test.go @@ -69,7 +69,7 @@ func Test_CRDMigrator(t *testing.T) { wantIsMigrated: false, }, { - name: "No-op if new CRD supports same versions", + name: "No-op if new CRD uses the same storage version", currentCRD: &apiextensionsv1.CustomResourceDefinition{ ObjectMeta: metav1.ObjectMeta{Name: "foo"}, Spec: apiextensionsv1.CustomResourceDefinitionSpec{ @@ -90,7 +90,7 @@ func Test_CRDMigrator(t *testing.T) { wantIsMigrated: false, }, { - name: "No-op if new CRD adds a new versions", + name: "No-op if new CRD adds a new versions and stored versions is only the old storage version", currentCRD: &apiextensionsv1.CustomResourceDefinition{ ObjectMeta: metav1.ObjectMeta{Name: "foo"}, Spec: apiextensionsv1.CustomResourceDefinitionSpec{ @@ -104,30 +104,8 @@ func Test_CRDMigrator(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: "foo"}, Spec: apiextensionsv1.CustomResourceDefinitionSpec{ Versions: []apiextensionsv1.CustomResourceDefinitionVersion{ - {Name: "v1beta1", Storage: true, Served: true}, // v1beta1 is being added - {Name: "v1alpha1", Served: true}, // v1alpha1 still exists - }, - }, - }, - wantIsMigrated: false, - }, - { - name: "No-op if new CRD adds a new versions and unserves the old", - currentCRD: &apiextensionsv1.CustomResourceDefinition{ - ObjectMeta: metav1.ObjectMeta{Name: "foo"}, - Spec: apiextensionsv1.CustomResourceDefinitionSpec{ - Versions: []apiextensionsv1.CustomResourceDefinitionVersion{ - {Name: "v1alpha1", Storage: true, Served: true}, - }, - }, - Status: apiextensionsv1.CustomResourceDefinitionStatus{StoredVersions: []string{"v1alpha1"}}, - }, - newCRD: &apiextensionsv1.CustomResourceDefinition{ - ObjectMeta: metav1.ObjectMeta{Name: "foo"}, - Spec: apiextensionsv1.CustomResourceDefinitionSpec{ - Versions: []apiextensionsv1.CustomResourceDefinitionVersion{ - {Name: "v1beta1", Storage: true, Served: true}, // v1beta1 is being added - {Name: "v1alpha1", Served: false}, // v1alpha1 still exists but is not served anymore + {Name: "v1beta1", Storage: true, Served: false}, // v1beta1 is being added + {Name: "v1alpha1", Served: true}, // v1alpha1 still exists }, }, }, @@ -155,7 +133,7 @@ func Test_CRDMigrator(t *testing.T) { wantErr: true, }, { - name: "Migrate CRs if their storage version is removed from the CRD", + name: "Migrate CRs if there are stored versions is not only the current storage version", CRs: []unstructured.Unstructured{ { Object: map[string]interface{}{ @@ -215,67 +193,6 @@ func Test_CRDMigrator(t *testing.T) { wantStoredVersions: []string{"v1beta1"}, // v1alpha1 should be dropped from current CRD's stored versions wantIsMigrated: true, }, - { - name: "Migrate the CR if their storage version is no longer served by the CRD", - CRs: []unstructured.Unstructured{ - { - Object: map[string]interface{}{ - "apiVersion": "foo/v1beta1", - "kind": "Foo", - "metadata": map[string]interface{}{ - "name": "cr1", - "namespace": metav1.NamespaceDefault, - }, - }, - }, - { - Object: map[string]interface{}{ - "apiVersion": "foo/v1beta1", - "kind": "Foo", - "metadata": map[string]interface{}{ - "name": "cr2", - "namespace": metav1.NamespaceDefault, - }, - }, - }, - { - Object: map[string]interface{}{ - "apiVersion": "foo/v1beta1", - "kind": "Foo", - "metadata": map[string]interface{}{ - "name": "cr3", - "namespace": metav1.NamespaceDefault, - }, - }, - }, - }, - currentCRD: &apiextensionsv1.CustomResourceDefinition{ - ObjectMeta: metav1.ObjectMeta{Name: "foo"}, - Spec: apiextensionsv1.CustomResourceDefinitionSpec{ - Group: "foo", - Names: apiextensionsv1.CustomResourceDefinitionNames{Kind: "Foo", ListKind: "FooList"}, - Versions: []apiextensionsv1.CustomResourceDefinitionVersion{ - {Name: "v1beta1", Storage: true, Served: true}, - {Name: "v1alpha1", Served: true}, - }, - }, - Status: apiextensionsv1.CustomResourceDefinitionStatus{StoredVersions: []string{"v1beta1", "v1alpha1"}}, - }, - newCRD: &apiextensionsv1.CustomResourceDefinition{ - ObjectMeta: metav1.ObjectMeta{Name: "foo"}, - Spec: apiextensionsv1.CustomResourceDefinitionSpec{ - Group: "foo", - Names: apiextensionsv1.CustomResourceDefinitionNames{Kind: "Foo", ListKind: "FooList"}, - Versions: []apiextensionsv1.CustomResourceDefinitionVersion{ - {Name: "v1", Storage: true, Served: true}, // v1 is being added - {Name: "v1beta1", Served: true}, // v1beta1 still there (required for future migration) - {Name: "v1alpha1", Served: false}, // v1alpha1 is no longer being served - }, - }, - }, - wantStoredVersions: []string{"v1beta1"}, // v1alpha1 should be dropped from current CRD's stored versions - wantIsMigrated: true, - }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) {