Skip to content

Commit

Permalink
serial:47674: fix mcp anticipated updates
Browse files Browse the repository at this point in the history
With the latest selinux updates, NROP will no longer require to reboot
the nodes on CR updates unless a node group annottaion is enabled. The
new selinux policy is default starting 4.18. However this wasn't
backported to older versions so in this commit we adapt this change
based on the existance of the annotation and the openshift version.
This commit fixes the expected bahavior per each case and adds missing
waiting loops for MCP updating conndition.

The test passed with default config on 4.18 and on older versions.

Signed-off-by: Shereen Haj <shajmakh@redhat.com>
  • Loading branch information
shajmakh committed Feb 25, 2025
1 parent 1048379 commit 52cb65e
Showing 1 changed file with 42 additions and 13 deletions.
55 changes: 42 additions & 13 deletions test/e2e/serial/tests/configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import (

"github.com/google/go-cmp/cmp"
depnodes "github.com/k8stopologyawareschedwg/deployer/pkg/clientutil/nodes"
"github.com/k8stopologyawareschedwg/deployer/pkg/deployer/platform"
nrtv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2"

configv1 "github.com/openshift/api/config/v1"
Expand All @@ -53,6 +54,7 @@ import (

nropv1 "github.com/openshift-kni/numaresources-operator/api/v1"
"github.com/openshift-kni/numaresources-operator/api/v1/helper/namespacedname"
inthelper "github.com/openshift-kni/numaresources-operator/internal/api/annotations/helper"
nropmcp "github.com/openshift-kni/numaresources-operator/internal/machineconfigpools"
intnrt "github.com/openshift-kni/numaresources-operator/internal/noderesourcetopology"
intobjs "github.com/openshift-kni/numaresources-operator/internal/objects"
Expand All @@ -66,6 +68,7 @@ import (
rteconfig "github.com/openshift-kni/numaresources-operator/rte/pkg/config"
e2eclient "github.com/openshift-kni/numaresources-operator/test/internal/clients"
"github.com/openshift-kni/numaresources-operator/test/internal/configuration"
"github.com/openshift-kni/numaresources-operator/test/internal/deploy"
e2efixture "github.com/openshift-kni/numaresources-operator/test/internal/fixture"
"github.com/openshift-kni/numaresources-operator/test/internal/images"
e2enrt "github.com/openshift-kni/numaresources-operator/test/internal/noderesourcetopologies"
Expand Down Expand Up @@ -164,6 +167,7 @@ var _ = Describe("[serial][disruptive] numaresources configuration management",
if len(initialMcps) > 1 {
e2efixture.Skip(fxt, "the test supports single node group")
}
customPolicySupportEnabled := isCustomPolicySupportEnabled(nroOperObj)

initialMcp := initialMcps[0]
initialMcpInfo := mcpInfo{
Expand Down Expand Up @@ -204,14 +208,18 @@ var _ = Describe("[serial][disruptive] numaresources configuration management",
ForMachineConfigPoolDeleted(context.TODO(), mcp)
Expect(err).ToNot(HaveOccurred())

if !customPolicySupportEnabled {
Expect(deploy.WaitForMCPsCondition(fxt.Client, context.TODO(), []*machineconfigv1.MachineConfigPool{initialMcpInfo.mcpObj}, machineconfigv1.MachineConfigPoolUpdating)).To(Succeed())
waitForMcpUpdate(fxt.Client, context.TODO(), []mcpInfo{initialMcpInfo}, MachineConfig)
}
}()

//so far 0 machine count for mcp-test -> no nodes -> no updates status -> empty status
var updatedNewMcp *machineconfigv1.MachineConfigPool
Expect(fxt.Client.Get(context.TODO(), client.ObjectKeyFromObject(mcp), updatedNewMcp)).To(Succeed())
var updatedNewMcp machineconfigv1.MachineConfigPool
Expect(fxt.Client.Get(context.TODO(), client.ObjectKeyFromObject(mcp), &updatedNewMcp)).To(Succeed())

newMcpInfo := mcpInfo{
mcpObj: updatedNewMcp,
mcpObj: &updatedNewMcp,
initialConfig: updatedNewMcp.Status.Configuration.Name,
sampleNode: *targetedNode,
}
Expand All @@ -230,7 +238,8 @@ var _ = Describe("[serial][disruptive] numaresources configuration management",
err = unlabelFunc()
Expect(err).ToNot(HaveOccurred())

//this will trigger node reboot as the NROP settings will be reapplied to the unlabelled node, so new node is added under the old mcp hence the MachineCount update type
Expect(deploy.WaitForMCPsCondition(fxt.Client, context.TODO(), []*machineconfigv1.MachineConfigPool{initialMcpInfo.mcpObj}, machineconfigv1.MachineConfigPoolUpdating)).To(Succeed())
// node reboot will be applied to the target node because it will belong now back to worker mcp which has specific KC/PP applied; NROP setting here will not affect here because the worker already has worker label; this is a sign we have an issue in NROP: one node group can init RTE on nodes from different MCPs! see bug from one customer back then
waitForMcpUpdate(fxt.Client, context.TODO(), []mcpInfo{initialMcpInfo}, MachineCount)
}()

Expand All @@ -242,9 +251,8 @@ var _ = Describe("[serial][disruptive] numaresources configuration management",
err := fxt.Client.Get(context.TODO(), client.ObjectKeyFromObject(initialNroOperObj), nroOperObj)
g.Expect(err).ToNot(HaveOccurred())

for i := range nroOperObj.Spec.NodeGroups {
nroOperObj.Spec.NodeGroups[i].MachineConfigPoolSelector.MatchLabels = mcp.Labels
}
// as filtered at the beginning of the test, it supports single node group
nroOperObj.Spec.NodeGroups[0].MachineConfigPoolSelector.MatchLabels = mcp.Labels
err = fxt.Client.Update(context.TODO(), nroOperObj)
g.Expect(err).ToNot(HaveOccurred())
}).WithTimeout(10 * time.Minute).WithPolling(30 * time.Second).Should(Succeed())
Expand All @@ -267,14 +275,20 @@ var _ = Describe("[serial][disruptive] numaresources configuration management",
g.Expect(err).ToNot(HaveOccurred())
}).WithTimeout(10 * time.Minute).WithPolling(30 * time.Second).Should(Succeed())

By("waiting for mcps to start updating")
// this will trigger mcp update only for the initial mcps because the mcp-test nodes are still labeled
// with the old labels, so worker mcp will switch back to the NROP mc
waitForMcpUpdate(fxt.Client, context.TODO(), []mcpInfo{initialMcpInfo}, MachineConfig)
if customPolicySupportEnabled {
// this will trigger mcp update only for the initial mcps because the mcp-test nodes are still labeled
// with the old labels, so worker mcp will switch back to the NROP mc
By(fmt.Sprintf("waiting for mcp %q to start updating", initialMcpInfo.mcpObj.Name))
Expect(deploy.WaitForMCPsCondition(fxt.Client, context.TODO(), []*machineconfigv1.MachineConfigPool{initialMcpInfo.mcpObj}, machineconfigv1.MachineConfigPoolUpdating)).To(Succeed())
By(fmt.Sprintf("waiting for mcp %q to complete update", initialMcpInfo.mcpObj.Name))
waitForMcpUpdate(fxt.Client, context.TODO(), []mcpInfo{initialMcpInfo}, MachineConfig)
}
}() //end of defer

By("waiting for the mcps to update")
// on old mcp because the ds will no longer include the worker node that is not labeled with mcp-test, so returning to MC without NROP settings
By(fmt.Sprintf("waiting for mcp %q to start updating", initialMcpInfo.mcpObj.Name))
Expect(deploy.WaitForMCPsCondition(fxt.Client, context.TODO(), []*machineconfigv1.MachineConfigPool{initialMcpInfo.mcpObj}, machineconfigv1.MachineConfigPoolUpdating)).To(Succeed())
By(fmt.Sprintf("waiting for mcp %q to complete update", initialMcpInfo.mcpObj.Name))
waitForMcpUpdate(fxt.Client, context.TODO(), []mcpInfo{initialMcpInfo}, MachineConfig)

By(fmt.Sprintf("Verify RTE daemonsets have the updated node selector matching to the new mcp %q", mcp.Name))
Expand Down Expand Up @@ -1130,7 +1144,6 @@ var _ = Describe("[serial][disruptive] numaresources configuration management",
Context("[ngpoolname] node group with PoolName support", Label("ngpoolname"), Label("feature:ngpoolname"), func() {
initialOperObj := &nropv1.NUMAResourcesOperator{}
nroKey := objects.NROObjectKey()

It("[tier2] should not allow configuring PoolName and MCP selector on same node group", Label("tier2"), func(ctx context.Context) {
Expect(fxt.Client.Get(ctx, nroKey, initialOperObj)).To(Succeed(), "cannot get %q in the cluster", nroKey.String())

Expand Down Expand Up @@ -1510,3 +1523,19 @@ func getLabelRoleWorker() string {
func getLabelRoleMCPTest() string {
return fmt.Sprintf("%s/%s", depnodes.LabelRole, roleMCPTest)
}

func isCustomPolicySupportEnabled(nro *nropv1.NUMAResourcesOperator) bool {
GinkgoHelper()

const minCustomSupportingVString = "4.18"
minCustomSupportingVersion, err := platform.ParseVersion(minCustomSupportingVString)
Expect(err).NotTo(HaveOccurred(), "failed to parse version string %q", minCustomSupportingVString)

customSupportAvailable, err := configuration.PlatVersion.AtLeast(minCustomSupportingVersion)
Expect(err).NotTo(HaveOccurred(), "failed to compare versions: %v vs %v", configuration.PlatVersion, minCustomSupportingVersion)

if !customSupportAvailable { // < 4.18
return true
}
return inthelper.IsCustomPolicyEnabled(nro)
}

0 comments on commit 52cb65e

Please sign in to comment.