From 5997a0180f13725ad1f6769e8263c426b15d2d96 Mon Sep 17 00:00:00 2001 From: Rob Leidle Date: Fri, 15 Jun 2018 16:04:29 -0700 Subject: [PATCH] Improve machine/controller.Reconcile to intelligently block deleting a node associated with the controller machine. This will enable external machine controllers to delete any machine/node, including masters. (#350) --- cloud/google/config/configtemplate.go | 4 + .../google/provider-components.yaml.template | 4 + .../vsphere/provider-components.yaml.template | 5 + pkg/controller/config/configuration.go | 3 - pkg/controller/machine/controller.go | 37 +++++++- pkg/controller/machine/node_test.go | 2 +- pkg/controller/machine/reconcile_test.go | 92 ++++++++++++++++--- 7 files changed, 125 insertions(+), 22 deletions(-) diff --git a/cloud/google/config/configtemplate.go b/cloud/google/config/configtemplate.go index f971d09714a7..001cea7a4e02 100644 --- a/cloud/google/config/configtemplate.go +++ b/cloud/google/config/configtemplate.go @@ -146,6 +146,10 @@ spec: env: - name: GOOGLE_APPLICATION_CREDENTIALS value: /etc/credentials/service-account.json + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName command: - "./gce-controller" args: diff --git a/clusterctl/examples/google/provider-components.yaml.template b/clusterctl/examples/google/provider-components.yaml.template index 666022c20c54..4eb8df9acb85 100644 --- a/clusterctl/examples/google/provider-components.yaml.template +++ b/clusterctl/examples/google/provider-components.yaml.template @@ -61,6 +61,10 @@ spec: env: - name: GOOGLE_APPLICATION_CREDENTIALS value: /etc/credentials/service-account.json + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName command: - "./gce-controller" args: diff --git a/clusterctl/examples/vsphere/provider-components.yaml.template b/clusterctl/examples/vsphere/provider-components.yaml.template index c8e3f8cf3eba..d2e483219091 100644 --- a/clusterctl/examples/vsphere/provider-components.yaml.template +++ b/clusterctl/examples/vsphere/provider-components.yaml.template @@ -61,6 +61,11 @@ spec: subPath: vsphere_tmp.pub - name: named-machines mountPath: /etc/named-machines + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName command: - "./vsphere-machine-controller" args: diff --git a/pkg/controller/config/configuration.go b/pkg/controller/config/configuration.go index 6e236e469533..dc4bef508e13 100644 --- a/pkg/controller/config/configuration.go +++ b/pkg/controller/config/configuration.go @@ -56,7 +56,6 @@ type LeaderElectionConfiguration struct { type Configuration struct { Kubeconfig string - InCluster bool WorkerCount int leaderElectionConfig *LeaderElectionConfiguration } @@ -71,7 +70,6 @@ const ( ) var ControllerConfig = Configuration{ - InCluster: true, WorkerCount: 5, // Default 5 worker. leaderElectionConfig: &LeaderElectionConfiguration{ LeaderElect: false, @@ -88,7 +86,6 @@ func GetLeaderElectionConfig() *LeaderElectionConfiguration { func (c *Configuration) AddFlags(fs *pflag.FlagSet) { fs.StringVar(&c.Kubeconfig, "kubeconfig", c.Kubeconfig, "Path to kubeconfig file with authorization and master location information.") - fs.BoolVar(&c.InCluster, "incluster", c.InCluster, "Controller will be running inside the cluster.") fs.IntVar(&c.WorkerCount, "workers", c.WorkerCount, "The number of workers for controller.") AddLeaderElectionFlags(c.leaderElectionConfig, fs) diff --git a/pkg/controller/machine/controller.go b/pkg/controller/machine/controller.go index 5e7859723e74..5172ab84fa91 100644 --- a/pkg/controller/machine/controller.go +++ b/pkg/controller/machine/controller.go @@ -18,21 +18,22 @@ package machine import ( "errors" + "os" "github.com/golang/glog" "github.com/kubernetes-incubator/apiserver-builder/pkg/builders" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" - clusterv1 "sigs.k8s.io/cluster-api/pkg/apis/cluster/v1alpha1" "sigs.k8s.io/cluster-api/pkg/client/clientset_generated/clientset" listers "sigs.k8s.io/cluster-api/pkg/client/listers_generated/cluster/v1alpha1" - cfg "sigs.k8s.io/cluster-api/pkg/controller/config" "sigs.k8s.io/cluster-api/pkg/controller/sharedinformers" "sigs.k8s.io/cluster-api/util" ) +const NodeNameEnvVar = "NODE_NAME" + // +controller:group=cluster,version=v1alpha1,kind=Machine,resource=machines type MachineControllerImpl struct { builders.DefaultControllerFns @@ -46,6 +47,9 @@ type MachineControllerImpl struct { clientSet clientset.Interface linkedNodes map[string]bool cachedReadiness map[string]bool + + // nodeName is the name of the node on which the machine controller is running, if not present, it is loaded from NODE_NAME. + nodeName string } // Init initializes the controller and is called by the generated code @@ -54,6 +58,12 @@ func (c *MachineControllerImpl) Init(arguments sharedinformers.ControllerInitArg // Use the lister for indexing machines labels c.lister = arguments.GetSharedInformers().Factory.Cluster().V1alpha1().Machines().Lister() + if c.nodeName == "" { + c.nodeName = os.Getenv(NodeNameEnvVar) + if c.nodeName == "" { + glog.Warningf("environment variable %v is not set, this controller will not protect against deleting its own machine", NodeNameEnvVar) + } + } clientset, err := clientset.NewForConfig(arguments.GetRestConfig()) if err != nil { glog.Fatalf("error creating machine client: %v", err) @@ -84,9 +94,8 @@ func (c *MachineControllerImpl) Reconcile(machine *clusterv1.Machine) error { glog.Infof("reconciling machine object %v causes a no-op as there is no finalizer.", name) return nil } - // Master should not be deleted as part of reconcilation. - if cfg.ControllerConfig.InCluster && util.IsMaster(machine) { - glog.Infof("skipping reconciling master machine object %v", name) + if !c.isDeleteAllowed(machine) { + glog.Infof("Skipping reconciling of machine object %v", name) return nil } glog.Infof("reconciling machine object %v triggers delete.", name) @@ -172,3 +181,21 @@ func (c *MachineControllerImpl) getCluster(machine *clusterv1.Machine) (*cluster return nil, errors.New("multiple clusters defined") } } + +func (c *MachineControllerImpl) isDeleteAllowed(machine *clusterv1.Machine) bool { + if c.nodeName == "" || machine.Status.NodeRef == nil { + return true + } + if machine.Status.NodeRef.Name != c.nodeName { + return true + } + node, err := c.kubernetesClientSet.CoreV1().Nodes().Get(c.nodeName, metav1.GetOptions{}) + if err != nil { + glog.Infof("unable to determine if controller's node is associated with machine '%v', error getting node named '%v': %v", machine.Name, c.nodeName, err) + return true + } + // When the UID of the machine's node reference and this controller's actual node match then then the request is to + // delete the machine this machine-controller is running on. Return false to not allow machine controller to delete its + // own machine. + return node.UID != machine.Status.NodeRef.UID +} diff --git a/pkg/controller/machine/node_test.go b/pkg/controller/machine/node_test.go index 5aa614931057..6e5d56fbcc17 100644 --- a/pkg/controller/machine/node_test.go +++ b/pkg/controller/machine/node_test.go @@ -113,7 +113,7 @@ func TestReconcileNode(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - m := getMachine("foo", false, false, false) + m := getMachine("foo", nil, false, false) if test.nodeRefName != "" { m.Status.NodeRef = &corev1.ObjectReference{ Kind: "Node", diff --git a/pkg/controller/machine/reconcile_test.go b/pkg/controller/machine/reconcile_test.go index 1473586e3997..d340e325e036 100644 --- a/pkg/controller/machine/reconcile_test.go +++ b/pkg/controller/machine/reconcile_test.go @@ -20,10 +20,12 @@ import ( "testing" "time" + "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + corefake "k8s.io/client-go/kubernetes/fake" core "k8s.io/client-go/testing" - clustercommon "sigs.k8s.io/cluster-api/pkg/apis/cluster/common" "sigs.k8s.io/cluster-api/pkg/apis/cluster/v1alpha1" "sigs.k8s.io/cluster-api/pkg/apis/cluster/v1alpha1/testutil" "sigs.k8s.io/cluster-api/pkg/client/clientset_generated/clientset/fake" @@ -37,13 +39,13 @@ func TestMachineSetControllerReconcileHandler(t *testing.T) { instanceExists bool isDeleting bool withFinalizer bool - isMaster bool ignoreDeleteCallCount bool expectFinalizerRemoved bool numExpectedCreateCalls int64 numExpectedDeleteCalls int64 numExpectedUpdateCalls int64 numExpectedExistsCalls int64 + nodeRef *v1.ObjectReference }{ { name: "Create machine", @@ -93,18 +95,29 @@ func TestMachineSetControllerReconcileHandler(t *testing.T) { withFinalizer: false, }, { - name: "Delete machine, skip master", - objExists: true, - instanceExists: true, - isDeleting: true, - withFinalizer: true, - isMaster: true, + name: "Delete machine, controller on different node with same name, but different UID", + objExists: true, + instanceExists: true, + isDeleting: true, + withFinalizer: true, + numExpectedDeleteCalls: 1, + expectFinalizerRemoved: true, + nodeRef: &v1.ObjectReference{Name: "controller-node-name", UID: "another-uid"}, + }, + { + name: "Delete machine, controller on the same node", + objExists: true, + instanceExists: true, + isDeleting: true, + withFinalizer: true, + expectFinalizerRemoved: false, + nodeRef: &v1.ObjectReference{Name: "controller-node-name", UID: "controller-node-uid"}, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - machineToTest := getMachine("bar", test.isDeleting, test.withFinalizer, test.isMaster) + machineToTest := getMachine("bar", test.nodeRef, test.isDeleting, test.withFinalizer) knownObjects := []runtime.Object{} if test.objExists { knownObjects = append(knownObjects, machineToTest) @@ -116,6 +129,14 @@ func TestMachineSetControllerReconcileHandler(t *testing.T) { machineUpdated = true return false, nil, nil }) + fakeKubernetesClient := corefake.NewSimpleClientset() + node := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "controller-node-name", + UID: "controller-node-uid", + }, + } + fakeKubernetesClient.CoreV1().Nodes().Create(&node) // When creating a new object, it should invoke the reconcile method. cluster := testutil.GetVanillaCluster() @@ -130,6 +151,8 @@ func TestMachineSetControllerReconcileHandler(t *testing.T) { target := &MachineControllerImpl{} target.actuator = actuator target.clientSet = fakeClient + target.kubernetesClientSet = fakeKubernetesClient + target.nodeName = "controller-node-name" var err error err = target.Reconcile(machineToTest) @@ -159,7 +182,50 @@ func TestMachineSetControllerReconcileHandler(t *testing.T) { } } -func getMachine(name string, isDeleting, hasFinalizer, isMaster bool) *v1alpha1.Machine { +func TestIsDeleteAllowed(t *testing.T) { + testCases := []struct { + name string + setControllerNodeName bool + nodeRef *v1.ObjectReference + expectedResult bool + }{ + {"empty controller node name should return true", false, nil, true}, + {"nil machine.Status.NodeRef should return true", true, nil, true}, + {"different node name should return true", true, &v1.ObjectReference{Name: "another-node", UID: "another-uid"}, true}, + {"same node name and different UID should return true", true, &v1.ObjectReference{Name: "controller-node-name", UID: "another-uid"}, true}, + {"same node name and same UID should return false", true, &v1.ObjectReference{Name: "controller-node-name", UID: "controller-node-uid"}, false}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + controller, machine := createIsDeleteAllowedTestFixtures(t, tc.setControllerNodeName, "controller-node-name", "controller-node-uid", tc.nodeRef) + result := controller.isDeleteAllowed(machine) + if result != tc.expectedResult { + t.Errorf("result mismatch: got '%v', want '%v'", result, tc.expectedResult) + } + }) + } +} + +func createIsDeleteAllowedTestFixtures(t *testing.T, setControllerNodeNameVariable bool, controllerNodeName string, controllerNodeUid string, nodeRef *v1.ObjectReference) (*MachineControllerImpl, *v1alpha1.Machine) { + controller := &MachineControllerImpl{} + fakeKubernetesClient := corefake.NewSimpleClientset() + node := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: controllerNodeName, + UID: types.UID(controllerNodeUid), + }, + } + fakeKubernetesClient.CoreV1().Nodes().Create(&node) + controller.kubernetesClientSet = fakeKubernetesClient + controller.kubernetesClientSet = fakeKubernetesClient + if setControllerNodeNameVariable { + controller.nodeName = controllerNodeName + } + machine := getMachine("bar", nodeRef, true, false) + return controller, machine +} + +func getMachine(name string, nodeRef *v1.ObjectReference, isDeleting, hasFinalizer bool) *v1alpha1.Machine { m := &v1alpha1.Machine{ TypeMeta: metav1.TypeMeta{ Kind: "Machine", @@ -169,6 +235,9 @@ func getMachine(name string, isDeleting, hasFinalizer, isMaster bool) *v1alpha1. Name: name, Namespace: metav1.NamespaceDefault, }, + Status: v1alpha1.MachineStatus{ + NodeRef: nodeRef, + }, } if isDeleting { now := metav1.NewTime(time.Now()) @@ -177,9 +246,6 @@ func getMachine(name string, isDeleting, hasFinalizer, isMaster bool) *v1alpha1. if hasFinalizer { m.ObjectMeta.SetFinalizers([]string{v1alpha1.MachineFinalizer}) } - if isMaster { - m.Spec.Roles = []clustercommon.MachineRole{clustercommon.MasterRole} - } return m }