Skip to content

Commit

Permalink
fix: taint master node in leave host test
Browse files Browse the repository at this point in the history
Signed-off-by: BruceAko <chongzhi@hust.edu.cn>
  • Loading branch information
BruceAko committed Oct 16, 2024
1 parent 8704cd0 commit 927e11d
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 6 deletions.
1 change: 1 addition & 0 deletions scheduler/service/service_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,7 @@ func (v *V2) DeleteHost(ctx context.Context, req *schedulerv2.DeleteHostRequest)

// Leave peers in host.
host.LeavePeers()
v.resource.HostManager().Delete(req.GetHostId())
return nil
}

Expand Down
4 changes: 4 additions & 0 deletions scheduler/service/service_v2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1011,6 +1011,8 @@ func TestServiceV2_DeleteHost(t *testing.T) {
gomock.InOrder(
mr.HostManager().Return(hostManager).Times(1),
mh.Load(gomock.Any()).Return(host, true).Times(1),
mr.HostManager().Return(hostManager).Times(1),
mh.Delete(gomock.Any()).Times(1),
)
},
expect: func(t *testing.T, peer *resource.Peer, err error) {
Expand All @@ -1026,6 +1028,8 @@ func TestServiceV2_DeleteHost(t *testing.T) {
gomock.InOrder(
mr.HostManager().Return(hostManager).Times(1),
mh.Load(gomock.Any()).Return(host, true).Times(1),
mr.HostManager().Return(hostManager).Times(1),
mh.Delete(gomock.Any()).Times(1),
)
},
expect: func(t *testing.T, peer *resource.Peer, err error) {
Expand Down
42 changes: 36 additions & 6 deletions test/e2e/v2/leave_host_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,46 +34,76 @@ import (
var _ = Describe("Clients Leaving", func() {
Context("normally", func() {
It("number of hosts should be ok", Label("host", "leave"), func() {
// create scheduler GRPC client
grpcCredentials := insecure.NewCredentials()
schedulerClient, err := schedulerclient.GetV2ByAddr(context.Background(), ":8002", grpc.WithTransportCredentials(grpcCredentials))
Expect(err).NotTo(HaveOccurred())

// get host count
hostCount := util.Servers[util.SeedClientServerName].Replicas + util.Servers[util.ClientServerName].Replicas
time.Sleep(10 * time.Minute)
Expect(getHostCountFromScheduler(schedulerClient)).To(Equal(hostCount))

// get client pod name in master node
podName, err := util.GetClientPodNameInMaster()
Expect(err).NotTo(HaveOccurred())

out, err := util.KubeCtlCommand("-n", util.DragonflyNamespace, "delete", "pod", podName).CombinedOutput()
// taint master node
out, err := util.KubeCtlCommand("-n", util.DragonflyNamespace, "taint", "nodes", "kind-control-plane", "master:NoSchedule").CombinedOutput()
fmt.Println(string(out))
Expect(err).NotTo(HaveOccurred())

// delete client pod in master, client will leave normally
out, err = util.KubeCtlCommand("-n", util.DragonflyNamespace, "delete", "pod", podName, "--grace-period=30").CombinedOutput()
fmt.Println(string(out))
Expect(err).NotTo(HaveOccurred())

// wait fot the client to leave gracefully
time.Sleep(1 * time.Minute)
Expect(getHostCountFromScheduler(schedulerClient)).To(Equal(hostCount))
time.Sleep(30 * time.Second)
Expect(getHostCountFromScheduler(schedulerClient)).To(Equal(hostCount - 1))

// remove taint in master node
out, err = util.KubeCtlCommand("-n", util.DragonflyNamespace, "taint", "nodes", "kind-control-plane", "master:NoSchedule-").CombinedOutput()
fmt.Println(string(out))
Expect(err).NotTo(HaveOccurred())
})
})

Context("abnormally", func() {
It("number of hosts should be ok", Label("host", "leave"), func() {
// create scheduler GRPC client
grpcCredentials := insecure.NewCredentials()
schedulerClient, err := schedulerclient.GetV2ByAddr(context.Background(), ":8002", grpc.WithTransportCredentials(grpcCredentials))
Expect(err).NotTo(HaveOccurred())

// get host count
hostCount := util.Servers[util.SeedClientServerName].Replicas + util.Servers[util.ClientServerName].Replicas
time.Sleep(30 * time.Second)
Expect(getHostCountFromScheduler(schedulerClient)).To(Equal(hostCount))

// get client pod name in master node
podName, err := util.GetClientPodNameInMaster()
Expect(err).NotTo(HaveOccurred())

out, err := util.KubeCtlCommand("-n", util.DragonflyNamespace, "delete", "pod", podName, "--force", "--grace-period=0").CombinedOutput()
// taint master node
out, err := util.KubeCtlCommand("-n", util.DragonflyNamespace, "taint", "nodes", "kind-control-plane", "master:NoSchedule").CombinedOutput()
fmt.Println(string(out))
Expect(err).NotTo(HaveOccurred())

// force delete client pod in master, client will leave abnormally
out, err = util.KubeCtlCommand("-n", util.DragonflyNamespace, "delete", "pod", podName, "--force", "--grace-period=0").CombinedOutput()
fmt.Println(string(out))
Expect(err).NotTo(HaveOccurred())

// wait for host gc
time.Sleep(6 * time.Minute)
Expect(getHostCountFromScheduler(schedulerClient)).To(Equal(hostCount))
time.Sleep(2 * time.Minute)
Expect(getHostCountFromScheduler(schedulerClient)).To(Equal(hostCount - 1))

// remove taint in master node
out, err = util.KubeCtlCommand("-n", util.DragonflyNamespace, "taint", "nodes", "kind-control-plane", "master:NoSchedule-").CombinedOutput()
fmt.Println(string(out))
Expect(err).NotTo(HaveOccurred())
time.Sleep(30 * time.Second)
})
})
})
Expand Down
3 changes: 3 additions & 0 deletions test/testdata/charts/config-v2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ scheduler:
enableHost: true
config:
verbose: true
scheduler:
gc:
hostGCInterval: 2m

seedClient:
enable: true
Expand Down

0 comments on commit 927e11d

Please sign in to comment.