From 394f68d05408bf32c786081d38b9fadfdec059a4 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Thu, 15 Aug 2024 15:58:01 +0200 Subject: [PATCH] test/e2e: on test failures dump server stack strace To debug #22246 Signed-off-by: Paul Holzinger --- test/e2e/common_test.go | 29 ++++++++++++++++++++++++++-- test/e2e/libpod_suite_remote_test.go | 6 +++++- test/e2e/libpod_suite_test.go | 3 +++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/test/e2e/common_test.go b/test/e2e/common_test.go index 61fed90b64..9a0a107dd1 100644 --- a/test/e2e/common_test.go +++ b/test/e2e/common_test.go @@ -720,11 +720,36 @@ func (p *PodmanTestIntegration) Cleanup() { // first stop everything, rm -fa is unreliable // /~https://github.com/containers/podman/issues/18180 stop := p.Podman([]string{"stop", "--all", "-t", "0"}) - stop.WaitWithDefaultTimeout() + Eventually(stop, DefaultWaitTimeout).Should(Exit(), func() string { + p.stopRemoteService(syscall.SIGABRT) + + // Note eventually does not kill the command as such the command is leaked forever without killing it + // Also let's use SIGABRT to create a go stack trace so in case there is a deadlock we see it. + stop.Signal(syscall.SIGABRT) + // Give some time to let the command print the output so it is not printed much later + // in the log at the wrong place. + time.Sleep(1 * time.Second) + + // As the output is logged by default there no need to dump it here. + return fmt.Sprintf("command timed out after %ds: %v", + DefaultWaitTimeout, stop.Command.Args) + }) // Remove all pods... podrm := p.Podman([]string{"pod", "rm", "-fa", "-t", "0"}) - podrm.WaitWithDefaultTimeout() + Eventually(podrm, DefaultWaitTimeout).Should(Exit(), func() string { + p.stopRemoteService(syscall.SIGABRT) + + // Note eventually does not kill the command as such the command is leaked forever without killing it + // Also let's use SIGABRT to create a go stack trace so in case there is a deadlock we see it. + podrm.Signal(syscall.SIGABRT) + // Give some time to let the command print the output so it is not printed much later + // in the log at the wrong place. + time.Sleep(1 * time.Second) + // As the output is logged by default there no need to dump it here. + return fmt.Sprintf("command timed out after %ds: %v", + DefaultWaitTimeout, stop.Command.Args) + }) // ...and containers rmall := p.Podman([]string{"rm", "-fa", "-t", "0"}) diff --git a/test/e2e/libpod_suite_remote_test.go b/test/e2e/libpod_suite_remote_test.go index ceaa5851ea..301665d795 100644 --- a/test/e2e/libpod_suite_remote_test.go +++ b/test/e2e/libpod_suite_remote_test.go @@ -103,7 +103,11 @@ func (p *PodmanTestIntegration) StartRemoteService() { } func (p *PodmanTestIntegration) StopRemoteService() { - if err := p.RemoteSession.Signal(syscall.SIGTERM); err != nil { + p.stopRemoteService(syscall.SIGTERM) +} + +func (p *PodmanTestIntegration) stopRemoteService(signal syscall.Signal) { + if err := p.RemoteSession.Signal(signal); err != nil { GinkgoWriter.Printf("unable to clean up service %d, %v\n", p.RemoteSession.Pid, err) } if _, err := p.RemoteSession.Wait(); err != nil { diff --git a/test/e2e/libpod_suite_test.go b/test/e2e/libpod_suite_test.go index 3b11b3952b..ea066dd5d9 100644 --- a/test/e2e/libpod_suite_test.go +++ b/test/e2e/libpod_suite_test.go @@ -5,6 +5,7 @@ package integration import ( "os" "path/filepath" + "syscall" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -74,6 +75,8 @@ func (p *PodmanTestIntegration) RestoreArtifact(image string) error { func (p *PodmanTestIntegration) StopRemoteService() {} +func (p *PodmanTestIntegration) stopRemoteService(signal syscall.Signal) {} + // We don't support running API service when local func (p *PodmanTestIntegration) StartRemoteService() { }