From 6bc52c9c5ecbde519ef160c4a26bc4d79ff6c355 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Thu, 25 May 2023 11:44:34 +0200 Subject: [PATCH] pkg/rootless: correctly handle proxy signals on reexec There are quite a lot of places in podman were we have some signal handlers, most notably libpod/shutdown/handler.go. However when we rexec we do not want any of that and just send all signals we get down to the child obviously. So before we install our signal handler we must first reset all others with signal.Reset(). Also while at it fix a problem were the joinUserAndMountNS() code path would not forward signals at all. This code path is used when you have running containers but the pause process was killed. Fixes #16091 Given that signal handlers run in different goroutines parallel it would explain why it flakes sometimes in CI. However to my understanding this flake can only happen when the pause process is dead before we run the podman command. So the question still is what kills the pause process? Signed-off-by: Paul Holzinger --- pkg/rootless/rootless_linux.go | 24 ++++++------ test/system/032-sig-proxy.bats | 49 +----------------------- test/system/550-pause-process.bats | 61 ++++++++++++++++++++++++++++++ test/system/helpers.sig-proxy.bash | 50 ++++++++++++++++++++++++ 4 files changed, 126 insertions(+), 58 deletions(-) create mode 100644 test/system/helpers.sig-proxy.bash diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go index 1937a1330d..323daec723 100644 --- a/pkg/rootless/rootless_linux.go +++ b/pkg/rootless/rootless_linux.go @@ -184,12 +184,7 @@ func joinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { return false, -1, fmt.Errorf("cannot re-exec process to join the existing user namespace") } - ret := C.reexec_in_user_namespace_wait(pidC, 0) - if ret < 0 { - return false, -1, errors.New("waiting for the re-exec process") - } - - return true, int(ret), nil + return waitAndProxySignalsToChild(pidC) } // GetConfiguredMappings returns the additional IDs configured for the current user. @@ -395,7 +390,6 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo if ret < 0 { return false, -1, errors.New("waiting for the re-exec process") } - return true, 0, nil } @@ -418,6 +412,10 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo return false, -1, errors.New("setting up the process") } + return waitAndProxySignalsToChild(pidC) +} + +func waitAndProxySignalsToChild(pid C.int) (bool, int, error) { signals := []os.Signal{} for sig := 0; sig < numSig; sig++ { if sig == int(unix.SIGTSTP) { @@ -426,24 +424,28 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo signals = append(signals, unix.Signal(sig)) } + // Disable all existing signal handlers, from now forward everything to the child and let + // it deal with it. All we do is to wait and propagate the exit code from the child to our parent. + gosignal.Reset() c := make(chan os.Signal, len(signals)) gosignal.Notify(c, signals...) - defer gosignal.Reset() go func() { for s := range c { if s == unix.SIGCHLD || s == unix.SIGPIPE { continue } - if err := unix.Kill(int(pidC), s.(unix.Signal)); err != nil { + if err := unix.Kill(int(pid), s.(unix.Signal)); err != nil { if err != unix.ESRCH { - logrus.Errorf("Failed to propagate signal to child process %d: %v", int(pidC), err) + logrus.Errorf("Failed to propagate signal to child process %d: %v", int(pid), err) } } } }() - ret := C.reexec_in_user_namespace_wait(pidC, 0) + ret := C.reexec_in_user_namespace_wait(pid, 0) + // child exited reset our signal proxy handler + gosignal.Reset() if ret < 0 { return false, -1, errors.New("waiting for the re-exec process") } diff --git a/test/system/032-sig-proxy.bats b/test/system/032-sig-proxy.bats index 422f0cae6b..fca2d44d17 100644 --- a/test/system/032-sig-proxy.bats +++ b/test/system/032-sig-proxy.bats @@ -1,54 +1,9 @@ #!/usr/bin/env bats load helpers +load helpers.sig-proxy -# Command to run in each of the tests. -SLEEPLOOP='trap "echo BYE;exit 0" INT;echo READY;while :;do sleep 0.1;done' - -# Main test code: wait for container to exist and be ready, send it a -# signal, wait for container to acknowledge and exit. -function _test_sigproxy() { - local cname=$1 - local kidpid=$2 - - # Wait for container to appear - local timeout=10 - while :;do - sleep 0.5 - run_podman '?' container exists $cname - if [[ $status -eq 0 ]]; then - break - fi - timeout=$((timeout - 1)) - if [[ $timeout -eq 0 ]]; then - run_podman ps -a - die "Timed out waiting for container $cname to start" - fi - done - - # Now that container exists, wait for it to declare itself READY - wait_for_ready $cname - - # Signal, and wait for container to exit - kill -INT $kidpid - timeout=20 - while :;do - sleep 0.5 - run_podman logs $cname - if [[ "$output" =~ BYE ]]; then - break - fi - timeout=$((timeout - 1)) - if [[ $timeout -eq 0 ]]; then - run_podman ps -a - die "Timed out waiting for BYE from container" - fi - done - - run_podman rm -f -t0 $cname -} - -# Each of the tests below does some setup, then invokes the above helper. +# Each of the tests below does some setup, then invokes the helper from helpers.sig-proxy.bash. @test "podman sigproxy test: run" { # We're forced to use $PODMAN because run_podman cannot be backgrounded diff --git a/test/system/550-pause-process.bats b/test/system/550-pause-process.bats index 733ad81a50..282494b630 100644 --- a/test/system/550-pause-process.bats +++ b/test/system/550-pause-process.bats @@ -4,6 +4,7 @@ # load helpers +load helpers.sig-proxy function _check_pause_process() { pause_pid= @@ -77,3 +78,63 @@ function _check_pause_process() { assert "$output" == "$tmpdir_userns" \ "podman with tmpdir2 should use the same userns created using a tmpdir" } + +# /~https://github.com/containers/podman/issues/16091 +@test "rootless reexec with sig-proxy" { + skip_if_not_rootless "pause process is only used as rootless" + skip_if_remote "system migrate not supported via remote" + + # Use podman system migrate to stop the currently running pause process + run_podman system migrate + + # We're forced to use $PODMAN because run_podman cannot be backgrounded + $PODMAN run -i --name c_run $IMAGE sh -c "$SLEEPLOOP" & + local kidpid=$! + + _test_sigproxy c_run $kidpid + + # our container exits 0 so podman should too + wait $kidpid || die "podman run exited $? instead of zero" +} + + +@test "rootless reexec with sig-proxy when rejoining userns from container" { + skip_if_not_rootless "pause process is only used as rootless" + skip_if_remote "unshare not supported via remote" + + # System tests can execute in contexts without XDG; in those, we have to + # skip the pause-pid-file checks. + if [[ -z "$XDG_RUNTIME_DIR" ]]; then + skip "\$XDG_RUNTIME_DIR not defined" + fi + local pause_pid_file="$XDG_RUNTIME_DIR/libpod/tmp/pause.pid" + + # First let's run a container in the background to keep the userns active + local cname1=c1_$(random_string) + run_podman run -d --name $cname1 $IMAGE top + + run_podman unshare readlink /proc/self/ns/user + userns="$output" + + # check for pause pid and then kill it + _check_pause_process + kill -9 $pause_pid + + # Now again directly start podman run and make sure it can forward signals + # We're forced to use $PODMAN because run_podman cannot be backgrounded + local cname2=c2_$(random_string) + $PODMAN run -i --name $cname2 $IMAGE sh -c "$SLEEPLOOP" & + local kidpid=$! + + _test_sigproxy $cname2 $kidpid + + # our container exits 0 so podman should too + wait $kidpid || die "podman run exited $? instead of zero" + + # Check that podman joined the same userns as it tries to use the one + # from the running podman process in the background. + run_podman unshare readlink /proc/self/ns/user + assert "$output" == "$userns" "userns before/after kill is the same" + + run_podman rm -f -t0 $cname1 +} diff --git a/test/system/helpers.sig-proxy.bash b/test/system/helpers.sig-proxy.bash new file mode 100644 index 0000000000..f5e9784a8c --- /dev/null +++ b/test/system/helpers.sig-proxy.bash @@ -0,0 +1,50 @@ +# -*- bash -*- +# +# BATS helpers for sig-proxy functionality +# + +# Command to run in each of the tests. +SLEEPLOOP='trap "echo BYE;exit 0" INT;echo READY;while :;do sleep 0.1;done' + +# Main test code: wait for container to exist and be ready, send it a +# signal, wait for container to acknowledge and exit. +function _test_sigproxy() { + local cname=$1 + local kidpid=$2 + + # Wait for container to appear + local timeout=10 + while :;do + sleep 0.5 + run_podman '?' container exists $cname + if [[ $status -eq 0 ]]; then + break + fi + timeout=$((timeout - 1)) + if [[ $timeout -eq 0 ]]; then + run_podman ps -a + die "Timed out waiting for container $cname to start" + fi + done + + # Now that container exists, wait for it to declare itself READY + wait_for_ready $cname + + # Signal, and wait for container to exit + kill -INT $kidpid + timeout=20 + while :;do + sleep 0.5 + run_podman logs $cname + if [[ "$output" =~ BYE ]]; then + break + fi + timeout=$((timeout - 1)) + if [[ $timeout -eq 0 ]]; then + run_podman ps -a + die "Timed out waiting for BYE from container" + fi + done + + run_podman rm -f -t0 $cname +}