Skip to content

Commit

Permalink
libpod: wait for healthy on main thread
Browse files Browse the repository at this point in the history
wait for the healthy status on the thread where the container lock is
held.  Otherwise, if it is performed from a go routine, a different
thread is used (since the runtime.LockOSThread() call doesn't have any
effect), causing pthread_mutex_unlock() to fail with EPERM.

Closes: containers#22651

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
  • Loading branch information
giuseppe committed May 14, 2024
1 parent 87bd775 commit b06c58b
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 19 deletions.
10 changes: 8 additions & 2 deletions libpod/container_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,10 @@ func (c *Container) Start(ctx context.Context, recursive bool) (finalErr error)
}

// Start the container
return c.start(ctx)
if err := c.start(); err != nil {
return err
}
return c.waitForHealthy(ctx)
}

// Update updates the given container.
Expand Down Expand Up @@ -194,6 +197,9 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
opts.Start = true
opts.Started = startedChan

// attach and start the container on a different thread. waitForHealthy must
// be done later, as it requires to run on the same thread that holds the lock
// for the container.
if err := c.ociRuntime.Attach(c, opts); err != nil {
attachChan <- err
}
Expand All @@ -207,7 +213,7 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
c.newContainerEvent(events.Attach)
}

return attachChan, nil
return attachChan, c.waitForHealthy(ctx)
}

// RestartWithTimeout restarts a running container and takes a given timeout in uint
Expand Down
32 changes: 23 additions & 9 deletions libpod/container_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,10 +330,10 @@ func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr err
return false, err
}
}
if err := c.start(ctx); err != nil {
if err := c.start(); err != nil {
return false, err
}
return true, nil
return true, c.waitForHealthy(ctx)
}

// Ensure that the container is in a specific state or state.
Expand Down Expand Up @@ -1208,7 +1208,7 @@ func (c *Container) reinit(ctx context.Context, retainRetries bool) error {

// Initialize (if necessary) and start a container
// Performs all necessary steps to start a container that is not running
// Does not lock or check validity
// Does not lock or check validity, requires to run on the same thread that holds the lock for the container.
func (c *Container) initAndStart(ctx context.Context) (retErr error) {
// If we are ContainerStateUnknown, throw an error
if c.state.State == define.ContainerStateUnknown {
Expand Down Expand Up @@ -1253,11 +1253,14 @@ func (c *Container) initAndStart(ctx context.Context) (retErr error) {
}

// Now start the container
return c.start(ctx)
if err := c.start(); err != nil {
return err
}
return c.waitForHealthy(ctx)
}

// Internal, non-locking function to start a container
func (c *Container) start(ctx context.Context) error {
func (c *Container) start() error {
if c.config.Spec.Process != nil {
logrus.Debugf("Starting container %s with command %v", c.ID(), c.config.Spec.Process.Args)
}
Expand Down Expand Up @@ -1298,10 +1301,14 @@ func (c *Container) start(ctx context.Context) error {

c.newContainerEvent(events.Start)

if err := c.save(); err != nil {
return err
}
return c.save()
}

// waitForHealthy, when sdNotifyMode == SdNotifyModeHealthy, waits up to the DefaultWaitInterval
// for the container to get into the healthy state and reports the status to the notify socket.
// The function unlocks the container lock, so it must be called from the same thread that locks
// the container.
func (c *Container) waitForHealthy(ctx context.Context) error {
if c.config.SdNotifyMode != define.SdNotifyModeHealthy {
return nil
}
Expand All @@ -1315,6 +1322,9 @@ func (c *Container) start(ctx context.Context) error {
}

if _, err := c.WaitForConditionWithInterval(ctx, DefaultWaitInterval, define.HealthCheckHealthy); err != nil {
if errors.Is(err, define.ErrNoSuchCtr) {
return nil
}
return err
}

Expand Down Expand Up @@ -1566,6 +1576,7 @@ func (c *Container) unpause() error {
}

// Internal, non-locking function to restart a container
// It requires to run on the same thread that holds the lock.
func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retErr error) {
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStateStopped, define.ContainerStateExited) {
return fmt.Errorf("unable to restart a container in a paused or unknown state: %w", define.ErrCtrStateInvalid)
Expand Down Expand Up @@ -1613,7 +1624,10 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retEr
return err
}
}
return c.start(ctx)
if err := c.start(); err != nil {
return err
}
return c.waitForHealthy(ctx)
}

// mountStorage sets up the container's root filesystem
Expand Down
4 changes: 2 additions & 2 deletions libpod/oci_conmon_attach_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
package libpod

import (
"context"
"errors"
"fmt"
"io"
Expand Down Expand Up @@ -32,6 +31,7 @@ const (
// Attach to the given container.
// Does not check if state is appropriate.
// started is only required if startContainer is true.
// It does not wait for the container to be healthy, it is the caller responsibility to do so.
func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
passthrough := c.LogDriver() == define.PassthroughLogging || c.LogDriver() == define.PassthroughTTYLogging

Expand Down Expand Up @@ -86,7 +86,7 @@ func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
// If starting was requested, start the container and notify when that's
// done.
if params.Start {
if err := c.start(context.TODO()); err != nil {
if err := c.start(); err != nil {
return err
}
params.Started <- true
Expand Down
14 changes: 8 additions & 6 deletions pkg/domain/infra/abi/containers.go
Original file line number Diff line number Diff line change
Expand Up @@ -995,7 +995,10 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
return reports, fmt.Errorf("unable to start container %s: %w", ctr.ID(), err)
}

exitCode = ic.GetContainerExitCode(ctx, ctr.Container)
exitCode, err2 := ic.ContainerWaitForExitCode(ctx, ctr.Container)
if err2 != nil {
logrus.Errorf("Waiting for container %s: %v", ctr.ID(), err2)
}
reports = append(reports, &entities.ContainerStartReport{
Id: ctr.ID(),
RawInput: ctr.rawInput,
Expand Down Expand Up @@ -1189,7 +1192,7 @@ func (ic *ContainerEngine) ContainerRun(ctx context.Context, opts entities.Conta
report.ExitCode = define.ExitCode(err)
return &report, err
}
report.ExitCode = ic.GetContainerExitCode(ctx, ctr)
report.ExitCode, _ = ic.ContainerWaitForExitCode(ctx, ctr)
if opts.Rm && !ctr.ShouldRestart(ctx) {
if err := removeContainer(ctr, false); err != nil {
if errors.Is(err, define.ErrNoSuchCtr) ||
Expand All @@ -1203,14 +1206,13 @@ func (ic *ContainerEngine) ContainerRun(ctx context.Context, opts entities.Conta
return &report, nil
}

func (ic *ContainerEngine) GetContainerExitCode(ctx context.Context, ctr *libpod.Container) int {
func (ic *ContainerEngine) ContainerWaitForExitCode(ctx context.Context, ctr *libpod.Container) (int, error) {
exitCode, err := ctr.Wait(ctx)
if err != nil {
logrus.Errorf("Waiting for container %s: %v", ctr.ID(), err)
intExitCode := int(define.ExecErrorCodeNotFound)
return intExitCode
return intExitCode, err
}
return int(exitCode)
return int(exitCode), nil
}

func (ic *ContainerEngine) ContainerLogs(ctx context.Context, namesOrIds []string, options entities.ContainerLogsOptions) error {
Expand Down
9 changes: 9 additions & 0 deletions test/system/260-sdnotify.bats
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,15 @@ READY=1" "Container log after healthcheck run"
is "$output" "running" "make sure container is still running"

run_podman rm -f -t0 $ctr

ctr=$(random_string)
run_podman run --name $ctr \
--health-cmd="touch /terminate" \
--sdnotify=healthy \
$IMAGE sh -c 'while test \! -e /terminate; do sleep 0.1; done; echo finished'
is "$output" "finished" "make sure container exited successfully"
run_podman rm -f -t0 $ctr

_stop_socat
}

Expand Down

0 comments on commit b06c58b

Please sign in to comment.