Skip to content

Commit

Permalink
cri: ensure NRI API never has nil CRI
Browse files Browse the repository at this point in the history
A nil CRIImplementation field can cause a nil pointer dereference and
panic during startup recovery.

Prior to this change, the nri.API struct would have a nil cri
(CRIImplementation) field after nri.NewAPI until nri.Register was
called.  Register is called mid-way through initialization of the CRI
plugin, but recovery for containers occurs prior to that.  Container
recovery includes establishing new exit monitors for existing containers
that were discovered.  When a container exits, NRI plugins are given the
opportunity to be notified about the lifecycle event, and this is done
by accessing that CRIImplementation field inside the nri.API.  If a
container exits prior to nri.Register being called, access to the
CRIImplementation field can cause a panic.

Here's the call-path:

* The CRI plugin starts running
  [here](/~https://github.com/containerd/containerd/blob/ae71819c4f5e67bb4d5ae76a6b735f29cc25774e/pkg/cri/server/service.go#L222)
* It then [calls into](/~https://github.com/containerd/containerd/blob/ae71819c4f5e67bb4d5ae76a6b735f29cc25774e/pkg/cri/server/service.go#L227)
  `recover()` to recover state from previous runs of containerd
* `recover()` then attempts to recover all containers through
  [`loadContainer()`](/~https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/server/restart.go#L175)
* When `loadContainer()` finds a container that is still running, it waits
  for the task (internal containerd object) to exit and sets up
  [exit monitoring](/~https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/server/restart.go#L391)
* Any exit that then happens must be
  [handled](/~https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/server/events.go#L145)
* Handling an exit includes
  [deleting the Task](/~https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/server/events.go#L188)
  and specifying [`nri.WithContainerExit`](/~https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/nri/nri_api_linux.go#L348)
  to [notify](/~https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/nri/nri_api_linux.go#L356)
  any subscribed NRI plugins
* NRI plugins need to know information about the pod (not just the sandbox),
  so before a plugin is notified the NRI API package
  [queries the Sandbox Store](/~https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/nri/nri_api_linux.go#L232)
  through the CRI implementation
* The `cri` implementation member field in the `nri.API` struct is set as part of the
  [`Register()`](/~https://github.com/containerd/containerd/blob/ae7d74b9e21bd08260586db104a1fe04af754545/internal/cri/nri/nri_api_linux.go#L66) method
* The `nri.Register()` method is only called
  [much further down in the CRI `Run()` method](/~https://github.com/containerd/containerd/blob/ae71819c4f5e67bb4d5ae76a6b735f29cc25774e/pkg/cri/server/service.go#L279)

(manually backported from commit 10aec35)

Signed-off-by: Samuel Karp <samuelkarp@google.com>
  • Loading branch information
samuelkarp committed Jul 1, 2024
1 parent 0794797 commit 7f5d3c5
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 15 deletions.
5 changes: 2 additions & 3 deletions pkg/cri/cri.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (
"github.com/containerd/containerd"
criconfig "github.com/containerd/containerd/pkg/cri/config"
"github.com/containerd/containerd/pkg/cri/constants"
"github.com/containerd/containerd/pkg/cri/nri"
"github.com/containerd/containerd/pkg/cri/sbserver"
"github.com/containerd/containerd/pkg/cri/server"
nriservice "github.com/containerd/containerd/pkg/nri"
Expand Down Expand Up @@ -145,7 +144,7 @@ func setGLogLevel() error {
}

// Get the NRI plugin, and set up our NRI API for it.
func getNRIAPI(ic *plugin.InitContext) *nri.API {
func getNRIAPI(ic *plugin.InitContext) nriservice.API {
const (
pluginType = plugin.NRIApiPlugin
pluginName = "nri"
Expand All @@ -168,5 +167,5 @@ func getNRIAPI(ic *plugin.InitContext) *nri.API {

log.G(ctx).Info("using experimental NRI integration - disable nri plugin to prevent this")

return nri.NewAPI(api)
return api
}
6 changes: 3 additions & 3 deletions pkg/cri/nri/nri_api_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,10 @@ type API struct {
nri nri.API
}

func NewAPI(nri nri.API) *API {
func NewAPI(nri nri.API, cri CRIImplementation) *API {
return &API{
nri: nri,
cri: cri,
}
}

Expand All @@ -59,12 +60,11 @@ func (a *API) IsDisabled() bool {

func (a *API) IsEnabled() bool { return !a.IsDisabled() }

func (a *API) Register(cri CRIImplementation) error {
func (a *API) Register() error {
if a.IsDisabled() {
return nil
}

a.cri = cri
nri.RegisterDomain(a)

return a.nri.Start()
Expand Down
4 changes: 2 additions & 2 deletions pkg/cri/nri/nri_api_other.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ import (
type API struct {
}

func NewAPI(nri.API) *API {
func NewAPI(nri.API, CRIImplementation) *API {
return nil
}

func (a *API) Register(CRIImplementation) error {
func (a *API) Register() error {
return nil
}

Expand Down
7 changes: 4 additions & 3 deletions pkg/cri/sbserver/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"github.com/containerd/containerd/pkg/cri/sbserver/podsandbox"
"github.com/containerd/containerd/pkg/cri/streaming"
"github.com/containerd/containerd/pkg/kmutex"
nriservice "github.com/containerd/containerd/pkg/nri"
"github.com/containerd/containerd/plugin"
"github.com/containerd/containerd/sandbox"
"github.com/containerd/containerd/services/warning"
Expand Down Expand Up @@ -128,7 +129,7 @@ type criService struct {
}

// NewCRIService returns a new instance of CRIService
func NewCRIService(config criconfig.Config, client *containerd.Client, nri *nri.API, warn warning.Service) (CRIService, error) {
func NewCRIService(config criconfig.Config, client *containerd.Client, nriservice nriservice.API, warn warning.Service) (CRIService, error) {
var err error
labels := label.NewStore()
c := &criService{
Expand Down Expand Up @@ -197,7 +198,7 @@ func NewCRIService(config criconfig.Config, client *containerd.Client, nri *nri.
c.sandboxControllers[criconfig.ModePodSandbox] = podsandbox.New(config, client, c.sandboxStore, c.os, c, c.baseOCISpecs)
c.sandboxControllers[criconfig.ModeShim] = client.SandboxController()

c.nri = nri
c.nri = nri.NewAPI(nriservice, &criImplementation{c})

return c, nil
}
Expand Down Expand Up @@ -281,7 +282,7 @@ func (c *criService) Run(ready func()) error {
}()

// register CRI domain with NRI
if err := c.nri.Register(&criImplementation{c}); err != nil {
if err := c.nri.Register(); err != nil {
return fmt.Errorf("failed to set up NRI for CRI service: %w", err)
}

Expand Down
7 changes: 4 additions & 3 deletions pkg/cri/server/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"github.com/containerd/containerd/pkg/cri/nri"
"github.com/containerd/containerd/pkg/cri/streaming"
"github.com/containerd/containerd/pkg/kmutex"
nriservice "github.com/containerd/containerd/pkg/nri"
"github.com/containerd/containerd/plugin"
"github.com/containerd/containerd/services/warning"
runtime_alpha "github.com/containerd/containerd/third_party/k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
Expand Down Expand Up @@ -125,7 +126,7 @@ type criService struct {
}

// NewCRIService returns a new instance of CRIService
func NewCRIService(config criconfig.Config, client *containerd.Client, nri *nri.API, warn warning.Service) (CRIService, error) {
func NewCRIService(config criconfig.Config, client *containerd.Client, nriservice nriservice.API, warn warning.Service) (CRIService, error) {
var err error
labels := label.NewStore()

Expand Down Expand Up @@ -198,7 +199,7 @@ func NewCRIService(config criconfig.Config, client *containerd.Client, nri *nri.
return nil, err
}

c.nri = nri
c.nri = nri.NewAPI(nriservice, &criImplementation{c})

return c, nil
}
Expand Down Expand Up @@ -276,7 +277,7 @@ func (c *criService) Run(ready func()) error {
}()

// register CRI domain with NRI
if err := c.nri.Register(&criImplementation{c}); err != nil {
if err := c.nri.Register(); err != nil {
return fmt.Errorf("failed to set up NRI for CRI service: %w", err)
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/nri/nri.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ type API interface {
// IsEnabled returns true if the NRI interface is enabled and initialized.
IsEnabled() bool

// Start start the NRI interface, allowing external NRI plugins to
// Start starts the NRI interface, allowing external NRI plugins to
// connect, register, and hook themselves into the lifecycle events
// of pods and containers.
Start() error
Expand Down

0 comments on commit 7f5d3c5

Please sign in to comment.