Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport of Prevent long delays in ExpirationManager.Stop due to blanking a large pending map into release/1.15.x #23322

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions changelog/23282.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
expiration: Prevent large lease loads from delaying state changes, e.g. becoming active or standby.
```
56 changes: 33 additions & 23 deletions vault/expiration.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ type ExpirationManager struct {
leaseCheckCounter *uint32

logLeaseExpirations bool
expireFunc ExpireLeaseStrategy
expireFunc atomic.Pointer[ExpireLeaseStrategy]

// testRegisterAuthFailure, if set to true, triggers an explicit failure on
// RegisterAuth to simulate a partial failure during a token creation
Expand Down Expand Up @@ -360,11 +360,11 @@ func NewExpirationManager(c *Core, view *BarrierView, e ExpireLeaseStrategy, log
leaseCheckCounter: new(uint32),

logLeaseExpirations: os.Getenv("VAULT_SKIP_LOGGING_LEASE_EXPIRATIONS") == "",
expireFunc: e,

jobManager: jobManager,
revokeRetryBase: c.expirationRevokeRetryBase,
}
exp.expireFunc.Store(&e)
if exp.revokeRetryBase == 0 {
exp.revokeRetryBase = revokeRetryBase
}
Expand Down Expand Up @@ -846,6 +846,8 @@ func (m *ExpirationManager) processRestore(ctx context.Context, leaseID string)
return nil
}

func expireNoop(ctx context.Context, manager *ExpirationManager, s string, n *namespace.Namespace) {}

// Stop is used to prevent further automatic revocations.
// This must be called before sealing the view.
func (m *ExpirationManager) Stop() error {
Expand All @@ -860,27 +862,24 @@ func (m *ExpirationManager) Stop() error {
close(m.quitCh)

m.pendingLock.Lock()
// Replacing the entire map would cause a race with
// a simultaneous WalkTokens, which doesn't hold pendingLock.
m.pending.Range(func(key, value interface{}) bool {
info := value.(pendingInfo)
info.timer.Stop()
m.pending.Delete(key)
return true
})
// We don't want any lease timers that fire to do anything; they can wait
// for the next ExpirationManager to handle them.
newStrategy := ExpireLeaseStrategy(expireNoop)
m.expireFunc.Store(&newStrategy)
oldPending := m.pending
m.pending, m.nonexpiring, m.irrevocable = sync.Map{}, sync.Map{}, sync.Map{}
m.leaseCount = 0
m.nonexpiring.Range(func(key, value interface{}) bool {
m.nonexpiring.Delete(key)
return true
})
m.uniquePolicies = make(map[string][]string)
m.irrevocable.Range(func(key, _ interface{}) bool {
m.irrevocable.Delete(key)
return true
})
m.irrevocableLeaseCount = 0
m.pendingLock.Unlock()

go oldPending.Range(func(key, value interface{}) bool {
info := value.(pendingInfo)
// We need to stop the timers to prevent memory leaks.
info.timer.Stop()
return true
})

if m.inRestoreMode() {
for {
if !m.inRestoreMode() {
Expand Down Expand Up @@ -1881,7 +1880,8 @@ func (m *ExpirationManager) updatePendingInternal(le *leaseEntry) {
leaseID, namespace := le.LeaseID, le.namespace
// Extend the timer by the lease total
timer := time.AfterFunc(leaseTotal, func() {
m.expireFunc(m.quitContext, m, leaseID, namespace)
expFn := *m.expireFunc.Load() // Load and deref the pointer
expFn(m.quitContext, m, leaseID, namespace)
})
pending = pendingInfo{
timer: timer,
Expand Down Expand Up @@ -2471,8 +2471,13 @@ func (m *ExpirationManager) WalkTokens(walkFn ExpirationWalkFunction) error {
return true
}

m.pending.Range(callback)
m.nonexpiring.Range(callback)
m.pendingLock.RLock()
toWalk := []sync.Map{m.pending, m.nonexpiring}
m.pendingLock.RUnlock()

for _, m := range toWalk {
m.Range(callback)
}

return nil
}
Expand All @@ -2495,8 +2500,13 @@ func (m *ExpirationManager) walkLeases(walkFn leaseWalkFunction) error {
return walkFn(key.(string), expireTime)
}

m.pending.Range(callback)
m.nonexpiring.Range(callback)
m.pendingLock.RLock()
toWalk := []sync.Map{m.pending, m.nonexpiring}
m.pendingLock.RUnlock()

for _, m := range toWalk {
m.Range(callback)
}

return nil
}
Expand Down