Skip to content

Commit

Permalink
Add ability to perform an etcd on-demand snapshot via cli (k3s-io#2819)
Browse files Browse the repository at this point in the history
* add ability to perform an etcd on-demand snapshot via cli

(cherry picked from commit 1322901)
Signed-off-by: Brian Downs <brian.downs@gmail.com>
  • Loading branch information
briandowns committed Mar 15, 2021
1 parent 64017c5 commit ca55efa
Show file tree
Hide file tree
Showing 15 changed files with 204 additions and 25 deletions.
22 changes: 22 additions & 0 deletions cmd/etcdsnapshot/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package main

import (
"os"

"github.com/rancher/k3s/pkg/cli/cmds"
"github.com/rancher/k3s/pkg/cli/etcdsnapshot"
"github.com/rancher/k3s/pkg/configfilearg"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
)

func main() {
app := cmds.NewApp()
app.Commands = []cli.Command{
cmds.NewEtcdSnapshotCommand(etcdsnapshot.Run),
}

if err := app.Run(configfilearg.MustParse(os.Args)); err != nil {
logrus.Fatal(err)
}
}
6 changes: 3 additions & 3 deletions cmd/k3s/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ func main() {
cmds.NewCRICTL(externalCLIAction("crictl")),
cmds.NewCtrCommand(externalCLIAction("ctr")),
cmds.NewCheckConfigCommand(externalCLIAction("check-config")),
cmds.NewEtcdSnapshotCommand(wrap(version.Program+"-"+cmds.EtcdSnapshotCommand, os.Args)),
}

err := app.Run(os.Args)
if err != nil {
if err := app.Run(os.Args); err != nil {
logrus.Fatal(err)
}
}
Expand Down Expand Up @@ -85,7 +85,7 @@ func stageAndRunCLI(cli *cli.Context, cmd string, args []string) error {
return stageAndRun(dataDir, cmd, args)
}

func stageAndRun(dataDir string, cmd string, args []string) error {
func stageAndRun(dataDir, cmd string, args []string) error {
dir, err := extract(dataDir)
if err != nil {
return errors.Wrap(err, "extracting data")
Expand Down
2 changes: 2 additions & 0 deletions cmd/server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/rancher/k3s/pkg/cli/cmds"
"github.com/rancher/k3s/pkg/cli/crictl"
"github.com/rancher/k3s/pkg/cli/ctr"
"github.com/rancher/k3s/pkg/cli/etcdsnapshot"
"github.com/rancher/k3s/pkg/cli/kubectl"
"github.com/rancher/k3s/pkg/cli/server"
"github.com/rancher/k3s/pkg/configfilearg"
Expand Down Expand Up @@ -42,6 +43,7 @@ func main() {
cmds.NewKubectlCommand(kubectl.Run),
cmds.NewCRICTL(crictl.Run),
cmds.NewCtrCommand(ctr.Run),
cmds.NewEtcdSnapshotCommand(etcdsnapshot.Run),
}

err := app.Run(configfilearg.MustParse(os.Args))
Expand Down
2 changes: 2 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/rancher/k3s/pkg/cli/agent"
"github.com/rancher/k3s/pkg/cli/cmds"
"github.com/rancher/k3s/pkg/cli/crictl"
"github.com/rancher/k3s/pkg/cli/etcdsnapshot"
"github.com/rancher/k3s/pkg/cli/kubectl"
"github.com/rancher/k3s/pkg/cli/server"
"github.com/rancher/k3s/pkg/configfilearg"
Expand All @@ -26,6 +27,7 @@ func main() {
cmds.NewAgentCommand(agent.Run),
cmds.NewKubectlCommand(kubectl.Run),
cmds.NewCRICTL(crictl.Run),
cmds.NewEtcdSnapshotCommand(etcdsnapshot.Run),
}

if err := app.Run(configfilearg.MustParse(os.Args)); err != nil {
Expand Down
39 changes: 39 additions & 0 deletions pkg/cli/cmds/etcd_snapshot.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package cmds

import (
"github.com/rancher/k3s/pkg/version"
"github.com/urfave/cli"
)

const EtcdSnapshotCommand = "etcd-snapshot"

func NewEtcdSnapshotCommand(action func(*cli.Context) error) cli.Command {
return cli.Command{
Name: EtcdSnapshotCommand,
Usage: "Trigger an immediate etcd snapshot",
SkipFlagParsing: false,
SkipArgReorder: true,
Action: action,
Flags: []cli.Flag{
DebugFlag,
LogFile,
AlsoLogToStderr,
cli.StringFlag{
Name: "data-dir,d",
Usage: "(data) Folder to hold state default /var/lib/rancher/" + version.Program + " or ${HOME}/.rancher/" + version.Program + " if not root",
Destination: &ServerConfig.DataDir,
},
&cli.StringFlag{
Name: "name",
Usage: "(db) Set the base name of the etcd on-demand snapshot (appended with UNIX timestamp).",
Destination: &ServerConfig.EtcdSnapshotName,
Value: "on-demand",
},
&cli.StringFlag{
Name: "dir",
Usage: "(db) Directory to save etcd on-demand snapshot. (default: ${data-dir}/db/snapshots)",
Destination: &ServerConfig.EtcdSnapshotDir,
},
},
}
}
7 changes: 7 additions & 0 deletions pkg/cli/cmds/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ type Server struct {
ClusterResetRestorePath string
EncryptSecrets bool
StartupHooks []func(context.Context, <-chan struct{}, string) error
EtcdSnapshotName string
EtcdDisableSnapshots bool
EtcdSnapshotDir string
EtcdSnapshotCron string
Expand Down Expand Up @@ -220,6 +221,12 @@ func NewServerCommand(action func(*cli.Context) error) cli.Command {
Usage: "(db) Disable automatic etcd snapshots",
Destination: &ServerConfig.EtcdDisableSnapshots,
},
&cli.StringFlag{
Name: "etcd-snapshot-name",
Usage: "(db) Set the base name of etcd snapshots. Default: etcd-snapshot-<unix-timestamp>",
Destination: &ServerConfig.EtcdSnapshotName,
Value: "etcd-snapshot",
},
&cli.StringFlag{
Name: "etcd-snapshot-schedule-cron",
Usage: "(db) Snapshot interval time in cron spec. eg. every 5 hours '* */5 * * *'",
Expand Down
62 changes: 62 additions & 0 deletions pkg/cli/etcdsnapshot/etcd_snapshot.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package etcdsnapshot

import (
"context"
"errors"
"os"
"path/filepath"

"github.com/erikdubbelboer/gspt"
"github.com/rancher/k3s/pkg/cli/cmds"
"github.com/rancher/k3s/pkg/cluster"
"github.com/rancher/k3s/pkg/daemons/config"
"github.com/rancher/k3s/pkg/etcd"
"github.com/rancher/k3s/pkg/server"
"github.com/rancher/wrangler/pkg/signals"
"github.com/urfave/cli"
)

func Run(app *cli.Context) error {
if err := cmds.InitLogging(); err != nil {
return err
}
return run(app, &cmds.ServerConfig)
}

func run(app *cli.Context, cfg *cmds.Server) error {
gspt.SetProcTitle(os.Args[0])

dataDir, err := server.ResolveDataDir(cfg.DataDir)
if err != nil {
return err
}

var serverConfig server.Config
serverConfig.DisableAgent = true
serverConfig.ControlConfig.DataDir = dataDir
serverConfig.ControlConfig.EtcdSnapshotName = cfg.EtcdSnapshotName
serverConfig.ControlConfig.EtcdSnapshotDir = cfg.EtcdSnapshotDir
serverConfig.ControlConfig.EtcdSnapshotRetention = 0 // disable retention check
serverConfig.ControlConfig.Runtime = &config.ControlRuntime{}
serverConfig.ControlConfig.Runtime.ETCDServerCA = filepath.Join(dataDir, "tls", "etcd", "server-ca.crt")
serverConfig.ControlConfig.Runtime.ClientETCDCert = filepath.Join(dataDir, "tls", "etcd", "client.crt")
serverConfig.ControlConfig.Runtime.ClientETCDKey = filepath.Join(dataDir, "tls", "etcd", "client.key")

ctx := signals.SetupSignalHandler(context.Background())

initialized, err := etcd.NewETCD().IsInitialized(ctx, &serverConfig.ControlConfig)
if err != nil {
return err
}
if !initialized {
return errors.New("managed etcd database has not been initialized")
}

cluster := cluster.New(&serverConfig.ControlConfig)

if err := cluster.Bootstrap(ctx); err != nil {
return err
}

return cluster.Snapshot(ctx, &serverConfig.ControlConfig)
}
1 change: 1 addition & 0 deletions pkg/cli/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont
serverConfig.ControlConfig.DisableControllerManager = cfg.DisableControllerManager
serverConfig.ControlConfig.ClusterInit = cfg.ClusterInit
serverConfig.ControlConfig.EncryptSecrets = cfg.EncryptSecrets
serverConfig.ControlConfig.EtcdSnapshotName = cfg.EtcdSnapshotName
serverConfig.ControlConfig.EtcdSnapshotCron = cfg.EtcdSnapshotCron
serverConfig.ControlConfig.EtcdSnapshotDir = cfg.EtcdSnapshotDir
serverConfig.ControlConfig.EtcdSnapshotRetention = cfg.EtcdSnapshotRetention
Expand Down
10 changes: 10 additions & 0 deletions pkg/cluster/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/rancher/k3s/pkg/bootstrap"
"github.com/rancher/k3s/pkg/clientaccess"
"github.com/rancher/k3s/pkg/daemons/config"
"github.com/rancher/k3s/pkg/version"
"github.com/sirupsen/logrus"
)
Expand Down Expand Up @@ -147,3 +148,12 @@ func (c *Cluster) bootstrap(ctx context.Context) error {
func (c *Cluster) bootstrapStamp() string {
return filepath.Join(c.config.DataDir, "db/joined-"+keyHash(c.config.Token))
}

// Snapshot is a proxy method to call the snapshot method on the managedb
// interface for etcd clusters.
func (c *Cluster) Snapshot(ctx context.Context, config *config.Control) error {
if c.managedDB == nil {
return errors.New("unable to perform etcd snapshot on non-etcd system")
}
return c.managedDB.Snapshot(ctx, config)
}
1 change: 1 addition & 0 deletions pkg/cluster/managed/drivers.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ type Driver interface {
EndpointName() string
GetMembersClientURLs(ctx context.Context) ([]string, error)
RemoveSelf(ctx context.Context) error
Snapshot(ctx context.Context, config *config.Control) error
}

func RegisterDriver(d Driver) {
Expand Down
1 change: 1 addition & 0 deletions pkg/daemons/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ type Control struct {
EncryptSecrets bool
TLSMinVersion uint16
TLSCipherSuites []uint16
EtcdSnapshotName string
EtcdDisableSnapshots bool
EtcdSnapshotDir string
EtcdSnapshotCron string
Expand Down
68 changes: 49 additions & 19 deletions pkg/etcd/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,6 @@ func getClientConfig(ctx context.Context, runtime *config.ControlRuntime, endpoi
if err != nil {
return nil, err
}

cfg := &etcd.Config{
Endpoints: endpoints,
TLS: tlsConfig,
Expand All @@ -422,7 +421,6 @@ func getClientConfig(ctx context.Context, runtime *config.ControlRuntime, endpoi
DialKeepAliveTime: defaultKeepAliveTime,
DialKeepAliveTimeout: defaultKeepAliveTimeout,
}

return cfg, nil
}

Expand Down Expand Up @@ -726,48 +724,80 @@ func snapshotDir(config *config.Control) (string, error) {
return config.EtcdSnapshotDir, nil
}

// snapshot attempts to save a new snapshot to the configured directory, and then clean up any old
// snapshots in excess of the retention limits.
func (e *ETCD) snapshot(ctx context.Context) {
// preSnapshotSetup checks to see if the necessary components are in place
// to perform an Etcd snapshot. This is necessary primarily for on-demand
// snapshots since they're performed before normal Etcd setup is completed.
func (e *ETCD) preSnapshotSetup(ctx context.Context, config *config.Control) error {
if e.client == nil {
if e.config == nil {
e.config = config
}
client, err := getClient(ctx, e.config.Runtime, endpoint)
if err != nil {
return err
}
e.client = client
}
if e.runtime == nil {
e.runtime = config.Runtime
}
return nil
}

// Snapshot attempts to save a new snapshot to the configured directory, and then clean up any old
// snapshots in excess of the retention limits. This method is used in the internal cron snapshot
// system as well as used to do on-demand snapshots.
func (e *ETCD) Snapshot(ctx context.Context, config *config.Control) error {
if err := e.preSnapshotSetup(ctx, config); err != nil {
return err
}

status, err := e.client.Status(ctx, endpoint)
if err != nil {
logrus.Errorf("Failed to check etcd status for snapshot: %v", err)
return
return errors.Wrap(err, "failed to check etcd status for snapshot")
}

if status.IsLearner {
logrus.Warnf("Skipping snapshot: not supported for learner")
return
return nil
}

snapshotDir, err := snapshotDir(e.config)
if err != nil {
logrus.Errorf("Failed to get the snapshot dir: %v", err)
return
return errors.Wrap(err, "failed to get the snapshot dir")
}

cfg, err := getClientConfig(ctx, e.runtime, endpoint)
if err != nil {
logrus.Errorf("Failed to get config for etcd snapshot: %v", err)
return
return errors.Wrap(err, "failed to get config for etcd snapshot")
}

snapshotPath := filepath.Join(snapshotDir, snapshotPrefix+strconv.Itoa(int(time.Now().Unix())))
snapshotName := fmt.Sprintf("%s-%d", e.config.EtcdSnapshotName, time.Now().Unix())
snapshotPath := filepath.Join(snapshotDir, snapshotName)

logrus.Infof("Saving etcd snapshot to %s", snapshotPath)

if err := snapshot.NewV3(nil).Save(ctx, *cfg, snapshotPath); err != nil {
logrus.Errorf("Failed to save snapshot: %v", err)
return
return errors.Wrap(err, "failed to save snapshot")
}
if err := snapshotRetention(e.config.EtcdSnapshotRetention, snapshotDir); err != nil {
logrus.Errorf("Failed to apply snapshot retention: %v", err)
return

// check if we need to perform a retention check
if e.config.EtcdSnapshotRetention >= 1 {
if err := snapshotRetention(e.config.EtcdSnapshotRetention, snapshotDir); err != nil {
return errors.Wrap(err, "failed to apply snapshot retention")
}
}

return nil
}

// setSnapshotFunction schedules snapshots at the configured interval
func (e *ETCD) setSnapshotFunction(ctx context.Context) {
e.cron.AddFunc(e.config.EtcdSnapshotCron, func() { e.snapshot(ctx) })
e.cron.AddFunc(e.config.EtcdSnapshotCron, func() {
if err := e.Snapshot(ctx, e.config); err != nil {
logrus.Error(err)
}
})
}

// Restore performs a restore of the ETCD datastore from
Expand Down
4 changes: 2 additions & 2 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ const (
ETCDRoleLabelKey = "node-role.kubernetes.io/etcd"
)

func resolveDataDir(dataDir string) (string, error) {
func ResolveDataDir(dataDir string) (string, error) {
dataDir, err := datadir.Resolve(dataDir)
return filepath.Join(dataDir, "server"), err
}
Expand Down Expand Up @@ -348,7 +348,7 @@ func setupDataDirAndChdir(config *config.Control) error {
err error
)

config.DataDir, err = resolveDataDir(config.DataDir)
config.DataDir, err = ResolveDataDir(config.DataDir)
if err != nil {
return err
}
Expand Down
2 changes: 2 additions & 0 deletions scripts/build
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ rm -f \
bin/containerd-shim-runc-v1 \
bin/containerd-shim-runc-v2 \
bin/k3s-server \
bin/k3s-etcd-snapshot \
bin/kubectl \
bin/crictl \
bin/ctr
Expand Down Expand Up @@ -105,6 +106,7 @@ echo Building server
CGO_ENABLED=1 "${GO}" build -tags "$TAGS" -ldflags "$VERSIONFLAGS $LDFLAGS $STATIC_SQLITE" -o bin/containerd ./cmd/server/main.go
ln -s containerd ./bin/k3s-agent
ln -s containerd ./bin/k3s-server
ln -s containerd ./bin/k3s-etcd-snapshot
ln -s containerd ./bin/kubectl
ln -s containerd ./bin/crictl
ln -s containerd ./bin/ctr
Expand Down
Loading

0 comments on commit ca55efa

Please sign in to comment.