Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to perform an etcd on-demand snapshot via cli #2819

Merged
merged 19 commits into from
Jan 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions cmd/etcdsnapshot/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package main

import (
"os"

"github.com/rancher/k3s/pkg/cli/cmds"
"github.com/rancher/k3s/pkg/cli/etcdsnapshot"
"github.com/rancher/k3s/pkg/configfilearg"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
)

func main() {
app := cmds.NewApp()
app.Commands = []cli.Command{
cmds.NewEtcdSnapshotCommand(etcdsnapshot.Run),
}

if err := app.Run(configfilearg.MustParse(os.Args)); err != nil {
logrus.Fatal(err)
}
}
8 changes: 4 additions & 4 deletions cmd/k3s/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ func main() {
cmds.NewCRICTL(externalCLIAction("crictl", dataDir)),
cmds.NewCtrCommand(externalCLIAction("ctr", dataDir)),
cmds.NewCheckConfigCommand(externalCLIAction("check-config", dataDir)),
cmds.NewEtcdSnapshotCommand(wrap(version.Program+"-"+cmds.EtcdSnapshotCommand, dataDir, os.Args)),
}

err := app.Run(os.Args)
if err != nil {
if err := app.Run(os.Args); err != nil {
logrus.Fatal(err)
}
}
Expand Down Expand Up @@ -92,7 +92,7 @@ func externalCLI(cli, dataDir string, args []string) error {
return stageAndRun(dataDir, cli, append([]string{cli}, args...))
}

func wrap(cmd string, dataDir string, args []string) func(ctx *cli.Context) error {
func wrap(cmd, dataDir string, args []string) func(ctx *cli.Context) error {
return func(ctx *cli.Context) error {
return stageAndRunCLI(ctx, cmd, dataDir, args)
}
Expand All @@ -107,7 +107,7 @@ func stageAndRunCLI(cli *cli.Context, cmd string, dataDir string, args []string)
return stageAndRun(dataDir, cmd, args)
}

func stageAndRun(dataDir string, cmd string, args []string) error {
func stageAndRun(dataDir, cmd string, args []string) error {
dir, err := extract(dataDir)
if err != nil {
return errors.Wrap(err, "extracting data")
Expand Down
2 changes: 2 additions & 0 deletions cmd/server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/rancher/k3s/pkg/cli/cmds"
"github.com/rancher/k3s/pkg/cli/crictl"
"github.com/rancher/k3s/pkg/cli/ctr"
"github.com/rancher/k3s/pkg/cli/etcdsnapshot"
"github.com/rancher/k3s/pkg/cli/kubectl"
"github.com/rancher/k3s/pkg/cli/server"
"github.com/rancher/k3s/pkg/configfilearg"
Expand Down Expand Up @@ -42,6 +43,7 @@ func main() {
cmds.NewKubectlCommand(kubectl.Run),
cmds.NewCRICTL(crictl.Run),
cmds.NewCtrCommand(ctr.Run),
cmds.NewEtcdSnapshotCommand(etcdsnapshot.Run),
}

err := app.Run(configfilearg.MustParse(os.Args))
Expand Down
2 changes: 2 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/rancher/k3s/pkg/cli/agent"
"github.com/rancher/k3s/pkg/cli/cmds"
"github.com/rancher/k3s/pkg/cli/crictl"
"github.com/rancher/k3s/pkg/cli/etcdsnapshot"
"github.com/rancher/k3s/pkg/cli/kubectl"
"github.com/rancher/k3s/pkg/cli/server"
"github.com/rancher/k3s/pkg/configfilearg"
Expand All @@ -26,6 +27,7 @@ func main() {
cmds.NewAgentCommand(agent.Run),
cmds.NewKubectlCommand(kubectl.Run),
cmds.NewCRICTL(crictl.Run),
cmds.NewEtcdSnapshotCommand(etcdsnapshot.Run),
}

if err := app.Run(configfilearg.MustParse(os.Args)); err != nil {
Expand Down
39 changes: 39 additions & 0 deletions pkg/cli/cmds/etcd_snapshot.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package cmds

import (
"github.com/rancher/k3s/pkg/version"
"github.com/urfave/cli"
)

const EtcdSnapshotCommand = "etcd-snapshot"

func NewEtcdSnapshotCommand(action func(*cli.Context) error) cli.Command {
return cli.Command{
Name: EtcdSnapshotCommand,
Usage: "Trigger an immediate etcd snapshot",
SkipFlagParsing: false,
SkipArgReorder: true,
Action: action,
Flags: []cli.Flag{
DebugFlag,
LogFile,
AlsoLogToStderr,
cli.StringFlag{
Name: "data-dir,d",
Usage: "(data) Folder to hold state default /var/lib/rancher/" + version.Program + " or ${HOME}/.rancher/" + version.Program + " if not root",
Destination: &ServerConfig.DataDir,
},
&cli.StringFlag{
Name: "name",
Usage: "(db) Set the base name of the etcd on-demand snapshot (appended with UNIX timestamp).",
Destination: &ServerConfig.EtcdSnapshotName,
Value: "on-demand",
},
&cli.StringFlag{
Name: "dir",
Usage: "(db) Directory to save etcd on-demand snapshot. (default: ${data-dir}/db/snapshots)",
Destination: &ServerConfig.EtcdSnapshotDir,
},
},
}
}
7 changes: 7 additions & 0 deletions pkg/cli/cmds/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ type Server struct {
ClusterResetRestorePath string
EncryptSecrets bool
StartupHooks []func(context.Context, <-chan struct{}, string) error
EtcdSnapshotName string
EtcdDisableSnapshots bool
EtcdSnapshotDir string
EtcdSnapshotCron string
Expand Down Expand Up @@ -214,6 +215,12 @@ func NewServerCommand(action func(*cli.Context) error) cli.Command {
Usage: "(db) Disable automatic etcd snapshots",
Destination: &ServerConfig.EtcdDisableSnapshots,
},
&cli.StringFlag{
Name: "etcd-snapshot-name",
Usage: "(db) Set the base name of etcd snapshots. Default: etcd-snapshot-<unix-timestamp>",
Destination: &ServerConfig.EtcdSnapshotName,
Value: "etcd-snapshot",
},
&cli.StringFlag{
Name: "etcd-snapshot-schedule-cron",
Usage: "(db) Snapshot interval time in cron spec. eg. every 5 hours '* */5 * * *'",
Expand Down
62 changes: 62 additions & 0 deletions pkg/cli/etcdsnapshot/etcd_snapshot.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package etcdsnapshot

import (
"context"
"errors"
"os"
"path/filepath"

"github.com/erikdubbelboer/gspt"
"github.com/rancher/k3s/pkg/cli/cmds"
"github.com/rancher/k3s/pkg/cluster"
"github.com/rancher/k3s/pkg/daemons/config"
"github.com/rancher/k3s/pkg/etcd"
"github.com/rancher/k3s/pkg/server"
"github.com/rancher/wrangler/pkg/signals"
"github.com/urfave/cli"
)

func Run(app *cli.Context) error {
if err := cmds.InitLogging(); err != nil {
return err
}
return run(app, &cmds.ServerConfig)
}

func run(app *cli.Context, cfg *cmds.Server) error {
gspt.SetProcTitle(os.Args[0])

dataDir, err := server.ResolveDataDir(cfg.DataDir)
if err != nil {
return err
}

var serverConfig server.Config
serverConfig.DisableAgent = true
serverConfig.ControlConfig.DataDir = dataDir
serverConfig.ControlConfig.EtcdSnapshotName = cfg.EtcdSnapshotName
serverConfig.ControlConfig.EtcdSnapshotDir = cfg.EtcdSnapshotDir
serverConfig.ControlConfig.EtcdSnapshotRetention = 0 // disable retention check
serverConfig.ControlConfig.Runtime = &config.ControlRuntime{}
serverConfig.ControlConfig.Runtime.ETCDServerCA = filepath.Join(dataDir, "tls", "etcd", "server-ca.crt")
serverConfig.ControlConfig.Runtime.ClientETCDCert = filepath.Join(dataDir, "tls", "etcd", "client.crt")
serverConfig.ControlConfig.Runtime.ClientETCDKey = filepath.Join(dataDir, "tls", "etcd", "client.key")

ctx := signals.SetupSignalHandler(context.Background())

initialized, err := etcd.NewETCD().IsInitialized(ctx, &serverConfig.ControlConfig)
if err != nil {
return err
}
if !initialized {
return errors.New("managed etcd database has not been initialized")
}

cluster := cluster.New(&serverConfig.ControlConfig)

if err := cluster.Bootstrap(ctx); err != nil {
return err
}

return cluster.Snapshot(ctx, &serverConfig.ControlConfig)
}
1 change: 1 addition & 0 deletions pkg/cli/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ func run(app *cli.Context, cfg *cmds.Server) error {
serverConfig.ControlConfig.DisableKubeProxy = cfg.DisableKubeProxy
serverConfig.ControlConfig.ClusterInit = cfg.ClusterInit
serverConfig.ControlConfig.EncryptSecrets = cfg.EncryptSecrets
serverConfig.ControlConfig.EtcdSnapshotName = cfg.EtcdSnapshotName
serverConfig.ControlConfig.EtcdSnapshotCron = cfg.EtcdSnapshotCron
serverConfig.ControlConfig.EtcdSnapshotDir = cfg.EtcdSnapshotDir
serverConfig.ControlConfig.EtcdSnapshotRetention = cfg.EtcdSnapshotRetention
Expand Down
10 changes: 10 additions & 0 deletions pkg/cluster/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/rancher/k3s/pkg/bootstrap"
"github.com/rancher/k3s/pkg/clientaccess"
"github.com/rancher/k3s/pkg/daemons/config"
"github.com/rancher/k3s/pkg/version"
"github.com/sirupsen/logrus"
)
Expand Down Expand Up @@ -147,3 +148,12 @@ func (c *Cluster) bootstrap(ctx context.Context) error {
func (c *Cluster) bootstrapStamp() string {
return filepath.Join(c.config.DataDir, "db/joined-"+keyHash(c.config.Token))
}

// Snapshot is a proxy method to call the snapshot method on the managedb
// interface for etcd clusters.
func (c *Cluster) Snapshot(ctx context.Context, config *config.Control) error {
if c.managedDB == nil {
return errors.New("unable to perform etcd snapshot on non-etcd system")
}
return c.managedDB.Snapshot(ctx, config)
}
1 change: 1 addition & 0 deletions pkg/cluster/managed/drivers.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ type Driver interface {
Test(ctx context.Context) error
Restore(ctx context.Context) error
EndpointName() string
Snapshot(ctx context.Context, config *config.Control) error
}

func RegisterDriver(d Driver) {
Expand Down
1 change: 1 addition & 0 deletions pkg/daemons/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ type Control struct {
EncryptSecrets bool
TLSMinVersion uint16
TLSCipherSuites []uint16
EtcdSnapshotName string
EtcdDisableSnapshots bool
EtcdSnapshotDir string
EtcdSnapshotCron string
Expand Down
68 changes: 49 additions & 19 deletions pkg/etcd/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,6 @@ func getClientConfig(ctx context.Context, runtime *config.ControlRuntime, endpoi
if err != nil {
return nil, err
}

cfg := &etcd.Config{
Endpoints: endpoints,
TLS: tlsConfig,
Expand All @@ -417,7 +416,6 @@ func getClientConfig(ctx context.Context, runtime *config.ControlRuntime, endpoi
DialKeepAliveTime: defaultKeepAliveTime,
DialKeepAliveTimeout: defaultKeepAliveTimeout,
}

return cfg, nil
}

Expand Down Expand Up @@ -712,48 +710,80 @@ func snapshotDir(config *config.Control) (string, error) {
return config.EtcdSnapshotDir, nil
}

// snapshot attempts to save a new snapshot to the configured directory, and then clean up any old
// snapshots in excess of the retention limits.
func (e *ETCD) snapshot(ctx context.Context) {
// preSnapshotSetup checks to see if the necessary components are in place
// to perform an Etcd snapshot. This is necessary primarily for on-demand
// snapshots since they're performed before normal Etcd setup is completed.
func (e *ETCD) preSnapshotSetup(ctx context.Context, config *config.Control) error {
if e.client == nil {
if e.config == nil {
e.config = config
}
client, err := getClient(ctx, e.config.Runtime, endpoint)
if err != nil {
return err
}
e.client = client
}
if e.runtime == nil {
e.runtime = config.Runtime
}
return nil
}

// Snapshot attempts to save a new snapshot to the configured directory, and then clean up any old
// snapshots in excess of the retention limits. This method is used in the internal cron snapshot
// system as well as used to do on-demand snapshots.
func (e *ETCD) Snapshot(ctx context.Context, config *config.Control) error {
if err := e.preSnapshotSetup(ctx, config); err != nil {
return err
}

status, err := e.client.Status(ctx, endpoint)
if err != nil {
logrus.Errorf("Failed to check etcd status for snapshot: %v", err)
return
return errors.Wrap(err, "failed to check etcd status for snapshot")
}

if status.IsLearner {
logrus.Warnf("Skipping snapshot: not supported for learner")
return
return nil
}

snapshotDir, err := snapshotDir(e.config)
if err != nil {
logrus.Errorf("Failed to get the snapshot dir: %v", err)
return
return errors.Wrap(err, "failed to get the snapshot dir")
}

cfg, err := getClientConfig(ctx, e.runtime, endpoint)
if err != nil {
logrus.Errorf("Failed to get config for etcd snapshot: %v", err)
return
return errors.Wrap(err, "failed to get config for etcd snapshot")
}

snapshotPath := filepath.Join(snapshotDir, snapshotPrefix+strconv.Itoa(int(time.Now().Unix())))
snapshotName := fmt.Sprintf("%s-%d", e.config.EtcdSnapshotName, time.Now().Unix())
snapshotPath := filepath.Join(snapshotDir, snapshotName)

logrus.Infof("Saving etcd snapshot to %s", snapshotPath)

if err := snapshot.NewV3(nil).Save(ctx, *cfg, snapshotPath); err != nil {
logrus.Errorf("Failed to save snapshot: %v", err)
return
return errors.Wrap(err, "failed to save snapshot")
}
if err := snapshotRetention(e.config.EtcdSnapshotRetention, snapshotDir); err != nil {
logrus.Errorf("Failed to apply snapshot retention: %v", err)
return

// check if we need to perform a retention check
if e.config.EtcdSnapshotRetention >= 1 {
if err := snapshotRetention(e.config.EtcdSnapshotRetention, snapshotDir); err != nil {
return errors.Wrap(err, "failed to apply snapshot retention")
}
}

return nil
}

// setSnapshotFunction schedules snapshots at the configured interval
func (e *ETCD) setSnapshotFunction(ctx context.Context) {
e.cron.AddFunc(e.config.EtcdSnapshotCron, func() { e.snapshot(ctx) })
e.cron.AddFunc(e.config.EtcdSnapshotCron, func() {
if err := e.Snapshot(ctx, e.config); err != nil {
logrus.Error(err)
}
})
}

// Restore performs a restore of the ETCD datastore from
Expand Down
4 changes: 2 additions & 2 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ const (
ControlPlaneRoleLabelKey = "node-role.kubernetes.io/control-plane"
)

func resolveDataDir(dataDir string) (string, error) {
func ResolveDataDir(dataDir string) (string, error) {
dataDir, err := datadir.Resolve(dataDir)
return filepath.Join(dataDir, "server"), err
}
Expand Down Expand Up @@ -322,7 +322,7 @@ func setupDataDirAndChdir(config *config.Control) error {
err error
)

config.DataDir, err = resolveDataDir(config.DataDir)
config.DataDir, err = ResolveDataDir(config.DataDir)
if err != nil {
return err
}
Expand Down
Loading