Skip to content

Commit

Permalink
Add etcd snapshot and restore
Browse files Browse the repository at this point in the history
Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com>
  • Loading branch information
galal-hussein committed Aug 12, 2020
1 parent 4a68698 commit 3ead6fa
Show file tree
Hide file tree
Showing 10 changed files with 680 additions and 0 deletions.
20 changes: 20 additions & 0 deletions pkg/cli/cmds/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"github.com/rancher/k3s/pkg/version"
"github.com/rancher/spur/cli"
"github.com/rancher/spur/cli/altsrc"
"time"
)

const (
Expand Down Expand Up @@ -54,6 +55,9 @@ type Server struct {
ClusterInit bool
ClusterReset bool
EncryptSecrets bool
SnapshotDir string
SnapshotInterval time.Duration
RestorePath string
}

var ServerConfig Server
Expand Down Expand Up @@ -201,6 +205,22 @@ func NewServerCommand(action func(*cli.Context) error) *cli.Command {
Destination: &ServerConfig.DatastoreKeyFile,
EnvVars: []string{version.ProgramUpper + "_DATASTORE_KEYFILE"},
},
&cli.DurationFlag{
Name: "snapshot-interval",
Usage: "(db) snapshot interval time",
Destination: &ServerConfig.SnapshotInterval,
Value: 5 * time.Minute,
},
&cli.StringFlag{
Name: "snapshot-dir",
Usage: "(db) directory to save db snapshots",
Destination: &ServerConfig.SnapshotDir,
},
&cli.StringFlag{
Name: "snapshot-restore-path",
Usage: "(db) Snapshot restore path",
Destination: &ServerConfig.RestorePath,
},
&cli.StringFlag{
Name: "default-local-storage-path",
Usage: "(storage) Default local storage path for local provisioner storage class",
Expand Down
3 changes: 3 additions & 0 deletions pkg/cli/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ func run(app *cli.Context, cfg *cmds.Server) error {
serverConfig.ControlConfig.ClusterInit = cfg.ClusterInit
serverConfig.ControlConfig.ClusterReset = cfg.ClusterReset
serverConfig.ControlConfig.EncryptSecrets = cfg.EncryptSecrets
serverConfig.ControlConfig.SnapshotInterval = cfg.SnapshotInterval
serverConfig.ControlConfig.SnapshotDir = cfg.SnapshotDir
serverConfig.ControlConfig.RestorePath = cfg.RestorePath

if serverConfig.ControlConfig.SupervisorPort == 0 {
serverConfig.ControlConfig.SupervisorPort = serverConfig.ControlConfig.HTTPSPort
Expand Down
4 changes: 4 additions & 0 deletions pkg/cluster/managed.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ func (c *Cluster) start(ctx context.Context) error {
return c.managedDB.Reset(ctx, c.clientAccessInfo)
}

if c.config.RestorePath != "" {
return c.managedDB.Restore(ctx)
}

return c.managedDB.Start(ctx, c.clientAccessInfo)
}

Expand Down
1 change: 1 addition & 0 deletions pkg/cluster/managed/drivers.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ type Driver interface {
Reset(ctx context.Context, clientAccessInfo *clientaccess.Info) error
Start(ctx context.Context, clientAccessInfo *clientaccess.Info) error
Test(ctx context.Context, clientAccessInfo *clientaccess.Info) error
Restore(ctx context.Context) error
EndpointName() string
}

Expand Down
4 changes: 4 additions & 0 deletions pkg/daemons/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"net/http"
"sort"
"strings"
"time"

"github.com/rancher/kine/pkg/endpoint"
"github.com/rancher/wrangler-api/pkg/generated/controllers/core"
Expand Down Expand Up @@ -127,6 +128,9 @@ type Control struct {
EncryptSecrets bool
TLSMinVersion uint16
TLSCipherSuites []uint16
SnapshotDir string
SnapshotInterval time.Duration
RestorePath string

BindAddress string
SANs []string
Expand Down
93 changes: 93 additions & 0 deletions pkg/etcd/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"net/url"
"os"
"path/filepath"
"strconv"
"strings"
"time"

Expand All @@ -23,6 +24,7 @@ import (
"github.com/rancher/k3s/pkg/daemons/executor"
"github.com/sirupsen/logrus"
etcd "go.etcd.io/etcd/clientv3"
"go.etcd.io/etcd/clientv3/snapshot"
"go.etcd.io/etcd/etcdserver/etcdserverpb"
utilnet "k8s.io/apimachinery/pkg/util/net"
)
Expand Down Expand Up @@ -89,6 +91,25 @@ func nameFile(config *config.Control) string {
return filepath.Join(dataDir(config), "name")
}

func snapshotDir(config *config.Control) (string, error) {
if config.SnapshotDir == "" {
// we have to create the snapshot dir if we are using
// default snapshot dir if it doesnt exist
defaultSnapshotDir := filepath.Join(config.DataDir, "db", "snapshots")
if s, err := os.Stat(defaultSnapshotDir); err == nil && s.IsDir() {
return defaultSnapshotDir, nil
} else if os.IsNotExist(err) {
if err := os.MkdirAll(defaultSnapshotDir, 0755); err != nil {
return "", err
}
return defaultSnapshotDir, nil
} else {
return "", err
}
}
return config.SnapshotDir, nil
}

func (e *ETCD) IsInitialized(ctx context.Context, config *config.Control) (bool, error) {
if s, err := os.Stat(walDir(config)); err == nil && s.IsDir() {
return true, nil
Expand Down Expand Up @@ -120,6 +141,46 @@ func (e *ETCD) Reset(ctx context.Context, clientAccessInfo *clientaccess.Info) e
return e.newCluster(ctx, true)
}

func (e *ETCD) Restore(ctx context.Context) error {
// check the old etcd data dir
oldDataDir := dataDir(e.config) + "-old"
if s, err := os.Stat(oldDataDir); err == nil && s.IsDir() {
logrus.Infof("etcd already restored from a snapshot, restart without --snapshot-restore-path flag now. Backup and delete ${datadir}/server/db on each peer etcd server and rejoin the nodes")
os.Exit(0)
} else if os.IsNotExist(err) {
if e.config.RestorePath == "" {
return fmt.Errorf("no etcd restore path was specified")
}
// make sure snapshot exists before restoration
if _, err := os.Stat(e.config.RestorePath); err != nil {
return err
}
// move the data directory to a temp path
if err := os.Rename(dataDir(e.config), oldDataDir); err != nil {
return err
}
sManager := snapshot.NewV3(nil)
if err := sManager.Restore(snapshot.RestoreConfig{
SnapshotPath: e.config.RestorePath,
Name: e.name,
OutputDataDir: dataDir(e.config),
OutputWALDir: walDir(e.config),
PeerURLs: []string{e.peerURL()},
InitialCluster: fmt.Sprintf("%s=%s", e.name, e.peerURL()),
}); err != nil {
fmt.Println("error here")
return err
}
} else {
return err
}
if err := e.setName(); err != nil {
return err
}

return e.newCluster(ctx, true)
}

func (e *ETCD) Start(ctx context.Context, clientAccessInfo *clientaccess.Info) error {
existingCluster, err := e.IsInitialized(ctx, e.config)
if err != nil {
Expand All @@ -130,6 +191,8 @@ func (e *ETCD) Start(ctx context.Context, clientAccessInfo *clientaccess.Info) e
Register(ctx, e, e.config.Runtime.Core.Core().V1().Node())
return nil
}
// starting snapshot thread
go e.Snapshot(ctx)

if existingCluster {
opt, err := executor.CurrentETCDOptions()
Expand Down Expand Up @@ -480,3 +543,33 @@ func (e *ETCD) clientURLs(ctx context.Context, clientAccessInfo *clientaccess.In
}
return clientURLs, memberList, nil
}

func (e *ETCD) Snapshot(ctx context.Context) {
ticker := time.NewTicker(e.config.SnapshotInterval)
defer ticker.Stop()
for range ticker.C {
snapshotTime := <-ticker.C
logrus.Infof("Taking etcd snapshot at %s", snapshotTime.String())
sManager := snapshot.NewV3(nil)
tlsConfig, err := toTLSConfig(e.runtime)
if err != nil {
logrus.Errorf("failed to get tls config for etcd: %v", err)
continue
}
etcdConfig := etcd.Config{
Endpoints: []string{"https://127.0.0.1:2379"},
TLS: tlsConfig,
Context: ctx,
}
snapshotDir, err := snapshotDir(e.config)
if err != nil {
logrus.Errorf("failed to get the snapshot dir: %v", err)
}
snapshotPath := filepath.Join(snapshotDir, "etcd-snapshot"+strconv.Itoa(int(snapshotTime.Unix())))

if err := sManager.Save(ctx, etcdConfig, snapshotPath); err != nil {
logrus.Errorf("failed to save snapshot %s: %v", snapshotPath, err)
continue
}
}
}
16 changes: 16 additions & 0 deletions vendor/go.etcd.io/etcd/clientv3/snapshot/doc.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 35 additions & 0 deletions vendor/go.etcd.io/etcd/clientv3/snapshot/util.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 3ead6fa

Please sign in to comment.