diff --git a/etcdserver/api/snap/snapshotter.go b/etcdserver/api/snap/snapshotter.go index 7e7933374c9..409aed386b6 100644 --- a/etcdserver/api/snap/snapshotter.go +++ b/etcdserver/api/snap/snapshotter.go @@ -226,6 +226,9 @@ func (s *Snapshotter) snapNames() ([]string, error) { if err != nil { return nil, err } + if err = s.cleanupSnapdir(names); err != nil { + return nil, err + } snaps := checkSuffix(s.lg, names) if len(snaps) == 0 { return nil, ErrNoSnapshot @@ -253,3 +256,21 @@ func checkSuffix(lg *zap.Logger, names []string) []string { } return snaps } + +// cleanupSnapdir removes any files that should not be in the snapshot directory: +// - db.tmp prefixed files that can be orphaned by defragmentation +func (s *Snapshotter) cleanupSnapdir(filenames []string) error { + for _, filename := range filenames { + if strings.HasPrefix(filename, "db.tmp") { + if s.lg != nil { + s.lg.Info("found orphaned defragmentation file; deleting", zap.String("path", filename)) + } else { + plog.Infof("found orphaned defragmentation file; deleting: %s", filename) + } + if rmErr := os.Remove(filepath.Join(s.dir, filename)); rmErr != nil && !os.IsNotExist(rmErr) { + return fmt.Errorf("failed to remove orphaned defragmentation file %s: %v", filename, rmErr) + } + } + } + return nil +} diff --git a/mvcc/backend/backend.go b/mvcc/backend/backend.go index bffd74950b4..6cbf3e66709 100644 --- a/mvcc/backend/backend.go +++ b/mvcc/backend/backend.go @@ -369,13 +369,24 @@ func (b *backend) defrag() error { b.batchTx.tx = nil - tmpdb, err := bolt.Open(b.db.Path()+".tmp", 0600, boltOpenOptions) + // Create a temporary file to ensure we start with a clean slate. + // Snapshotter.cleanupSnapdir cleans up any of these that are found during startup. + dir := filepath.Dir(b.db.Path()) + temp, err := ioutil.TempFile(dir, "db.tmp.*") + if err != nil { + return err + } + options := *boltOpenOptions + options.OpenFile = func(path string, i int, mode os.FileMode) (file *os.File, err error) { + return temp, nil + } + tdbp := temp.Name() + tmpdb, err := bolt.Open(tdbp, 0600, &options) if err != nil { return err } dbp := b.db.Path() - tdbp := tmpdb.Path() size1, sizeInUse1 := b.Size(), b.SizeInUse() if b.lg != nil { b.lg.Info( @@ -387,11 +398,17 @@ func (b *backend) defrag() error { zap.String("current-db-size-in-use", humanize.Bytes(uint64(sizeInUse1))), ) } - + // gofail: var defragBeforeCopy struct{} err = defragdb(b.db, tmpdb, defragLimit) if err != nil { tmpdb.Close() - os.RemoveAll(tmpdb.Path()) + if rmErr := os.RemoveAll(tmpdb.Path()); rmErr != nil { + if b.lg != nil { + b.lg.Error("failed to remove db.tmp after defragmentation completed", zap.Error(rmErr)) + } else { + plog.Fatalf("failed to remove db.tmp after defragmentation completed: %v", rmErr) + } + } return err } @@ -411,6 +428,7 @@ func (b *backend) defrag() error { plog.Fatalf("cannot close database (%s)", err) } } + // gofail: var defragBeforeRename struct{} err = os.Rename(tdbp, dbp) if err != nil { if b.lg != nil {