-
Notifications
You must be signed in to change notification settings - Fork 654
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
tests/robustness: init with powerfailure case #622
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
name: Robustness Test | ||
on: [push, pull_request] | ||
permissions: read-all | ||
jobs: | ||
test: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- id: goversion | ||
run: echo "goversion=$(cat .go-version)" >> "$GITHUB_OUTPUT" | ||
- uses: actions/setup-go@v4 | ||
with: | ||
go-version: ${{ steps.goversion.outputs.goversion }} | ||
- run: | | ||
make gofail-enable | ||
# build bbolt with failpoint | ||
go install ./cmd/bbolt | ||
sudo -E PATH=$PATH make test-robustness |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
//go:build linux | ||
|
||
package robustness | ||
|
||
import ( | ||
"flag" | ||
"os" | ||
"testing" | ||
|
||
testutils "go.etcd.io/bbolt/tests/utils" | ||
) | ||
|
||
func TestMain(m *testing.M) { | ||
flag.Parse() | ||
testutils.RequiresRoot() | ||
os.Exit(m.Run()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,194 @@ | ||
//go:build linux | ||
|
||
package robustness | ||
|
||
import ( | ||
"bytes" | ||
"fmt" | ||
"io" | ||
"net/http" | ||
"net/url" | ||
"os" | ||
"os/exec" | ||
"path" | ||
"path/filepath" | ||
"strings" | ||
"testing" | ||
"time" | ||
|
||
"go.etcd.io/bbolt/tests/dmflakey" | ||
|
||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
"golang.org/x/sys/unix" | ||
) | ||
|
||
// TestRestartFromPowerFailure is to test data after unexpected power failure. | ||
func TestRestartFromPowerFailure(t *testing.T) { | ||
flakey := initFlakeyDevice(t, t.Name(), dmflakey.FSTypeEXT4, "") | ||
root := flakey.RootFS() | ||
|
||
dbPath := filepath.Join(root, "boltdb") | ||
|
||
args := []string{"bbolt", "bench", | ||
"-work", // keep the database | ||
"-path", dbPath, | ||
"-count=1000000000", | ||
"-batch-size=5", // separate total count into multiple truncation | ||
} | ||
|
||
logPath := filepath.Join(t.TempDir(), fmt.Sprintf("%s.log", t.Name())) | ||
logFd, err := os.Create(logPath) | ||
require.NoError(t, err) | ||
defer logFd.Close() | ||
|
||
fpURL := "127.0.0.1:12345" | ||
|
||
cmd := exec.Command(args[0], args[1:]...) | ||
cmd.Stdout = logFd | ||
cmd.Stderr = logFd | ||
cmd.Env = append(cmd.Env, "GOFAIL_HTTP="+fpURL) | ||
t.Logf("start %s", strings.Join(args, " ")) | ||
require.NoError(t, cmd.Start(), "args: %v", args) | ||
|
||
errCh := make(chan error, 1) | ||
go func() { | ||
errCh <- cmd.Wait() | ||
}() | ||
|
||
defer func() { | ||
if t.Failed() { | ||
logData, err := os.ReadFile(logPath) | ||
assert.NoError(t, err) | ||
t.Logf("dump log:\n: %s", string(logData)) | ||
} | ||
}() | ||
|
||
time.Sleep(time.Duration(time.Now().UnixNano()%5+1) * time.Second) | ||
t.Logf("simulate power failure") | ||
|
||
activeFailpoint(t, fpURL, "beforeSyncMetaPage", "panic") | ||
|
||
select { | ||
case <-time.After(10 * time.Second): | ||
t.Error("bbolt should stop with panic in seconds") | ||
assert.NoError(t, cmd.Process.Kill()) | ||
case err := <-errCh: | ||
require.Error(t, err) | ||
} | ||
require.NoError(t, flakey.PowerFailure("")) | ||
|
||
st, err := os.Stat(dbPath) | ||
require.NoError(t, err) | ||
t.Logf("db size: %d", st.Size()) | ||
|
||
t.Logf("verify data") | ||
output, err := exec.Command("bbolt", "check", dbPath).CombinedOutput() | ||
require.NoError(t, err, "bbolt check output: %s", string(output)) | ||
} | ||
|
||
// activeFailpoint actives the failpoint by http. | ||
func activeFailpoint(t *testing.T, targetUrl string, fpName, fpVal string) { | ||
u, err := url.Parse("http://" + path.Join(targetUrl, fpName)) | ||
require.NoError(t, err, "parse url %s", targetUrl) | ||
|
||
req, err := http.NewRequest("PUT", u.String(), bytes.NewBuffer([]byte(fpVal))) | ||
require.NoError(t, err) | ||
|
||
resp, err := http.DefaultClient.Do(req) | ||
require.NoError(t, err) | ||
defer resp.Body.Close() | ||
|
||
data, err := io.ReadAll(resp.Body) | ||
require.NoError(t, err) | ||
require.Equal(t, 204, resp.StatusCode, "response body: %s", string(data)) | ||
} | ||
|
||
// FlakeyDevice extends dmflakey.Flakey interface. | ||
type FlakeyDevice interface { | ||
// RootFS returns root filesystem. | ||
RootFS() string | ||
|
||
// PowerFailure simulates power failure with drop all the writes. | ||
PowerFailure(mntOpt string) error | ||
|
||
dmflakey.Flakey | ||
} | ||
|
||
// initFlakeyDevice returns FlakeyDevice instance with a given filesystem. | ||
func initFlakeyDevice(t *testing.T, name string, fsType dmflakey.FSType, mntOpt string) FlakeyDevice { | ||
imgDir := t.TempDir() | ||
|
||
flakey, err := dmflakey.InitFlakey(name, imgDir, fsType) | ||
require.NoError(t, err, "init flakey %s", name) | ||
t.Cleanup(func() { | ||
assert.NoError(t, flakey.Teardown()) | ||
}) | ||
|
||
rootDir := t.TempDir() | ||
err = unix.Mount(flakey.DevicePath(), rootDir, string(fsType), 0, mntOpt) | ||
require.NoError(t, err, "init rootfs on %s", rootDir) | ||
|
||
t.Cleanup(func() { assert.NoError(t, unmountAll(rootDir)) }) | ||
|
||
return &flakeyT{ | ||
Flakey: flakey, | ||
|
||
rootDir: rootDir, | ||
mntOpt: mntOpt, | ||
} | ||
} | ||
|
||
type flakeyT struct { | ||
dmflakey.Flakey | ||
|
||
rootDir string | ||
mntOpt string | ||
} | ||
|
||
// RootFS returns root filesystem. | ||
func (f *flakeyT) RootFS() string { | ||
return f.rootDir | ||
} | ||
|
||
// PowerFailure simulates power failure with drop all the writes. | ||
func (f *flakeyT) PowerFailure(mntOpt string) error { | ||
if err := f.DropWrites(); err != nil { | ||
return fmt.Errorf("failed to drop_writes: %w", err) | ||
} | ||
|
||
if err := unmountAll(f.rootDir); err != nil { | ||
return fmt.Errorf("failed to unmount rootfs %s: %w", f.rootDir, err) | ||
} | ||
|
||
if mntOpt == "" { | ||
mntOpt = f.mntOpt | ||
} | ||
|
||
if err := f.AllowWrites(); err != nil { | ||
return fmt.Errorf("failed to allow_writes: %w", err) | ||
} | ||
|
||
if err := unix.Mount(f.DevicePath(), f.rootDir, string(f.Filesystem()), 0, mntOpt); err != nil { | ||
return fmt.Errorf("failed to mount rootfs %s: %w", f.rootDir, err) | ||
} | ||
return nil | ||
} | ||
|
||
func unmountAll(target string) error { | ||
for i := 0; i < 50; i++ { | ||
if err := unix.Unmount(target, 0); err != nil { | ||
switch err { | ||
case unix.EBUSY: | ||
time.Sleep(500 * time.Millisecond) | ||
continue | ||
case unix.EINVAL: | ||
return nil | ||
default: | ||
return fmt.Errorf("failed to umount %s: %w", target, err) | ||
} | ||
} | ||
continue | ||
} | ||
return fmt.Errorf("failed to umount %s: %w", target, unix.EBUSY) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
package utils | ||
|
||
import ( | ||
"flag" | ||
"fmt" | ||
"os" | ||
) | ||
|
||
var enableRoot bool | ||
|
||
func init() { | ||
flag.BoolVar(&enableRoot, "test.root", false, "enable tests that require root") | ||
} | ||
|
||
// RequiresRoot requires root and the test.root flag has been set. | ||
func RequiresRoot() { | ||
if !enableRoot { | ||
fmt.Fprintln(os.Stderr, "Skip tests that require root") | ||
os.Exit(0) | ||
} | ||
|
||
if os.Getuid() != 0 { | ||
fmt.Fprintln(os.Stderr, "This test must be run as root.") | ||
os.Exit(1) | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am thinking we should also support forcibly killing the process so that the process can exit at a random point?
This can be resolved in a followup PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah. I am thinking about introducing random panic including force-kill. Let me handle this in the follow-up. Thanks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In this test, you inject the failure on device (fs) after the process already terminates. Should we inject the failure (dropWrite) before we terminate(panic) the process?
For the forcibly killing case (we will support it in a followup PR), we do need to inject the failure on device (fs) after the process already terminates.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Discuss with @fuweid , let's support more cases in followup PRs
Use gofailpoint
commit interval
: make sure all data after the lastsync
is lostcommit interval
: make sure part of the data since lastsync
is lostcommit interval
: almost no data lossforcibly killing the process