-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve tablet types to wait #17622
base: main
Are you sure you want to change the base?
Improve tablet types to wait #17622
Changes from all commits
65c00a8
abb53cb
f4f2f88
c59a642
bc1d135
97e8f3e
f96ba9f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,85 +19,19 @@ package loadkeyspace | |
import ( | ||
"os" | ||
"path" | ||
"strings" | ||
"testing" | ||
"time" | ||
|
||
"github.com/stretchr/testify/require" | ||
|
||
"vitess.io/vitess/go/test/endtoend/utils" | ||
|
||
"vitess.io/vitess/go/test/endtoend/cluster" | ||
) | ||
|
||
var ( | ||
clusterInstance *cluster.LocalProcessCluster | ||
hostname = "localhost" | ||
keyspaceName = "ks" | ||
cell = "zone1" | ||
sqlSchema = ` | ||
create table vt_user ( | ||
id bigint, | ||
name varchar(64), | ||
primary key (id) | ||
) Engine=InnoDB; | ||
create table main ( | ||
id bigint, | ||
val varchar(128), | ||
primary key(id) | ||
) Engine=InnoDB; | ||
create table test_table ( | ||
id bigint, | ||
val varchar(128), | ||
primary key(id) | ||
) Engine=InnoDB; | ||
` | ||
) | ||
|
||
func TestLoadKeyspaceWithNoTablet(t *testing.T) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test case was removed as discussed on Slack There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is an empty string an allowed value for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What I understood that default is empty but it fails the validation, so at least one tablet type have to be provided. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Correct, the |
||
var err error | ||
|
||
clusterInstance = cluster.NewCluster(cell, hostname) | ||
defer clusterInstance.Teardown() | ||
|
||
// Start topo server | ||
err = clusterInstance.StartTopo() | ||
require.NoError(t, err) | ||
|
||
// create keyspace | ||
keyspace := &cluster.Keyspace{ | ||
Name: keyspaceName, | ||
SchemaSQL: sqlSchema, | ||
} | ||
clusterInstance.VtTabletExtraArgs = append(clusterInstance.VtTabletExtraArgs, "--queryserver-config-schema-change-signal") | ||
err = clusterInstance.StartUnshardedKeyspace(*keyspace, 0, false) | ||
require.NoError(t, err) | ||
|
||
// teardown vttablets | ||
for _, vttablet := range clusterInstance.Keyspaces[0].Shards[0].Vttablets { | ||
err = vttablet.VttabletProcess.TearDown() | ||
require.NoError(t, err) | ||
utils.TimeoutAction(t, 1*time.Minute, "timeout - teardown of VTTablet", func() bool { | ||
return vttablet.VttabletProcess.GetStatus() == "" | ||
}) | ||
} | ||
|
||
// Start vtgate with the schema_change_signal flag | ||
clusterInstance.VtGateExtraArgs = append(clusterInstance.VtGateExtraArgs, "--schema_change_signal") | ||
err = clusterInstance.StartVtgate() | ||
require.NoError(t, err) | ||
|
||
// After starting VTGate we need to leave enough time for resolveAndLoadKeyspace to reach | ||
// the schema tracking timeout (5 seconds). | ||
utils.TimeoutAction(t, 5*time.Minute, "timeout - could not find 'Unable to get initial schema reload' in 'vtgate-stderr.txt'", func() bool { | ||
logDir := clusterInstance.VtgateProcess.LogDir | ||
all, _ := os.ReadFile(path.Join(logDir, "vtgate-stderr.txt")) | ||
return strings.Contains(string(all), "Unable to get initial schema reload") | ||
}) | ||
} | ||
|
||
func TestNoInitialKeyspace(t *testing.T) { | ||
var err error | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -295,8 +295,21 @@ func Init( | |
// TabletGateway can create it's own healthcheck | ||
gw := NewTabletGateway(ctx, hc, serv, cell) | ||
gw.RegisterStats() | ||
if err := gw.WaitForTablets(ctx, tabletTypesToWait); err != nil { | ||
log.Fatalf("tabletGateway.WaitForTablets failed: %v", err) | ||
|
||
// Retry loop for potential time-outs waiting for all tablets. | ||
OuterLoop: | ||
for { | ||
err := gw.WaitForTablets(ctx, tabletTypesToWait) | ||
switch { | ||
case err == nil: | ||
break OuterLoop | ||
case errors.Is(err, context.DeadlineExceeded): | ||
log.Warning("TabletGateway timed out waiting for tablets to become available - retrying.") | ||
|
||
continue | ||
Comment on lines
+306
to
+309
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this would require a new context for retry otherwise it will keep failing with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am unsure I follow what you would like the behavior to be, could you please elaborate? Thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should check the parent context sent to WaitForTablets to ensure it is still valid. If the context has already expired, the retry will continue to fail with the same error. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, thanks for the clarification. However, I am unsure this is necessary. The upstream context passed by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is my expectation as well. |
||
default: | ||
log.Fatalf("tabletGateway.WaitForTablets failed: %v", err) | ||
} | ||
} | ||
|
||
dynamicConfig := NewDynamicViperConfig() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need two tablets here because the
vtgate
start-up script waits forprimary,replica
and with a single tablet we end up with no replicas.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we change vtgate to only wait for PRIMARY?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The script that starts the
vtgate
(../common/vtgate-up.sh
) is common for all tests, not onlyregion_sharding
, so changing it would affect all tests. It seemed less intrusive of a change to modify this test only by having an extravttablet
. But if you prefer, I can do it that way. LMK.