diff --git a/.buildkite/code.pipeline.yml b/.buildkite/code.pipeline.yml index a5c8dd4b2cb..96a7189403a 100644 --- a/.buildkite/code.pipeline.yml +++ b/.buildkite/code.pipeline.yml @@ -242,7 +242,7 @@ steps: - /tmp/e2e/**/*.log env: OASIS_E2E_COVERAGE: enable - OASIS_EXCLUDE_E2E: e2e/runtime/txsource-multi + OASIS_EXCLUDE_E2E: e2e/runtime/txsource-multi,e2e/runtime/txsource-multi-short TEST_BASE_DIR: /tmp # libp2p logging. IPFS_LOGGING: debug diff --git a/.changelog/3595.internal.md b/.changelog/3595.internal.md new file mode 100644 index 00000000000..d9df280c627 --- /dev/null +++ b/.changelog/3595.internal.md @@ -0,0 +1 @@ +go/e2e/txsource: add txsource SGX scenario using fewer nodes diff --git a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go index e9ec7cb7e26..2a60ac825ab 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go @@ -582,6 +582,10 @@ func RegisterScenarios() error { for _, s := range []scenario.Scenario{ // Transaction source test. Non-default, because it runs for ~6 hours. TxSourceMulti, + // SGX version of the txsource-multi-short test. Non-default, because + // it is identical to the txsource-multi-short, only using fewer nodes + // due to SGX CI instance resource constrains. + TxSourceMultiShortSGX, } { if err := cmd.RegisterNondefault(s); err != nil { return err diff --git a/go/oasis-test-runner/scenario/e2e/runtime/txsource.go b/go/oasis-test-runner/scenario/e2e/runtime/txsource.go index 399c6562aae..30361a265b3 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/txsource.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/txsource.go @@ -68,10 +68,38 @@ var TxSourceMultiShort scenario.Scenario = &txSourceImpl{ consensusPruneDisabledProbability: 0.1, consensusPruneMinKept: 100, consensusPruneMaxKept: 200, - // XXX: use no more than 2 storage, 4 compute nodes as SGX E2E test - // instances cannot handle any more nodes that are currently configured. - numStorageNodes: 2, - numComputeNodes: 4, + numValidatorNodes: 4, + numKeyManagerNodes: 2, + numStorageNodes: 2, + numComputeNodes: 4, +} + +// TxSourceMultiShortSGX uses multiple workloads for a short time. +var TxSourceMultiShortSGX scenario.Scenario = &txSourceImpl{ + runtimeImpl: *newRuntimeImpl("txsource-multi-short-sgx", "", nil), + clientWorkloads: []string{ + workload.NameCommission, + workload.NameDelegation, + workload.NameOversized, + workload.NameParallel, + workload.NameRegistration, + workload.NameRuntime, + workload.NameTransfer, + }, + allNodeWorkloads: []string{ + workload.NameQueries, + }, + timeLimit: timeLimitShort, + livenessCheckInterval: livenessCheckInterval, + consensusPruneDisabledProbability: 0.1, + consensusPruneMinKept: 100, + consensusPruneMaxKept: 200, + // XXX: don't use more node as SGX E2E test instances cannot handle much + // more nodes that are currently configured. + numValidatorNodes: 3, + numKeyManagerNodes: 1, + numStorageNodes: 2, + numComputeNodes: 4, } // TxSourceMulti uses multiple workloads. @@ -102,6 +130,12 @@ var TxSourceMulti scenario.Scenario = &txSourceImpl{ // node is restarted. Enable automatic corrupted WAL recovery for validator // nodes. tendermintRecoverCorruptedWAL: true, + // Use 4 validators so that consensus can keep making progress + // when a node is being killed and restarted. + numValidatorNodes: 4, + // Use 2 keymanager so that at least one keymanager is accessible when + // the other one is being killed or shut down. + numKeyManagerNodes: 2, // Use 4 storage nodes so runtime continues to work when one of the nodes // is shut down. numStorageNodes: 4, @@ -133,6 +167,10 @@ type txSourceImpl struct { // nolint: maligned enableCrashPoints bool + numValidatorNodes int + numKeyManagerNodes int + numComputeNodes int + // Configurable number of storage nodes. If running tests with long node // shutdowns enabled, make sure this is at least `MinWriteReplication+1`, // so that the runtime continues to work, even if one of the nodes is shut @@ -142,9 +180,6 @@ type txSourceImpl struct { // nolint: maligned // nodes in the short test variant. numStorageNodes int - // Configurable number of compute nodes. - numComputeNodes int - rng *rand.Rand seed string } @@ -336,14 +371,21 @@ func (sc *txSourceImpl) Fixture() (*oasis.NetworkFixture, error) { f.Network.DefaultLogWatcherHandlerFactories = []log.WatcherHandlerFactory{} } - // Use at least 4 validators so that consensus can keep making progress - // when a node is being killed and restarted. - f.Validators = []oasis.ValidatorFixture{ - {Entity: 1}, - {Entity: 1}, - {Entity: 1}, - {Entity: 1}, + var validators []oasis.ValidatorFixture + for i := 0; i < sc.numValidatorNodes; i++ { + validators = append(validators, oasis.ValidatorFixture{ + Entity: 1, + }) } + f.Validators = validators + var keymanagers []oasis.KeymanagerFixture + for i := 0; i < sc.numKeyManagerNodes; i++ { + keymanagers = append(keymanagers, oasis.KeymanagerFixture{ + Runtime: 0, + Entity: 1, + }) + } + f.Keymanagers = keymanagers var computeWorkers []oasis.ComputeWorkerFixture for i := 0; i < sc.numComputeNodes; i++ { computeWorkers = append(computeWorkers, oasis.ComputeWorkerFixture{ @@ -352,10 +394,6 @@ func (sc *txSourceImpl) Fixture() (*oasis.NetworkFixture, error) { }) } f.ComputeWorkers = computeWorkers - f.Keymanagers = []oasis.KeymanagerFixture{ - {Runtime: 0, Entity: 1}, - {Runtime: 0, Entity: 1}, - } var storageWorkers []oasis.StorageWorkerFixture for i := 0; i < sc.numStorageNodes; i++ { storageWorkers = append(storageWorkers, oasis.StorageWorkerFixture{ @@ -672,6 +710,8 @@ func (sc *txSourceImpl) Clone() scenario.Scenario { consensusPruneMaxKept: sc.consensusPruneMaxKept, tendermintRecoverCorruptedWAL: sc.tendermintRecoverCorruptedWAL, enableCrashPoints: sc.enableCrashPoints, + numValidatorNodes: sc.numValidatorNodes, + numKeyManagerNodes: sc.numKeyManagerNodes, numStorageNodes: sc.numStorageNodes, numComputeNodes: sc.numComputeNodes, seed: sc.seed,