From 14a374633096a3f7cb0239c22cab597781ef1e01 Mon Sep 17 00:00:00 2001 From: Renato Costa Date: Fri, 13 Jan 2023 09:36:29 -0500 Subject: [PATCH] workload: use different random seeds by default This changes most of the workloads that take a `--seed` command line flag so that they use a different, randomly generated seed on each run. Previously, they would all default to `1`, making every run of the workload produce the same data and operations. That behavior is good for reproducing a specific pattern or behavior, but workloads that have a random element should exploit that randomness by default. When a workload is invoked, a random seed is generated and logged. Users are still able to specify their own seeds by passing the `--seed` flag. Resolves #88566. Release note (cli change): workloads that take a `--seed` argument used to default to `1`. Now, they use a randomly generated seed in each run. Users can still pass a custom seed with the `--seed` flag. --- pkg/ccl/changefeedccl/encoder_test.go | 2 + pkg/ccl/workloadccl/allccl/all_test.go | 8 ++ pkg/cmd/roachtest/tests/copy.go | 8 +- pkg/workload/BUILD.bazel | 2 + pkg/workload/bank/bank.go | 11 ++- pkg/workload/bulkingest/bulkingest.go | 11 ++- pkg/workload/cli/run.go | 10 +++ pkg/workload/csv_test.go | 4 + pkg/workload/indexes/indexes.go | 9 ++- pkg/workload/insights/insights.go | 13 ++-- pkg/workload/jsonload/json.go | 34 ++++---- pkg/workload/kv/kv.go | 9 ++- pkg/workload/ledger/generate.go | 12 +-- pkg/workload/ledger/ledger.go | 7 +- pkg/workload/movr/movr.go | 19 +++-- pkg/workload/movr/workload.go | 2 +- pkg/workload/rand/rand.go | 12 +-- pkg/workload/random.go | 103 +++++++++++++++++++++++++ pkg/workload/sqlsmith/sqlsmith.go | 11 ++- pkg/workload/tpcc/generate.go | 16 ++-- pkg/workload/tpcc/random.go | 2 +- pkg/workload/tpcc/tpcc.go | 14 ++-- pkg/workload/workload.go | 3 + pkg/workload/ycsb/ycsb.go | 15 ++-- 24 files changed, 255 insertions(+), 82 deletions(-) create mode 100644 pkg/workload/random.go diff --git a/pkg/ccl/changefeedccl/encoder_test.go b/pkg/ccl/changefeedccl/encoder_test.go index a861a6399e46..0e0ea7b0a72e 100644 --- a/pkg/ccl/changefeedccl/encoder_test.go +++ b/pkg/ccl/changefeedccl/encoder_test.go @@ -832,6 +832,8 @@ func TestAvroLedger(t *testing.T) { testFn := func(t *testing.T, s TestServer, f cdctest.TestFeedFactory) { ctx := context.Background() + // assertions depend on this seed + ledger.RandomSeed.Set(1) gen := ledger.FromFlags(`--customers=1`) var l workloadsql.InsertsDataLoader _, err := workloadsql.Setup(ctx, s.DB, gen, l) diff --git a/pkg/ccl/workloadccl/allccl/all_test.go b/pkg/ccl/workloadccl/allccl/all_test.go index 5946d800c00e..6d1123afcc84 100644 --- a/pkg/ccl/workloadccl/allccl/all_test.go +++ b/pkg/ccl/workloadccl/allccl/all_test.go @@ -293,6 +293,14 @@ func TestDeterministicInitialData(t *testing.T) { continue } t.Run(meta.Name, func(t *testing.T) { + // assertions depend on this seed + switch rs := meta.RandomSeed.(type) { + case *workload.Int64RandomSeed: + rs.Set(1) + case *workload.Uint64RandomSeed: + rs.Set(1) + } + if bigInitialData(meta) { skip.UnderShort(t, fmt.Sprintf(`%s involves a lot of data`, meta.Name)) } diff --git a/pkg/cmd/roachtest/tests/copy.go b/pkg/cmd/roachtest/tests/copy.go index 7b21fd066406..9eb965303249 100644 --- a/pkg/cmd/roachtest/tests/copy.go +++ b/pkg/cmd/roachtest/tests/copy.go @@ -27,6 +27,10 @@ import ( "github.com/cockroachdb/errors" ) +// we need to hardcode the random seed when loading fixtures since +// that references a static bucket in GCS +const fixturesRandomSeed = 1 + func registerCopy(r registry.Registry) { // This test imports a fully-populated Bank table. It then creates an empty // Bank schema. Finally, it performs a series of `INSERT ... SELECT ...` @@ -60,8 +64,8 @@ func registerCopy(r registry.Registry) { t.Status("importing Bank fixture") c.Run(ctx, c.Node(1), fmt.Sprintf( - "./workload fixtures load bank --rows=%d --payload-bytes=%d {pgurl:1}", - rows, payload)) + "./workload fixtures load bank --rows=%d --payload-bytes=%d --seed %d {pgurl:1}", + rows, payload, fixturesRandomSeed)) if _, err := db.Exec("ALTER TABLE bank.bank RENAME TO bank.bank_orig"); err != nil { t.Fatalf("failed to rename table: %v", err) } diff --git a/pkg/workload/BUILD.bazel b/pkg/workload/BUILD.bazel index 1e55140c9259..080484f07f8a 100644 --- a/pkg/workload/BUILD.bazel +++ b/pkg/workload/BUILD.bazel @@ -8,6 +8,7 @@ go_library( "csv.go", "driver.go", "pgx_helpers.go", + "random.go", "round_robin.go", "sql_runner.go", "stats.go", @@ -22,6 +23,7 @@ go_library( "//pkg/util/bufalloc", "//pkg/util/encoding/csv", "//pkg/util/log", + "//pkg/util/randutil", "//pkg/util/syncutil", "//pkg/util/timeutil", "//pkg/workload/histogram", diff --git a/pkg/workload/bank/bank.go b/pkg/workload/bank/bank.go index 580c24e5c265..f580a86daafa 100644 --- a/pkg/workload/bank/bank.go +++ b/pkg/workload/bank/bank.go @@ -42,11 +42,13 @@ const ( maxTransfer = 999 ) +// RandomSeed is the bank random seed. +var RandomSeed = workload.NewUint64RandomSeed() + type bank struct { flags workload.Flags connFlags *workload.ConnFlags - seed uint64 rows, batchSize int payloadBytes, ranges int } @@ -60,17 +62,18 @@ var bankMeta = workload.Meta{ Description: `Bank models a set of accounts with currency balances`, Version: `1.0.0`, PublicFacing: true, + RandomSeed: RandomSeed, New: func() workload.Generator { g := &bank{} g.flags.FlagSet = pflag.NewFlagSet(`bank`, pflag.ContinueOnError) g.flags.Meta = map[string]workload.FlagMeta{ `batch-size`: {RuntimeOnly: true}, } - g.flags.Uint64Var(&g.seed, `seed`, 1, `Key hash seed.`) g.flags.IntVar(&g.rows, `rows`, defaultRows, `Initial number of accounts in bank table.`) g.flags.IntVar(&g.batchSize, `batch-size`, defaultBatchSize, `Number of rows in each batch of initial data.`) g.flags.IntVar(&g.payloadBytes, `payload-bytes`, defaultPayloadBytes, `Size of the payload field in each initial row.`) g.flags.IntVar(&g.ranges, `ranges`, defaultRanges, `Initial number of ranges in bank table.`) + RandomSeed.AddFlag(&g.flags) g.connFlags = workload.NewConnFlags(&g.flags) return g }, @@ -138,7 +141,7 @@ func (b *bank) Tables() []workload.Table { InitialRows: workload.BatchedTuples{ NumBatches: numBatches, FillBatch: func(batchIdx int, cb coldata.Batch, a *bufalloc.ByteAllocator) { - rng := rand.NewSource(b.seed + uint64(batchIdx)) + rng := rand.NewSource(RandomSeed.Seed() + uint64(batchIdx)) rowBegin, rowEnd := batchIdx*b.batchSize, (batchIdx+1)*b.batchSize if rowEnd > b.rows { @@ -204,7 +207,7 @@ func (b *bank) Ops( ql := workload.QueryLoad{SQLDatabase: sqlDatabase} for i := 0; i < b.connFlags.Concurrency; i++ { - rng := rand.New(rand.NewSource(b.seed)) + rng := rand.New(rand.NewSource(RandomSeed.Seed())) hists := reg.GetHandle() workerFn := func(ctx context.Context) error { from := rng.Intn(b.rows) diff --git a/pkg/workload/bulkingest/bulkingest.go b/pkg/workload/bulkingest/bulkingest.go index 08095d801b2d..e857a9f0dad4 100644 --- a/pkg/workload/bulkingest/bulkingest.go +++ b/pkg/workload/bulkingest/bulkingest.go @@ -75,11 +75,13 @@ const ( defaultPayloadBytes = 100 ) +// RandomSeed is the bulkingest workload random seed. +var RandomSeed = workload.NewInt64RandomSeed() + type bulkingest struct { flags workload.Flags connFlags *workload.ConnFlags - seed int64 aCount, bCount, cCount, payloadBytes int generateBsFirst bool @@ -94,16 +96,17 @@ var bulkingestMeta = workload.Meta{ Name: `bulkingest`, Description: `bulkingest testdata is designed to produce a skewed distribution of KVs when ingested (in initial import or during later indexing)`, Version: `1.0.0`, + RandomSeed: RandomSeed, New: func() workload.Generator { g := &bulkingest{} g.flags.FlagSet = pflag.NewFlagSet(`bulkingest`, pflag.ContinueOnError) - g.flags.Int64Var(&g.seed, `seed`, 1, `Key hash seed.`) g.flags.IntVar(&g.aCount, `a`, 10, `number of values of A (i.e. pk prefix)`) g.flags.IntVar(&g.bCount, `b`, 10, `number of values of B (i.e. idx prefix)`) g.flags.IntVar(&g.cCount, `c`, 1000, `number of values of C (i.e. rows per A/B pair)`) g.flags.BoolVar(&g.generateBsFirst, `batches-by-b`, false, `generate all B batches for given A first`) g.flags.BoolVar(&g.indexBCA, `index-b-c-a`, true, `include an index on (B, C, A)`) g.flags.IntVar(&g.payloadBytes, `payload-bytes`, defaultPayloadBytes, `Size of the payload field in each row.`) + RandomSeed.AddFlag(&g.flags) g.connFlags = workload.NewConnFlags(&g.flags) return g }, @@ -154,7 +157,7 @@ func (w *bulkingest) Tables() []workload.Table { cCol := cb.ColVec(2).Int64() payloadCol := cb.ColVec(3).Bytes() - rng := rand.New(rand.NewSource(w.seed + int64(ab))) + rng := rand.New(rand.NewSource(RandomSeed.Seed() + int64(ab))) var payload []byte *alloc, payload = alloc.Alloc(w.cCount*w.payloadBytes, 0 /* extraCap */) randutil.ReadTestdataBytes(rng, payload) @@ -200,7 +203,7 @@ func (w *bulkingest) Ops( ql := workload.QueryLoad{SQLDatabase: sqlDatabase} for i := 0; i < w.connFlags.Concurrency; i++ { - rng := rand.New(rand.NewSource(w.seed)) + rng := rand.New(rand.NewSource(RandomSeed.Seed())) hists := reg.GetHandle() pad := make([]byte, w.payloadBytes) workerFn := func(ctx context.Context) error { diff --git a/pkg/workload/cli/run.go b/pkg/workload/cli/run.go index f741cb5dc1c7..39ff5598380b 100644 --- a/pkg/workload/cli/run.go +++ b/pkg/workload/cli/run.go @@ -303,6 +303,7 @@ func runInit(gen workload.Generator, urls []string, dbName string) error { } startPProfEndPoint(ctx) + maybeLogRandomSeed(ctx, gen) return runInitImpl(ctx, gen, initDB, dbName) } @@ -404,6 +405,7 @@ func runRun(gen workload.Generator, urls []string, dbName string) error { limiter = rate.NewLimiter(rate.Limit(*maxRate), 1) } + maybeLogRandomSeed(ctx, gen) o, ok := gen.(workload.Opser) if !ok { return errors.Errorf(`no operations defined for %s`, gen.Meta().Name) @@ -612,3 +614,11 @@ func runRun(gen workload.Generator, urls []string, dbName string) error { } } } + +// maybeLogRandomSeed will log the random seed used by the generator, +// if a seed is being used. +func maybeLogRandomSeed(ctx context.Context, gen workload.Generator) { + if randomSeed := gen.Meta().RandomSeed; randomSeed != nil { + log.Infof(ctx, "%s", randomSeed.LogMessage()) + } +} diff --git a/pkg/workload/csv_test.go b/pkg/workload/csv_test.go index e1d47726382c..af684614fae4 100644 --- a/pkg/workload/csv_test.go +++ b/pkg/workload/csv_test.go @@ -46,6 +46,8 @@ func TestHandleCSV(t *testing.T) { }, } + // assertions depend on this seed + bank.RandomSeed.Set(1) meta := bank.FromRows(0).Meta() for _, test := range tests { t.Run(test.params, func(t *testing.T) { @@ -114,6 +116,8 @@ func BenchmarkWriteCSVRows(b *testing.B) { func TestCSVRowsReader(t *testing.T) { defer leaktest.AfterTest(t)() + // assertions depend on this seed + bank.RandomSeed.Set(1) table := bank.FromRows(10).Tables()[0] r := workload.NewCSVRowsReader(table, 1, 3) b, err := io.ReadAll(r) diff --git a/pkg/workload/indexes/indexes.go b/pkg/workload/indexes/indexes.go index 9b741ada7238..48a848b916c9 100644 --- a/pkg/workload/indexes/indexes.go +++ b/pkg/workload/indexes/indexes.go @@ -43,11 +43,13 @@ const ( payload BYTES NOT NULL` ) +// RandomSeed is the indexes workload random seed. +var RandomSeed = workload.NewInt64RandomSeed() + type indexes struct { flags workload.Flags connFlags *workload.ConnFlags - seed int64 idxs int unique bool payload int @@ -62,15 +64,16 @@ var indexesMeta = workload.Meta{ Name: `indexes`, Description: `Indexes writes to a table with a variable number of secondary indexes`, Version: `1.0.0`, + RandomSeed: RandomSeed, New: func() workload.Generator { g := &indexes{} g.flags.FlagSet = pflag.NewFlagSet(`indexes`, pflag.ContinueOnError) - g.flags.Int64Var(&g.seed, `seed`, 1, `Key hash seed.`) g.flags.IntVar(&g.idxs, `secondary-indexes`, 1, `Number of indexes to add to the table.`) g.flags.BoolVar(&g.unique, `unique-indexes`, false, `Use UNIQUE secondary indexes.`) g.flags.IntVar(&g.payload, `payload`, 64, `Size of the unindexed payload column.`) g.flags.Uint64Var(&g.cycleLength, `cycle-length`, math.MaxUint64, `Number of keys repeatedly accessed by each writer through upserts.`) + RandomSeed.AddFlag(&g.flags) g.connFlags = workload.NewConnFlags(&g.flags) return g }, @@ -170,7 +173,7 @@ func (w *indexes) Ops( op := &indexesOp{ config: w, hists: reg.GetHandle(), - rand: rand.New(rand.NewSource(int64((i + 1)) * w.seed)), + rand: rand.New(rand.NewSource(int64((i + 1)) * RandomSeed.Seed())), buf: make([]byte, w.payload), } op.stmt = op.sr.Define(stmt) diff --git a/pkg/workload/insights/insights.go b/pkg/workload/insights/insights.go index e64c70f05f40..5b9ebbc70699 100644 --- a/pkg/workload/insights/insights.go +++ b/pkg/workload/insights/insights.go @@ -49,11 +49,13 @@ const ( var tableNames = []string{tableNameA, tableNameB} +// RandomSeed is the insights workload random seed. +var RandomSeed = workload.NewUint64RandomSeed() + type insights struct { flags workload.Flags connFlags *workload.ConnFlags - seed uint64 rowCount, batchSize int payloadBytes, ranges int } @@ -66,6 +68,7 @@ var insightsMeta = workload.Meta{ Name: `insights`, Description: `This workload executes queries that will be detected by insights`, Version: `1.0.0`, + RandomSeed: RandomSeed, PublicFacing: false, New: func() workload.Generator { g := &insights{} @@ -73,11 +76,11 @@ var insightsMeta = workload.Meta{ g.flags.Meta = map[string]workload.FlagMeta{ `batch-size`: {RuntimeOnly: true}, } - g.flags.Uint64Var(&g.seed, `seed`, 1, `Key hash seed.`) g.flags.IntVar(&g.rowCount, `rows`, defaultRows, `Initial number of accounts in insights table.`) g.flags.IntVar(&g.batchSize, `batch-size`, defaultBatchSize, `Number of rows in each batch of initial data.`) g.flags.IntVar(&g.payloadBytes, `payload-bytes`, defaultPayloadBytes, `Size of the payload field in each initial row.`) g.flags.IntVar(&g.ranges, `ranges`, defaultRanges, `Initial number of ranges in insights table.`) + RandomSeed.AddFlag(&g.flags) g.connFlags = workload.NewConnFlags(&g.flags) return g }, @@ -150,7 +153,7 @@ func (b *insights) Tables() []workload.Table { InitialRows: workload.BatchedTuples{ NumBatches: numBatches, FillBatch: func(batchIdx int, cb coldata.Batch, a *bufalloc.ByteAllocator) { - rng := rand.NewSource(b.seed + uint64(batchIdx)) + rng := rand.NewSource(RandomSeed.Seed() + uint64(batchIdx)) rowBegin, rowEnd := batchIdx*b.batchSize, (batchIdx+1)*b.batchSize if rowEnd > b.rowCount { @@ -207,7 +210,7 @@ func (b *insights) Ops( db.SetMaxIdleConns(b.connFlags.Concurrency + 1) ql := workload.QueryLoad{SQLDatabase: sqlDatabase} - rng := rand.New(rand.NewSource(b.seed)) + rng := rand.New(rand.NewSource(RandomSeed.Seed())) for i := 0; i < b.connFlags.Concurrency; i++ { temp := i hists := reg.GetHandle() @@ -273,7 +276,7 @@ func joinOnNonIndexColumn(ctx context.Context, db *gosql.DB) error { func orderByOnNonIndexColumn(ctx context.Context, db *gosql.DB, rowCount int) error { rowLimit := (rand.Uint32() % uint32(rowCount)) + 1 _, err := db.ExecContext(ctx, ` - select balance + select balance from insights_workload_table_a order by balance desc limit $1;`, rowLimit) return err } diff --git a/pkg/workload/jsonload/json.go b/pkg/workload/jsonload/json.go index 8966261fe4d4..3d86519b0208 100644 --- a/pkg/workload/jsonload/json.go +++ b/pkg/workload/jsonload/json.go @@ -37,19 +37,22 @@ const ( jsonSchemaWithComputed = `(k BIGINT AS (v->>'key')::BIGINT STORED PRIMARY KEY, v JSONB NOT NULL)` ) +// RandomSeed is the json workload random seed. +var RandomSeed = workload.NewInt64RandomSeed() + type jsonLoad struct { flags workload.Flags connFlags *workload.ConnFlags - batchSize int - cycleLength int64 - readPercent int - writeSeq, seed int64 - sequential bool - splits int - complexity int - inverted bool - computed bool + batchSize int + cycleLength int64 + readPercent int + writeSeq int64 + sequential bool + splits int + complexity int + inverted bool + computed bool } func init() { @@ -60,7 +63,8 @@ var jsonLoadMeta = workload.Meta{ Name: `json`, Description: `JSON reads and writes to keys spread (by default, uniformly` + ` at random) across the cluster`, - Version: `1.0.0`, + Version: `1.0.0`, + RandomSeed: RandomSeed, New: func() workload.Generator { g := &jsonLoad{} g.flags.FlagSet = pflag.NewFlagSet(`json`, pflag.ContinueOnError) @@ -71,12 +75,12 @@ var jsonLoadMeta = workload.Meta{ g.flags.Int64Var(&g.cycleLength, `cycle-length`, math.MaxInt64, `Number of keys repeatedly accessed by each writer`) g.flags.IntVar(&g.readPercent, `read-percent`, 0, `Percent (0-100) of operations that are reads of existing keys`) g.flags.Int64Var(&g.writeSeq, `write-seq`, 0, `Initial write sequence value.`) - g.flags.Int64Var(&g.seed, `seed`, 1, `Key hash seed.`) g.flags.BoolVar(&g.sequential, `sequential`, false, `Pick keys sequentially instead of randomly`) g.flags.IntVar(&g.splits, `splits`, 0, `Number of splits to perform before starting normal operations`) g.flags.IntVar(&g.complexity, `complexity`, 20, `Complexity of generated JSON data`) g.flags.BoolVar(&g.inverted, `inverted`, false, `Whether to include an inverted index`) g.flags.BoolVar(&g.computed, `computed`, false, `Whether to use a computed primary key`) + RandomSeed.AddFlag(&g.flags) g.connFlags = workload.NewConnFlags(&g.flags) return g }, @@ -114,7 +118,7 @@ func (w *jsonLoad) Tables() []workload.Table { Splits: workload.Tuples( w.splits, func(splitIdx int) []interface{} { - rng := rand.New(rand.NewSource(w.seed + int64(splitIdx))) + rng := rand.New(rand.NewSource(RandomSeed.Seed() + int64(splitIdx))) g := newHashGenerator(&sequence{config: w, val: w.writeSeq}) return []interface{}{ int(g.hash(rng.Int63())), @@ -289,14 +293,14 @@ type hashGenerator struct { func newHashGenerator(seq *sequence) *hashGenerator { return &hashGenerator{ seq: seq, - random: rand.New(rand.NewSource(seq.config.seed)), + random: rand.New(rand.NewSource(RandomSeed.Seed())), hasher: sha1.New(), } } func (g *hashGenerator) hash(v int64) int64 { binary.BigEndian.PutUint64(g.buf[:8], uint64(v)) - binary.BigEndian.PutUint64(g.buf[8:16], uint64(g.seq.config.seed)) + binary.BigEndian.PutUint64(g.buf[8:16], uint64(RandomSeed.Seed())) g.hasher.Reset() _, _ = g.hasher.Write(g.buf[:16]) g.hasher.Sum(g.buf[:0]) @@ -327,7 +331,7 @@ type sequentialGenerator struct { func newSequentialGenerator(seq *sequence) *sequentialGenerator { return &sequentialGenerator{ seq: seq, - random: rand.New(rand.NewSource(seq.config.seed)), + random: rand.New(rand.NewSource(RandomSeed.Seed())), } } diff --git a/pkg/workload/kv/kv.go b/pkg/workload/kv/kv.go index 0cfa0be375c4..fe00311f8584 100644 --- a/pkg/workload/kv/kv.go +++ b/pkg/workload/kv/kv.go @@ -61,6 +61,9 @@ const ( )` ) +// RandomSeed is the kv workload randoom seed. +var RandomSeed = workload.NewInt64RandomSeed() + type kv struct { flags workload.Flags connFlags *workload.ConnFlags @@ -72,7 +75,6 @@ type kv struct { spanPercent int spanLimit int writesUseSelectForUpdate bool - seed int64 writeSeq string sequential bool zipfian bool @@ -101,6 +103,7 @@ var kvMeta = workload.Meta{ the current run. `, Version: `1.0.0`, + RandomSeed: RandomSeed, PublicFacing: true, New: func() workload.Generator { g := &kv{} @@ -124,7 +127,6 @@ var kvMeta = workload.Meta{ `LIMIT count for each spanning query, or 0 for no limit`) g.flags.BoolVar(&g.writesUseSelectForUpdate, `sfu-writes`, false, `Use SFU and transactional writes with a sleep after SFU.`) - g.flags.Int64Var(&g.seed, `seed`, 1, `Key hash seed.`) g.flags.BoolVar(&g.zipfian, `zipfian`, false, `Pick keys in a zipfian distribution instead of randomly.`) g.flags.BoolVar(&g.sequential, `sequential`, false, @@ -143,6 +145,7 @@ var kvMeta = workload.Meta{ `Target compression ratio for data blocks. Must be >= 1.0`) g.flags.BoolVar(&g.enum, `enum`, false, `Inject an enum column and use it`) + RandomSeed.AddFlag(&g.flags) g.connFlags = workload.NewConnFlags(&g.flags) return g }, @@ -556,7 +559,7 @@ func newHashGenerator(seq *sequence) *hashGenerator { func (g *hashGenerator) hash(v int64) int64 { binary.BigEndian.PutUint64(g.buf[:8], uint64(v)) - binary.BigEndian.PutUint64(g.buf[8:16], uint64(g.seq.config.seed)) + binary.BigEndian.PutUint64(g.buf[8:16], uint64(RandomSeed.Seed())) g.hasher.Reset() _, _ = g.hasher.Write(g.buf[:16]) g.hasher.Sum(g.buf[:0]) diff --git a/pkg/workload/ledger/generate.go b/pkg/workload/ledger/generate.go index 8f2d3f38c781..d8b81c7e9dde 100644 --- a/pkg/workload/ledger/generate.go +++ b/pkg/workload/ledger/generate.go @@ -48,7 +48,7 @@ var ledgerCustomerTypes = []*types.T{ func (w *ledger) ledgerCustomerInitialRow(rowIdx int) []interface{} { rng := w.rngPool.Get().(*rand.Rand) defer w.rngPool.Put(rng) - rng.Seed(w.seed + int64(rowIdx)) + rng.Seed(RandomSeed.Seed() + int64(rowIdx)) return []interface{}{ rowIdx, // id @@ -85,7 +85,7 @@ var ledgerTransactionColTypes = []*types.T{ func (w *ledger) ledgerTransactionInitialRow(rowIdx int) []interface{} { rng := w.rngPool.Get().(*rand.Rand) defer w.rngPool.Put(rng) - rng.Seed(w.seed + int64(rowIdx)) + rng.Seed(RandomSeed.Seed() + int64(rowIdx)) h := w.hashPool.Get().(hash.Hash64) defer w.hashPool.Put(h) @@ -105,7 +105,7 @@ func (w *ledger) ledgerTransactionInitialRow(rowIdx int) []interface{} { } func (w *ledger) ledgerTransactionSplitRow(splitIdx int) []interface{} { - rng := rand.New(rand.NewSource(w.seed + int64(splitIdx))) + rng := rand.New(rand.NewSource(RandomSeed.Seed() + int64(splitIdx))) u := uuid.FromUint128(uint128.FromInts(rng.Uint64(), rng.Uint64())) return []interface{}{ paymentIDPrefix + u.String(), @@ -115,7 +115,7 @@ func (w *ledger) ledgerTransactionSplitRow(splitIdx int) []interface{} { func (w *ledger) ledgerEntryInitialRow(rowIdx int) []interface{} { rng := w.rngPool.Get().(*rand.Rand) defer w.rngPool.Put(rng) - rng.Seed(w.seed + int64(rowIdx)) + rng.Seed(RandomSeed.Seed() + int64(rowIdx)) // Alternate. debit := rowIdx%2 == 0 @@ -157,7 +157,7 @@ func (w *ledger) ledgerEntrySplitRow(splitIdx int) []interface{} { func (w *ledger) ledgerSessionInitialRow(rowIdx int) []interface{} { rng := w.rngPool.Get().(*rand.Rand) defer w.rngPool.Put(rng) - rng.Seed(w.seed + int64(rowIdx)) + rng.Seed(RandomSeed.Seed() + int64(rowIdx)) return []interface{}{ randSessionID(rng), // session_id @@ -168,7 +168,7 @@ func (w *ledger) ledgerSessionInitialRow(rowIdx int) []interface{} { } func (w *ledger) ledgerSessionSplitRow(splitIdx int) []interface{} { - rng := rand.New(rand.NewSource(w.seed + int64(splitIdx))) + rng := rand.New(rand.NewSource(RandomSeed.Seed() + int64(splitIdx))) return []interface{}{ randSessionID(rng), } diff --git a/pkg/workload/ledger/ledger.go b/pkg/workload/ledger/ledger.go index 6aec4e31a0c8..8a865d03cb72 100644 --- a/pkg/workload/ledger/ledger.go +++ b/pkg/workload/ledger/ledger.go @@ -24,11 +24,13 @@ import ( "github.com/spf13/pflag" ) +// RandomSeed is the ledger workload random seed. +var RandomSeed = workload.NewInt64RandomSeed() + type ledger struct { flags workload.Flags connFlags *workload.ConnFlags - seed int64 customers int inlineArgs bool splits int @@ -52,11 +54,11 @@ var ledgerMeta = workload.Meta{ Name: `ledger`, Description: `Ledger simulates an accounting system using double-entry bookkeeping`, Version: `1.0.0`, + RandomSeed: RandomSeed, New: func() workload.Generator { g := &ledger{} g.flags.FlagSet = pflag.NewFlagSet(`ledger`, pflag.ContinueOnError) g.connFlags = workload.NewConnFlags(&g.flags) - g.flags.Int64Var(&g.seed, `seed`, 1, `Random number generator seed`) g.flags.IntVar(&g.customers, `customers`, 1000, `Number of customers`) g.flags.BoolVar(&g.inlineArgs, `inline-args`, false, `Use inline query arguments`) g.flags.IntVar(&g.splits, `splits`, 0, `Number of splits to perform before starting normal operations`) @@ -65,6 +67,7 @@ var ledgerMeta = workload.Meta{ g.flags.StringVar(&g.mix, `mix`, `balance=50,withdrawal=37,deposit=12,reversal=0`, `Weights for the transaction mix.`) + RandomSeed.AddFlag(&g.flags) return g }, } diff --git a/pkg/workload/movr/movr.go b/pkg/workload/movr/movr.go index 36ade4bc6779..96165d78eb41 100644 --- a/pkg/workload/movr/movr.go +++ b/pkg/workload/movr/movr.go @@ -175,11 +175,13 @@ var cities = []struct { {city: "rome", region: "europe-west1"}, } +// RandomSeed is the movr workload random seed. +var RandomSeed = workload.NewUint64RandomSeed() + type movr struct { flags workload.Flags connFlags *workload.ConnFlags - seed uint64 users, vehicles, rides, histories cityDistributor numPromoCodes int numUserPromoCodes int @@ -203,11 +205,11 @@ var movrMeta = workload.Meta{ Name: `movr`, Description: `MovR is a fictional vehicle sharing company`, Version: `1.0.0`, + RandomSeed: RandomSeed, PublicFacing: true, New: func() workload.Generator { g := &movr{} g.flags.FlagSet = pflag.NewFlagSet(`movr`, pflag.ContinueOnError) - g.flags.Uint64Var(&g.seed, `seed`, 1, `Key hash seed.`) g.flags.IntVar(&g.users.numRows, `num-users`, 50, `Initial number of users.`) g.flags.IntVar(&g.vehicles.numRows, `num-vehicles`, 15, `Initial number of vehicles.`) g.flags.IntVar(&g.rides.numRows, `num-rides`, 500, `Initial number of rides.`) @@ -235,6 +237,7 @@ Otherwise defaults to the gateway_region.`, g.flags.IntVar(&g.numPromoCodes, `num-promo-codes`, 1000, `Initial number of promo codes.`) g.flags.IntVar(&g.numUserPromoCodes, `num-user-promos`, 5, `Initial number of promo codes in use.`) g.flags.IntVar(&g.ranges, `num-ranges`, 9, `Initial number of ranges to break the tables into`) + RandomSeed.AddFlag(&g.flags) g.connFlags = workload.NewConnFlags(&g.flags) g.creationTime = time.Date(2019, 1, 2, 3, 4, 5, 6, time.UTC) return g @@ -607,7 +610,7 @@ func (d cityDistributor) randRowInCity(rng *rand.Rand, cityIdx int) int { } func (g *movr) movrUsersInitialRow(rowIdx int) []interface{} { - rng := rand.New(rand.NewSource(g.seed + uint64(rowIdx))) + rng := rand.New(rand.NewSource(RandomSeed.Seed() + uint64(rowIdx))) cityIdx := g.users.cityForRow(rowIdx) city := cities[cityIdx] @@ -625,7 +628,7 @@ func (g *movr) movrUsersInitialRow(rowIdx int) []interface{} { } func (g *movr) movrVehiclesInitialRow(rowIdx int) []interface{} { - rng := rand.New(rand.NewSource(g.seed + uint64(rowIdx))) + rng := rand.New(rand.NewSource(RandomSeed.Seed() + uint64(rowIdx))) cityIdx := g.vehicles.cityForRow(rowIdx) city := cities[cityIdx] @@ -650,7 +653,7 @@ func (g *movr) movrVehiclesInitialRow(rowIdx int) []interface{} { } func (g *movr) movrRidesInitialRow(rowIdx int) []interface{} { - rng := rand.New(rand.NewSource(g.seed + uint64(rowIdx))) + rng := rand.New(rand.NewSource(RandomSeed.Seed() + uint64(rowIdx))) cityIdx := g.rides.cityForRow(rowIdx) city := cities[cityIdx] @@ -680,7 +683,7 @@ func (g *movr) movrRidesInitialRow(rowIdx int) []interface{} { } func (g *movr) movrVehicleLocationHistoriesInitialRow(rowIdx int) []interface{} { - rng := rand.New(rand.NewSource(g.seed + uint64(rowIdx))) + rng := rand.New(rand.NewSource(RandomSeed.Seed() + uint64(rowIdx))) cityIdx := g.histories.cityForRow(rowIdx) city := cities[cityIdx] @@ -699,7 +702,7 @@ func (g *movr) movrVehicleLocationHistoriesInitialRow(rowIdx int) []interface{} } func (g *movr) movrPromoCodesInitialRow(rowIdx int) []interface{} { - rng := rand.New(rand.NewSource(g.seed + uint64(rowIdx))) + rng := rand.New(rand.NewSource(RandomSeed.Seed() + uint64(rowIdx))) code := strings.ToLower(strings.Join(g.faker.Words(rng, 3), `_`)) code = fmt.Sprintf("%d_%s", rowIdx, code) description := g.faker.Paragraph(rng) @@ -718,7 +721,7 @@ func (g *movr) movrPromoCodesInitialRow(rowIdx int) []interface{} { } func (g *movr) movrUserPromoCodesInitialRow(rowIdx int) []interface{} { - rng := rand.New(rand.NewSource(g.seed + uint64(rowIdx))) + rng := rand.New(rand.NewSource(RandomSeed.Seed() + uint64(rowIdx))) // Make evenly-spaced UUIDs sorted in the same order as the rows. var id uuid.UUID id.DeterministicV4(uint64(rowIdx), uint64(g.users.numRows)) diff --git a/pkg/workload/movr/workload.go b/pkg/workload/movr/workload.go index 4cdcd673abf0..3bb899aa04c7 100644 --- a/pkg/workload/movr/workload.go +++ b/pkg/workload/movr/workload.go @@ -314,7 +314,7 @@ func (m *movr) Ops( } worker := movrWorker{ db: db, - rng: rand.New(rand.NewSource(m.seed)), + rng: rand.New(rand.NewSource(RandomSeed.Seed())), faker: m.faker, creationTime: m.creationTime, activeRides: []rideInfo{}, diff --git a/pkg/workload/rand/rand.go b/pkg/workload/rand/rand.go index bda2e86d1eae..7efb81bcbfef 100644 --- a/pkg/workload/rand/rand.go +++ b/pkg/workload/rand/rand.go @@ -33,14 +33,15 @@ import ( "github.com/spf13/pflag" ) +// RandomSeed is the rand workload random seed. +var RandomSeed = workload.NewInt64RandomSeed() + type random struct { flags workload.Flags connFlags *workload.ConnFlags batchSize int - seed int64 - tableName string tables int @@ -56,6 +57,7 @@ func init() { var randMeta = workload.Meta{ Name: `rand`, Description: `random writes to table`, + RandomSeed: RandomSeed, Version: `1.0.0`, New: func() workload.Generator { g := &random{} @@ -67,9 +69,9 @@ var randMeta = workload.Meta{ g.flags.StringVar(&g.tableName, `table`, ``, `Table to write to`) g.flags.IntVar(&g.batchSize, `batch`, 1, `Number of rows to insert in a single SQL statement`) g.flags.StringVar(&g.method, `method`, `upsert`, `Choice of DML name: insert, upsert, ioc-update (insert on conflict update), ioc-nothing (insert on conflict no nothing)`) - g.flags.Int64Var(&g.seed, `seed`, 1, `Key hash seed.`) g.flags.StringVar(&g.primaryKey, `primary-key`, ``, `ioc-update and ioc-nothing require primary key`) g.flags.IntVar(&g.nullPct, `null-percent`, 5, `Percent random nulls`) + RandomSeed.AddFlag(&g.flags) g.connFlags = workload.NewConnFlags(&g.flags) return g }, @@ -89,7 +91,7 @@ func (w *random) Hooks() workload.Hooks { // Tables implements the Generator interface. func (w *random) Tables() []workload.Table { tables := make([]workload.Table, w.tables) - rng := rand.New(rand.NewSource(w.seed)) + rng := rand.New(rand.NewSource(RandomSeed.Seed())) for i := 0; i < w.tables; i++ { createTable := randgen.RandCreateTable(rng, "table", rng.Int(), false /* isMultiRegion */) ctx := tree.NewFmtCtx(tree.FmtParsable) @@ -291,7 +293,7 @@ AND i.indisprimary`, relid) hists: reg.GetHandle(), db: db, cols: nonComputedCols, - rng: rand.New(rand.NewSource(w.seed + int64(i))), + rng: rand.New(rand.NewSource(RandomSeed.Seed() + int64(i))), writeStmt: writeStmt, } ql.WorkerFns = append(ql.WorkerFns, op.run) diff --git a/pkg/workload/random.go b/pkg/workload/random.go new file mode 100644 index 000000000000..2cdd8e1ceeb2 --- /dev/null +++ b/pkg/workload/random.go @@ -0,0 +1,103 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package workload + +import ( + "fmt" + + "github.com/cockroachdb/cockroach/pkg/util/randutil" +) + +const ( + flagName = "seed" + flagDescription = "Random seed. Must be the same in 'init' and 'run'. Default changes in each run" +) + +// RandomSeed is the interface used by the workload runner to print +// the seed used in a run. +type RandomSeed interface { + LogMessage() string +} + +// The following structs implement the same logic for int64 and uint64 +// seeds, respectively. The distinction exists because currently some +// workloads use the `math/rand` package, while others use +// `golang.org/x/exp/rand`: the former uses int64 seeds and the latter +// uses uint64 seeds. + +// Int64RandomSeed implements the RandomSeed interface for workloads +// that take an int64 seed. +type Int64RandomSeed struct { + seed int64 +} + +// NewInt64RandomSeed creates a new Int64RandomSeed. +func NewInt64RandomSeed() *Int64RandomSeed { + _, seed := randutil.NewPseudoRand() + return &Int64RandomSeed{seed: seed} +} + +// Set is a test-only function. +func (rs *Int64RandomSeed) Set(val int64) { + rs.seed = val +} + +// Seed returns the underlying rng seed. +func (rs *Int64RandomSeed) Seed() int64 { + return rs.seed +} + +// AddFlag adds a `seed` command line flag to a command that can be +// used by the caller to specify a custom random seed; if no seed is +// passed, a random one is used. Particularly useful in workloads that +// implement `init` and `run`, where the same seed needs to be passed. +func (rs *Int64RandomSeed) AddFlag(flags *Flags) { + flags.Int64Var(&rs.seed, flagName, rs.Seed(), flagDescription) +} + +// LogMessage returns a string that can be used for logging the chosen +// random seed. +func (rs *Int64RandomSeed) LogMessage() string { + return fmt.Sprintf("random seed: %d", rs.seed) +} + +// Uint64RandomSeed implements the RandomSeed interface for workloads +// that take an int64 seed. +type Uint64RandomSeed struct { + seed uint64 +} + +// NewUint64RandomSeed creates a new Uint64RandomSeed. +func NewUint64RandomSeed() *Uint64RandomSeed { + _, seed := randutil.NewPseudoRand() + return &Uint64RandomSeed{seed: uint64(seed)} +} + +// Set is a test-only function. +func (rs *Uint64RandomSeed) Set(val uint64) { + rs.seed = val +} + +// Seed returns the underlying rng seed. +func (rs *Uint64RandomSeed) Seed() uint64 { + return rs.seed +} + +// AddFlag has the same semantics as `AddFlag` for Int64RandomSeed. +func (rs *Uint64RandomSeed) AddFlag(flags *Flags) { + flags.Uint64Var(&rs.seed, flagName, rs.Seed(), flagDescription) +} + +// LogMessage returns a string that can be used for logging the chosen +// random seed. +func (rs *Uint64RandomSeed) LogMessage() string { + return fmt.Sprintf("random seed: %d", rs.seed) +} diff --git a/pkg/workload/sqlsmith/sqlsmith.go b/pkg/workload/sqlsmith/sqlsmith.go index 44bb61681129..ae71b320a9d7 100644 --- a/pkg/workload/sqlsmith/sqlsmith.go +++ b/pkg/workload/sqlsmith/sqlsmith.go @@ -26,11 +26,13 @@ import ( "github.com/spf13/pflag" ) +// RandomSeed is the sqlsmith workload random seed. +var RandomSeed = workload.NewInt64RandomSeed() + type sqlSmith struct { flags workload.Flags connFlags *workload.ConnFlags - seed int64 tables int errorSettings int } @@ -51,13 +53,14 @@ var sqlSmithMeta = workload.Meta{ Name: `sqlsmith`, Description: `sqlsmith is a random SQL query generator`, Version: `1.0.0`, + RandomSeed: RandomSeed, New: func() workload.Generator { g := &sqlSmith{} g.flags.FlagSet = pflag.NewFlagSet(`sqlsmith`, pflag.ContinueOnError) - g.flags.Int64Var(&g.seed, `seed`, 1, `Key hash seed.`) g.flags.IntVar(&g.tables, `tables`, 1, `Number of tables.`) g.flags.IntVar(&g.errorSettings, `error-sensitivity`, 0, `SQLSmith's sensitivity to errors. 0=ignore all errors. 1=quit on internal errors. 2=quit on any error.`) + RandomSeed.AddFlag(&g.flags) g.connFlags = workload.NewConnFlags(&g.flags) return g }, @@ -75,7 +78,7 @@ func (g *sqlSmith) Hooks() workload.Hooks { // Tables implements the Generator interface. func (g *sqlSmith) Tables() []workload.Table { - rng := rand.New(rand.NewSource(g.seed)) + rng := rand.New(rand.NewSource(RandomSeed.Seed())) var tables []workload.Table for idx := 0; idx < g.tables; idx++ { schema := randgen.RandCreateTable(rng, "table", idx, false /* isMultiRegion */) @@ -139,7 +142,7 @@ func (g *sqlSmith) Ops( ql := workload.QueryLoad{SQLDatabase: sqlDatabase} for i := 0; i < g.connFlags.Concurrency; i++ { - rng := rand.New(rand.NewSource(g.seed + int64(i))) + rng := rand.New(rand.NewSource(RandomSeed.Seed() + int64(i))) smither, err := sqlsmith.NewSmither(db, rng) if err != nil { return workload.QueryLoad{}, err diff --git a/pkg/workload/tpcc/generate.go b/pkg/workload/tpcc/generate.go index 678f7db57311..e0eef2833707 100644 --- a/pkg/workload/tpcc/generate.go +++ b/pkg/workload/tpcc/generate.go @@ -76,7 +76,7 @@ var itemTypes = []*types.T{ func (w *tpcc) tpccItemInitialRowBatch(rowIdx int, cb coldata.Batch, a *bufalloc.ByteAllocator) { l := w.localsPool.Get().(*generateLocals) defer w.localsPool.Put(l) - l.rng.Seed(w.seed + uint64(rowIdx)) + l.rng.Seed(RandomSeed.Seed() + uint64(rowIdx)) ao := aCharsOffset(l.rng.Intn(len(aCharsAlphabet))) iID := rowIdx + 1 @@ -119,7 +119,7 @@ func (w *tpcc) tpccWarehouseInitialRowBatch( ) { l := w.localsPool.Get().(*generateLocals) defer w.localsPool.Put(l) - l.rng.Seed(w.seed + uint64(rowIdx)) + l.rng.Seed(RandomSeed.Seed() + uint64(rowIdx)) no := numbersOffset(l.rng.Intn(len(numbersAlphabet))) lo := lettersOffset(l.rng.Intn(len(lettersAlphabet))) @@ -177,7 +177,7 @@ var stockTypes = []*types.T{ func (w *tpcc) tpccStockInitialRowBatch(rowIdx int, cb coldata.Batch, a *bufalloc.ByteAllocator) { l := w.localsPool.Get().(*generateLocals) defer w.localsPool.Put(l) - l.rng.Seed(w.seed + uint64(rowIdx)) + l.rng.Seed(RandomSeed.Seed() + uint64(rowIdx)) ao := aCharsOffset(l.rng.Intn(len(aCharsAlphabet))) sID := (rowIdx % numStockPerWarehouse) + 1 @@ -249,7 +249,7 @@ func (w *tpcc) tpccDistrictInitialRowBatch( ) { l := w.localsPool.Get().(*generateLocals) defer w.localsPool.Put(l) - l.rng.Seed(w.seed + uint64(rowIdx)) + l.rng.Seed(RandomSeed.Seed() + uint64(rowIdx)) ao := aCharsOffset(l.rng.Intn(len(aCharsAlphabet))) no := numbersOffset(l.rng.Intn(len(numbersAlphabet))) lo := lettersOffset(l.rng.Intn(len(lettersAlphabet))) @@ -322,7 +322,7 @@ func (w *tpcc) tpccCustomerInitialRowBatch( ) { l := w.localsPool.Get().(*generateLocals) defer w.localsPool.Put(l) - l.rng.Seed(w.seed + uint64(rowIdx)) + l.rng.Seed(RandomSeed.Seed() + uint64(rowIdx)) ao := aCharsOffset(l.rng.Intn(len(aCharsAlphabet))) no := numbersOffset(l.rng.Intn(len(numbersAlphabet))) lo := lettersOffset(l.rng.Intn(len(lettersAlphabet))) @@ -420,7 +420,7 @@ var historyTypes = []*types.T{ func (w *tpcc) tpccHistoryInitialRowBatch(rowIdx int, cb coldata.Batch, a *bufalloc.ByteAllocator) { l := w.localsPool.Get().(*generateLocals) defer w.localsPool.Put(l) - l.rng.Seed(w.seed + uint64(rowIdx)) + l.rng.Seed(RandomSeed.Seed() + uint64(rowIdx)) ao := aCharsOffset(l.rng.Intn(len(aCharsAlphabet))) // This used to be a V4 uuid made through the normal `uuid.MakeV4` @@ -485,7 +485,7 @@ func (w *tpcc) tpccOrderInitialRowBatch(rowIdx int, cb coldata.Batch, a *bufallo // NB: numOrderLines is not allowed to use precomputed random data, make sure // it stays that way. See 4.3.2.1. - l.rng.Seed(w.seed + uint64(rowIdx)) + l.rng.Seed(RandomSeed.Seed() + uint64(rowIdx)) numOrderLines := randInt(l.rng.Rand, minOrderLinesPerOrder, maxOrderLinesPerOrder) oID := (rowIdx % numOrdersPerDistrict) + 1 @@ -606,7 +606,7 @@ func (w *tpcc) tpccOrderLineInitialRowBatch( // NB: numOrderLines is not allowed to use precomputed random data, make sure // it stays that way. See 4.3.2.1. - l.rng.Seed(w.seed + uint64(orderRowIdx)) + l.rng.Seed(RandomSeed.Seed() + uint64(orderRowIdx)) numOrderLines := int(randInt(l.rng.Rand, minOrderLinesPerOrder, maxOrderLinesPerOrder)) // NB: There is one batch of order_line rows per order diff --git a/pkg/workload/tpcc/random.go b/pkg/workload/tpcc/random.go index 416ec901026e..0afc9be99ef4 100644 --- a/pkg/workload/tpcc/random.go +++ b/pkg/workload/tpcc/random.go @@ -21,7 +21,7 @@ var cLastTokens = [...]string{ "ESE", "ANTI", "CALLY", "ATION", "EING"} func (w *tpcc) initNonUniformRandomConstants() { - rng := rand.New(rand.NewSource(w.seed)) + rng := rand.New(rand.NewSource(RandomSeed.Seed())) w.cLoad = rng.Intn(256) w.cItemID = rng.Intn(1024) w.cCustomerID = rng.Intn(8192) diff --git a/pkg/workload/tpcc/tpcc.go b/pkg/workload/tpcc/tpcc.go index 918ea2c6f846..f41b6547b834 100644 --- a/pkg/workload/tpcc/tpcc.go +++ b/pkg/workload/tpcc/tpcc.go @@ -33,11 +33,13 @@ import ( "golang.org/x/sync/errgroup" ) +// RandomSeed is the tpcc workload random seed. +var RandomSeed = workload.NewUint64RandomSeed() + type tpcc struct { flags workload.Flags connFlags *workload.ConnFlags - seed uint64 warehouses int activeWarehouses int nowString []byte @@ -157,6 +159,7 @@ var tpccMeta = workload.Meta{ ` using a rich schema of multiple tables`, Version: `2.2.0`, PublicFacing: true, + RandomSeed: RandomSeed, New: func() workload.Generator { g := &tpcc{} g.flags.FlagSet = pflag.NewFlagSet(`tpcc`, pflag.ContinueOnError) @@ -184,7 +187,6 @@ var tpccMeta = workload.Meta{ `deprecated-fk-indexes`: {RuntimeOnly: true}, } - g.flags.Uint64Var(&g.seed, `seed`, 1, `Random number generator seed`) g.flags.IntVar(&g.warehouses, `warehouses`, 1, `Number of warehouses for loading`) g.flags.BoolVar(&g.fks, `fks`, true, `Add the foreign keys`) g.flags.BoolVar(&g.deprecatedFkIndexes, `deprecated-fk-indexes`, false, `Add deprecated foreign keys (needed when running against v20.1 or below clusters)`) @@ -221,6 +223,7 @@ var tpccMeta = workload.Meta{ g.flags.BoolVar(&g.separateColumnFamilies, `families`, false, `Use separate column families for dynamic and static columns`) g.flags.BoolVar(&g.replicateStaticColumns, `replicate-static-columns`, false, "Create duplicate indexes for all static columns in district, items and warehouse tables, such that each zone or rack has them locally.") g.flags.BoolVar(&g.localWarehouses, `local-warehouses`, false, `Force transactions to use a local warehouse in all cases (in violation of the TPC-C specification)`) + RandomSeed.AddFlag(&g.flags) g.connFlags = workload.NewConnFlags(&g.flags) // Hardcode this since it doesn't seem like anyone will want to change @@ -526,9 +529,10 @@ func (w *tpcc) Hooks() workload.Hooks { // Tables implements the Generator interface. func (w *tpcc) Tables() []workload.Table { - aCharsInit := workloadimpl.PrecomputedRandInit(rand.New(rand.NewSource(w.seed)), precomputedLength, aCharsAlphabet) - lettersInit := workloadimpl.PrecomputedRandInit(rand.New(rand.NewSource(w.seed)), precomputedLength, lettersAlphabet) - numbersInit := workloadimpl.PrecomputedRandInit(rand.New(rand.NewSource(w.seed)), precomputedLength, numbersAlphabet) + seed := RandomSeed.Seed() + aCharsInit := workloadimpl.PrecomputedRandInit(rand.New(rand.NewSource(seed)), precomputedLength, aCharsAlphabet) + lettersInit := workloadimpl.PrecomputedRandInit(rand.New(rand.NewSource(seed)), precomputedLength, lettersAlphabet) + numbersInit := workloadimpl.PrecomputedRandInit(rand.New(rand.NewSource(seed)), precomputedLength, numbersAlphabet) if w.localsPool == nil { w.localsPool = &sync.Pool{ New: func() interface{} { diff --git a/pkg/workload/workload.go b/pkg/workload/workload.go index d980ec00386b..0524c4109945 100644 --- a/pkg/workload/workload.go +++ b/pkg/workload/workload.go @@ -144,6 +144,9 @@ type Meta struct { Name string // Description is a short description of this generator. Description string + // RandomSeed points to the random seed to be used by this + // generator, if any. + RandomSeed RandomSeed // Details optionally allows specifying longer, more in-depth usage details. Details string // Version is a semantic version for this generator. It should be bumped diff --git a/pkg/workload/ycsb/ycsb.go b/pkg/workload/ycsb/ycsb.go index e40f4532a3d4..708b14061855 100644 --- a/pkg/workload/ycsb/ycsb.go +++ b/pkg/workload/ycsb/ycsb.go @@ -84,11 +84,13 @@ const ( timeFormatTemplate = `2006-01-02 15:04:05.000000-07:00` ) +// RandomSeed is the ycsb workload random seed. +var RandomSeed = workload.NewUint64RandomSeed() + type ycsb struct { flags workload.Flags connFlags *workload.ConnFlags - seed uint64 timeString bool insertHash bool zeroPadding int @@ -115,6 +117,7 @@ var ycsbMeta = workload.Meta{ Name: `ycsb`, Description: `YCSB is the Yahoo! Cloud Serving Benchmark`, Version: `1.0.0`, + RandomSeed: RandomSeed, PublicFacing: true, New: func() workload.Generator { g := &ycsb{} @@ -122,7 +125,6 @@ var ycsbMeta = workload.Meta{ g.flags.Meta = map[string]workload.FlagMeta{ `workload`: {RuntimeOnly: true}, } - g.flags.Uint64Var(&g.seed, `seed`, 1, `Key hash seed.`) g.flags.BoolVar(&g.timeString, `time-string`, false, `Prepend field[0-9] data with current time in microsecond precision.`) g.flags.BoolVar(&g.insertHash, `insert-hash`, true, `Key to be hashed or ordered.`) g.flags.IntVar(&g.zeroPadding, `zero-padding`, 1, `Key using "insert-hash=false" has zeros padded to left to make this length of digits.`) @@ -138,6 +140,7 @@ var ycsbMeta = workload.Meta{ g.flags.StringVar(&g.scanLengthDistribution, `scan-length-distribution`, `uniform`, `Distribution for scan length generation [zipfian, uniform]. Primarily used for workload E.`) g.flags.Uint64Var(&g.minScanLength, `min-scan-length`, 1, `The minimum length for scan operations. Primarily used for workload E.`) g.flags.Uint64Var(&g.maxScanLength, `max-scan-length`, 1000, `The maximum length for scan operations. Primarily used for workload E.`) + RandomSeed.AddFlag(&g.flags) // TODO(dan): g.flags.Uint64Var(&g.maxWrites, `max-writes`, // 7*24*3600*1500, // 7 days at 5% writes and 30k ops/s @@ -345,7 +348,7 @@ func (g *ycsb) Tables() []workload.Table { config: g, hashFunc: fnv.New64(), } - rng := rand.NewSource(g.seed + uint64(batchIdx)) + rng := rand.NewSource(RandomSeed.Seed() + uint64(batchIdx)) var tmpbuf [fieldLength]byte for rowIdx := rowBegin; rowIdx < rowEnd; rowIdx++ { @@ -426,7 +429,7 @@ func (g *ycsb) Ops( rowCounter := NewAcknowledgedCounter((uint64)(g.recordCount)) var requestGen randGenerator - requestGenRng := rand.New(rand.NewSource(g.seed)) + requestGenRng := rand.New(rand.NewSource(RandomSeed.Seed())) switch strings.ToLower(g.requestDistribution) { case "zipfian": requestGen, err = NewZipfGenerator( @@ -444,7 +447,7 @@ func (g *ycsb) Ops( } var scanLengthGen randGenerator - scanLengthGenRng := rand.New(rand.NewSource(g.seed + 1)) + scanLengthGenRng := rand.New(rand.NewSource(RandomSeed.Seed() + 1)) switch strings.ToLower(g.scanLengthDistribution) { case "zipfian": scanLengthGen, err = NewZipfGenerator(scanLengthGenRng, g.minScanLength, g.maxScanLength, defaultTheta, false /* verbose */) @@ -477,6 +480,7 @@ func (g *ycsb) Ops( } } conn, err := db.Conn(ctx) + rng := rand.New(rand.NewSource(RandomSeed.Seed() + uint64(i))) if err != nil { return workload.QueryLoad{}, err } @@ -509,7 +513,6 @@ func (g *ycsb) Ops( updateStmts[i] = stmt } - rng := rand.New(rand.NewSource(g.seed + uint64(i))) w := &ycsbWorker{ config: g, hists: reg.GetHandle(),