Skip to content

Commit

Permalink
workload: fix non-determinism in TPC-H data generation
Browse files Browse the repository at this point in the history
This commit fixes the non-determinism in the TPC-H data generation by
using a local slice for the random permutation of indexes into randPartNames.
It also fixes the permutation algorithm to use a modified Fisher–Yates
shuffle.

Fixes #93958

Release note: None
  • Loading branch information
rytaft committed Jan 4, 2023
1 parent e503f18 commit 9bc1393
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 22 deletions.
4 changes: 1 addition & 3 deletions pkg/ccl/workloadccl/allccl/all_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,6 @@ func hashTableInitialData(
func TestDeterministicInitialData(t *testing.T) {
defer leaktest.AfterTest(t)()

skip.WithIssue(t, 93958, "flaky test")

// There are other tests that run initial data generation under race, so we
// don't get anything from running this one under race as well.
skip.UnderRace(t, "uninteresting under race")
Expand All @@ -280,7 +278,7 @@ func TestDeterministicInitialData(t *testing.T) {
`sqlsmith`: 0xcbf29ce484222325,
`startrek`: 0xa0249fbdf612734c,
`tpcc`: 0xab32e4f5e899eb2f,
`tpch`: 0xe013881749bb67e8,
`tpch`: 0xe4fd28db230b9149,
`ycsb`: 0x1244ea1c29ef67f6,
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/workload/tpch/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ func (w *tpch) tpchPartInitialRowBatch(batchIdx int, cb coldata.Batch, a *bufall
// P_PARTKEY unique within [SF * 200,000].
cb.ColVec(0).Int64()[0] = int64(partKey)
// P_NAME generated by concatenating five unique randomly selected part name strings.
cb.ColVec(1).Bytes().Set(0, randPartName(rng, l.namePerm, a))
cb.ColVec(1).Bytes().Set(0, randPartName(rng, a))
m, mfgr := randMfgr(rng, a)
// P_MFGR text appended with digit ["Manufacturer#",M], where M = random value [1,5].
cb.ColVec(2).Bytes().Set(0, mfgr) //
Expand Down
13 changes: 9 additions & 4 deletions pkg/workload/tpch/random.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,16 @@ const nPartNames = 5

// randPartName concatenates 5 random unique strings from randPartNames, separated
// by spaces.
func randPartName(rng *rand.Rand, namePerm []int, a *bufalloc.ByteAllocator) []byte {
// do nPartNames iterations of rand.Perm, to get a random 5-subset of the
// indexes into randPartNames.
func randPartName(rng *rand.Rand, a *bufalloc.ByteAllocator) []byte {
namePerm := make([]int, len(randPartNames))
for i := range namePerm {
namePerm[i] = i
}
// Create a random 5-subset of the indexes into randPartNames using a modified
// Fisher–Yates shuffle.
for i := 0; i < nPartNames; i++ {
j := rng.Intn(len(namePerm))
// N.B. Correctness requires that i <= j < len(namePerm)
j := rng.Intn(len(namePerm)-i) + i
namePerm[i], namePerm[j] = namePerm[j], namePerm[i]
}
var buf []byte
Expand Down
6 changes: 1 addition & 5 deletions pkg/workload/tpch/random_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,7 @@ func TestRandPartName(t *testing.T) {
rng := rand.New(rand.NewSource(uint64(timeutil.Now().UnixNano())))
seen := make(map[string]int)
runOneRound := func() {
namePerm := make([]int, len(randPartNames))
for i := range namePerm {
namePerm[i] = i
}
res := randPartName(rng, namePerm, &a)
res := randPartName(rng, &a)
names := strings.Split(string(res), " ")
assert.Equal(t, len(names), nPartNames)
seenLocal := make(map[string]int)
Expand Down
10 changes: 1 addition & 9 deletions pkg/workload/tpch/tpch.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,6 @@ func (w *tpch) Hooks() workload.Hooks {
type generateLocals struct {
rng *rand.Rand

// namePerm is a slice of ordinals into randPartNames.
namePerm []int

orderData *orderSharedRandomData
}

Expand All @@ -201,13 +198,8 @@ func (w *tpch) Tables() []workload.Table {
if w.localsPool == nil {
w.localsPool = &sync.Pool{
New: func() interface{} {
namePerm := make([]int, len(randPartNames))
for i := range namePerm {
namePerm[i] = i
}
return &generateLocals{
rng: rand.New(rand.NewSource(uint64(timeutil.Now().UnixNano()))),
namePerm: namePerm,
rng: rand.New(rand.NewSource(uint64(timeutil.Now().UnixNano()))),
orderData: &orderSharedRandomData{
partKeys: make([]int, 0, 7),
shipDates: make([]int64, 0, 7),
Expand Down

0 comments on commit 9bc1393

Please sign in to comment.