Skip to content

Commit

Permalink
randgen: add PopulateTableWithRandomData
Browse files Browse the repository at this point in the history
PopulateRandTable populates the caller's table with random data. This helper
function aims to make it easier for engineers to develop randomized tests that
leverage randgen / sqlsmith.

I considered adding random insert statements into sqlsmith's randtables setup,
however the high probably of a faulty insert statement would cause the whole
setup to fail. See cockroachdb#75159

Informs cockroachdb#72345

Release note: None
  • Loading branch information
msbutler committed Feb 3, 2022
1 parent a5158c4 commit dd227d8
Show file tree
Hide file tree
Showing 7 changed files with 262 additions and 5 deletions.
1 change: 1 addition & 0 deletions bazel-out
3 changes: 0 additions & 3 deletions pkg/internal/sqlsmith/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,6 @@ func randTablesN(r *rand.Rand, n int) string {
sb.WriteString(stmt.String())
sb.WriteString(";\n")
}

// TODO(mjibson): add random INSERTs.

return sb.String()
}

Expand Down
20 changes: 18 additions & 2 deletions pkg/sql/randgen/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,23 @@ go_library(

go_test(
name = "randgen_test",
srcs = ["mutator_test.go"],
srcs = [
"main_test.go",
"mutator_test.go",
"schema_test.go",
],
embed = [":randgen"],
deps = ["//pkg/util/randutil"],
deps = [
"//pkg/base",
"//pkg/security",
"//pkg/security/securitytest",
"//pkg/server",
"//pkg/sql/sem/tree",
"//pkg/testutils/serverutils",
"//pkg/testutils/sqlutils",
"//pkg/testutils/testcluster",
"//pkg/util/leaktest",
"//pkg/util/randutil",
"@com_github_stretchr_testify//require",
],
)
33 changes: 33 additions & 0 deletions pkg/sql/randgen/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright 2019 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package randgen

import (
"os"
"testing"

"github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/security/securitytest"
"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
)

//go:generate ../../util/leaktest/add-leaktest.sh *_test.go

func TestMain(m *testing.M) {
security.SetAssetLoader(securitytest.EmbeddedAssets)
randutil.SeedForTests()
serverutils.InitTestServerFactory(server.TestServerFactory)
serverutils.InitTestClusterFactory(testcluster.TestClusterFactory)
os.Exit(m.Run())
}
2 changes: 2 additions & 0 deletions pkg/sql/randgen/mutator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@ import (
"strings"
"testing"

"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
)

func TestPostgresMutator(t *testing.T) {
defer leaktest.AfterTest(t)()
q := `
CREATE TABLE t (s STRING FAMILY fam1, b BYTES, FAMILY fam2 (b), PRIMARY KEY (s ASC, b DESC), INDEX (s) STORING (b))
PARTITION BY LIST (s)
Expand Down
132 changes: 132 additions & 0 deletions pkg/sql/randgen/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo"
"github.com/cockroachdb/cockroach/pkg/sql/parser"
"github.com/cockroachdb/cockroach/pkg/sql/rowenc"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/types"
Expand Down Expand Up @@ -186,6 +187,137 @@ func RandCreateTableWithColumnIndexNumberGenerator(
return res[0].(*tree.CreateTable)
}

func parseCreateStatement(createStmtSQL string) (*tree.CreateTable, error) {
var p parser.Parser
stmts, err := p.Parse(createStmtSQL)
if err != nil {
return nil, err
}
if len(stmts) != 1 {
return nil, errors.Errorf("parsed CreateStatement string yielded more than one parsed statment")
}
tableStmt, ok := stmts[0].AST.(*tree.CreateTable)
if !ok {
return nil, errors.Errorf("AST could not be cast to *tree.CreateTable")
}
return tableStmt, nil
}

// generateInsertStmtVals generates random data for a string builder thats
// used after the VALUES keyword in an INSERT statement.
func generateInsertStmtVals(rng *rand.Rand, colTypes []*types.T, nullable []bool) strings.Builder {
var valBuilder strings.Builder
valBuilder.WriteString("(")
comma := ""
for j := 0; j < len(colTypes); j++ {
valBuilder.WriteString(comma)
var d tree.Datum
if rand.Intn(10) < 4 {
// 40% of the time, use a corner case value
d = randInterestingDatum(rng, colTypes[j])
}
if colTypes[j].Family() == types.OidFamily {
// choose 0 or 1 as the OID value as the value must be less than the
// number of tables in the database.
d = tree.NewDOid(tree.DInt(rand.Intn(2)))
}
if d == nil {
d = RandDatum(rng, colTypes[j], nullable[j])
}
valBuilder.WriteString(tree.AsStringWithFlags(d, tree.FmtParsable))
comma = ", "
}
valBuilder.WriteString(")")
return valBuilder
}

// PopulateTableWithRandData populates the provided table with `numrows` rows of random data.
func PopulateTableWithRandData(rng *rand.Rand, db *gosql.DB, tableName string, numRows int) error {
var ignored, createStmtSQL string
res := db.QueryRow(fmt.Sprintf("SHOW CREATE TABLE %s", tableName))
err := res.Scan(&ignored, &createStmtSQL)
if err != nil {
return errors.Wrapf(err, "table does not exist in db")
}
createStmt, err := parseCreateStatement(createStmtSQL)
if err != nil {
return errors.Wrapf(err, "failed to determine table schema")
}

// Populate helper objects for insert statement creation and error out if a
// column's constraints will make it impossible to execute random insert
// statements.
defs := createStmt.Defs
colTypes := make([]*types.T, 0)
nullable := make([]bool, 0)
var colNameBuilder strings.Builder
comma := ""
for _, def := range defs {
if col, ok := def.(*tree.ColumnTableDef); ok {
if col.References.Table != nil {
// Given that this function only populates an individual table without
// considering other tables in the database, populating a column with a
// foreign key reference can be nearly impossible.
return errors.Errorf("cannot populate column with foreign key reference")
}
if len(col.CheckExprs) != 0 {
// RandDatum is unaware of CHECK constraints, so populating a column with
// CHECK constraints can be nearly impossible.
return errors.Errorf("cannot populate column with CHECK constraint")
}
if (col.Type.(*types.T).Family() == types.OidFamily) && (col.Unique.IsUnique) && numRows > 2 {
// For OID columns, PopulateTableWithRandData randomly chooses 0 or 1 as a value,
// which means it's impossible to obey a uniqueness constraint if numRows>2.
return errors.Errorf("cannot populate oid column with uniqueness constraint when numrows>2")
}
if col.Computed.Computed || col.Hidden {
// cannot insert values into hidden or computed columns, so skip adding
// them to the list of columns to insert data into
continue
}
colTypes = append(colTypes, tree.MustBeStaticallyKnownType(col.Type.(*types.T)))

if col.Nullable.Nullability == tree.Null {
nullable = append(nullable, true)
} else {
nullable = append(nullable, false)
}
colNameBuilder.WriteString(comma)
colNameBuilder.WriteString(col.Name.String())
comma = ", "
}
}

var (
success int // number of successfully executed insert statements
fail int // number of failed insert statements
)
maxTries := numRows * 10
for success < numRows {
valBuilder := generateInsertStmtVals(rng, colTypes, nullable)
insertStmt := fmt.Sprintf("INSERT INTO %s (%s) VALUES %s;",
tableName,
colNameBuilder.String(),
valBuilder.String())
_, err := db.Exec(insertStmt)
if err != nil {
// Inserting into an arbitrary table with a UNIQUE constraint can be finicky,
// so allow some room for error!
fail++
if fail > maxTries {
// This could mean PopulateTableWithRandomData or RandDatum couldn't
// handle this table's schema. consider filing a bug.
return errors.Errorf(`could not populate %d rows for table with schema \n \t %s \n--
only %d succesful insert attempts out of %d total attempts`,
numRows, createStmt.String(), success, maxTries)
}
} else {
success++
}
}
return nil
}

// GenerateRandInterestingTable takes a gosql.DB connection and creates
// a table with all the types in randInterestingDatums and rows of the
// interesting datums.
Expand Down
76 changes: 76 additions & 0 deletions pkg/sql/randgen/schema_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Copyright 2022 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package randgen

import (
"context"
"fmt"
"strings"
"testing"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
"github.com/stretchr/testify/require"
)

// TestPopulateTableWithRandData generates some random tables and passes if it
// at least one of those tables will be successfully populated.
func TestPopulateTableWithRandData(t *testing.T) {
defer leaktest.AfterTest(t)()

ctx := context.Background()
s, dbConn, _ := serverutils.StartServer(t, base.TestServerArgs{})
defer s.Stopper().Stop(ctx)

rng, _ := randutil.NewTestRand()

sqlDB := sqlutils.MakeSQLRunner(dbConn)
sqlDB.Exec(t, "CREATE DATABASE rand")

// Turn off auto stats collection to prevent out of memory errors on stress tests
sqlDB.Exec(t, "SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false")

tablePrefix := "table"
numTables := 10

stmts := RandCreateTables(rng, tablePrefix, numTables,
PartialIndexMutator,
)

var sb strings.Builder
for _, stmt := range stmts {
sb.WriteString(tree.SerializeForDisplay(stmt))
sb.WriteString(";\n")
}
sqlDB.Exec(t, sb.String())

// To prevent the test from being flaky, pass the test if PopulateTableWithRandomData
// works at least once
success := false
for i := 1; i <= numTables; i++ {
tableName := tablePrefix + fmt.Sprint(i)
numRows := rng.Intn(30)
err := PopulateTableWithRandData(rng, dbConn, tableName, numRows)
if err != nil {
t.Log(err)
continue
}
res := sqlDB.QueryStr(t, fmt.Sprintf("SELECT count(*) FROM %s", tableName))
require.Equal(t, fmt.Sprint(numRows), res[0][0])
success = true
break
}
require.Equal(t, true, success)
}

0 comments on commit dd227d8

Please sign in to comment.