Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Give a little more information in dolt_diff_* when there is a pk change #8631

Merged
merged 8 commits into from
Dec 17, 2024
2 changes: 1 addition & 1 deletion go/libraries/doltcore/sqle/dtables/commit_diff_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ func (dt *CommitDiffTable) LookupPartitions(ctx *sql.Context, i sql.IndexLookup)
fromSch: dt.targetSchema,
}

isDiffable, err := dp.isDiffablePartition(ctx)
isDiffable, _, err := dp.isDiffablePartition(ctx)
if err != nil {
return nil, err
}
Expand Down
35 changes: 27 additions & 8 deletions go/libraries/doltcore/sqle/dtables/diff_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -685,30 +685,39 @@ func (dp DiffPartition) GetRowIter(ctx *sql.Context, ddb *doltdb.DoltDB, joiner
// isDiffablePartition checks if the commit pair for this partition is "diffable".
// If the primary key sets changed between the two commits, it may not be
// possible to diff them.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably helpful for future code readers to explain simpleDiff and fuzzyDiff in the function docs and maybe how they are intended to be used.

func (dp *DiffPartition) isDiffablePartition(ctx *sql.Context) (bool, error) {
func (dp *DiffPartition) isDiffablePartition(ctx *sql.Context) (simpleDiff bool, fuzzyDiff bool, err error) {
// dp.to is nil when a table has been deleted previously. In this case, we return
// false, to stop processing diffs, since that previously deleted table is considered
// a logically different table and we don't want to mix the diffs together.
if dp.to == nil {
return false, nil
return false, false, nil
}

// dp.from is nil when the to commit created a new table
if dp.from == nil {
return true, nil
return true, false, nil
}

fromSch, err := dp.from.GetSchema(ctx)
if err != nil {
return false, err
return false, false, err
}

toSch, err := dp.to.GetSchema(ctx)
if err != nil {
return false, err
return false, false, err
}

easyDiff := schema.ArePrimaryKeySetsDiffable(dp.from.Format(), fromSch, toSch)
if easyDiff {
return true, false, nil
}

return schema.ArePrimaryKeySetsDiffable(dp.from.Format(), fromSch, toSch), nil
_, _, err = schema.MapSchemaBasedOnTagAndName(fromSch, toSch)
if err == nil {
return false, true, nil
}
return false, false, nil
}

type partitionSelectFunc func(*sql.Context, DiffPartition) (bool, error)
Expand Down Expand Up @@ -762,6 +771,7 @@ type DiffPartitions struct {
selectFunc partitionSelectFunc
toSch schema.Schema
fromSch schema.Schema
stopNext bool
}

// processCommit is called in a commit iteration loop. Adds partitions when it finds a commit and its parent that have
Expand Down Expand Up @@ -821,6 +831,10 @@ func (dps *DiffPartitions) processCommit(ctx *sql.Context, cmHash hash.Hash, cm
}

func (dps *DiffPartitions) Next(ctx *sql.Context) (sql.Partition, error) {
if dps.stopNext {
return nil, io.EOF
}

for {
cmHash, optCmt, err := dps.cmItr.Next(ctx)
if err != nil {
Expand Down Expand Up @@ -852,16 +866,21 @@ func (dps *DiffPartitions) Next(ctx *sql.Context) (sql.Partition, error) {

if next != nil {
// If we can't diff this commit with its parent, don't traverse any lower
canDiff, err := next.isDiffablePartition(ctx)
simpleDiff, fuzzyDiff, err := next.isDiffablePartition(ctx)
if err != nil {
return nil, err
}

if !canDiff {
if !simpleDiff && !fuzzyDiff {
ctx.Warn(PrimaryKeyChangeWarningCode, fmt.Sprintf(PrimaryKeyChangeWarning, next.fromName, next.toName))
return nil, io.EOF
}

if !simpleDiff && fuzzyDiff {
ctx.Warn(PrimaryKeyChangeWarningCode, fmt.Sprintf(PrimaryKeyChangeWarning, next.fromName, next.toName))
dps.stopNext = true
}

return *next, nil
}
}
Expand Down
10 changes: 7 additions & 3 deletions go/libraries/doltcore/sqle/dtables/prolly_row_conv.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,14 @@ func (c ProllyRowConverter) putFields(ctx context.Context, tup val.Tuple, proj v
virtualOffset := 0
for i, j := range proj {
if j == -1 {
// Skip over virtual columns in non-pk cols as they are not stored
if !isPk && c.inSchema.GetNonPKCols().GetByIndex(i).Virtual {
virtualOffset++
nonPkCols := c.inSchema.GetNonPKCols()
if len(nonPkCols.GetColumns()) > i {
// Skip over virtual columns in non-pk cols as they are not stored
if !isPk && nonPkCols.GetByIndex(i).Virtual {
virtualOffset++
}
}

continue
}

Expand Down
39 changes: 36 additions & 3 deletions go/libraries/doltcore/sqle/enginetest/dolt_queries_diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -528,14 +528,44 @@ var DiffSystemTableScriptTests = []queries.ScriptTest{
},
{
Query: "SELECT COUNT(*) FROM DOLT_DIFF_t;",
Expected: []sql.Row{{1}},
Expected: []sql.Row{{7}},
},
{
Query: "SELECT to_pk, to_c1, from_pk, from_c1, diff_type FROM DOLT_DIFF_t where to_commit=@Commit4;",
Expected: []sql.Row{{7, 8, nil, nil, "added"}},
},
},
},
{
// Similar to previous test, but with one row to avoid ordering issues.
Name: "altered keyless table add pk", // https://github.com/dolthub/dolt/issues/8625
SetUpScript: []string{
"create table tbl (i int, j int);",
"insert into tbl values (42, 23);",
"call dolt_commit('-Am', 'commit1');",
"alter table tbl add primary key(i);",
"call dolt_commit('-am', 'commit2');",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "SELECT to_i,to_j,from_i,from_j,diff_type FROM dolt_diff_tbl;",
// Output in the situation is admittedly wonky. Updating the PK leaves in a place where we can't really render
// the diff, but we want to show something. In this case, the 'pk' column tag changes, so in the last two rows
macneale4 marked this conversation as resolved.
Show resolved Hide resolved
// of the output you see we add "nil,23" and remove "nil,23" when in fact those columns were "42" with a different
// tag.
//
// In the past we just returned an empty set in this case. The
// warning is kind of essential to understand what is happening.
Expected: []sql.Row{
{42, 23, nil, nil, "added"},
{nil, nil, nil, 23, "removed"},
},
ExpectedWarningsCount: 1,
ExpectedWarning: 1105,
ExpectedWarningMessageSubstring: "due to primary key set change",
macneale4 marked this conversation as resolved.
Show resolved Hide resolved
},
},
},
{
Name: "table with commit column should maintain its data in diff",
SetUpScript: []string{
Expand Down Expand Up @@ -713,8 +743,10 @@ var Dolt1DiffSystemTableScripts = []queries.ScriptTest{
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "SELECT to_pk1, to_pk2, from_pk1, from_pk2, diff_type from dolt_diff_t;",
Expected: []sql.Row{{"2", "2", nil, nil, "added"}},
Query: "SELECT to_pk1, to_pk2, from_pk1, from_pk2, diff_type from dolt_diff_t;",
Expected: []sql.Row{
{"2", "2", nil, nil, "added"},
},
},
},
},
Expand Down Expand Up @@ -5298,6 +5330,7 @@ var CommitDiffSystemTableScriptTests = []queries.ScriptTest{
},
},
},

{
Name: "added and dropped table",
SetUpScript: []string{
Expand Down
73 changes: 46 additions & 27 deletions integration-tests/bats/sql-diff.bats
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,10 @@ SQL
done
}

@test "sql-diff: supports multiple primary keys" {
run_2pk5col_ints() {
local query_name=$1

# Initial setup
dolt checkout -b firstbranch
dolt sql <<SQL
CREATE TABLE test (
Expand All @@ -605,40 +608,56 @@ CREATE TABLE test (
PRIMARY KEY (pk1,pk2)
);
SQL
dolt table import -u test `batshelper 2pk5col-ints.csv`
dolt table import -u test $(batshelper 2pk5col-ints.csv)
dolt add .
dolt commit -m "create/init table test"

# for each query file in helper/queries/2pk5col-ints/
# run query on db, create sql diff patch, confirm they're equivalent
dolt branch newbranch
for query in delete add update single_pk_update all_pk_update create_table
do
dolt checkout newbranch
dolt sql < $BATS_TEST_DIRNAME/helper/queries/2pk5col-ints/$query.sql
dolt add .
dolt diff -r sql
dolt commit -m "applied $query query "

# confirm a difference exists
# Apply the query
dolt checkout newbranch
dolt sql < "$BATS_TEST_DIRNAME/helper/queries/2pk5col-ints/${query_name}.sql"
dolt add .
dolt diff -r sql
dolt commit -m "applied ${query_name} query"

run dolt diff -r sql firstbranch newbranch
[ "$status" -eq 0 ]
[ ! "$output" = "" ]
# Confirm a difference exists
run dolt diff -r sql firstbranch newbranch
[ "$status" -eq 0 ]
[ ! "$output" = "" ]

dolt diff -r sql firstbranch > patch.sql newbranch
dolt checkout firstbranch
dolt sql < patch.sql
rm patch.sql
dolt add .
dolt commit -m "Reconciled with newbranch"
# Generate patch, apply on firstbranch, and verify no differences
dolt diff -r sql firstbranch > patch.sql newbranch
dolt checkout firstbranch
dolt sql < patch.sql
rm patch.sql
dolt add .
dolt commit -m "Reconciled with newbranch"

# confirm that both branches have the same content
run dolt diff -r sql firstbranch newbranch
[ "$status" -eq 0 ]
[ "$output" = "" ]
done
# Confirm branches are identical
run dolt diff -r sql firstbranch newbranch
[ "$status" -eq 0 ]
[ "$output" = "" ]
}

@test "sql-diff: supports multiple primary keys (delete)" {
run_2pk5col_ints "delete"
}
@test "sql-diff: supports multiple primary keys (add)" {
run_2pk5col_ints "add"
}
@test "sql-diff: supports multiple primary keys (update)" {
run_2pk5col_ints "update"
}
@test "sql-diff: supports multiple primary keys (single_pk_update)" {
run_2pk5col_ints "single_pk_update"
}
@test "sql-diff: supports multiple primary keys (all_pk_update)" {
run_2pk5col_ints "all_pk_update"
}
@test "sql-diff: supports multiple primary keys (create_table)" {
run_2pk5col_ints "create_table"
}


@test "sql-diff: escapes values for MySQL string literals" {
# https://dev.mysql.com/doc/refman/8.0/en/string-literals.html
Expand Down
Loading