diff --git a/go/libraries/doltcore/sqle/dtables/commit_diff_table.go b/go/libraries/doltcore/sqle/dtables/commit_diff_table.go index 4eeee2a812..dcc50a12d5 100644 --- a/go/libraries/doltcore/sqle/dtables/commit_diff_table.go +++ b/go/libraries/doltcore/sqle/dtables/commit_diff_table.go @@ -216,7 +216,7 @@ func (dt *CommitDiffTable) LookupPartitions(ctx *sql.Context, i sql.IndexLookup) fromSch: dt.targetSchema, } - isDiffable, err := dp.isDiffablePartition(ctx) + isDiffable, _, err := dp.isDiffablePartition(ctx) if err != nil { return nil, err } diff --git a/go/libraries/doltcore/sqle/dtables/diff_table.go b/go/libraries/doltcore/sqle/dtables/diff_table.go index c69b8f6cd3..a70f24a5ac 100644 --- a/go/libraries/doltcore/sqle/dtables/diff_table.go +++ b/go/libraries/doltcore/sqle/dtables/diff_table.go @@ -684,31 +684,42 @@ func (dp DiffPartition) GetRowIter(ctx *sql.Context, ddb *doltdb.DoltDB, joiner // isDiffablePartition checks if the commit pair for this partition is "diffable". // If the primary key sets changed between the two commits, it may not be -// possible to diff them. -func (dp *DiffPartition) isDiffablePartition(ctx *sql.Context) (bool, error) { +// possible to diff them. We return two bools: simpleDiff is returned if the primary key sets are close enough that we +// can confidently merge the diff (using schema.ArePrimaryKeySetsDiffable). fuzzyDiff is returned if the primary key +// sets are not close enough to merge the diff, but we can still make an approximate comparison (using schema.MapSchemaBasedOnTagAndName). +func (dp *DiffPartition) isDiffablePartition(ctx *sql.Context) (simpleDiff bool, fuzzyDiff bool, err error) { // dp.to is nil when a table has been deleted previously. In this case, we return // false, to stop processing diffs, since that previously deleted table is considered // a logically different table and we don't want to mix the diffs together. if dp.to == nil { - return false, nil + return false, false, nil } // dp.from is nil when the to commit created a new table if dp.from == nil { - return true, nil + return true, false, nil } fromSch, err := dp.from.GetSchema(ctx) if err != nil { - return false, err + return false, false, err } toSch, err := dp.to.GetSchema(ctx) if err != nil { - return false, err + return false, false, err + } + + easyDiff := schema.ArePrimaryKeySetsDiffable(dp.from.Format(), fromSch, toSch) + if easyDiff { + return true, false, nil } - return schema.ArePrimaryKeySetsDiffable(dp.from.Format(), fromSch, toSch), nil + _, _, err = schema.MapSchemaBasedOnTagAndName(fromSch, toSch) + if err == nil { + return false, true, nil + } + return false, false, nil } type partitionSelectFunc func(*sql.Context, DiffPartition) (bool, error) @@ -762,6 +773,7 @@ type DiffPartitions struct { selectFunc partitionSelectFunc toSch schema.Schema fromSch schema.Schema + stopNext bool } // processCommit is called in a commit iteration loop. Adds partitions when it finds a commit and its parent that have @@ -821,6 +833,10 @@ func (dps *DiffPartitions) processCommit(ctx *sql.Context, cmHash hash.Hash, cm } func (dps *DiffPartitions) Next(ctx *sql.Context) (sql.Partition, error) { + if dps.stopNext { + return nil, io.EOF + } + for { cmHash, optCmt, err := dps.cmItr.Next(ctx) if err != nil { @@ -852,16 +868,21 @@ func (dps *DiffPartitions) Next(ctx *sql.Context) (sql.Partition, error) { if next != nil { // If we can't diff this commit with its parent, don't traverse any lower - canDiff, err := next.isDiffablePartition(ctx) + simpleDiff, fuzzyDiff, err := next.isDiffablePartition(ctx) if err != nil { return nil, err } - if !canDiff { + if !simpleDiff && !fuzzyDiff { ctx.Warn(PrimaryKeyChangeWarningCode, fmt.Sprintf(PrimaryKeyChangeWarning, next.fromName, next.toName)) return nil, io.EOF } + if !simpleDiff && fuzzyDiff { + ctx.Warn(PrimaryKeyChangeWarningCode, fmt.Sprintf(PrimaryKeyChangeWarning, next.fromName, next.toName)) + dps.stopNext = true + } + return *next, nil } } diff --git a/go/libraries/doltcore/sqle/dtables/prolly_row_conv.go b/go/libraries/doltcore/sqle/dtables/prolly_row_conv.go index f842494ef0..687fcc21b2 100644 --- a/go/libraries/doltcore/sqle/dtables/prolly_row_conv.go +++ b/go/libraries/doltcore/sqle/dtables/prolly_row_conv.go @@ -115,10 +115,14 @@ func (c ProllyRowConverter) putFields(ctx context.Context, tup val.Tuple, proj v virtualOffset := 0 for i, j := range proj { if j == -1 { - // Skip over virtual columns in non-pk cols as they are not stored - if !isPk && c.inSchema.GetNonPKCols().GetByIndex(i).Virtual { - virtualOffset++ + nonPkCols := c.inSchema.GetNonPKCols() + if len(nonPkCols.GetColumns()) > i { + // Skip over virtual columns in non-pk cols as they are not stored + if !isPk && nonPkCols.GetByIndex(i).Virtual { + virtualOffset++ + } } + continue } diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_queries_diff.go b/go/libraries/doltcore/sqle/enginetest/dolt_queries_diff.go index 9824ae67d9..0f432ff542 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_queries_diff.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_queries_diff.go @@ -528,7 +528,7 @@ var DiffSystemTableScriptTests = []queries.ScriptTest{ }, { Query: "SELECT COUNT(*) FROM DOLT_DIFF_t;", - Expected: []sql.Row{{1}}, + Expected: []sql.Row{{7}}, }, { Query: "SELECT to_pk, to_c1, from_pk, from_c1, diff_type FROM DOLT_DIFF_t where to_commit=@Commit4;", @@ -536,6 +536,36 @@ var DiffSystemTableScriptTests = []queries.ScriptTest{ }, }, }, + { + // Similar to previous test, but with one row to avoid ordering issues. + Name: "altered keyless table add pk", // https://github.com/dolthub/dolt/issues/8625 + SetUpScript: []string{ + "create table tbl (i int, j int);", + "insert into tbl values (42, 23);", + "call dolt_commit('-Am', 'commit1');", + "alter table tbl add primary key(i);", + "call dolt_commit('-am', 'commit2');", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "SELECT to_i,to_j,from_i,from_j,diff_type FROM dolt_diff_tbl;", + // Output in the situation is admittedly wonky. Updating the PK leaves in a place where we can't really render + // the diff, but we want to show something. In this case, the 'pk' column tag changes, so in the last two rows + // of the output you see we add "nil,23" and remove "nil,23" when in fact those columns were "42" with a different + // tag. + // + // In the past we just returned an empty set in this case. The + // warning is kind of essential to understand what is happening. + Expected: []sql.Row{ + {42, 23, nil, nil, "added"}, + {nil, nil, nil, 23, "removed"}, + }, + ExpectedWarningsCount: 1, + ExpectedWarning: 1105, + ExpectedWarningMessageSubstring: "due to primary key set change", + }, + }, + }, { Name: "table with commit column should maintain its data in diff", SetUpScript: []string{ @@ -713,8 +743,10 @@ var Dolt1DiffSystemTableScripts = []queries.ScriptTest{ }, Assertions: []queries.ScriptTestAssertion{ { - Query: "SELECT to_pk1, to_pk2, from_pk1, from_pk2, diff_type from dolt_diff_t;", - Expected: []sql.Row{{"2", "2", nil, nil, "added"}}, + Query: "SELECT to_pk1, to_pk2, from_pk1, from_pk2, diff_type from dolt_diff_t;", + Expected: []sql.Row{ + {"2", "2", nil, nil, "added"}, + }, }, }, }, @@ -5298,6 +5330,7 @@ var CommitDiffSystemTableScriptTests = []queries.ScriptTest{ }, }, }, + { Name: "added and dropped table", SetUpScript: []string{ diff --git a/integration-tests/bats/sql-diff.bats b/integration-tests/bats/sql-diff.bats index 92b7842aa3..6ab7356384 100644 --- a/integration-tests/bats/sql-diff.bats +++ b/integration-tests/bats/sql-diff.bats @@ -591,7 +591,10 @@ SQL done } -@test "sql-diff: supports multiple primary keys" { +run_2pk5col_ints() { + local query_name=$1 + + # Initial setup dolt checkout -b firstbranch dolt sql < patch.sql newbranch - dolt checkout firstbranch - dolt sql < patch.sql - rm patch.sql - dolt add . - dolt commit -m "Reconciled with newbranch" + # Generate patch, apply on firstbranch, and verify no differences + dolt diff -r sql firstbranch > patch.sql newbranch + dolt checkout firstbranch + dolt sql < patch.sql + rm patch.sql + dolt add . + dolt commit -m "Reconciled with newbranch" - # confirm that both branches have the same content - run dolt diff -r sql firstbranch newbranch - [ "$status" -eq 0 ] - [ "$output" = "" ] - done + # Confirm branches are identical + run dolt diff -r sql firstbranch newbranch + [ "$status" -eq 0 ] + [ "$output" = "" ] +} + +@test "sql-diff: supports multiple primary keys (delete)" { + run_2pk5col_ints "delete" +} +@test "sql-diff: supports multiple primary keys (add)" { + run_2pk5col_ints "add" } +@test "sql-diff: supports multiple primary keys (update)" { + run_2pk5col_ints "update" +} +@test "sql-diff: supports multiple primary keys (single_pk_update)" { + run_2pk5col_ints "single_pk_update" +} +@test "sql-diff: supports multiple primary keys (all_pk_update)" { + run_2pk5col_ints "all_pk_update" +} +@test "sql-diff: supports multiple primary keys (create_table)" { + run_2pk5col_ints "create_table" +} + @test "sql-diff: escapes values for MySQL string literals" { # https://dev.mysql.com/doc/refman/8.0/en/string-literals.html