Skip to content

Commit

Permalink
props: add methods to EquivGroups for use in FuncDepSet
Browse files Browse the repository at this point in the history
This commit adds several new methods along with unit tests to `EquivGroups`
to prepare its use in tracking equivalencies in `FuncDepSet`.

Informs cockroachdb#83963

Release note: None
  • Loading branch information
DrewKimball committed Dec 16, 2024
1 parent ce8d5f8 commit 890c31c
Show file tree
Hide file tree
Showing 4 changed files with 661 additions and 76 deletions.
2 changes: 1 addition & 1 deletion pkg/sql/opt/norm/join_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ func (c *CustomFuncs) GetEquivColsWithEquivTypeWithEquivGroups(
}

// Compute all equivalent columns.
eqCols := equivSet.Group(col)
eqCols := equivSet.GroupForCol(col)

eqCols.ForEach(func(i opt.ColumnID) {
// Only include columns that have the same type as col.
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/opt/props/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ go_test(
"//pkg/sql/sem/tree",
"//pkg/sql/sem/volatility",
"//pkg/sql/types",
"//pkg/util/buildutil",
"//pkg/util/randutil",
"@com_github_cockroachdb_errors//:errors",
"@com_github_stretchr_testify//require",
Expand Down
290 changes: 254 additions & 36 deletions pkg/sql/opt/props/equiv_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,26 +30,149 @@ func (eq *EquivGroups) Reset() {
eq.groups = eq.groups[:0]
}

// Add adds the given equivalent columns to the EquivGroups. If possible, the
// columns are added to an existing group. Otherwise, a new one is created.
func (eq *EquivGroups) Add(equivCols opt.ColSet) {
// Empty returns true if the set stores no equalities.
func (eq *EquivGroups) Empty() bool {
if buildutil.CrdbTestBuild {
defer eq.verify()
}
return len(eq.groups) == 0
}

// GroupCount returns the number of equiv groups stored in the set.
func (eq *EquivGroups) GroupCount() int {
if buildutil.CrdbTestBuild {
defer eq.verify()
}
return len(eq.groups)
}

// Group returns the equiv group at the given index. The returned ColSet should
// be considered immutable. The index must be less than GroupCount().
func (eq *EquivGroups) Group(idx int) opt.ColSet {
if buildutil.CrdbTestBuild {
defer eq.verify()
}
if idx >= len(eq.groups) {
panic(errors.AssertionFailedf("invalid equiv group index %d", idx))
}
return eq.groups[idx]
}

// GroupForCol returns the group of columns equivalent to the given column. It
// returns the empty set if no such group exists. The returned should not be
// mutated without being copied first.
func (eq *EquivGroups) GroupForCol(col opt.ColumnID) opt.ColSet {
if buildutil.CrdbTestBuild {
defer eq.verify()
}
for i := range eq.groups {
if eq.groups[i].Contains(col) {
return eq.groups[i]
}
}
return opt.ColSet{}
}

// ContainsCol returns true if the given column is contained in any of the equiv
// groups (it will be in at most one group).
func (eq *EquivGroups) ContainsCol(col opt.ColumnID) bool {
if buildutil.CrdbTestBuild {
defer eq.verify()
}
for i := range eq.groups {
if eq.groups[i].Contains(col) {
return true
}
}
return false
}

// AreColsEquiv indicates whether the given columns are equivalent.
func (eq *EquivGroups) AreColsEquiv(left, right opt.ColumnID) bool {
if buildutil.CrdbTestBuild {
defer eq.verify()
}
if left == right {
return true
}
for i := range eq.groups {
if eq.groups[i].Contains(left) {
return eq.groups[i].Contains(right)
}
if eq.groups[i].Contains(right) {
return eq.groups[i].Contains(left)
}
}
return false
}

// AreAllColsEquiv returns true if all columns in the given set are equivalent
// to all others in the set.
func (eq *EquivGroups) AreAllColsEquiv(cols opt.ColSet) bool {
if buildutil.CrdbTestBuild {
defer eq.verify()
}
if cols.Len() <= 1 {
return true
}
for i := range eq.groups {
if eq.groups[i].Intersects(cols) {
return cols.SubsetOf(eq.groups[i])
}
}
return false
}

// ComputeEquivClosureNoCopy returns the equivalence closure of the given
// columns. Note that the given ColSet is mutated and returned directly.
func (eq *EquivGroups) ComputeEquivClosureNoCopy(cols opt.ColSet) opt.ColSet {
if buildutil.CrdbTestBuild {
defer eq.verify()
}
for i := range eq.groups {
if eq.groups[i].Intersects(cols) {
cols.UnionWith(eq.groups[i])
if cols.Len() == eq.groups[i].Len() {
// Since we just took the union, equal lengths means all columns in cols
// were within the same equivalence group, so we can short-circuit.
break
}
}
}
return cols
}

// AddNoCopy adds the given equivalent columns to the EquivGroups. If possible,
// the columns are added to an existing group. Otherwise, a new one is created.
// NOTE: the given ColSet may be added to the EquivGroups without being copied,
// so it must be considered immutable after it is passed to addNoCopy.
func (eq *EquivGroups) AddNoCopy(equivCols opt.ColSet) {
if buildutil.CrdbTestBuild {
defer eq.verify()
}
if equivCols.Len() <= 1 {
// This is a trivial equivalence.
return
}
// Attempt to add the equivalence to an existing group.
for i := range eq.groups {
if eq.groups[i].Intersects(equivCols) {
if equivCols.SubsetOf(eq.groups[i]) {
// No-op
// The equivalence is already contained in the set.
return
}
eq.groups[i].UnionWith(equivCols)
if eq.groups[i].SubsetOf(equivCols) {
// Avoid the copy.
eq.groups[i] = equivCols
} else {
eq.groups[i] = eq.groups[i].Union(equivCols)
}
eq.tryMergeGroups(i)
return
}
}
// Make a new equivalence group.
eq.groups = append(eq.groups, equivCols.Copy())
eq.groups = append(eq.groups, equivCols)
}

// AddFromFDs adds all equivalence relations from the given FuncDepSet to the
Expand All @@ -61,49 +184,155 @@ func (eq *EquivGroups) AddFromFDs(fdset *FuncDepSet) {
for i := range fdset.deps {
fd := &fdset.deps[i]
if fd.equiv {
eq.Add(fd.from.Union(fd.to))
eq.AddNoCopy(fd.from.Union(fd.to))
}
}
}

// AreColsEquiv indicates whether the given columns are equivalent.
func (eq *EquivGroups) AreColsEquiv(left, right opt.ColumnID) bool {
// TranslateColsStrict remaps the column IDs of each equiv group according to
// the given "from" and "to" lists. It requires that all columns in each group
// are present in the "from" list, and that the "from" and "to" lists are the
// same length.
func (eq *EquivGroups) TranslateColsStrict(fromCols, toCols opt.ColList) {
if buildutil.CrdbTestBuild {
defer eq.verify()
}
// It is possible that the same column shows up more than once in either of
// the lists. In other words, a column can map to more than one column, and
// two different columns can map to the same column. The former is handled
// by TranslateColSetStrict, and may add a column to an equiv group. The
// latter can merge two equiv groups, so we need to handle it here.
var seenCols, dupCols opt.ColSet
for _, toCol := range toCols {
if seenCols.Contains(toCol) && !dupCols.Contains(toCol) {
var equiv opt.ColSet
for i, fromCol := range fromCols {
if toCols[i] == toCol {
equiv.Add(fromCol)
}
}
eq.AddNoCopy(equiv)
dupCols.Add(toCol)
}
seenCols.Add(toCol)
}
for i := range eq.groups {
if eq.groups[i].Contains(left) {
return eq.groups[i].Contains(right)
eq.groups[i] = opt.TranslateColSetStrict(eq.groups[i], fromCols, toCols)
}
// Handle the case when multiple "in" columns map to the same "out" column,
// which could result in removal of an equiv group.
eq.removeTrivialGroups()
}

// ProjectCols removes all columns from the EquivGroups that are not in the
// given ColSet, removing equiv groups that become empty.
func (eq *EquivGroups) ProjectCols(cols opt.ColSet) {
if buildutil.CrdbTestBuild {
defer eq.verify()
}
for i := range eq.groups {
if !eq.groups[i].SubsetOf(cols) {
eq.groups[i] = eq.groups[i].Intersection(cols)
}
if eq.groups[i].Contains(right) {
return eq.groups[i].Contains(left)
}
eq.removeTrivialGroups()
}

// PartitionBy divides the equiv groups according to the given columns. If an
// equiv group intersects the given ColSet but is not a subset, it is split into
// the intersection and difference with the given ColSet. Ex:
//
// eq := [(1-3), (4-8), (9-12)]
// eq.PartitionBy(1,5,6)
// eq == [(2,3), (4,7,8), (5,6), (9-12)]
//
// * In the example, the (1-3) group is split into (1) and (2,3). Since the (1)
// group only has a single column, it is discarded as a trivial equivalence.
// * The (4-8) group is split into (4,8) and (5,6). Since both subsets have at
// least two columns, both are kept in the EquivGroups.
// * Finally, the (9-12) group does not intersect the given cols, and so is not
// split.
func (eq *EquivGroups) PartitionBy(cols opt.ColSet) {
if buildutil.CrdbTestBuild {
defer eq.verify()
}
for i := len(eq.groups) - 1; i >= 0; i-- {
if eq.groups[i].Intersects(cols) && !eq.groups[i].SubsetOf(cols) {
left, right := eq.groups[i].Intersection(cols), eq.groups[i].Difference(cols)
eq.groups[i] = opt.ColSet{}
if left.Len() > 1 {
eq.groups = append(eq.groups, left)
}
if right.Len() > 1 {
eq.groups = append(eq.groups, right)
}
}
}
return false
eq.removeTrivialGroups()
}

// Group returns the group of columns equivalent to the given column. It
// returns the empty set if no such group exists. The returned should not be
// mutated without being copied first.
func (eq *EquivGroups) Group(col opt.ColumnID) opt.ColSet {
// CopyFrom copies the given EquivGroups into this EquivGroups, replacing any
// existing data.
func (eq *EquivGroups) CopyFrom(other *EquivGroups) {
if buildutil.CrdbTestBuild {
defer eq.verify()
}
eq.Reset()
eq.AppendFromDisjoint(other)
}

// AppendFromDisjoint unions the equiv groups from the given EquivGroups with
// this one, assuming the groups are disjoint.
func (eq *EquivGroups) AppendFromDisjoint(other *EquivGroups) {
if buildutil.CrdbTestBuild {
other.verify()
defer eq.verify()
}
neededCap := len(eq.groups) + len(other.groups)
if cap(eq.groups) < neededCap {
// Make sure to copy the old equiv groups into the new slice.
newGroups := make([]opt.ColSet, len(eq.groups), neededCap)
copy(newGroups, eq.groups)
eq.groups = newGroups
}
// There is no need to deep-copy the equiv groups, since they are never
// modified in-place.
eq.groups = append(eq.groups, other.groups...)
}

func (eq *EquivGroups) String() string {
if buildutil.CrdbTestBuild {
defer eq.verify()
}
ret := "["
for i := range eq.groups {
if eq.groups[i].Contains(col) {
return eq.groups[i]
if i > 0 {
ret += ", "
}
ret += eq.groups[i].String()
}
return opt.ColSet{}
return ret + "]"
}

// tryMergeGroups attempts to merge the equality group at the given index with
// any of the *following* groups. If a group can be merged, it is removed after
// its columns are added to the given group.
func (eq *EquivGroups) tryMergeGroups(idx int) {
if buildutil.CrdbTestBuild {
defer eq.verify()
}
for i := len(eq.groups) - 1; i > idx; i-- {
if eq.groups[idx].Intersects(eq.groups[i]) {
eq.groups[idx].UnionWith(eq.groups[i])
eq.groups[idx] = eq.groups[idx].Union(eq.groups[i])
eq.groups[i] = eq.groups[len(eq.groups)-1]
eq.groups[len(eq.groups)-1] = opt.ColSet{}
eq.groups = eq.groups[:len(eq.groups)-1]
}
}
}

// removeTrivialGroups removes groups with zero or one columns, which may be
// added by methods like makePartition.
func (eq *EquivGroups) removeTrivialGroups() {
for i := len(eq.groups) - 1; i >= 0; i-- {
if eq.groups[i].Len() <= 1 {
eq.groups[i] = eq.groups[len(eq.groups)-1]
eq.groups[len(eq.groups)-1] = opt.ColSet{}
eq.groups = eq.groups[:len(eq.groups)-1]
Expand All @@ -123,14 +352,3 @@ func (eq *EquivGroups) verify() {
seen.UnionWith(group)
}
}

func (eq *EquivGroups) String() string {
ret := "["
for i := range eq.groups {
if i > 0 {
ret += ", "
}
ret += eq.groups[i].String()
}
return ret + "]"
}
Loading

0 comments on commit 890c31c

Please sign in to comment.