From 5625e79d44f30b02df7014ff0f500bf86e1a7905 Mon Sep 17 00:00:00 2001
From: Thorarinn Sigurdsson <thorarinnsigurdsson@gmail.com>
Date: Fri, 22 Oct 2021 10:12:46 -0700
Subject: [PATCH] fix(core): fix task batch partitioning algorithm

The old batch partitioning algorithm wasn't correctly merging partition
classes when transitive relationships with more than one "hop" were
present.

This could lead to tasks being processed twice when using the `--force`
flag for certain commands.
---
 core/src/task-graph.ts          |  8 +++---
 core/src/util/util.ts           | 48 ++++++++++++++++++++++-----------
 core/test/unit/src/util/util.ts |  7 ++---
 3 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/core/src/task-graph.ts b/core/src/task-graph.ts
index b033c4e4b1..ece46de2ed 100644
--- a/core/src/task-graph.ts
+++ b/core/src/task-graph.ts
@@ -15,7 +15,7 @@ import { LogEntry, LogEntryMetadata, TaskLogStatus } from "./logger/log-entry"
 import { toGardenError, GardenBaseError } from "./exceptions"
 import { Garden } from "./garden"
 import { dedent } from "./util/string"
-import { defer, relationshipClasses, uuidv4, safeDumpYaml } from "./util/util"
+import { defer, relationshipClasses, uuidv4, safeDumpYaml, isDisjoint } from "./util/util"
 import { renderError } from "./logger/renderers"
 import { cyclesToString } from "./util/validate-dependencies"
 import { Profile } from "./util/profiling"
@@ -184,16 +184,16 @@ export class TaskGraph extends EventEmitter2 {
     })
 
     const nodesWithKeys = deduplicatedNodes.map((node) => {
-      return { node, resultKeys: this.keysWithDependencies(node) }
+      return { node, resultKeys: new Set(this.keysWithDependencies(node)) }
     })
 
     const sharesDeps = (node1withKeys, node2withKeys) => {
-      return intersection(node1withKeys.resultKeys, node2withKeys.resultKeys).length > 0
+      return !isDisjoint(node1withKeys.resultKeys, node2withKeys.resultKeys)
     }
 
     return relationshipClasses(nodesWithKeys, sharesDeps).map((cls) => {
       const nodesForBatch = cls.map((n) => n.node)
-      const resultKeys: string[] = union(...cls.map((ts) => ts.resultKeys))
+      const resultKeys: string[] = union(...cls.map((ts) => [...ts.resultKeys]))
       return new TaskNodeBatch(nodesForBatch, resultKeys)
     })
   }
diff --git a/core/src/util/util.ts b/core/src/util/util.ts
index ef1f8bf4e6..bde11bcd19 100644
--- a/core/src/util/util.ts
+++ b/core/src/util/util.ts
@@ -6,7 +6,7 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
  */
 
-import { trimEnd, omit, groupBy } from "lodash"
+import { isArray, isPlainObject, extend, mapValues, pickBy, range, trimEnd, omit, groupBy, some } from "lodash"
 import split2 = require("split2")
 import Bluebird = require("bluebird")
 import { ResolvableProps } from "bluebird"
@@ -16,7 +16,6 @@ import _spawn from "cross-spawn"
 import { readFile, writeFile } from "fs-extra"
 import { find, pick, difference, fromPairs, uniqBy } from "lodash"
 import { TimeoutError, ParameterError, RuntimeError, GardenError } from "../exceptions"
-import { isArray, isPlainObject, extend, mapValues, pickBy, range, some } from "lodash"
 import highlight from "cli-highlight"
 import chalk from "chalk"
 import { safeDump, safeLoad, DumpOptions } from "js-yaml"
@@ -522,6 +521,10 @@ export function uniqByName<T extends ObjectWithName>(array: T[]): T[] {
   return uniqBy(array, (item) => item.name)
 }
 
+export function isDisjoint<T>(set1: Set<T>, set2: Set<T>): boolean {
+  return !some([...set1], (element) => set2.has(element))
+}
+
 /**
  * Returns an array of arrays, where the elements of a given array are the elements of items for which
  * isRelated returns true for one or more elements of its class.
@@ -529,21 +532,36 @@ export function uniqByName<T extends ObjectWithName>(array: T[]): T[] {
  * I.e. an element is related to at least one element of its class, transitively.
  */
 export function relationshipClasses<I>(items: I[], isRelated: (item1: I, item2: I) => boolean): I[][] {
-  const classes: I[][] = []
-  for (const item of items) {
-    let found = false
-    for (const classIndex of range(0, classes.length)) {
-      const cls = classes[classIndex]
-      if (cls && cls.length && some(cls, (classItem) => isRelated(classItem, item))) {
-        found = true
-        cls.push(item)
+  // We start with each item in its own class.
+  //
+  // We then keep looking for relationships/connections between classes and merging them when one is found,
+  // until no two classes have elements that are related to each other.
+  const classes: I[][] = items.map((i) => [i])
+
+  let didMerge = false
+  do {
+    didMerge = false
+    for (const classIndex1 of range(0, classes.length)) {
+      for (const classIndex2 of range(0, classes.length)) {
+        if (classIndex1 === classIndex2) {
+          continue
+        }
+        const c1 = classes[classIndex1]
+        const c2 = classes[classIndex2]
+        if (some(c1, (i1) => some(c2, (i2) => isRelated(i1, i2)))) {
+          // Then we merge c1 and c2.
+          didMerge = true
+          classes.splice(classIndex2, 1)
+          classes[classIndex1] = [...c1, ...c2]
+          break
+        }
+      }
+      if (didMerge) {
+        break
       }
     }
-
-    if (!found) {
-      classes.push([item])
-    }
-  }
+  } while (didMerge)
+  // Once this point is reached, no two classes are related to each other, so we can return them.
 
   return classes
 }
diff --git a/core/test/unit/src/util/util.ts b/core/test/unit/src/util/util.ts
index 7bf9ed9a3d..cd1fc9c779 100644
--- a/core/test/unit/src/util/util.ts
+++ b/core/test/unit/src/util/util.ts
@@ -332,11 +332,12 @@ describe("util", () => {
 
   describe("relationshipClasses", () => {
     it("should correctly partition related items", () => {
-      const items = ["ab", "b", "c", "a", "cd"]
+      const items = ["a", "b", "c", "d", "e", "f", "g", "ab", "bc", "cd", "de", "fg"]
       const isRelated = (s1: string, s2: string) => includes(s1, s2) || includes(s2, s1)
+      // There's no "ef" element, so ["f", "fg", "g"] should be disjoint from the rest.
       expect(relationshipClasses(items, isRelated)).to.eql([
-        ["ab", "b", "a"],
-        ["c", "cd"],
+        ["a", "ab", "b", "bc", "c", "cd", "d", "de", "e"],
+        ["f", "fg", "g"],
       ])
     })