Skip to content

Commit

Permalink
fix(core): fix task batch partitioning algorithm
Browse files Browse the repository at this point in the history
The old batch partitioning algorithm wasn't correctly merging partition
classes when transitive relationships with more than one "hop" were
present.

This could lead to tasks being processed twice when using the `--force`
flag for certain commands.
  • Loading branch information
thsig authored and edvald committed Oct 22, 2021
1 parent bbe7527 commit 5625e79
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 22 deletions.
8 changes: 4 additions & 4 deletions core/src/task-graph.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import { LogEntry, LogEntryMetadata, TaskLogStatus } from "./logger/log-entry"
import { toGardenError, GardenBaseError } from "./exceptions"
import { Garden } from "./garden"
import { dedent } from "./util/string"
import { defer, relationshipClasses, uuidv4, safeDumpYaml } from "./util/util"
import { defer, relationshipClasses, uuidv4, safeDumpYaml, isDisjoint } from "./util/util"
import { renderError } from "./logger/renderers"
import { cyclesToString } from "./util/validate-dependencies"
import { Profile } from "./util/profiling"
Expand Down Expand Up @@ -184,16 +184,16 @@ export class TaskGraph extends EventEmitter2 {
})

const nodesWithKeys = deduplicatedNodes.map((node) => {
return { node, resultKeys: this.keysWithDependencies(node) }
return { node, resultKeys: new Set(this.keysWithDependencies(node)) }
})

const sharesDeps = (node1withKeys, node2withKeys) => {
return intersection(node1withKeys.resultKeys, node2withKeys.resultKeys).length > 0
return !isDisjoint(node1withKeys.resultKeys, node2withKeys.resultKeys)
}

return relationshipClasses(nodesWithKeys, sharesDeps).map((cls) => {
const nodesForBatch = cls.map((n) => n.node)
const resultKeys: string[] = union(...cls.map((ts) => ts.resultKeys))
const resultKeys: string[] = union(...cls.map((ts) => [...ts.resultKeys]))
return new TaskNodeBatch(nodesForBatch, resultKeys)
})
}
Expand Down
48 changes: 33 additions & 15 deletions core/src/util/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/

import { trimEnd, omit, groupBy } from "lodash"
import { isArray, isPlainObject, extend, mapValues, pickBy, range, trimEnd, omit, groupBy, some } from "lodash"
import split2 = require("split2")
import Bluebird = require("bluebird")
import { ResolvableProps } from "bluebird"
Expand All @@ -16,7 +16,6 @@ import _spawn from "cross-spawn"
import { readFile, writeFile } from "fs-extra"
import { find, pick, difference, fromPairs, uniqBy } from "lodash"
import { TimeoutError, ParameterError, RuntimeError, GardenError } from "../exceptions"
import { isArray, isPlainObject, extend, mapValues, pickBy, range, some } from "lodash"
import highlight from "cli-highlight"
import chalk from "chalk"
import { safeDump, safeLoad, DumpOptions } from "js-yaml"
Expand Down Expand Up @@ -522,28 +521,47 @@ export function uniqByName<T extends ObjectWithName>(array: T[]): T[] {
return uniqBy(array, (item) => item.name)
}

export function isDisjoint<T>(set1: Set<T>, set2: Set<T>): boolean {
return !some([...set1], (element) => set2.has(element))
}

/**
* Returns an array of arrays, where the elements of a given array are the elements of items for which
* isRelated returns true for one or more elements of its class.
*
* I.e. an element is related to at least one element of its class, transitively.
*/
export function relationshipClasses<I>(items: I[], isRelated: (item1: I, item2: I) => boolean): I[][] {
const classes: I[][] = []
for (const item of items) {
let found = false
for (const classIndex of range(0, classes.length)) {
const cls = classes[classIndex]
if (cls && cls.length && some(cls, (classItem) => isRelated(classItem, item))) {
found = true
cls.push(item)
// We start with each item in its own class.
//
// We then keep looking for relationships/connections between classes and merging them when one is found,
// until no two classes have elements that are related to each other.
const classes: I[][] = items.map((i) => [i])

let didMerge = false
do {
didMerge = false
for (const classIndex1 of range(0, classes.length)) {
for (const classIndex2 of range(0, classes.length)) {
if (classIndex1 === classIndex2) {
continue
}
const c1 = classes[classIndex1]
const c2 = classes[classIndex2]
if (some(c1, (i1) => some(c2, (i2) => isRelated(i1, i2)))) {
// Then we merge c1 and c2.
didMerge = true
classes.splice(classIndex2, 1)
classes[classIndex1] = [...c1, ...c2]
break
}
}
if (didMerge) {
break
}
}

if (!found) {
classes.push([item])
}
}
} while (didMerge)
// Once this point is reached, no two classes are related to each other, so we can return them.

return classes
}
Expand Down
7 changes: 4 additions & 3 deletions core/test/unit/src/util/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -332,11 +332,12 @@ describe("util", () => {

describe("relationshipClasses", () => {
it("should correctly partition related items", () => {
const items = ["ab", "b", "c", "a", "cd"]
const items = ["a", "b", "c", "d", "e", "f", "g", "ab", "bc", "cd", "de", "fg"]
const isRelated = (s1: string, s2: string) => includes(s1, s2) || includes(s2, s1)
// There's no "ef" element, so ["f", "fg", "g"] should be disjoint from the rest.
expect(relationshipClasses(items, isRelated)).to.eql([
["ab", "b", "a"],
["c", "cd"],
["a", "ab", "b", "bc", "c", "cd", "d", "de", "e"],
["f", "fg", "g"],
])
})

Expand Down

0 comments on commit 5625e79

Please sign in to comment.