-
Notifications
You must be signed in to change notification settings - Fork 4.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement a generic topological sort algorithm #25169
Changes from all commits
966624e
30f178f
74c7800
0e16b5b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,217 @@ | ||
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Collections.Immutable; | ||
using System.Linq; | ||
using Microsoft.CodeAnalysis; | ||
using Roslyn.Test.Utilities; | ||
using Xunit; | ||
|
||
namespace Microsoft.CodeAnalysis.UnitTests.Collections | ||
{ | ||
public class TopologicalSortTests | ||
{ | ||
[Fact] | ||
public void Test01() | ||
{ | ||
int[][] successors = new int[][] | ||
{ | ||
/* 0 */ new int[] { }, // 0 has no successors | ||
/* 1 */ new int[] { }, | ||
/* 2 */ new int[] { 3 }, | ||
/* 3 */ new int[] { 1 }, | ||
/* 4 */ new int[] { 0, 1, 0, 1 }, // tolerate duplicate edges | ||
/* 5 */ new int[] { 0, 2 }, | ||
}; | ||
|
||
Func<int, IEnumerable<int>> succF = x => successors[x]; | ||
var sorted = TopologicalSort.IterativeSort<int>(new[] { 4, 5 }, succF); | ||
AssertTopologicallySorted(sorted, succF, "Test01"); | ||
Assert.Equal(6, sorted.Length); | ||
AssertEx.Equal(new[] { 4, 5, 2, 3, 1, 0 }, sorted); | ||
} | ||
|
||
[Fact] | ||
public void Test01b() | ||
{ | ||
string[][] successors = new string[][] | ||
{ | ||
/* 0 */ new string[] { }, // 0 has no successors | ||
/* 1 */ new string[] { }, | ||
/* 2 */ new string[] { "3" }, | ||
/* 3 */ new string[] { "1" }, | ||
/* 4 */ new string[] { "0", "1" }, | ||
/* 5 */ new string[] { "0", "2" }, | ||
}; | ||
|
||
Func<string, IEnumerable<string>> succF = x => successors[int.Parse(x)]; | ||
var sorted = TopologicalSort.IterativeSort<string>(new[] { "4", "5" }, succF); | ||
AssertTopologicallySorted(sorted, succF, "Test01"); | ||
Assert.Equal(6, sorted.Length); | ||
AssertEx.Equal(new[] { "4", "5", "2", "3", "1", "0" }, sorted); | ||
} | ||
|
||
[Fact] | ||
public void Test02() | ||
{ | ||
int[][] successors = new int[][] | ||
{ | ||
/* 0 */ new int[] { }, | ||
/* 1 */ new int[] { 2, 4 }, | ||
/* 2 */ new int[] { }, | ||
/* 3 */ new int[] { 2, 5 }, | ||
/* 4 */ new int[] { 2, 3 }, | ||
/* 5 */ new int[] { 2, }, | ||
/* 6 */ new int[] { 2, 7 }, | ||
/* 7 */ new int[] { } | ||
}; | ||
|
||
Func<int, IEnumerable<int>> succF = x => successors[x]; | ||
var sorted = TopologicalSort.IterativeSort<int>(new[] { 1, 6 }, succF); | ||
AssertTopologicallySorted(sorted, succF, "Test02"); | ||
Assert.Equal(7, sorted.Length); | ||
AssertEx.Equal(new[] { 1, 4, 3, 5, 6, 7, 2 }, sorted); | ||
} | ||
|
||
[Fact] | ||
public void TestCycle() | ||
{ | ||
int[][] successors = new int[][] | ||
{ | ||
/* 0 */ new int[] { }, | ||
/* 1 */ new int[] { 2, 4 }, | ||
/* 2 */ new int[] { }, | ||
/* 3 */ new int[] { 2, 5 }, | ||
/* 4 */ new int[] { 2, 3 }, | ||
/* 5 */ new int[] { 2, 1 }, | ||
/* 6 */ new int[] { 2, 7 }, | ||
/* 7 */ new int[] { } | ||
}; | ||
|
||
// 1 -> 4 -> 3 -> 5 -> 1 | ||
Assert.Throws<ArgumentException>(() => | ||
{ | ||
var sorted = TopologicalSort.IterativeSort<int>(new[] { 1 }, x => successors[x]); | ||
}); | ||
} | ||
|
||
[Theory] | ||
[InlineData(1984142830)] | ||
[InlineData(107329897)] | ||
[InlineData(136826316)] | ||
[InlineData(808774716)] | ||
[InlineData(729791148)] | ||
[InlineData(770911997)] | ||
[InlineData(1786285961)] | ||
[InlineData(321110113)] | ||
[InlineData(1686926633)] | ||
[InlineData(787934201)] | ||
[InlineData(745939035)] | ||
[InlineData(1075862430)] | ||
[InlineData(428872484)] | ||
[InlineData(489337268)] | ||
[InlineData(1976108951)] | ||
[InlineData(428397397)] | ||
[InlineData(1921108202)] | ||
[InlineData(926330127)] | ||
[InlineData(364136202)] | ||
[InlineData(1893034696)] | ||
public void TestRandom(int seed) | ||
{ | ||
int numberOfNodes = 100; | ||
Random random = new Random(seed); | ||
|
||
// First, we produce a list of integers representing a possible (reversed) | ||
// topological sort of the graph we will construct | ||
var possibleSort = Enumerable.Range(0, numberOfNodes).ToArray(); | ||
shuffle(possibleSort); | ||
|
||
// Then we produce a set of edges that is consistent with that possible topological sort | ||
int[][] successors = new int[numberOfNodes][]; | ||
for (int i = numberOfNodes-1; i >= 0; i--) | ||
{ | ||
successors[possibleSort[i]] = randomSubset((int)Math.Sqrt(i), i); | ||
} | ||
|
||
// Perform a topological sort and check it. | ||
Func<int, IEnumerable<int>> succF = x => successors[x]; | ||
var sorted = TopologicalSort.IterativeSort<int>(Enumerable.Range(0, numberOfNodes).ToArray(), succF); | ||
Assert.Equal(numberOfNodes, sorted.Length); | ||
AssertTopologicallySorted(sorted, succF, $"TestRandom(seed: {seed})"); | ||
|
||
// Now we modify the graph to add an edge from the last node to the first node, which | ||
// probably induces a cycle. Since the graph is random, it is possible that this does | ||
// not induce a cycle. However, by the construction of the graph it is almost certain | ||
// that a cycle is induced. Nevertheless, to avoid flakiness in the tests, we do not | ||
// test with actual random graphs, but with graphs based on pseudo-random sequences using | ||
// random seeds hardcoded into the tests. That way we are testing on the same graphs each | ||
// time. | ||
successors[possibleSort[0]] = successors[possibleSort[0]].Concat(new int[] { possibleSort[numberOfNodes - 1] }).ToArray(); | ||
|
||
Assert.Throws<ArgumentException>(() => | ||
{ | ||
TopologicalSort.IterativeSort<int>(Enumerable.Range(0, numberOfNodes).ToArray(), succF); | ||
}); | ||
|
||
// where | ||
void shuffle(int[] data) | ||
{ | ||
int length = data.Length; | ||
for (int t = 0; t < length - 1; t++) | ||
{ | ||
int r = random.Next(t, length); | ||
if (t != r) | ||
{ | ||
var tmp = data[t]; | ||
data[t] = data[r]; | ||
data[r] = tmp; | ||
} | ||
} | ||
} | ||
|
||
int[] randomSubset(int count, int limit) | ||
{ | ||
// We don't worry about duplicate values. That's all part of the test, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💭 I'm not sure this is correct. If you intend to support duplicate values in the result of Specifically, it seems that duplicate values in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The predecessor count is a count of incoming edges, not preceding nodes. I'll add a test. In reply to: 171728301 [](ancestors = 171728301) |
||
// as the topological sort should tolerate duplicate edges. | ||
var result = new int[count]; | ||
for (int i = 0; i < count; i++) | ||
{ | ||
result[i] = possibleSort[random.Next(0, limit)]; | ||
} | ||
|
||
return result; | ||
} | ||
} | ||
|
||
[Fact(Skip = | ||
@"There is little additional coverage of this test over what is offered by TestRandom. | ||
However, we are keeping it in the source as it may be useful to developers who change the topological sort algorithm in the future.")] | ||
public void TestLots() | ||
{ | ||
Random random = new Random(1893034696); | ||
const int count = 100000; | ||
|
||
// Test lots more pseudo-random graphs using many different seeds. | ||
for (int i = 0; i < count; i++) | ||
{ | ||
TestRandom(random.Next()); | ||
} | ||
} | ||
|
||
private void AssertTopologicallySorted<T>(ImmutableArray<T> sorted, Func<T, IEnumerable<T>> successors, string message = null) | ||
{ | ||
var seen = new HashSet<T>(); | ||
for (int i = sorted.Length - 1; i >= 0; i--) | ||
{ | ||
var n = sorted[i]; | ||
foreach (var succ in successors(n)) | ||
{ | ||
Assert.True(seen.Contains(succ), message); | ||
} | ||
|
||
seen.Add(n); | ||
} | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Collections.Immutable; | ||
using System.Diagnostics; | ||
using System.Linq; | ||
using Microsoft.CodeAnalysis.PooledObjects; | ||
|
||
namespace Microsoft.CodeAnalysis | ||
{ | ||
/// <summary> | ||
/// A helper class that contains a topological sort algorithm. | ||
/// </summary> | ||
internal static class TopologicalSort | ||
{ | ||
/// <summary> | ||
/// Produce a topological sort of a given directed acyclic graph, given a set of nodes which include all nodes | ||
/// that have no predecessors. Any nodes not in the given set, but reachable through successors, will be added | ||
/// to the result. This is an iterative rather than recursive implementation, so it is unlikely to cause a stack | ||
/// overflow. | ||
/// </summary> | ||
/// <typeparam name="TNode">The type of the node</typeparam> | ||
/// <param name="nodes">Any subset of the nodes that includes all nodes with no predecessors</param> | ||
/// <param name="successors">A function mapping a node to its set of successors</param> | ||
/// <returns>A list of all reachable nodes, in which each node always precedes its successors</returns> | ||
public static ImmutableArray<TNode> IterativeSort<TNode>(IEnumerable<TNode> nodes, Func<TNode, IEnumerable<TNode>> successors) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps want an overload with ImmutableArrays as inputs, but waiting until the actual use also seems ok. #ByDesign |
||
{ | ||
// First, count the predecessors of each node | ||
PooledDictionary<TNode, int> predecessorCounts = PredecessorCounts(nodes, successors, out ImmutableArray<TNode> allNodes); | ||
|
||
// Initialize the ready set with those nodes that have no predecessors | ||
var ready = ArrayBuilder<TNode>.GetInstance(); | ||
foreach (TNode node in allNodes) | ||
{ | ||
if (predecessorCounts[node] == 0) | ||
{ | ||
ready.Push(node); | ||
} | ||
} | ||
|
||
// Process the ready set. Output a node, and decrement the predecessor count of its successors. | ||
var resultBuilder = ImmutableArray.CreateBuilder<TNode>(); | ||
while (ready.Count != 0) | ||
{ | ||
var node = ready.Pop(); | ||
resultBuilder.Add(node); | ||
foreach (var succ in successors(node)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💭 I worry a bit about the number of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We visit each node and edge a constant number of times. We call successors(node) once to count predecessors, and then we call it once on each node to process the outgoing edges. Presuming each call allocates a new enumerator, that is 2n enumerators. If that is a problem for some caller the caller can cache. However, there is no getting around the fact that the caller must have this data allocated in memory in some form or another. In reply to: 171726056 [](ancestors = 171726056) |
||
{ | ||
var count = predecessorCounts[succ]; | ||
Debug.Assert(count != 0); | ||
predecessorCounts[succ] = count - 1; | ||
if (count == 1) | ||
{ | ||
ready.Push(succ); | ||
} | ||
} | ||
} | ||
|
||
// At this point all the nodes should have been output, otherwise there was a cycle | ||
if (predecessorCounts.Count != resultBuilder.Count) | ||
{ | ||
throw new ArgumentException("Cycle in the input graph"); | ||
} | ||
|
||
predecessorCounts.Free(); | ||
ready.Free(); | ||
return resultBuilder.ToImmutable(); | ||
} | ||
|
||
private static PooledDictionary<TNode, int> PredecessorCounts<TNode>( | ||
IEnumerable<TNode> nodes, | ||
Func<TNode, IEnumerable<TNode>> successors, | ||
out ImmutableArray<TNode> allNodes) | ||
{ | ||
var predecessorCounts = PooledDictionary<TNode, int>.GetInstance(); | ||
var counted = PooledHashSet<TNode>.GetInstance(); | ||
var toCount = ArrayBuilder<TNode>.GetInstance(); | ||
var allNodesBuilder = ArrayBuilder<TNode>.GetInstance(); | ||
toCount.AddRange(nodes); | ||
while (toCount.Count != 0) | ||
{ | ||
var n = toCount.Pop(); | ||
if (!counted.Add(n)) | ||
{ | ||
continue; | ||
} | ||
|
||
allNodesBuilder.Add(n); | ||
if (!predecessorCounts.ContainsKey(n)) | ||
{ | ||
predecessorCounts.Add(n, 0); | ||
} | ||
|
||
foreach (var succ in successors(n)) | ||
{ | ||
toCount.Push(succ); | ||
if (predecessorCounts.TryGetValue(succ, out int succPredecessorCount)) | ||
{ | ||
predecessorCounts[succ] = succPredecessorCount + 1; | ||
} | ||
else | ||
{ | ||
predecessorCounts.Add(succ, 1); | ||
} | ||
} | ||
} | ||
|
||
counted.Free(); | ||
toCount.Free(); | ||
allNodes = allNodesBuilder.ToImmutableAndFree(); | ||
return predecessorCounts; | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are we sure Random works the same between platforms/versions etc? #Closed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just checked the docs. It seems to be specified as
Donald E. Knuth's subtractive random number generator
#ClosedThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it is required to by the specification of this method:
Having said that, it doesn't matter very much. I doubt there exists an implementation and seed that would cause the cyclic part of the test to fail, which is the only part that can fail spuriously if a random number generator produces a very unlucky sequence. But since it is possible, I added plenty of documentation in the tests so a future failure could be diagnosed.
#Closed