Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SE: Enumerable ContentHash for ordered and unordered sets #6979

Merged
merged 4 commits into from
Mar 27, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions analyzers/src/SonarAnalyzer.Common/Helpers/HashCode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,16 @@ namespace SonarAnalyzer.Helpers
private const uint PreMultiplier = 3266489917U;
private const uint PostMultiplier = 668265263U;
private const int RotateOffset = 17;
private const int IntSeed = 393241;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not use Seed instead?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is givin "Empty" some random value? Why not keeping 0 for empy? It's valid integer has value like any other

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seed is unit. This number is taken from https://planetmath.org/goodhashtableprimes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to have a seed so the first value gets transformed (x ^ 0 = x) and a good hash function should distribute the values along the available number space.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I need answers to these questions before approving.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Damit! Always the same mistake:
image

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd argue that x is already a good distribution :D


public static int DictionaryContentHash<TKey, TValue>(IDictionary<TKey, TValue> dictionary) =>
dictionary.Aggregate(0, (seed, kvp) => seed ^ Combine(kvp.Key, kvp.Value));

public static int EnumerableContentHash<TValue>(IEnumerable<TValue> enumerable) =>
enumerable.Aggregate(0, (seed, x) => Combine(seed, x));
public static int EnumerableUnorderedContentHash<TValue>(IEnumerable<TValue> enumerable) =>
enumerable.Aggregate(IntSeed, (seed, x) => seed ^ (x?.GetHashCode() ?? 0));

public static int EnumerableOrderedContentHash<TValue>(IEnumerable<TValue> enumerable) =>
enumerable.Aggregate(IntSeed, Combine);
pavel-mikula-sonarsource marked this conversation as resolved.
Show resolved Hide resolved

public static int Combine<T1, T2>(T1 a, T2 b) =>
(int)Seed.AddHash(a?.GetHashCode()).AddHash(b?.GetHashCode());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ public override int GetHashCode() =>
HashCode.DictionaryContentHash(OperationValue),
HashCode.DictionaryContentHash(SymbolValue),
HashCode.DictionaryContentHash(CaptureOperation),
HashCode.EnumerableContentHash(PreservedSymbols),
HashCode.EnumerableContentHash(Exceptions));
HashCode.EnumerableUnorderedContentHash(PreservedSymbols),
HashCode.EnumerableOrderedContentHash(Exceptions));

public bool Equals(ProgramState other) =>
// VisitCount is not compared, two ProgramState are equal if their current state is equal. No matter what historical path led to it.
Expand Down
86 changes: 78 additions & 8 deletions analyzers/tests/SonarAnalyzer.UnitTest/Helpers/HashCodeTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/

using HashCode = SonarAnalyzer.Helpers.HashCode;

namespace SonarAnalyzer.UnitTest.Helpers
{
[TestClass]
Expand All @@ -28,10 +30,10 @@ public class HashCodeTest
[DataRow("Lorem Ipsum")]
public void Combine_ProducesDifferentResults(string input)
{
var hash2 = SonarAnalyzer.Helpers.HashCode.Combine(input, input);
var hash3 = SonarAnalyzer.Helpers.HashCode.Combine(input, input, input);
var hash4 = SonarAnalyzer.Helpers.HashCode.Combine(input, input, input, input);
var hash5 = SonarAnalyzer.Helpers.HashCode.Combine(input, input, input, input, input);
var hash2 = HashCode.Combine(input, input);
var hash3 = HashCode.Combine(input, input, input);
var hash4 = HashCode.Combine(input, input, input, input);
var hash5 = HashCode.Combine(input, input, input, input, input);

hash2.Should().NotBe(0);
hash3.Should().NotBe(0).And.NotBe(hash2);
Expand All @@ -45,8 +47,8 @@ public void DictionaryContentHash_StableForUnsortedDictionary()
var numbers = Enumerable.Range(1, 1000);
var dict1 = numbers.ToDictionary(x => x, x => x);
var dict2 = numbers.OrderByDescending(x => x).ToDictionary(x => x, x => x);
var hashCode1 = SonarAnalyzer.Helpers.HashCode.DictionaryContentHash(dict1);
var hashCode2 = SonarAnalyzer.Helpers.HashCode.DictionaryContentHash(dict2);
var hashCode1 = HashCode.DictionaryContentHash(dict1);
var hashCode2 = HashCode.DictionaryContentHash(dict2);
hashCode1.Should().Be(hashCode2);
}

Expand All @@ -56,9 +58,77 @@ public void DictionaryContentHash_StableForImmutableDictionary()
var numbers = Enumerable.Range(1, 1000);
var dict1 = numbers.ToImmutableDictionary(x => x, x => x);
var dict2 = numbers.OrderByDescending(x => x).ToImmutableDictionary(x => x, x => x);
var hashCode1 = SonarAnalyzer.Helpers.HashCode.DictionaryContentHash(dict1);
var hashCode2 = SonarAnalyzer.Helpers.HashCode.DictionaryContentHash(dict2);
var hashCode1 = HashCode.DictionaryContentHash(dict1);
var hashCode2 = HashCode.DictionaryContentHash(dict2);
hashCode1.Should().Be(hashCode2);
}

[TestMethod]
public void EnumerableUnorderedContentHash_Empty()
{
#pragma warning disable CA1825 // Avoid zero-length array allocations
pavel-mikula-sonarsource marked this conversation as resolved.
Show resolved Hide resolved
var ints = new int[0];
var strings = new string[0];

HashCode.EnumerableUnorderedContentHash(ints).Should().Be(HashCode.EnumerableUnorderedContentHash(new int[0]));
HashCode.EnumerableUnorderedContentHash(strings).Should().Be(HashCode.EnumerableUnorderedContentHash(strings));
HashCode.EnumerableUnorderedContentHash(ints).Should().Be(HashCode.EnumerableUnorderedContentHash(strings));
#pragma warning restore CA1825
}

[TestMethod]
public void EnumerableUnorderedContentHash_Order()
{
var ints1 = new[] { 0, 1, 2 };
var ints2 = new[] { 2, 1, 0 };
var ints3 = new[] { 0, 1, 8 };

HashCode.EnumerableUnorderedContentHash(ints1).Should().Be(HashCode.EnumerableUnorderedContentHash(ints2)).And.NotBe(0);
HashCode.EnumerableUnorderedContentHash(ints1).Should().NotBe(HashCode.EnumerableUnorderedContentHash(ints3));
}

[TestMethod]
public void EnumerableUnorderedContentHash_DifferentLength()
{
var ints1 = new[] { 0, 1, 2 };
var ints2 = new[] { 0, 1, 2, 3 };

HashCode.EnumerableUnorderedContentHash(ints1).Should().NotBe(HashCode.EnumerableUnorderedContentHash(ints2));
}

[TestMethod]
public void EnumerableOrderedContentHash_Empty()
{
#pragma warning disable CA1825 // Avoid zero-length array allocations
var ints = new int[0];
var strings = new string[0];

HashCode.EnumerableOrderedContentHash(ints).Should().Be(HashCode.EnumerableOrderedContentHash(new int[0]));
HashCode.EnumerableOrderedContentHash(strings).Should().Be(HashCode.EnumerableOrderedContentHash(strings));
HashCode.EnumerableOrderedContentHash(ints).Should().Be(HashCode.EnumerableOrderedContentHash(strings));
#pragma warning restore CA1825
}

[TestMethod]
public void EnumerableOrderedContentHash_Order()
{
var ints1 = new[] { 0, 1, 2 };
var ints2 = new[] { 0, 1, 2 };
var ints3 = new[] { 2, 1, 0 };
var ints4 = new[] { 0, 1, 8 };

HashCode.EnumerableOrderedContentHash(ints1).Should().Be(HashCode.EnumerableOrderedContentHash(ints2)).And.NotBe(0);
HashCode.EnumerableOrderedContentHash(ints1).Should().NotBe(HashCode.EnumerableOrderedContentHash(ints3));
HashCode.EnumerableOrderedContentHash(ints1).Should().NotBe(HashCode.EnumerableOrderedContentHash(ints4));
}

[TestMethod]
public void EnumerableOrderedContentHash_DifferentLength()
{
var ints1 = new[] { 0, 1, 2 };
var ints2 = new[] { 0, 1, 2, 3 };

HashCode.EnumerableOrderedContentHash(ints1).Should().NotBe(HashCode.EnumerableOrderedContentHash(ints2));
}
}
}