Skip to content

Commit

Permalink
Add more specialized frozen collection types. (#79794)
Browse files Browse the repository at this point in the history
* Add more specialized frozen collection types.

- Add SmallFrozenDictionary/Set which don't do any hashing and simply iterate through arrays,
testing each element

- Add SparseRangeInt32FrozenSet which uses a bit vector for storage.

FrozenDictionary/Set each have some constants defined to drive the heuristics that
decide when to use the small and sparse collection types. I did some preliminary tuning
of these, but this could use a detailed benchmark to pick the right values. I'll leave
the fine-tuning to a separate PR.

* Expand supported numeric sets on .NET 7+

Co-authored-by: Martin Taillefer <[email protected]>
  • Loading branch information
geeknoid and Martin Taillefer authored Jan 9, 2023
1 parent 55e1ac7 commit dcda1e0
Show file tree
Hide file tree
Showing 23 changed files with 1,169 additions and 33 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>$(NetCoreAppCurrent);$(NetCoreAppPrevious);$(NetCoreAppMinimum);netstandard2.0;$(NetFrameworkMinimum)</TargetFrameworks>
<IsPackable>true</IsPackable>
Expand All @@ -14,7 +14,8 @@ The System.Collections.Immutable library is built-in as part of the shared frame
<Compile Include="System\Polyfills.cs" />
<Compile Include="System\Collections\ThrowHelper.cs" />
<Compile Include="$(CoreLibSharedDir)System\Collections\HashHelpers.cs" Link="System\Collections\HashHelpers.cs" />


<Compile Include="System\Collections\Frozen\Constants.cs" />
<Compile Include="System\Collections\Frozen\DefaultFrozenDictionary.cs" />
<Compile Include="System\Collections\Frozen\DefaultFrozenSet.cs" />
<Compile Include="System\Collections\Frozen\EmptyFrozenDictionary.cs" />
Expand All @@ -24,16 +25,29 @@ The System.Collections.Immutable library is built-in as part of the shared frame
<Compile Include="System\Collections\Frozen\FrozenSet.cs" />
<Compile Include="System\Collections\Frozen\FrozenSetInternalBase.cs" />
<Compile Include="System\Collections\Frozen\ImmutableArrayFactory.cs" />
<Compile Include="System\Collections\Frozen\Int32FrozenDictionary.cs" />
<Compile Include="System\Collections\Frozen\Int32FrozenSet.cs" />
<Compile Include="System\Collections\Frozen\ItemsFrozenSet.cs" />
<Compile Include="System\Collections\Frozen\KeysAndValuesFrozenDictionary.cs" />
<Compile Include="System\Collections\Frozen\LengthBucketsFrozenDictionary.cs" />
<Compile Include="System\Collections\Frozen\LengthBucketsFrozenSet.cs" />
<Compile Include="System\Collections\Frozen\OrdinalStringFrozenDictionary.cs" />
<Compile Include="System\Collections\Frozen\OrdinalStringFrozenSet.cs" />
<Compile Include="System\Collections\Frozen\String\OrdinalStringFrozenDictionary.cs" />
<Compile Include="System\Collections\Frozen\String\OrdinalStringFrozenSet.cs" />
<Compile Include="System\Collections\Frozen\SmallFrozenDictionary.cs" />
<Compile Include="System\Collections\Frozen\SmallFrozenSet.cs" />
<Compile Include="System\Collections\Frozen\ValueTypeDefaultComparerFrozenDictionary.cs" />
<Compile Include="System\Collections\Frozen\ValueTypeDefaultComparerFrozenSet.cs" />

<Compile Include="System\Collections\Frozen\Integer\IntegerFrozenDictionary.cs" Condition="$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net7.0'))" />
<Compile Include="System\Collections\Frozen\Integer\IntegerFrozenSet.cs" Condition="$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net7.0'))" />
<Compile Include="System\Collections\Frozen\Integer\SmallIntegerFrozenDictionary.cs" Condition="$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net7.0'))" />
<Compile Include="System\Collections\Frozen\Integer\SmallIntegerFrozenSet.cs" Condition="$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net7.0'))" />
<Compile Include="System\Collections\Frozen\Integer\SparseRangeIntegerFrozenSet.cs" Condition="$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net7.0'))" />

<Compile Include="System\Collections\Frozen\Int32\Int32FrozenDictionary.cs" />
<Compile Include="System\Collections\Frozen\Int32\Int32FrozenSet.cs" />
<Compile Include="System\Collections\Frozen\Int32\SmallInt32FrozenDictionary.cs" Condition="!$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net7.0'))" />
<Compile Include="System\Collections\Frozen\Int32\SmallInt32FrozenSet.cs" Condition="!$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net7.0'))" />
<Compile Include="System\Collections\Frozen\Int32\SparseRangeInt32FrozenSet.cs" Condition="!$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net7.0'))" />

<Compile Include="System\Collections\Frozen\StringComparers\ComparerPicker.cs" />
<Compile Include="System\Collections\Frozen\StringComparers\FullCaseInsensitiveAsciiStringComparer.cs" />
<Compile Include="System\Collections\Frozen\StringComparers\FullCaseInsensitiveStringComparer.cs" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

namespace System.Collections.Frozen
{
/// <summary>
/// A few numbers to drive implementation selection heuristics.
/// </summary>
/// <remarks>
/// These numbers were arrived through simple benchmarks conducted against .NET 7.
/// It's worth potentially tweaking these values if the implementation of the
/// collections changes in a substantial way, or if the JIT got smarter over time.
/// </remarks>
internal static class Constants
{
/// <summary>
/// Threshold when we switch from scanning to hashing for non-integer collections.
/// </summary>
/// <remarks>
/// This determines the threshold where we switch from
/// the scanning-based SmallFrozenDictionary/Set to the hashing-based
/// DefaultFrozenDictionary/Set.
/// </remarks>
public const int MaxItemsInSmallFrozenCollection = 4;

/// <summary>
/// Threshold when we switch from scanning to hashing integer collections.
/// </summary>
/// <remarks>
/// This determines the threshold when we switch from the scanning
/// SmallIntegerFrozenDictionary/Set to the
/// hashing IntegerFrozenDictionary/Set.
/// </remarks>
public const int MaxItemsInSmallIntegerFrozenCollection = 10;

/// <summary>
/// How much free space is allowed in a sparse integer set
/// </summary>
/// <remarks>
/// This determines how much free space is allowed in a sparse integer set.
/// This is a space/perf trade off. The sparse sets just use a bit vector to
/// hold the state, so lookup is always fast. But there's a point where you're
/// too much heap space.
/// </remarks>
public const int MaxSparsenessFactorInSparseRangeIntegerSet = 8;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Numerics;
using System.Runtime.CompilerServices;

namespace System.Collections.Frozen
Expand Down Expand Up @@ -105,12 +106,93 @@ private static FrozenDictionary<TKey, TValue> Freeze<TKey, TValue>(Dictionary<TK
// the Equals/GetHashCode methods to be devirtualized and possibly inlined.
if (ReferenceEquals(comparer, EqualityComparer<TKey>.Default))
{
// In the specific case of Int32 keys, we can optimize further to reduce memory consumption by using
// the underlying FrozenHashtable's Int32 index as the keys themselves, avoiding the need to store the
// same keys yet again.
return typeof(TKey) == typeof(int) ?
(FrozenDictionary<TKey, TValue>)(object)new Int32FrozenDictionary<TValue>((Dictionary<int, TValue>)(object)source) :
new ValueTypeDefaultComparerFrozenDictionary<TKey, TValue>(source);
#if NET7_0_OR_GREATER
static FrozenDictionary<TKey, TValue> PickIntegerDictionary<TInt>(Dictionary<TKey, TValue> source)
where TInt : struct, IBinaryInteger<TInt>
{
TInt[] keys = (TInt[])(object)source.Keys.ToArray();
TValue[] values = source.Values.ToArray();

Array.Sort(keys, values);

TInt min = keys[0];
TInt max = keys[^1];
ulong range = ulong.CreateTruncating(max - min);

if (keys.Length <= Constants.MaxItemsInSmallIntegerFrozenCollection)
{
return (FrozenDictionary<TKey, TValue>)(object)new SmallIntegerFrozenDictionary<TInt, TValue>(keys, values);
}
else if (typeof(TInt) == typeof(int))
{
return (FrozenDictionary<TKey, TValue>)(object)new Int32FrozenDictionary<TValue>((Dictionary<int, TValue>)(object)source);
}
else
{
return (FrozenDictionary<TKey, TValue>)(object)new IntegerFrozenDictionary<TInt, TValue>((Dictionary<TInt, TValue>)(object)source);
}
}

if (typeof(TKey) == typeof(int))
{
return PickIntegerDictionary<int>(source);
}
else if (typeof(TKey) == typeof(uint))
{
return PickIntegerDictionary<uint>(source);
}
else if (typeof(TKey) == typeof(long))
{
return PickIntegerDictionary<long>(source);
}
else if (typeof(TKey) == typeof(ulong))
{
return PickIntegerDictionary<ulong>(source);
}
else if (typeof(TKey) == typeof(short))
{
return PickIntegerDictionary<short>(source);
}
else if (typeof(TKey) == typeof(ushort))
{
return PickIntegerDictionary<ushort>(source);
}
else if (typeof(TKey) == typeof(byte))
{
return PickIntegerDictionary<byte>(source);
}
else if (typeof(TKey) == typeof(sbyte))
{
return PickIntegerDictionary<sbyte>(source);
}

#else

if (typeof(TKey) == typeof(int))
{
int[] keys = (int[])(object)source.Keys.ToArray();
TValue[] values = source.Values.ToArray();

Array.Sort(keys, values);

int min = keys[0];
int max = keys[keys.Length - 1];
int range = max - min + 1;

if (keys.Length <= Constants.MaxItemsInSmallIntegerFrozenCollection)
{
return (FrozenDictionary<TKey, TValue>)(object)new SmallInt32FrozenDictionary<TValue>(keys, values);
}
else
{
return (FrozenDictionary<TKey, TValue>)(object)new Int32FrozenDictionary<TValue>((Dictionary<int, TValue>)(object)source);
}
}
#endif
else
{
return new ValueTypeDefaultComparerFrozenDictionary<TKey, TValue>(source);
}
}
}
else if (typeof(TKey) == typeof(string))
Expand All @@ -132,6 +214,12 @@ private static FrozenDictionary<TKey, TValue> Freeze<TKey, TValue>(Dictionary<TK
}
}

if (source.Count <= Constants.MaxItemsInSmallFrozenCollection)
{
// use the specialized dictionary for low item counts
return new SmallFrozenDictionary<TKey, TValue>(source, comparer);
}

// No special-cases apply. Use the default frozen dictionary.
return new DefaultFrozenDictionary<TKey, TValue>(source, comparer);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Numerics;

namespace System.Collections.Frozen
{
Expand Down Expand Up @@ -60,12 +63,96 @@ public static FrozenSet<T> ToFrozenSet<T>(this IEnumerable<T> source, IEqualityC
// the Equals/GetHashCode methods to be devirtualized and possibly inlined.
if (ReferenceEquals(comparer, EqualityComparer<T>.Default))
{
// In the specific case of Int32 keys, we can optimize further to reduce memory consumption by using
// the underlying FrozenHashtable's Int32 index as the values themselves, avoiding the need to store the
// same values yet again.
return typeof(T) == typeof(int) ?
(FrozenSet<T>)(object)new Int32FrozenSet((HashSet<int>)(object)uniqueValues) :
new ValueTypeDefaultComparerFrozenSet<T>(uniqueValues);
#if NET7_0_OR_GREATER
static FrozenSet<T> PickIntegerSet<TInt>(HashSet<T> values)
where TInt : struct, IBinaryInteger<TInt>
{
TInt[] items = (TInt[])(object)values.ToArray();
Array.Sort(items);

TInt min = items[0];
TInt max = items[^1];
ulong range = ulong.CreateTruncating(max - min);

if ((range == (ulong)items.Length - 1) || (range <= int.MaxValue && (int)range / items.Length <= Constants.MaxSparsenessFactorInSparseRangeIntegerSet))
{
return (FrozenSet<T>)(object)new SparseRangeIntegerFrozenSet<TInt>(items);
}
else if (items.Length <= Constants.MaxItemsInSmallIntegerFrozenCollection)
{
return (FrozenSet<T>)(object)new SmallIntegerFrozenSet<TInt>(items);
}
else if (typeof(T) == typeof(int))
{
return (FrozenSet<T>)(object)new Int32FrozenSet((int[])(object)items);
}
else
{
return (FrozenSet<T>)(object)new IntegerFrozenSet<TInt>((HashSet<TInt>)(object)values);
}
}

if (typeof(T) == typeof(int))
{
return PickIntegerSet<int>(uniqueValues);
}
else if (typeof(T) == typeof(uint))
{
return PickIntegerSet<uint>(uniqueValues);
}
else if (typeof(T) == typeof(long))
{
return PickIntegerSet<long>(uniqueValues);
}
else if (typeof(T) == typeof(ulong))
{
return PickIntegerSet<ulong>(uniqueValues);
}
else if (typeof(T) == typeof(short))
{
return PickIntegerSet<short>(uniqueValues);
}
else if (typeof(T) == typeof(ushort))
{
return PickIntegerSet<ushort>(uniqueValues);
}
else if (typeof(T) == typeof(byte))
{
return PickIntegerSet<byte>(uniqueValues);
}
else if (typeof(T) == typeof(sbyte))
{
return PickIntegerSet<sbyte>(uniqueValues);
}

#else
if (typeof(T) == typeof(int))
{
int[] items = (int[])(object)uniqueValues.ToArray();
Array.Sort(items);

int min = items[0];
int max = items[items.Length - 1];
int range = max - min + 1;

if ((range == items.Length) || (range / items.Length <= Constants.MaxSparsenessFactorInSparseRangeIntegerSet))
{
return (FrozenSet<T>)(object)new SparseRangeInt32FrozenSet(items);
}
else if (items.Length <= Constants.MaxItemsInSmallFrozenCollection)
{
return (FrozenSet<T>)(object)new SmallInt32FrozenSet(items);
}
else
{
return (FrozenSet<T>)(object)new Int32FrozenSet(items);
}
}
#endif
else
{
return new ValueTypeDefaultComparerFrozenSet<T>(uniqueValues);
}
}
}
else if (typeof(T) == typeof(string))
Expand All @@ -92,6 +179,12 @@ public static FrozenSet<T> ToFrozenSet<T>(this IEnumerable<T> source, IEqualityC
}
}

if (uniqueValues.Count <= Constants.MaxItemsInSmallFrozenCollection)
{
// use the specialized set for low item counts
return new SmallFrozenSet<T>(uniqueValues, comparer);
}

// No special-cases apply. Use the default frozen set.
return new DefaultFrozenSet<T>(uniqueValues, comparer);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.Runtime.CompilerServices;

namespace System.Collections.Frozen
{
/// <summary>Provides a frozen dictionary to use when the key is an <see cref="int"/> and the default comparer is used.</summary>
/// <typeparam name="TValue">The type of the values in the dictionary.</typeparam>
/// <remarks>
/// This key type is specialized as a memory optimization, as the frozen hash table already contains the array of all
/// int values, and we can thus use its array as the keys rather than maintaining a duplicate copy.
Expand Down Expand Up @@ -52,9 +50,10 @@ private protected override ref readonly TValue GetValueRefOrNullRefCore(int key)
{
_hashTable.FindMatchingEntries(key, out int index, out int endIndex);

int[] hashCodes = _hashTable.HashCodes;
while (index <= endIndex)
{
if (key == _hashTable.HashCodes[index])
if (key == hashCodes[index])
{
return ref _values[index];
}
Expand Down
Loading

0 comments on commit dcda1e0

Please sign in to comment.