Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ADO.NET IHashPicker customization API + Orleans v3-compatible IHashPicker implementation #9217

Merged
merged 13 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using Microsoft.Extensions.Options;
using Orleans.Persistence.AdoNet.Storage;
using Orleans.Runtime;
using Orleans.Storage;
Expand Down Expand Up @@ -37,6 +38,20 @@ public class AdoNetGrainStorageOptions : IStorageProviderSerializerOptions

/// <inheritdoc/>
public IGrainStorageSerializer GrainStorageSerializer { get; set; }

/// <summary>
/// Gets or sets the hasher picker to use for this storage provider.
/// </summary>
public IStorageHasherPicker HashPicker { get; set; }

/// <summary>
/// Sets legacy Orleans v3-compatible hash picker to use for this storage provider. Invoke this method if you need to run
/// Orleans v7+ silo against existing Orleans v3-initialized database and keep existing grain state.
/// </summary>
public void UseOrleans3CompatibleHasher()
{
this.HashPicker = new Orleans3CompatibleStorageHashPicker();
}
}

/// <summary>
Expand Down Expand Up @@ -70,6 +85,27 @@ public void ValidateConfiguration()
{
throw new OrleansConfigurationException($"Invalid {nameof(AdoNetGrainStorageOptions)} values for {nameof(AdoNetGrainStorage)} \"{name}\". {nameof(options.ConnectionString)} is required.");
}

if (this.options.HashPicker == null)
{
throw new OrleansConfigurationException($"Invalid {nameof(AdoNetGrainStorageOptions)} values for {nameof(AdoNetGrainStorage)} {name}. {nameof(options.HashPicker)} is required.");
}
}
}

/// <summary>
/// Provides default configuration HashPicker for AdoNetGrainStorageOptions.
/// </summary>
public class DefaultAdoNetGrainStorageOptionsHashPickerConfigurator : IPostConfigureOptions<AdoNetGrainStorageOptions>
{
public void PostConfigure(string name, AdoNetGrainStorageOptions options)
{
// preserving explicitly configured HashPicker
if (options.HashPicker != null)
return;

// content-aware hashing with different pickers, unable to use standard StorageHasherPicker
options.HashPicker = new Orleans3CompatibleStorageHashPicker();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this should be setting the existing default value, new StorageHasherPicker(new[] { new OrleansDefaultHasher() });

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, it's a mistake, thanks for checking

}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ public class AdoNetGrainStorage: IGrainStorage, ILifecycleParticipant<ISiloLifec
/// <summary>
/// The hash generator used to hash natural keys, grain ID and grain type to a more narrow index.
/// </summary>
public IStorageHasherPicker HashPicker { get; set; } = new StorageHasherPicker(new[] { new OrleansDefaultHasher() });
public IStorageHasherPicker HashPicker { get; set; }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this default need to change? It seems that the current behavior should be the default, with the option for the Orleans 3.x compatible

Copy link
Contributor Author

@vladislav-prishchepa vladislav-prishchepa Nov 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now we always initialize this property in the constructor, so this initializer is ignored.
But it can be added as a fallback in the constructor to handle cases when value from options is null.


private readonly AdoNetGrainStorageOptions options;
private readonly IProviderRuntime providerRuntime;
Expand All @@ -137,6 +137,7 @@ public AdoNetGrainStorage(
this.logger = logger;
this.serviceId = clusterOptions.Value.ServiceId;
this.Serializer = options.Value.GrainStorageSerializer;
this.HashPicker = options.Value.HashPicker;
}

public void Participate(ISiloLifecycle lifecycle)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ public static IServiceCollection AddAdoNetGrainStorage(this IServiceCollection s
configureOptions?.Invoke(services.AddOptions<AdoNetGrainStorageOptions>(name));
services.ConfigureNamedOptionForLogging<AdoNetGrainStorageOptions>(name);
services.AddTransient<IPostConfigureOptions<AdoNetGrainStorageOptions>, DefaultStorageProviderSerializerOptionsConfigurator<AdoNetGrainStorageOptions>>();
services.AddTransient<IPostConfigureOptions<AdoNetGrainStorageOptions>, DefaultAdoNetGrainStorageOptionsHashPickerConfigurator>();
services.AddTransient<IConfigurationValidator>(sp => new AdoNetGrainStorageOptionsValidator(sp.GetRequiredService<IOptionsMonitor<AdoNetGrainStorageOptions>>().Get(name), name));
return services.AddGrainStorage(name, AdoNetGrainStorageFactory.Create);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
using System;

namespace Orleans.Storage
{
// Based on the version in http://home.comcast.net/~bretm/hash/7.html, which is based on that
// in http://burtleburtle.net/bob/hash/evahash.html.
// Note that we only use the version that takes three ulongs, which was written by the Orleans team.
// implementation restored from Orleans v3.7.2: https://github.com/dotnet/orleans/blob/b24e446abfd883f0e4ed614f5267eaa3331548dc/src/Orleans.Core.Abstractions/IDs/JenkinsHash.cs,
// trimmed and slightly optimized
internal static class JenkinsHash
{
private static void Mix(ref uint aa, ref uint bb, ref uint cc)
{
uint a = aa;
uint b = bb;
uint c = cc;

a -= b; a -= c; a ^= (c >> 13);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a, b, and c are refs, each operation must access the (stack) memory.
When the operations are solely done on locals, then registers can be used.

Cf. sharplab

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of curiosity I ran a benchmark.

| Method     | Mean      | Error     | StdDev    | Ratio | RatioSD |
|----------- |----------:|----------:|----------:|------:|--------:|
| Default    | 27.784 ns | 0.5689 ns | 0.5322 ns |  1.00 |    0.03 |
| Suggestion |  6.043 ns | 0.1590 ns | 0.1633 ns |  0.22 |    0.01 |

So roughly 4.5x faster (in the isolated benchmark).

code
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;

BenchmarkRunner.Run<Bench>();

public class Bench
{
    private uint _a = 42;
    private uint _b = 43;
    private uint _c = 44;

    [Benchmark(Baseline = true)]
    public void Default() => Mix(ref _a, ref _b, ref _c);

    [Benchmark]
    public void Suggestion() => Mix1(ref _a, ref _b, ref _c);

    private static void Mix(ref uint a, ref uint b, ref uint c)
    {
        a -= b; a -= c; a ^= (c >> 13);
        b -= c; b -= a; b ^= (a << 8);
        c -= a; c -= b; c ^= (b >> 13);
        a -= b; a -= c; a ^= (c >> 12);
        b -= c; b -= a; b ^= (a << 16);
        c -= a; c -= b; c ^= (b >> 5);
        a -= b; a -= c; a ^= (c >> 3);
        b -= c; b -= a; b ^= (a << 10);
        c -= a; c -= b; c ^= (b >> 15);
    }

    private static void Mix1(ref uint aa, ref uint bb, ref uint cc)
    {
        uint a = aa;
        uint b = bb;
        uint c = cc;

        a -= b; a -= c; a ^= (c >> 13);
        b -= c; b -= a; b ^= (a << 8);
        c -= a; c -= b; c ^= (b >> 13);
        a -= b; a -= c; a ^= (c >> 12);
        b -= c; b -= a; b ^= (a << 16);
        c -= a; c -= b; c ^= (b >> 5);
        a -= b; a -= c; a ^= (c >> 3);
        b -= c; b -= a; b ^= (a << 10);
        c -= a; c -= b; c ^= (b >> 15);

        aa = a;
        bb = b;
        cc = c;
    }
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea was to keep the implementation of the JenkinsHash exactly the same it was in 3.7.2, without introducing potentially breaking changes.

It's possible to improve it but only with a proper coverage by tests.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure that calculation optimization will bring a significant profit. We need to calculate hash only 2 times just before DbCommand execution, ~50ns improvement is unnoticeable in comparison with ADO.NET overhead.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's true, but think in a different way:

For each operation memory needs to be accessed, so if in L1 read from there, otherwise go the cache hierarchy up to RAM if not found. So we have potential for cache trashing, which can't be shown in micro-benchmarks.

When memory access is minimized, the chance for cache trashing is minimized too.
Here the code change required is trivial, so I'd go with the suggestion -- the algorithm is untouched, just at entry and exit of the method the value are read to local / written back.

Copy link
Contributor Author

@vladislav-prishchepa vladislav-prishchepa Nov 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, pushed the change

b -= c; b -= a; b ^= (a << 8);
c -= a; c -= b; c ^= (b >> 13);
a -= b; a -= c; a ^= (c >> 12);
b -= c; b -= a; b ^= (a << 16);
c -= a; c -= b; c ^= (b >> 5);
a -= b; a -= c; a ^= (c >> 3);
b -= c; b -= a; b ^= (a << 10);
c -= a; c -= b; c ^= (b >> 15);

aa = a;
bb = b;
cc = c;
}

// This is the reference implementation of the Jenkins hash.
public static uint ComputeHash(ReadOnlySpan<byte> data)
{
int len = data.Length;
uint a = 0x9e3779b9;
uint b = a;
uint c = 0;
int i = 0;

while (i <= len - 12)
{
a += (uint)data[i++] |
((uint)data[i++] << 8) |
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This kind of indexing results in a lot bound checks. Do we care about these?

Alternative is to use raw-pointers (pinning doesn't harm here).

((uint)data[i++] << 16) |
((uint)data[i++] << 24);
b += (uint)data[i++] |
((uint)data[i++] << 8) |
((uint)data[i++] << 16) |
((uint)data[i++] << 24);
c += (uint)data[i++] |
((uint)data[i++] << 8) |
((uint)data[i++] << 16) |
((uint)data[i++] << 24);
Mix(ref a, ref b, ref c);
}
c += (uint)len;
if (i < len)
a += data[i++];
if (i < len)
a += (uint)data[i++] << 8;
if (i < len)
a += (uint)data[i++] << 16;
if (i < len)
a += (uint)data[i++] << 24;
if (i < len)
b += (uint)data[i++];
if (i < len)
b += (uint)data[i++] << 8;
if (i < len)
b += (uint)data[i++] << 16;
if (i < len)
b += (uint)data[i++] << 24;
if (i < len)
c += (uint)data[i++] << 8;
if (i < len)
c += (uint)data[i++] << 16;
if (i < len)
c += (uint)data[i++] << 24;
Mix(ref a, ref b, ref c);
return c;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
using System;

namespace Orleans.Storage
{
/// <summary>
/// Orleans v3-compatible hasher implementation for non-string-only grain key ids.
/// </summary>
internal class Orleans3CompatibleHasher : IHasher
{
/// <summary>
/// <see cref="IHasher.Description"/>
/// </summary>
public string Description { get; } = $"Orleans v3 hash function ({nameof(JenkinsHash)}).";

/// <summary>
/// <see cref="IHasher.Hash(byte[])"/>.
/// </summary>
public int Hash(byte[] data) => Hash(data.AsSpan());

/// <summary>
/// <see cref="IHasher.Hash(byte[])"/>.
/// </summary>
public int Hash(ReadOnlySpan<byte> data)
{
// implementation restored from Orleans v3.7.2: https://github.com/dotnet/orleans/blob/b24e446abfd883f0e4ed614f5267eaa3331548dc/src/AdoNet/Orleans.Persistence.AdoNet/Storage/Provider/OrleansDefaultHasher.cs
return unchecked((int)JenkinsHash.ComputeHash(data));
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
using System.Collections.Generic;
using Orleans.Runtime;

namespace Orleans.Storage
{
/// <summary>
/// Orleans v3-compatible hash picker implementation for Orleans v3 -> v7+ migration scenarios.
/// </summary>
public class Orleans3CompatibleStorageHashPicker : IStorageHasherPicker
{
private readonly Orleans3CompatibleHasher _nonStringHasher;

/// <summary>
/// <see cref="IStorageHasherPicker.HashProviders"/>.
/// </summary>
public ICollection<IHasher> HashProviders { get; }

/// <summary>
/// A constructor.
/// </summary>
public Orleans3CompatibleStorageHashPicker()
{
_nonStringHasher = new();
HashProviders = [_nonStringHasher];
}

/// <summary>
/// <see cref="IStorageHasherPicker.PickHasher{T}"/>.
/// </summary>
public IHasher PickHasher<T>(
string serviceId,
string storageProviderInstanceName,
string grainType,
GrainId grainId,
IGrainState<T> grainState,
string tag = null)
{
// string-only grain keys had special behaviour in Orleans v3
if (grainId.TryGetIntegerKey(out _, out _) || grainId.TryGetGuidKey(out _, out _))
return _nonStringHasher;

// unable to cache hasher instances: content-aware behaviour, see hasher implementation for details
return new Orleans3CompatibleStringKeyHasher(_nonStringHasher, grainType);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
using System;
using System.Buffers;
using System.Text;

namespace Orleans.Storage
{
/// <summary>
/// Orleans v3-compatible hasher implementation for string-only grain key ids.
/// </summary>
internal class Orleans3CompatibleStringKeyHasher : IHasher
{
private readonly Orleans3CompatibleHasher _innerHasher;
private readonly string _grainType;

public Orleans3CompatibleStringKeyHasher(Orleans3CompatibleHasher innerHasher, string grainType)
{
_innerHasher = innerHasher;
_grainType = grainType;
}

/// <summary>
/// <see cref="IHasher.Description"/>
/// </summary>
public string Description { get; } = $"Orleans v3 hash function ({nameof(JenkinsHash)}).";

/// <summary>
/// <see cref="IHasher.Hash(byte[])"/>.
/// </summary>
public int Hash(byte[] data)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
public int Hash(byte[] data)
public int Hash(ReadOnlySpan<byte> data)

I'd go with the ROS on all these arguments.

{
// Orleans v3 treats string-only keys as integer keys with extension (AdoGrainKey.IsLongKey == true),
// so data must be extended for string-only grain keys.
// But AdoNetGrainStorage implementation also uses such code:
// ...
// var grainIdHash = HashPicker.PickHasher(serviceId, this.name, baseGrainType, grainReference, grainState).Hash(grainId.GetHashBytes());
// var grainTypeHash = HashPicker.PickHasher(serviceId, this.name, baseGrainType, grainReference, grainState).Hash(Encoding.UTF8.GetBytes(baseGrainType));
// ...
// PickHasher parameters are the same for both calls so we need to analyze data content to distinguish these cases.
// It doesn't word if string key is equal to grain type name, but we consider this edge case to be negligibly rare.

if (IsGrainTypeName(data))
return _innerHasher.Hash(data);

var extendedLength = data.Length + 8;

const int maxOnStack = 256;
var rentedBuffer = extendedLength > maxOnStack
? ArrayPool<byte>.Shared.Rent(extendedLength)
: null;

// assuming code below never throws, so calling ArrayPool.Return without try/finally block for JIT optimization

var buffer = rentedBuffer is not null
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't you write it as suggested (i.e. like use almost everywhere with .NET)?

Copy link
Contributor Author

@vladislav-prishchepa vladislav-prishchepa Nov 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct me if I'm wrong.

When we call ArrayPool.Rent and don't call ArrayPool.Return, the buffer we used will be collected by GC. Even if GC will return the array to pool, nobody knows when the GC kicks in. So, further calls to ArrayPool.Return will allocate new arrays (we will quickly deplete the pool by frequent ArrayPool.Rent calls) which will lead to high memory traffic. That's why I return the array explicitly as soon as possible to make it recycled by further ArrayPool.Rent calls.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

small repro

[MemoryDiagnoser]
public class PoolAllocationBenchmark
{
    [Benchmark]
    public void Test()
    {
        for (var i = 0; i < 1_000_000; i++)
        {
            UsePool();
        }
    }

    [MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.NoOptimization)]
    private static void UsePool()
    {
        var buffer  = ArrayPool<byte>.Shared.Rent(1024);
        Array.Clear(buffer);
    }
}
// * Summary *

BenchmarkDotNet v0.14.0, Windows 11 (10.0.22631.4391/23H2/2023Update/SunValley3)
12th Gen Intel Core i9-12900HX, 1 CPU, 24 logical and 16 physical cores
.NET SDK 9.0.100-rc.2.24474.11
  [Host]     : .NET 8.0.10 (8.0.1024.46610), X64 RyuJIT AVX2
  DefaultJob : .NET 8.0.10 (8.0.1024.46610), X64 RyuJIT AVX2


| Method | Mean     | Error    | StdDev   | Gen0       | Allocated |
|------- |---------:|---------:|---------:|-----------:|----------:|
| Test   | 42.28 ms | 0.596 ms | 0.557 ms | 66750.0000 | 999.45 MB |

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I've got what you mean (confused by outdated snipped), pushed a change

? rentedBuffer.AsSpan()
: stackalloc byte[maxOnStack];

buffer = buffer[..extendedLength];

data.AsSpan().CopyTo(buffer);
// buffer may contain arbitrary data, setting zeros in 'extension' segment
buffer[data.Length..].Clear();

var hash = _innerHasher.Hash(buffer);

if (rentedBuffer is not null)
ArrayPool<byte>.Shared.Return(rentedBuffer);

return hash;
}

private bool IsGrainTypeName(byte[] data)
{
// at least 1 byte per char
if (data.Length < _grainType.Length)
return false;

var grainTypeByteCount = Encoding.UTF8.GetByteCount(_grainType);
if (grainTypeByteCount != data.Length)
return false;

const int maxOnStack = 256;
var rentedBuffer = grainTypeByteCount > maxOnStack
? ArrayPool<byte>.Shared.Rent(grainTypeByteCount)
: null;

// assuming code below never throws, so calling ArrayPool.Return without try/finally block for JIT optimization

var buffer = rentedBuffer is not null
? rentedBuffer.AsSpan()
: stackalloc byte[maxOnStack];

buffer = buffer[..grainTypeByteCount];

var bytesWritten = Encoding.UTF8.GetBytes(_grainType, buffer);
var isGrainType = buffer[..bytesWritten].SequenceEqual(data);
if (rentedBuffer is not null)
ArrayPool<byte>.Shared.Return(rentedBuffer);

return isGrainType;
}
}
}