Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Redesign benchmarks for culture-specific string operations #892

Merged
merged 7 commits into from
Sep 23, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 0 additions & 136 deletions src/benchmarks/micro/corefx/System.Globalization/Perf.CompareInfo.cs

This file was deleted.

70 changes: 70 additions & 0 deletions src/benchmarks/micro/corefx/System.Globalization/StringEquality.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using BenchmarkDotNet.Attributes;
using MicroBenchmarks;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;

namespace System.Globalization.Tests
{
[BenchmarkCategory(Categories.CoreFX, Categories.CoreCLR)]
public class StringEquality
{
private string _value, _same, _sameUpper, _diffAtFirstChar;

public static IEnumerable<(CultureInfo CultureInfo, CompareOptions CompareOptions)> GetOptions()
{
// Ordinal and OrdinalIgnoreCase use single execution path for all cultures, so we test it only for "en-US"
yield return (new CultureInfo("en-US"), CompareOptions.Ordinal);
yield return (new CultureInfo("en-US"), CompareOptions.OrdinalIgnoreCase);

// the most popular culture:
yield return (new CultureInfo("en-US"), CompareOptions.None);
yield return (new CultureInfo("en-US"), CompareOptions.IgnoreCase);

// two very common use cases:
yield return (CultureInfo.InvariantCulture, CompareOptions.None);
yield return (CultureInfo.InvariantCulture, CompareOptions.IgnoreCase);

// IgnoreSymbols and IgnoreNonSpace are rarely used, this is why we test it only for a single culture
yield return (new CultureInfo("en-US"), CompareOptions.IgnoreSymbols);
yield return (new CultureInfo("en-US"), CompareOptions.IgnoreNonSpace);

// Polish language has a lot of special characters, for example 'ch', 'rz', 'sz', 'cz' use two chars to express one ;)
// it also has a lot of characters with accent so we use it as an example of a "complex" language
yield return (new CultureInfo("pl-PL"), CompareOptions.None);
}

[ParamsSource(nameof(GetOptions))]
public (CultureInfo CultureInfo, CompareOptions CompareOptions) Options;

[Params(1024)] // single execution path = single test case
public int Count;

[GlobalSetup]
public void Setup()
{
// we are using part of Alice's Adventures in Wonderland text as test data
char[] characters = File.ReadAllText(CompressedFile.GetFilePath("alice29.txt")).Take(Count).ToArray();
_value = new string(characters);
_same = new string(characters);
_sameUpper = _same.ToUpper();
char[] copy = characters.ToArray();
copy[0] = (char)(copy[0] + 1);
_diffAtFirstChar = new string(copy);
}

[Benchmark] // the most work to do: the strings have same conent, but don't point to the same memory
public int Compare_Same() => Options.CultureInfo.CompareInfo.Compare(_value, _same, Options.CompareOptions);

[Benchmark] // the most work to do for IgnoreCase: every char needs to be compared and uppercased
public int Compare_Same_Upper() => Options.CultureInfo.CompareInfo.Compare(_value, _sameUpper, Options.CompareOptions);

[Benchmark] // this should return quickly
public int Compare_DifferentFirstChar() => Options.CultureInfo.CompareInfo.Compare(_value, _diffAtFirstChar, Options.CompareOptions);
}
}
46 changes: 46 additions & 0 deletions src/benchmarks/micro/corefx/System.Globalization/StringHash.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using BenchmarkDotNet.Attributes;
using MicroBenchmarks;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;

namespace System.Globalization.Tests
{
[BenchmarkCategory(Categories.CoreFX, Categories.CoreCLR)]
public class StringHash
{
public static IEnumerable<(CultureInfo CultureInfo, CompareOptions CompareOptions)> GetOptions()
{
// Ordinal and OrdinalIgnoreCase use single execution path for all cultures, so we test it only for "en-US"
yield return (new CultureInfo("en-US"), CompareOptions.Ordinal);
yield return (new CultureInfo("en-US"), CompareOptions.OrdinalIgnoreCase);

yield return (new CultureInfo("en-US"), CompareOptions.None);
yield return (new CultureInfo("en-US"), CompareOptions.IgnoreCase);

yield return (CultureInfo.InvariantCulture, CompareOptions.None);
yield return (CultureInfo.InvariantCulture, CompareOptions.IgnoreCase);
}

[ParamsSource(nameof(GetOptions))]
public (CultureInfo CultureInfo, CompareOptions CompareOptions) Options;

[Params(
128, // small input that fits into stack-allocated array https://github.com/dotnet/coreclr/blob/c6675ef2e22474d6222d054ae3d022c01eda9b6d/src/System.Private.CoreLib/shared/System/Globalization/CompareInfo.Unix.cs#L824
1024 * 128)] // medium size input that fits into an array rented from ArrayPool.Shared without allocation
public int Count;

private string _value;

[GlobalSetup] // we are using part of Alice's Adventures in Wonderland text as test data
public void Setup() => _value = new string(File.ReadAllText(CompressedFile.GetFilePath("alice29.txt")).Take(Count).ToArray());

[Benchmark]
public new void GetHashCode() => Options.CultureInfo.CompareInfo.GetHashCode(_value, Options.CompareOptions);
}
}
Loading