diff --git a/.github/scripts/run-benchmark-server.sh b/.github/scripts/run-benchmark-server.sh
index d329173fd..982356bde 100644
--- a/.github/scripts/run-benchmark-server.sh
+++ b/.github/scripts/run-benchmark-server.sh
@@ -9,12 +9,16 @@ set -euo pipefail
 
 function usage {
     echo "usage: $(basename $0) [options]"
+    echo "Required:"
+    echo "  --args          string      Arguments to pass when running the built binary (default: \"\")"
     echo "Options:"
     echo "  --help                      Show this help message"
 }
 
 while [ $# -gt 0 ]; do
   case $1 in
+    # required
+    --args) _ARGS=$2; shift 2; ;;
     # optional
     --help) usage; exit 1; ;;
     *) shift ;;
@@ -29,6 +33,7 @@ function print() {
 # parameter setup
 repo="MagicOnion"
 build_config="Release"
+args="${_ARGS:=""}"
 build_csproj="perf/BenchmarkApp/PerformanceTest.Server/PerformanceTest.Server.csproj"
 env_settings=""
 
@@ -105,10 +110,11 @@ pushd "$clone_path"
 popd
 
 # run dotnet app
-print "# Run $full_process_path"
+print "# Run $full_process_path $args"
 pushd "$output_dir"
-  # run background https://stackoverflow.com/questions/29142/getting-ssh-to-execute-a-command-in-the-background-on-target-machine
-  nohup "./$binary_name" > "${stdoutfile}" 2> "${stderrfile}" < /dev/null &
+  # use nohup to run background https://stackoverflow.com/questions/29142/getting-ssh-to-execute-a-command-in-the-background-on-target-machine
+  # shellcheck disable=SC2086
+  nohup "./$binary_name" $args > "${stdoutfile}" 2> "${stderrfile}" < /dev/null &
 
   # wait 10s will be enough to start the server or not
   sleep 10s
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 6f82c007a..4f59b3d7b 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -28,21 +28,25 @@ jobs:
       matrix:
         include:
           # 1
-          - tags: "legend:h2c-linux,streams:1"
+          - tags: "legend:messagepack-h2c-linux,streams:1"
             channels: 28
             streams: 1
+            serialization: messagepack
           # 1x1
-          - tags: "legend:h2c-linux,streams:1x1"
+          - tags: "legend:messagepack-h2c-linux,streams:1x1"
             channels: 1
             streams: 1
+            serialization: messagepack
           # 70
-          - tags: "legend:h2c-linux,streams:70"
+          - tags: "legend:messagepack-h2c-linux,streams:70"
             channels: 28
             streams: 70
+            serialization: messagepack
           # 70x1
-          - tags: "legend:h2c-linux,streams:70x1"
+          - tags: "legend:messagepack-h2c-linux,streams:70x1"
             channels: 1
             streams: 70
+            serialization: messagepack
     uses: Cysharp/Actions/.github/workflows/benchmark.yaml@main
     with:
       dotnet-version: "8.0"
@@ -50,7 +54,7 @@ jobs:
       benchmark-name: "magiconion-${{ github.event.issue.number || (inputs.reuse && 'wf' || github.run_number) }}"
       benchmark-timeout: 20 # 10min (env prepare) + 5min (clone & benchmark) + 5min (spare)
       client-benchmark-script-path: ".github/scripts/run-benchmark-client.sh"
-      client-benchmark-script-args: "--args \"-u http://${BENCHMARK_SERVER_NAME}:5000 -s CI --channels ${{ matrix.channels }} --streams ${{ matrix.streams }} --validate true --tags '${{ matrix.tags }}'\""
+      client-benchmark-script-args: "--args \"-u http://${BENCHMARK_SERVER_NAME}:5000 -s CI --channels ${{ matrix.channels }} --streams ${{ matrix.streams }} --serialization ${{ matrix.serialization }} --validate true --tags ${{ matrix.tags }}\""
       server-benchmark-script-path: ".github/scripts/run-benchmark-server.sh"
-      server-benchmark-script-args: ""
+      server-benchmark-script-args: "--args \"--validate true --tags ${{ matrix.tags }}\""
     secrets: inherit
diff --git a/perf/BenchmarkApp/PerformanceTest.Client/Program.cs b/perf/BenchmarkApp/PerformanceTest.Client/Program.cs
index 251e3bef6..47a12cb87 100644
--- a/perf/BenchmarkApp/PerformanceTest.Client/Program.cs
+++ b/perf/BenchmarkApp/PerformanceTest.Client/Program.cs
@@ -278,7 +278,7 @@ static class DatadogMetricsRecorderExtensions
     public static async Task PutClientBenchmarkMetricsAsync(this DatadogMetricsRecorder recorder, ScenarioType scenario, ApplicationInformation applicationInfo, SerializationType serialization, PerformanceResult result)
     {
         var tags = MetricsTagCache.Get((recorder.TagBranch, recorder.TagLegend, recorder.TagStreams, scenario, applicationInfo, serialization), static x => [
-            $"legend:{x.scenario.ToString().ToLower()}-{x.serialization}-{x.TagLegend}{x.TagStreams}",
+            $"legend:{x.scenario.ToString().ToLower()}-{x.TagLegend}{x.TagStreams}",
             $"branch:{x.TagBranch}",
             $"streams:{x.TagStreams}",
             $"process_arch:{x.applicationInfo.ProcessArchitecture}",
diff --git a/perf/BenchmarkApp/PerformanceTest.Server/ProfileService.cs b/perf/BenchmarkApp/PerformanceTest.Server/ProfileService.cs
new file mode 100644
index 000000000..cc904ec8d
--- /dev/null
+++ b/perf/BenchmarkApp/PerformanceTest.Server/ProfileService.cs
@@ -0,0 +1,59 @@
+using PerformanceTest.Shared;
+using PerformanceTest.Shared.Reporting;
+
+namespace PerformanceTest.Server;
+
+class ProfileService : BackgroundService
+{
+    private DatadogMetricsRecorder datadog;
+    private readonly HardwarePerformanceReporter hardwarehardwarePerformanceReporter;
+    private readonly PeriodicTimer timer;
+
+    public ProfileService(TimeProvider timeProvider, IConfiguration configuration)
+    {
+        var tagString = configuration.GetValue<string>("Tags") ?? "";
+        var validate = configuration.GetValue<bool?>("Validate") ?? false;
+        datadog = DatadogMetricsRecorder.Create(tagString, validate);
+        hardwarehardwarePerformanceReporter = new HardwarePerformanceReporter();
+        timer = new PeriodicTimer(TimeSpan.FromSeconds(10), timeProvider);
+    }
+
+    protected override async Task ExecuteAsync(CancellationToken stoppingToken)
+    {
+        hardwarehardwarePerformanceReporter.Start();
+        while (await timer.WaitForNextTickAsync(stoppingToken))
+        {
+            var result = hardwarehardwarePerformanceReporter.GetResult();
+            await datadog.PutServerBenchmarkMetricsAsync(ApplicationInformation.Current, result);
+        }
+    }
+}
+
+static class DatadogMetricsRecorderExtensions
+{
+    /// <summary>
+    /// Put Server Benchmark metrics to background. 
+    /// </summary>
+    /// <param name="recorder"></param>
+    /// <param name="applicationInfo"></param>
+    /// <param name="result"></param>
+    public static async Task PutServerBenchmarkMetricsAsync(this DatadogMetricsRecorder recorder, ApplicationInformation applicationInfo, HardwarePerformanceResult result)
+    {
+        var tags = MetricsTagCache.Get((recorder.TagBranch, recorder.TagLegend, recorder.TagStreams, applicationInfo), static x => [
+            $"legend:{x.TagLegend}{x.TagStreams}",
+            $"branch:{x.TagBranch}",
+            $"streams:{x.TagStreams}",
+            $"process_arch:{x.applicationInfo.ProcessArchitecture}",
+            $"process_count:{x.applicationInfo.ProcessorCount}",
+        ]);
+
+        // Don't want to await each put. Let's send it to queue and await when benchmark ends.
+        recorder.Record(recorder.SendAsync("benchmark.magiconion.server.cpu_usage_max", result.MaxCpuUsage, DatadogMetricsType.Gauge, tags, "percent"));
+        recorder.Record(recorder.SendAsync("benchmark.magiconion.server.cpu_usage_avg", result.MaxCpuUsage, DatadogMetricsType.Gauge, tags, "percent"));
+        recorder.Record(recorder.SendAsync("benchmark.magiconion.server.memory_usage_max", result.MaxMemoryUsageMB, DatadogMetricsType.Gauge, tags, "megabyte"));
+        recorder.Record(recorder.SendAsync("benchmark.magiconion.server.memory_usage_avg", result.AvgMemoryUsageMB, DatadogMetricsType.Gauge, tags, "megabyte"));
+
+        // wait until send complete
+        await recorder.WaitSaveAsync();
+    }
+}
diff --git a/perf/BenchmarkApp/PerformanceTest.Server/Program.cs b/perf/BenchmarkApp/PerformanceTest.Server/Program.cs
index 83624a50c..0661f8b49 100644
--- a/perf/BenchmarkApp/PerformanceTest.Server/Program.cs
+++ b/perf/BenchmarkApp/PerformanceTest.Server/Program.cs
@@ -9,6 +9,11 @@
 }
 
 var builder = WebApplication.CreateBuilder(args);
+builder.Configuration.AddCommandLine(args, new Dictionary<string, string>()
+{
+    { "--tags", "Tags" },
+    { "--validate", "Validate" },
+});
 
 builder.Logging.ClearProviders();
 
@@ -18,7 +23,9 @@
 // Add services to the container.
 builder.Services.AddGrpc();
 builder.Services.AddMagicOnion();
+builder.Services.AddSingleton(TimeProvider.System);
 builder.Services.AddHostedService<StartupService>();
+builder.Services.AddHostedService<ProfileService>();
 
 var app = builder.Build();
 
diff --git a/perf/BenchmarkApp/PerformanceTest.Server/Properties/launchSettings.json b/perf/BenchmarkApp/PerformanceTest.Server/Properties/launchSettings.json
index 0d2e73873..64b94678c 100644
--- a/perf/BenchmarkApp/PerformanceTest.Server/Properties/launchSettings.json
+++ b/perf/BenchmarkApp/PerformanceTest.Server/Properties/launchSettings.json
@@ -2,6 +2,7 @@
   "profiles": {
     "PerformanceTest.Server": {
       "commandName": "Project",
+      "commandLineArgs": "--tags legend:h2c-linux,streams:1",
       "dotnetRunMessages": true,
       "launchBrowser": false,
       "applicationUrl": "http://localhost:5000;https://localhost:5001",
diff --git a/perf/BenchmarkApp/PerformanceTest.Shared/Reporting/HardwarePerformanceReporter.cs b/perf/BenchmarkApp/PerformanceTest.Shared/Reporting/HardwarePerformanceReporter.cs
index 6a1e2aeae..a4d0d7e41 100644
--- a/perf/BenchmarkApp/PerformanceTest.Shared/Reporting/HardwarePerformanceReporter.cs
+++ b/perf/BenchmarkApp/PerformanceTest.Shared/Reporting/HardwarePerformanceReporter.cs
@@ -1,3 +1,4 @@
+using System.Collections.Concurrent;
 using System.Diagnostics;
 
 namespace PerformanceTest.Shared.Reporting;
@@ -7,8 +8,8 @@ public class HardwarePerformanceReporter
     private readonly TimeSpan samplingInterval;
     private readonly TimeProvider timeProvider;
     private readonly Process currentProcess;
-    private readonly List<double> cpuUsages;
-    private readonly List<double> memoryUsages;
+    private readonly ConcurrentBag<double> cpuUsages;
+    private readonly ConcurrentBag<double> memoryUsages;
     private CancellationTokenSource cancellationTokenSource;
     private bool running;
 
@@ -68,6 +69,7 @@ public HardwarePerformanceResult GetResult()
         var avgCpuUsage = cpuUsages.Count > 0 ? cpuUsages.Average() : 0d;
         var maxMemoryUsage = memoryUsages.Count > 0 ? memoryUsages.Max() / 1024 / 1024: 0d;
         var avgMemoryUsage = memoryUsages.Count > 0 ? memoryUsages.Average() / 1024 / 1024: 0d;
+
         cpuUsages.Clear();
         memoryUsages.Clear();