Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add metrics with correct names for Resource Monitoring #5341

Merged
merged 39 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
f959973
Rework metrics
evgenyfedorov2 Jul 25, 2024
831e502
Use nameof()
evgenyfedorov2 Jul 30, 2024
c8367dc
update
evgenyfedorov2 Jul 31, 2024
06bd487
Merge branch 'main' into users/evgenyfedorov2/rework_metrics_rm
evgenyfedorov2 Jul 31, 2024
9d852f8
Update
evgenyfedorov2 Aug 1, 2024
a498ed3
small optimization
evgenyfedorov2 Aug 1, 2024
a644626
fix test
evgenyfedorov2 Aug 2, 2024
bd3d68c
Merge branch 'main' into users/evgenyfedorov2/rework_metrics_rm
evgenyfedorov2 Aug 5, 2024
728ac12
update comment
evgenyfedorov2 Aug 5, 2024
fd5bb31
Fix more comments
evgenyfedorov2 Aug 5, 2024
b9b51a1
Update comments
evgenyfedorov2 Aug 5, 2024
b92463a
Move memoryUsedPercentage calculation to Calculator
evgenyfedorov2 Aug 5, 2024
b346c5d
Rename variables
evgenyfedorov2 Aug 5, 2024
b9bec77
Rename internal methods
evgenyfedorov2 Aug 5, 2024
e5ad793
Rename private variables
evgenyfedorov2 Aug 5, 2024
c8b2960
Rename private variables
evgenyfedorov2 Aug 5, 2024
4ed789d
Rename private var
evgenyfedorov2 Aug 5, 2024
fa85475
revert
evgenyfedorov2 Aug 5, 2024
3c0698c
WIP
evgenyfedorov2 Aug 5, 2024
9ca2c35
Update
evgenyfedorov2 Aug 5, 2024
5d54584
.
evgenyfedorov2 Aug 5, 2024
80b0496
Revert "."
evgenyfedorov2 Aug 6, 2024
d13cbaf
working version
evgenyfedorov2 Aug 6, 2024
cf8cd73
Update
evgenyfedorov2 Aug 6, 2024
4f4ffd6
fix tests
evgenyfedorov2 Aug 6, 2024
44d935f
Improve tests
evgenyfedorov2 Aug 6, 2024
c582bb2
remove unused metric
evgenyfedorov2 Aug 6, 2024
bbe1d94
extract test code to methods
evgenyfedorov2 Aug 6, 2024
67843fb
Merge branch 'main' into users/evgenyfedorov2/rework_metrics_rm
evgenyfedorov2 Aug 6, 2024
1a1efc2
Add UseContainerMetricNames switch
evgenyfedorov2 Aug 8, 2024
e46ed8c
Linux snapshot provider to report CPU utilization relative to request…
evgenyfedorov2 Aug 8, 2024
a60aca4
Merge branch 'main' into users/evgenyfedorov2/rework_metrics_rm
evgenyfedorov2 Aug 9, 2024
5e56f01
Emit all metrics
evgenyfedorov2 Aug 9, 2024
f5423a7
.
evgenyfedorov2 Aug 9, 2024
55350e4
Update
evgenyfedorov2 Aug 9, 2024
19ace1c
Use explicit types instead of var
evgenyfedorov2 Aug 12, 2024
932727b
Sort const in the order
evgenyfedorov2 Aug 12, 2024
64465d1
Change xml comments
evgenyfedorov2 Aug 12, 2024
bc5cd17
Update xml comments
evgenyfedorov2 Aug 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public static ResourceUtilization CalculateUtilization(in Snapshot first, in Sna
long runtimeTickDelta = second.TotalTimeSinceStart.Ticks - first.TotalTimeSinceStart.Ticks;

// Compute the total number of ticks available on the machine during that interval
double totalSystemTicks = runtimeTickDelta * systemResources.GuaranteedCpuUnits;
evgenyfedorov2 marked this conversation as resolved.
Show resolved Hide resolved
double totalSystemTicks = runtimeTickDelta;

// fudge to avoid divide by zero
if (totalSystemTicks <= 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ private static bool TryGetCpuUnitsFromCgroups(IFileSystem fileSystem, out float
/// <summary>
/// In cgroup v1 the CPU shares is used to determine the CPU allocation.
/// in cgroup v2 the CPU weight is used to determine the CPU allocation.
/// To calculete CPU request in cgroup v2 we need to read the CPU weight and convert it to CPU shares.
/// To calculate CPU request in cgroup v2 we need to read the CPU weight and convert it to CPU shares.
/// But for cgroup v1 we can read the CPU shares directly from the file.
/// 1024 equals 1 CPU core.
/// In cgroup v1 on some systems the location of the CPU shares file is different.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@ internal sealed class LinuxUtilizationProvider : ISnapshotProvider
private readonly object _cpuLocker = new();
private readonly object _memoryLocker = new();
private readonly ILinuxUtilizationParser _parser;
private readonly ulong _totalMemoryInBytes;
private readonly ulong _memoryLimit;
private readonly TimeSpan _cpuRefreshInterval;
private readonly TimeSpan _memoryRefreshInterval;
private readonly TimeProvider _timeProvider;
private readonly double _scale;
private readonly double _scaleForTrackerApi;
private readonly double _scaleRelativeToCpuLimit;
private readonly double _scaleRelativeToCpuRequest;
private readonly double _scaleRelativeToCpuRequestForTrackerApi;

private DateTimeOffset _refreshAfterCpu;
private DateTimeOffset _refreshAfterMemory;
Expand All @@ -37,73 +38,73 @@ public LinuxUtilizationProvider(IOptions<ResourceMonitoringOptions> options, ILi
{
_parser = parser;
_timeProvider = timeProvider ?? TimeProvider.System;
var now = _timeProvider.GetUtcNow();
DateTimeOffset now = _timeProvider.GetUtcNow();
_cpuRefreshInterval = options.Value.CpuConsumptionRefreshInterval;
_memoryRefreshInterval = options.Value.MemoryConsumptionRefreshInterval;
_refreshAfterCpu = now;
_refreshAfterMemory = now;
_totalMemoryInBytes = _parser.GetAvailableMemoryInBytes();
_memoryLimit = _parser.GetAvailableMemoryInBytes();
_previousHostCpuTime = _parser.GetHostCpuUsageInNanoseconds();
_previousCgroupCpuTime = _parser.GetCgroupCpuUsageInNanoseconds();

var hostMemory = _parser.GetHostAvailableMemory();
var hostCpus = _parser.GetHostCpuCount();
var availableCpus = _parser.GetCgroupLimitedCpus();
var cpuGuaranteedRequest = _parser.GetCgroupRequestCpu();
_scale = hostCpus / availableCpus;
_scaleForTrackerApi = hostCpus / availableCpus;
float hostCpus = _parser.GetHostCpuCount();
float cpuLimit = _parser.GetCgroupLimitedCpus();
float cpuRequest = _parser.GetCgroupRequestCpu();
_scaleRelativeToCpuLimit = hostCpus / cpuLimit;
_scaleRelativeToCpuRequest = hostCpus / cpuRequest;
_scaleRelativeToCpuRequestForTrackerApi = hostCpus; // the division by cpuRequest is performed later on in the ResourceUtilization class

#pragma warning disable CA2000 // Dispose objects before losing scope
// We don't dispose the meter because IMeterFactory handles that
// An issue on analyzer side: https://github.com/dotnet/roslyn-analyzers/issues/6912
// Related documentation: https://github.com/dotnet/docs/pull/37170
var meter = meterFactory.Create("Microsoft.Extensions.Diagnostics.ResourceMonitoring");
var meter = meterFactory.Create(nameof(Microsoft.Extensions.Diagnostics.ResourceMonitoring));
#pragma warning restore CA2000 // Dispose objects before losing scope

_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.CpuUtilization, observeValue: CpuUtilization, unit: "1");
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.MemoryUtilization, observeValue: MemoryUtilization, unit: "1");
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuLimitUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuLimit, unit: "1");
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerMemoryLimitUtilization, observeValue: MemoryUtilization, unit: "1");
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuRequestUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuRequest, unit: "1");

// cpuGuaranteedRequest is a CPU request for pod, for host its 1 core
// available CPUs is a CPU limit for a pod or for a host.
// _totalMemoryInBytes - Resource Memory Limit (in k8s terms)
// _totalMemoryInBytes - To keep the contract, this parameter will get the Host available memory
Resources = new SystemResources(cpuGuaranteedRequest, availableCpus, _totalMemoryInBytes, _totalMemoryInBytes);
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ProcessCpuUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuRequest, unit: "1");
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ProcessMemoryUtilization, observeValue: MemoryUtilization, unit: "1");

// cpuRequest is a CPU request (aka guaranteed number of CPU units) for pod, for host its 1 core
// cpuLimit is a CPU limit (aka max CPU units available) for a pod or for a host.
// _memoryLimit - Resource Memory Limit (in k8s terms)
// _memoryLimit - To keep the contract, this parameter will get the Host available memory
Resources = new SystemResources(cpuRequest, cpuLimit, _memoryLimit, _memoryLimit);
}

public double CpuUtilization()
{
var now = _timeProvider.GetUtcNow();
bool needUpdate = false;
DateTimeOffset now = _timeProvider.GetUtcNow();

lock (_cpuLocker)
{
if (now >= _refreshAfterCpu)
if (now < _refreshAfterCpu)
{
needUpdate = true;
return _cpuPercentage;
}
}

if (needUpdate)
{
var hostCpuTime = _parser.GetHostCpuUsageInNanoseconds();
var cgroupCpuTime = _parser.GetCgroupCpuUsageInNanoseconds();
long hostCpuTime = _parser.GetHostCpuUsageInNanoseconds();
long cgroupCpuTime = _parser.GetCgroupCpuUsageInNanoseconds();

lock (_cpuLocker)
lock (_cpuLocker)
{
if (now >= _refreshAfterCpu)
{
if (now >= _refreshAfterCpu)
double deltaHost = hostCpuTime - _previousHostCpuTime;
double deltaCgroup = cgroupCpuTime - _previousCgroupCpuTime;

if (deltaHost > 0 && deltaCgroup > 0)
{
var deltaHost = hostCpuTime - _previousHostCpuTime;
var deltaCgroup = cgroupCpuTime - _previousCgroupCpuTime;

if (deltaHost > 0 && deltaCgroup > 0)
{
var percentage = Math.Min(One, deltaCgroup / deltaHost * _scale);

_cpuPercentage = percentage;
_refreshAfterCpu = now.Add(_cpuRefreshInterval);
_previousCgroupCpuTime = cgroupCpuTime;
_previousHostCpuTime = hostCpuTime;
}
double percentage = Math.Min(One, deltaCgroup / deltaHost);

_cpuPercentage = percentage;
_refreshAfterCpu = now.Add(_cpuRefreshInterval);
_previousCgroupCpuTime = cgroupCpuTime;
_previousHostCpuTime = hostCpuTime;
}
}
}
Expand All @@ -113,30 +114,26 @@ public double CpuUtilization()

public double MemoryUtilization()
{
var now = _timeProvider.GetUtcNow();
bool needUpdate = false;
DateTimeOffset now = _timeProvider.GetUtcNow();

lock (_memoryLocker)
{
if (now >= _refreshAfterMemory)
if (now < _refreshAfterMemory)
{
needUpdate = true;
return _memoryPercentage;
}
}

if (needUpdate)
{
var memoryUsed = _parser.GetMemoryUsageInBytes();
ulong memoryUsed = _parser.GetMemoryUsageInBytes();

lock (_memoryLocker)
lock (_memoryLocker)
{
if (now >= _refreshAfterMemory)
{
if (now >= _refreshAfterMemory)
{
var memoryPercentage = Math.Min(One, (double)memoryUsed / _totalMemoryInBytes);
double memoryPercentage = Math.Min(One, (double)memoryUsed / _memoryLimit);

_memoryPercentage = memoryPercentage;
_refreshAfterMemory = now.Add(_memoryRefreshInterval);
}
_memoryPercentage = memoryPercentage;
_refreshAfterMemory = now.Add(_memoryRefreshInterval);
}
}

Expand All @@ -150,14 +147,14 @@ public double MemoryUtilization()
/// </remarks>
public Snapshot GetSnapshot()
{
var hostTime = _parser.GetHostCpuUsageInNanoseconds();
var cgroupTime = _parser.GetCgroupCpuUsageInNanoseconds();
var memoryUsed = _parser.GetMemoryUsageInBytes();
long hostTime = _parser.GetHostCpuUsageInNanoseconds();
long cgroupTime = _parser.GetCgroupCpuUsageInNanoseconds();
ulong memoryUsed = _parser.GetMemoryUsageInBytes();

return new Snapshot(
totalTimeSinceStart: TimeSpan.FromTicks(hostTime / Hundred),
kernelTimeSinceStart: TimeSpan.Zero,
userTimeSinceStart: TimeSpan.FromTicks((long)(cgroupTime / Hundred * _scaleForTrackerApi)),
userTimeSinceStart: TimeSpan.FromTicks((long)(cgroupTime / Hundred * _scaleRelativeToCpuRequestForTrackerApi)),
memoryUsageInBytes: memoryUsed);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ private static ResourceMonitorBuilder AddWindowsProvider(this ResourceMonitorBui
builder.PickWindowsSnapshotProvider();

_ = builder.Services
.AddActivatedSingleton<WindowsCounters>();
.AddActivatedSingleton<WindowsNetworkMetrics>();

_ = builder.Services
.AddActivatedSingleton<TcpTableInfo>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,16 @@ public readonly struct ResourceUtilization
/// <param name="systemResources">CPU and memory limits.</param>
public ResourceUtilization(double cpuUsedPercentage, ulong memoryUsedInBytes, SystemResources systemResources)
{
CpuUsedPercentage = Throw.IfLessThan(cpuUsedPercentage, 0.0);
double guaranteedCpuUnits = systemResources.GuaranteedCpuUnits;
if (guaranteedCpuUnits <= 0)
{
guaranteedCpuUnits = 1;
}

CpuUsedPercentage = Throw.IfLessThan(cpuUsedPercentage / guaranteedCpuUnits, 0.0);
MemoryUsedInBytes = Throw.IfLessThan(memoryUsedInBytes, 0);
SystemResources = systemResources;
MemoryUsedPercentage = Math.Min(Hundred, (double)MemoryUsedInBytes / SystemResources.GuaranteedMemoryInBytes * Hundred);
MemoryUsedPercentage = Math.Min(Hundred, (double)MemoryUsedInBytes / systemResources.GuaranteedMemoryInBytes * Hundred);
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;

namespace Microsoft.Extensions.Diagnostics.ResourceMonitoring;

/// <summary>
Expand All @@ -13,18 +15,42 @@ namespace Microsoft.Extensions.Diagnostics.ResourceMonitoring;
internal static class ResourceUtilizationInstruments
evgenyfedorov2 marked this conversation as resolved.
Show resolved Hide resolved
{
/// <summary>
/// Gets the CPU consumption of the running application in range <c>[0, 1]</c>.
/// The name of an instrument to retrieve CPU limit consumption of all processes running inside a container or control group in range <c>[0, 1]</c>.
/// </summary>
/// <remarks>
/// The type of an instrument is <see cref="System.Diagnostics.Metrics.ObservableGauge{T}"/>.
/// </remarks>
public const string ContainerCpuLimitUtilization = "container.cpu.limit.utilization";

/// <summary>
/// The name of an instrument to retrieve CPU request consumption of all processes running inside a container or control group in range <c>[0, 1]</c>.
/// </summary>
/// <remarks>
/// The type of an instrument is <see cref="System.Diagnostics.Metrics.ObservableGauge{T}"/>.
/// </remarks>
public const string ContainerCpuRequestUtilization = "container.cpu.request.utilization";

/// <summary>
/// The name of an instrument to retrieve memory limit consumption of all processes running inside a container or control group in range <c>[0, 1]</c>.
/// </summary>
/// <remarks>
/// The type of an instrument is <see cref="System.Diagnostics.Metrics.ObservableGauge{T}"/>.
/// </remarks>
public const string ContainerMemoryLimitUtilization = "container.memory.limit.utilization";

/// <summary>
/// The name of an instrument to retrieve CPU consumption share of the running process in range <c>[0, 1]</c>.
/// </summary>
/// <remarks>
/// The type of an instrument is <see cref="System.Diagnostics.Metrics.ObservableGauge{T}"/>.
/// </remarks>
public const string CpuUtilization = "process.cpu.utilization";
public const string ProcessCpuUtilization = "process.cpu.utilization";

/// <summary>
/// Gets the memory consumption of the running application in range <c>[0, 1]</c>.
/// The name of an instrument to retrieve memory consumption share of the running process in range <c>[0, 1]</c>.
/// </summary>
/// <remarks>
/// The type of an instrument is <see cref="System.Diagnostics.Metrics.ObservableGauge{T}"/>.
/// </remarks>
public const string MemoryUtilization = "dotnet.process.memory.virtual.utilization";
public const string ProcessMemoryUtilization = "dotnet.process.memory.virtual.utilization";
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,14 @@ namespace Microsoft.Extensions.Diagnostics.ResourceMonitoring.Windows.Interop;
internal interface IProcessInfo
{
/// <summary>
/// Retrieve the memory usage of a system.
/// Retrieves the amount of memory, in bytes, used by the current process.
/// </summary>
/// <returns>Memory usage amount in bytes.</returns>
/// <returns>The number of bytes allocated by the current process.</returns>
ulong GetCurrentProcessMemoryUsage();

/// <summary>
/// Retrieves the amount of memory, in bytes, used by the system.
/// </summary>
/// <returns>The number of bytes allocated by the system.</returns>
ulong GetMemoryUsage();
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,10 @@ public ulong GetMemoryUsage()

return memoryUsage;
}

public ulong GetCurrentProcessMemoryUsage()
{
using Process process = Process.GetCurrentProcess();
return (ulong)process.WorkingSet64;
}
}
Loading