Skip to content

Commit

Permalink
Merge pull request collectd#4272 from octo/6/time_metrics
Browse files Browse the repository at this point in the history
[collectd 6] Scale time metrics to seconds per second using floating point counters.
  • Loading branch information
octo authored Feb 6, 2024
2 parents 50cbc6e + cd5352f commit 67633b8
Show file tree
Hide file tree
Showing 7 changed files with 470 additions and 178 deletions.
2 changes: 1 addition & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ libavltree_la_SOURCES = \
libcommon_la_SOURCES = \
src/utils/common/common.c \
src/utils/common/common.h
libcommon_la_LIBADD = libmetric.la libstrbuf.la $(COMMON_LIBS)
libcommon_la_LIBADD = libmetric.la libstrbuf.la -lm $(COMMON_LIBS)

libheap_la_SOURCES = \
src/utils/heap/heap.c \
Expand Down
49 changes: 24 additions & 25 deletions src/cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,16 +132,14 @@ static const char *cpu_state_names[STATE_MAX] = {
[STATE_WAIT] = "wait", [STATE_ACTIVE] = "active",
};

#define USAGE_UNAVAILABLE -1

typedef struct {
gauge_t rate;
bool has_rate;
value_to_rate_state_t conv;

/* count is a scaled counter, so that all states in sum increase by 1000000
* per second. */
derive_t count;
fpcounter_t count;
bool has_count;
rate_to_value_state_t to_count;
} usage_state_t;
Expand Down Expand Up @@ -473,13 +471,13 @@ static void usage_finalize(usage_t *u) {
}

/* With cpu_rate available, calculate a counter for each state that is
* normalized to microseconds. I.e. all states of one CPU sum up to 1000000
* us per second. */
* normalized to seconds. I.e. all states of one CPU sum up to 1.0 second
* per second. */
for (state_t s = 0; s < STATE_MAX; s++) {
size_t index = (cpu * STATE_MAX) + s;
usage_state_t *us = u->states + index;

us->count = -1;
us->count = NAN;
if (!us->has_rate) {
/* Ensure that us->to_count is initialized. */
rate_to_value(&(value_t){0}, 0.0, &us->to_count, DS_TYPE_DERIVE,
Expand All @@ -489,10 +487,10 @@ static void usage_finalize(usage_t *u) {

gauge_t ratio = us->rate / cpu_rate;
value_t v = {0};
int status = rate_to_value(&v, 1000000.0 * ratio, &us->to_count,
DS_TYPE_DERIVE, u->time);
int status = rate_to_value(&v, ratio, &us->to_count,
METRIC_TYPE_FPCOUNTER, u->time);
if (status == 0) {
us->count = v.derive;
us->count = v.fpcounter;
us->has_count = true;
}

Expand All @@ -503,18 +501,19 @@ static void usage_finalize(usage_t *u) {
for (state_t s = 0; s < STATE_MAX; s++) {
usage_state_t *us = &u->global[s];

us->count = -1;
us->count = NAN;
if (!us->has_rate) {
/* Ensure that us->to_count is initialized. */
rate_to_value(&(value_t){0}, 0.0, &us->to_count, DS_TYPE_DERIVE, u->time);
rate_to_value(&(value_t){0}, 0.0, &us->to_count, METRIC_TYPE_FPCOUNTER,
u->time);
continue;
}

value_t v = {0};
int status = rate_to_value(&v, 1000000.0 * state_ratio[s], &us->to_count,
DS_TYPE_DERIVE, u->time);
int status = rate_to_value(&v, state_ratio[s], &us->to_count,
METRIC_TYPE_FPCOUNTER, u->time);
if (status == 0) {
us->count = v.derive;
us->count = v.fpcounter;
us->has_count = true;
}
}
Expand Down Expand Up @@ -555,7 +554,7 @@ static gauge_t usage_ratio(usage_t *u, size_t cpu, state_t state) {
return usage_rate(u, cpu, state) / global_rate;
}

static derive_t usage_count(usage_t *u, size_t cpu, state_t state) {
static fpcounter_t usage_count(usage_t *u, size_t cpu, state_t state) {
usage_finalize(u);

usage_state_t us;
Expand All @@ -564,12 +563,12 @@ static derive_t usage_count(usage_t *u, size_t cpu, state_t state) {
} else {
size_t index = (cpu * STATE_MAX) + state;
if (index >= u->states_num) {
return USAGE_UNAVAILABLE;
return NAN;
}
us = u->states[index];
}

return us.has_count ? us.count : USAGE_UNAVAILABLE;
return us.has_count ? us.count : NAN;
}

/* Commits the number of cores */
Expand Down Expand Up @@ -598,8 +597,8 @@ static void commit_cpu_usage(usage_t *u, size_t cpu_num) {
metric_family_t fam = {
.name = "system.cpu.time",
.help = "Microseconds each logical CPU spent in each state",
.unit = "us",
.type = METRIC_TYPE_COUNTER,
.unit = "s",
.type = METRIC_TYPE_FPCOUNTER,
};

metric_t m = {0};
Expand All @@ -611,18 +610,18 @@ static void commit_cpu_usage(usage_t *u, size_t cpu_num) {

if (report_by_state) {
for (state_t state = 0; state < STATE_ACTIVE; state++) {
derive_t usage = usage_count(u, cpu_num, state);
if (usage == USAGE_UNAVAILABLE) {
fpcounter_t usage = usage_count(u, cpu_num, state);
if (isnan(usage)) {
continue;
}
metric_family_append(&fam, label_state, cpu_state_names[state],
(value_t){.derive = usage}, &m);
(value_t){.fpcounter = usage}, &m);
}
} else {
derive_t usage = usage_count(u, cpu_num, STATE_ACTIVE);
if (usage != USAGE_UNAVAILABLE) {
fpcounter_t usage = usage_count(u, cpu_num, STATE_ACTIVE);
if (!isnan(usage)) {
metric_family_append(&fam, label_state, cpu_state_names[STATE_ACTIVE],
(value_t){.derive = usage}, &m);
(value_t){.fpcounter = usage}, &m);
}
}

Expand Down
40 changes: 20 additions & 20 deletions src/cpu_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ DEF_TEST(usage_ratio) {
return 0;
}

static bool expect_usage_count(derive_t want, derive_t got, size_t cpu,
static bool expect_usage_count(fpcounter_t want, fpcounter_t got, size_t cpu,
state_t state) {
char cpu_str[64] = "CPU_ALL";
if (cpu != SIZE_MAX) {
Expand All @@ -133,14 +133,14 @@ static bool expect_usage_count(derive_t want, derive_t got, size_t cpu,

bool ok = true;
char msg[1024] = {0};
snprintf(msg, sizeof(msg), "usage_count(cpu=%s, state=\"%s\") = %" PRId64,
cpu_str, cpu_state_names[state], got);
snprintf(msg, sizeof(msg), "usage_count(cpu=%s, state=\"%s\") = %g", cpu_str,
cpu_state_names[state], got);

derive_t diff = got - want;
if (diff < -1 || diff > 1) {
if (diff < -DBL_PRECISION || diff > DBL_PRECISION) {
snprintf(msg, sizeof(msg),
"usage_count(cpu=%s, state=\"%s\") = %" PRId64 ", want %" PRId64,
cpu_str, cpu_state_names[state], got, want);
"usage_count(cpu=%s, state=\"%s\") = %g, want %g", cpu_str,
cpu_state_names[state], got, want);
ok = false;
}

Expand Down Expand Up @@ -174,8 +174,8 @@ DEF_TEST(usage_count) {
}
}

gauge_t state_time[STATE_MAX] = {0};
gauge_t sum_time = 0;
fpcounter_t state_time[STATE_MAX] = {0};
fpcounter_t sum_time = 0;
for (size_t cpu = 0; cpu < CPU_NUM; cpu++) {
derive_t active_increment = 0;
for (state_t s = 0; s < STATE_ACTIVE; s++) {
Expand All @@ -184,34 +184,34 @@ DEF_TEST(usage_count) {
active_increment += increment;
}

gauge_t want_time = 1000000.0 * CDTIME_T_TO_DOUBLE(interval) *
((gauge_t)increment) / ((gauge_t)cpu_increment[cpu]);
fpcounter_t want_time = CDTIME_T_TO_DOUBLE(interval) *
((fpcounter_t)increment) /
((fpcounter_t)cpu_increment[cpu]);
state_time[s] += want_time;
sum_time += want_time;

bool ok = expect_usage_count((derive_t)want_time,
usage_count(&usage, cpu, s), cpu, s);
bool ok =
expect_usage_count(want_time, usage_count(&usage, cpu, s), cpu, s);
ret = ret || !ok;
}

gauge_t want_active_time = 1000000.0 * CDTIME_T_TO_DOUBLE(interval) *
((gauge_t)active_increment) /
((gauge_t)cpu_increment[cpu]);
fpcounter_t want_active_time = CDTIME_T_TO_DOUBLE(interval) *
((fpcounter_t)active_increment) /
((fpcounter_t)cpu_increment[cpu]);
state_time[STATE_ACTIVE] += want_active_time;
bool ok = expect_usage_count((derive_t)want_active_time,
bool ok = expect_usage_count(want_active_time,
usage_count(&usage, cpu, STATE_ACTIVE), cpu,
STATE_ACTIVE);
ret = ret || !ok;
}

for (state_t s = 0; s < STATE_MAX; s++) {
bool ok = expect_usage_count((derive_t)state_time[s],
usage_count(&usage, CPU_ALL, s), CPU_ALL, s);
bool ok = expect_usage_count(state_time[s], usage_count(&usage, CPU_ALL, s),
CPU_ALL, s);
ret = ret || !ok;
}

EXPECT_EQ_DOUBLE(CPU_NUM * 1000000.0 * CDTIME_T_TO_DOUBLE(interval),
sum_time);
EXPECT_EQ_DOUBLE(CPU_NUM * CDTIME_T_TO_DOUBLE(interval), sum_time);

usage_reset(&usage);
return ret;
Expand Down
Loading

0 comments on commit 67633b8

Please sign in to comment.