Skip to content

Commit

Permalink
[terafoundation] Prom metrics exporter doesn't reset metrics between …
Browse files Browse the repository at this point in the history
…updates (#3747)

This PR makes the following changes:

- The `PromMetrics` class needs to reset it's list of metrics on each
scrape. If it doesn't do this, then all the executions are listed, not
just the active ones. `resetMetrics()` functions were added to
`PromMetrics` and `Exporter` to reset the `prom-client` register.
- Add `prom_metrics_display_url` field to terafoundation. This value
will be used as the `url` default label added to all prom metrics.
Defaults to an empty string, making it more obvious that this field is
missing from the config.
- Include cluster analytics metrics (GET '/cluster/stats' endpoint
results) in the cluster master

ref: #3743
  • Loading branch information
busma13 authored Sep 19, 2024
1 parent 500a005 commit 5b37c44
Show file tree
Hide file tree
Showing 16 changed files with 282 additions and 34 deletions.
1 change: 1 addition & 0 deletions docs/configuration/overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ NOTE: All `asset_storage` related fields are deprecated. Please use the fields i
| **prom_metrics_enabled** | `Boolean` | `false` | Create prometheus exporters. Kubernetes clustering only |
| **prom_metrics_port** | `Number` | `3333` | Port of prometheus exporter server. Kubernetes clustering only. Metrics will be visible at `http://localhost:<PORT>/metrics` |
| **prom_metrics_add_default** | `Boolean` | `true` | Display default node metrics in prom exporter. Kubernetes clustering only |
| **prom_metrics_display_url** | `String` | `""` | Value to display as url label for prometheus metrics |
| **workers** | `Number` | `4` | Number of workers per server |

## Teraslice Configuration Reference
Expand Down
1 change: 1 addition & 0 deletions docs/development/k8s.md
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ The `PromMetrics` class lives within `packages/terafoundation/src/api/prom-metri
| hasMetric | check if a metric exists | `(name: string) => boolean` |
| deleteMetric | delete a metric from the metric list | `(name: string) => Promise<boolean>` |
| verifyAPI | verfiy that the API is running | `() => boolean` |
| resetMetrics | reset the values of all metrics | `() => void` |
| shutdown | disable API and shutdown exporter server | `() => Promise<void>` |
| getDefaultLabels | retrieve the default labels set at init | `() => Record<string, string>` |

Expand Down
49 changes: 49 additions & 0 deletions docs/management-apis/endpoints-json.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,55 @@ $ curl 'localhost:5678/v1/cluster/controllers'
]
```

## GET /v1/cluster/stats

Returns a json object containing cluster analytics.

**NOTE:** The slicer object is identical to controllers and is present for backwards compatibility.

**Usage:**

```sh
$ curl 'http://localhost:5678/v1/cluster/stats'
{
"controllers": {
"processed": 2,
"failed": 0,
"queued": 0,
"job_duration": 3,
"workers_joined": 1,
"workers_disconnected": 0,
"workers_reconnected": 0
},
"slicer": {
"processed": 2,
"failed": 0,
"queued": 0,
"job_duration": 3,
"workers_joined": 1,
"workers_disconnected": 0,
"workers_reconnected": 0
}
}
```

Include the following header to receive stats in "prometheus exporter mode":
```sh
$ curl -H "Accept: application/openmetrics-text;" -sS http://localhost:5678/cluster/stats
# TYPE teraslice_slices_processed counter
teraslice_slices_processed{cluster="teraslice-dev1"} 2
# TYPE teraslice_slices_failed counter
teraslice_slices_failed{cluster="teraslice-dev1"} 0
# TYPE teraslice_slices_queued counter
teraslice_slices_queued{cluster="teraslice-dev1"} 0
# TYPE teraslice_workers_joined counter
teraslice_workers_joined{cluster="teraslice-dev1"} 1
# TYPE teraslice_workers_disconnected counter
teraslice_workers_disconnected{cluster="teraslice-dev1"} 0
# TYPE teraslice_workers_reconnected counter
teraslice_workers_reconnected{cluster="teraslice-dev1"} 0
```

## GET /v1/assets

Retreives a list of assets
Expand Down
6 changes: 6 additions & 0 deletions packages/job-components/src/test-helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ export class TestContext implements i.Context {
prom_metrics_enabled: false,
prom_metrics_port: 3333,
prom_metrics_add_default: true,
prom_metrics_display_url: 'http://localhost',
},
teraslice: {
action_timeout: 10000,
Expand Down Expand Up @@ -499,6 +500,11 @@ export class TestContext implements i.Context {
verifyAPI(): boolean {
return ctx.mockPromMetrics !== null;
},
resetMetrics() {
if (ctx.mockPromMetrics) {
ctx.mockPromMetrics.metricList = {};
}
},
async shutdown(): Promise<void> {
ctx.mockPromMetrics = null;
}
Expand Down
6 changes: 6 additions & 0 deletions packages/job-components/test/test-helpers-spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ describe('Test Helpers', () => {
tf_prom_metrics_enabled: true,
tf_prom_metrics_port: 3333,
tf_prom_metrics_add_default: false,
prom_metrics_display_url: 'http://localhost'
};

it('should be able to init a mock prom_metrics_api', async () => {
Expand Down Expand Up @@ -215,6 +216,11 @@ describe('Test Helpers', () => {
.toThrow('Metric missing_test_histogram is not setup');
});

it('should reset metrics', () => {
context.apis.foundation.promMetrics.resetMetrics();
expect(context.mockPromMetrics?.metricList).toBeEmptyObject();
})

it('should shutdown', async () => {
await context.apis.foundation.promMetrics.shutdown();
expect(context.mockPromMetrics).toBeNull();
Expand Down
4 changes: 4 additions & 0 deletions packages/terafoundation/src/api/prom-metrics/exporter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,8 @@ export default class Exporter {
async deleteMetric(name: string): Promise<void> {
promClient.register.removeSingleMetric(name);
}

resetMetrics() {
promClient.register.resetMetrics();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ export class PromMetrics {
const {
assignment, job_prom_metrics_add_default, job_prom_metrics_enabled,
job_prom_metrics_port, tf_prom_metrics_add_default, tf_prom_metrics_enabled,
tf_prom_metrics_port, labels, prefix, terasliceName
tf_prom_metrics_port, labels, prefix, terasliceName, prom_metrics_display_url
} = config;

const portToUse = job_prom_metrics_port || tf_prom_metrics_port;
Expand All @@ -67,6 +67,7 @@ export class PromMetrics {
this.default_labels = {
name: terasliceName,
assignment: apiConfig.assignment,
url: prom_metrics_display_url,
...apiConfig.labels
};
await this.createAPI(apiConfig);
Expand Down Expand Up @@ -405,6 +406,10 @@ export class PromMetrics {
return this.apiRunning;
}

resetMetrics() {
this.metricExporter.resetMetrics();
}

async shutdown(): Promise<void> {
this.logger.info('prom_metrics_API exporter shutdown');
try {
Expand Down
5 changes: 5 additions & 0 deletions packages/terafoundation/src/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ export function foundationSchema() {
doc: 'Display default node metrics in prom exporter',
default: true,
format: Boolean
},
prom_metrics_display_url: {
doc: 'Value to display as url label for prometheus metrics',
default: '',
format: String
}
};

Expand Down
38 changes: 30 additions & 8 deletions packages/terafoundation/test/apis/exporter-spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,19 @@ import Exporter from '../../src/api/prom-metrics/exporter.js';

describe('prometheus exporter', () => {
let exporter: Exporter;

async function getExporterMetrics(): Promise<string> {
const response: Record<string, any> = await got('http://127.0.0.1:3344/metrics', {
throwHttpErrors: true
});
return response.body;
}

beforeAll(() => {
const logger = debugLogger('prometheus_exporter');
exporter = new Exporter(logger);
});

describe('create', () => {
const config: tf.PromMetricsAPIConfig = {
assignment: 'worker',
Expand All @@ -32,8 +41,9 @@ describe('prometheus exporter', () => {
expect(response.body).toBeString();
});
});

describe('delete', () => {
it('should shutdown the express server', async () => {
it('should delete a metric', async () => {
new Counter({
name: 'delete_test',
help: 'delete_test_help_message',
Expand All @@ -48,15 +58,27 @@ describe('prometheus exporter', () => {
const bodyAfter = await getExporterMetrics();
const valueAfter = bodyAfter.split('\n').filter((line: string) => line.includes('delete_test counter'))[0];
expect(valueAfter).toBe(undefined);
});
});

async function getExporterMetrics(): Promise<string> {
const response: Record<string, any> = await got('http://127.0.0.1:3344/metrics', {
throwHttpErrors: true
});
return response.body;
}
}, 3000000);
describe('reset', () => {
it('should reset the prom metrics registry', async () => {
const counter = new Counter({
name: 'reset_test',
help: 'reset_test_help_message',
labelNames: ['reset_test_label'],
});

counter.inc(100);
const bodyBefore = await getExporterMetrics();
expect(bodyBefore).toInclude('reset_test 100');

exporter.resetMetrics();
const bodyAfter = await getExporterMetrics();
expect(bodyAfter).not.toInclude('reset_test 100');
});
});

describe('shutdown', () => {
it('should shutdown the express server', async () => {
await exporter.shutdown();
Expand Down
78 changes: 75 additions & 3 deletions packages/terafoundation/test/apis/prom-metrics-spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ describe('promMetrics foundation API', () => {
log_level: 'debug',
prom_metrics_enabled: true,
prom_metrics_port: 3333,
prom_metrics_add_default: true
prom_metrics_add_default: true,
prom_metrics_display_url: 'http://localhost'
},
teraslice: {
cluster_manager_type: 'kubernetes',
Expand All @@ -33,7 +34,8 @@ describe('promMetrics foundation API', () => {
tf_prom_metrics_port: terafoundation.prom_metrics_port,
tf_prom_metrics_add_default: terafoundation.prom_metrics_add_default,
logger: debugLogger('prom-metrics-spec-logger'),
assignment: 'worker'
assignment: 'worker',
prom_metrics_display_url: terafoundation.prom_metrics_display_url
};

beforeAll(() => {
Expand All @@ -58,7 +60,7 @@ describe('promMetrics foundation API', () => {

it('should have correct default labels', async () => {
const labels = await context.apis.foundation.promMetrics.getDefaultLabels();
expect(labels).toEqual({ assignment: 'worker', name: 'tera-test' });
expect(labels).toEqual({ assignment: 'worker', name: 'tera-test', url: 'http://localhost' });
});

it('should throw an error if promMetricsAPI is already initialized', async () => {
Expand Down Expand Up @@ -663,4 +665,74 @@ describe('promMetrics foundation API', () => {
});
});
});

describe('resetMetrics', () => {
const context = {
sysconfig: {
terafoundation: {
log_level: 'debug',
prom_metrics_enabled: true,
prom_metrics_port: 3337,
prom_metrics_add_default: false
},
teraslice: {
cluster_manager_type: 'kubernetes',
name: 'tera-test'
}
},
} as any;

const { terafoundation, teraslice } = context.sysconfig;
const config = {
terasliceName: teraslice.name,
tf_prom_metrics_enabled: terafoundation.prom_metrics_enabled,
tf_prom_metrics_port: terafoundation.prom_metrics_port,
tf_prom_metrics_add_default: terafoundation.prom_metrics_add_default,
logger: debugLogger('prom-metrics-spec-logger'),
assignment: 'master',
prefix: 'foundation_test_'
};

beforeAll(async () => {
// This sets up the API endpoints in the context.
api(context);
context.logger = debugLogger('terafoundation-tests');
await context.apis.foundation.promMetrics.init(config);
});

afterAll(async () => {
await context.apis.foundation.promMetrics.shutdown();
});

it('should reset metrics', async () => {
await context.apis.foundation.promMetrics.addGauge('gauge2', 'help message', ['uuid'], function collect(this: Gauge) {
const defaultLabels = context.apis.foundation.promMetrics.getDefaultLabels();
this.inc({ uuid: '7oBd9L3sJB', ...defaultLabels }, 0);
});
context.apis.foundation.promMetrics.inc('gauge2', { uuid: '7oBd9L3sJB' }, 200);
const response1: Record<string, any> = await got(`http://127.0.0.1:${config.tf_prom_metrics_port}/metrics`, {
throwHttpErrors: true
});

const value1 = response1.body
.split('\n')
.filter((line: string) => line.includes('7oBd9L3sJB'))[0]
.split(' ')[1];

expect(value1).toBe('200');

context.apis.foundation.promMetrics.resetMetrics();

const response2: Record<string, any> = await got(`http://127.0.0.1:${config.tf_prom_metrics_port}/metrics`, {
throwHttpErrors: true
});

const value2 = response2.body
.split('\n')
.filter((line: string) => line.includes('7oBd9L3sJB'))[0]
.split(' ')[1];

expect(value2).toBe('0');
});
})
});
3 changes: 2 additions & 1 deletion packages/terafoundation/test/test-context-spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ describe('TestContext', () => {
tf_prom_metrics_port: 3333,
tf_prom_metrics_add_default: false,
logger: context.logger,
assignment: 'master'
assignment: 'master',
prom_metrics_display_url: context.sysconfig.terafoundation.prom_metrics_display_url
};
expect(await context.apis.foundation.promMetrics.init(config)).toBe(true);
});
Expand Down
Loading

0 comments on commit 5b37c44

Please sign in to comment.