diff --git a/lib/charms/grafana_agent/v0/cos_agent.py b/lib/charms/grafana_agent/v0/cos_agent.py index b72897df7..9261582f5 100644 --- a/lib/charms/grafana_agent/v0/cos_agent.py +++ b/lib/charms/grafana_agent/v0/cos_agent.py @@ -22,7 +22,7 @@ Using the `COSAgentProvider` object only requires instantiating it, typically in the `__init__` method of your charm (the one which sends telemetry). -The constructor of `COSAgentProvider` has only one required and eight optional parameters: +The constructor of `COSAgentProvider` has only one required and nine optional parameters: ```python def __init__( @@ -36,6 +36,7 @@ def __init__( log_slots: Optional[List[str]] = None, dashboard_dirs: Optional[List[str]] = None, refresh_events: Optional[List] = None, + scrape_configs: Optional[Union[List[Dict], Callable]] = None, ): ``` @@ -47,7 +48,8 @@ def __init__( the `cos_agent` interface, this is where you have to specify that. - `metrics_endpoints`: In this parameter you can specify the metrics endpoints that Grafana Agent - machine Charmed Operator will scrape. + machine Charmed Operator will scrape. The configs of this list will be merged with the configs + from `scrape_configs`. - `metrics_rules_dir`: The directory in which the Charmed Operator stores its metrics alert rules files. @@ -63,6 +65,10 @@ def __init__( - `refresh_events`: List of events on which to refresh relation data. +- `scrape_configs`: List of standard scrape_configs dicts or a callable that returns the list in + case the configs need to be generated dynamically. The contents of this list will be merged + with the configs from `metrics_endpoints`. + ### Example 1 - Minimal instrumentation: @@ -91,6 +97,7 @@ def __init__(self, *args): self, relation_name="custom-cos-agent", metrics_endpoints=[ + # specify "path" and "port" to scrape from localhost {"path": "/metrics", "port": 9000}, {"path": "/metrics", "port": 9001}, {"path": "/metrics", "port": 9002}, @@ -101,6 +108,46 @@ def __init__(self, *args): log_slots=["my-app:slot"], dashboard_dirs=["./src/dashboards_1", "./src/dashboards_2"], refresh_events=["update-status", "upgrade-charm"], + scrape_configs=[ + { + "job_name": "custom_job", + "metrics_path": "/metrics", + "authorization": {"credentials": "bearer-token"}, + "static_configs": [ + { + "targets": ["localhost:9003"]}, + "labels": {"key": "value"}, + }, + ], + }, + ] + ) +``` + +### Example 3 - Dynamic scrape configs generation: + +Pass a function to the `scrape_configs` to decouple the generation of the configs +from the instantiation of the COSAgentProvider object. + +```python +from charms.grafana_agent.v0.cos_agent import COSAgentProvider +... + +class TelemetryProviderCharm(CharmBase): + def generate_scrape_configs(self): + return [ + { + "job_name": "custom", + "metrics_path": "/metrics", + "static_configs": [{"targets": ["localhost:9000"]}], + }, + ] + + def __init__(self, *args): + ... + self._grafana_agent = COSAgentProvider( + self, + scrape_configs=self.generate_scrape_configs, ) ``` @@ -166,12 +213,12 @@ def __init__(self, *args): from collections import namedtuple from itertools import chain from pathlib import Path -from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Set, Union +from typing import TYPE_CHECKING, Any, Callable, ClassVar, Dict, List, Optional, Set, Union import pydantic from cosl import JujuTopology from cosl.rules import AlertRules -from ops.charm import RelationChangedEvent, RelationEvent +from ops.charm import RelationChangedEvent from ops.framework import EventBase, EventSource, Object, ObjectEvents from ops.model import Relation, Unit from ops.testing import CharmType @@ -189,15 +236,15 @@ class _MetricsEndpointDict(TypedDict): LIBID = "dc15fa84cef84ce58155fb84f6c6213a" LIBAPI = 0 -LIBPATCH = 4 +LIBPATCH = 5 PYDEPS = ["cosl", "pydantic<2"] DEFAULT_RELATION_NAME = "cos-agent" DEFAULT_PEER_RELATION_NAME = "peers" -DEFAULT_METRICS_ENDPOINT = { - "path": "/metrics", - "port": 80, +DEFAULT_SCRAPE_CONFIG = { + "static_configs": [{"targets": ["localhost:80"]}], + "metrics_path": "/metrics", } logger = logging.getLogger(__name__) @@ -295,6 +342,8 @@ def __init__( log_slots: Optional[List[str]] = None, dashboard_dirs: Optional[List[str]] = None, refresh_events: Optional[List] = None, + *, + scrape_configs: Optional[Union[List[dict], Callable]] = None, ): """Create a COSAgentProvider instance. @@ -302,6 +351,8 @@ def __init__( charm: The `CharmBase` instance that is instantiating this object. relation_name: The name of the relation to communicate over. metrics_endpoints: List of endpoints in the form [{"path": path, "port": port}, ...]. + This argument is a simplified form of the `scrape_configs`. + The contents of this list will be merged with the contents of `scrape_configs`. metrics_rules_dir: Directory where the metrics rules are stored. logs_rules_dir: Directory where the logs rules are stored. recurse_rules_dirs: Whether to recurse into rule paths. @@ -309,13 +360,17 @@ def __init__( in the form ["snap-name:slot", ...]. dashboard_dirs: Directory where the dashboards are stored. refresh_events: List of events on which to refresh relation data. + scrape_configs: List of standard scrape_configs dicts or a callable + that returns the list in case the configs need to be generated dynamically. + The contents of this list will be merged with the contents of `metrics_endpoints`. """ super().__init__(charm, relation_name) dashboard_dirs = dashboard_dirs or ["./src/grafana_dashboards"] self._charm = charm self._relation_name = relation_name - self._metrics_endpoints = metrics_endpoints or [DEFAULT_METRICS_ENDPOINT] + self._metrics_endpoints = metrics_endpoints or [] + self._scrape_configs = scrape_configs or [] self._metrics_rules = metrics_rules_dir self._logs_rules = logs_rules_dir self._recursive = recurse_rules_dirs @@ -331,10 +386,7 @@ def __init__( def _on_refresh(self, event): """Trigger the class to update relation data.""" - if isinstance(event, RelationEvent): - relations = [event.relation] - else: - relations = self._charm.model.relations[self._relation_name] + relations = self._charm.model.relations[self._relation_name] for relation in relations: # Before a principal is related to the grafana-agent subordinate, we'd get @@ -359,12 +411,34 @@ def _on_refresh(self, event): @property def _scrape_jobs(self) -> List[Dict]: - """Return a prometheus_scrape-like data structure for jobs.""" - job_name_prefix = self._charm.app.name - return [ - {"job_name": f"{job_name_prefix}_{key}", **endpoint} - for key, endpoint in enumerate(self._metrics_endpoints) - ] + """Return a prometheus_scrape-like data structure for jobs. + + https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config + """ + if callable(self._scrape_configs): + scrape_configs = self._scrape_configs() + else: + # Create a copy of the user scrape_configs, since we will mutate this object + scrape_configs = self._scrape_configs.copy() + + # Convert "metrics_endpoints" to standard scrape_configs, and add them in + for endpoint in self._metrics_endpoints: + scrape_configs.append( + { + "metrics_path": endpoint["path"], + "static_configs": [{"targets": [f"localhost:{endpoint['port']}"]}], + } + ) + + scrape_configs = scrape_configs or [DEFAULT_SCRAPE_CONFIG] + + # Augment job name to include the app name and a unique id (index) + for idx, scrape_config in enumerate(scrape_configs): + scrape_config["job_name"] = "_".join( + [self._charm.app.name, str(idx), scrape_config.get("job_name", "default")] + ) + + return scrape_configs @property def _metrics_alert_rules(self) -> Dict: @@ -647,15 +721,18 @@ def metrics_jobs(self) -> List[Dict]: """Parse the relation data contents and extract the metrics jobs.""" scrape_jobs = [] if data := self._principal_unit_data: - jobs = data.metrics_scrape_jobs - if jobs: - for job in jobs: - job_config = { + for job in data.metrics_scrape_jobs: + # In #220, relation schema changed from a simplified dict to the standard + # `scrape_configs`. + # This is to ensure backwards compatibility with Providers older than v0.5. + if "path" in job and "port" in job and "job_name" in job: + job = { "job_name": job["job_name"], "metrics_path": job["path"], "static_configs": [{"targets": [f"localhost:{job['port']}"]}], } - scrape_jobs.append(job_config) + + scrape_jobs.append(job) return scrape_jobs diff --git a/tests/integration/relations/test_shared_db.py b/tests/integration/relations/test_shared_db.py index a7b63f876..0958cb5ad 100644 --- a/tests/integration/relations/test_shared_db.py +++ b/tests/integration/relations/test_shared_db.py @@ -42,25 +42,23 @@ async def deploy_and_relate_keystone_with_mysql( """ # Deploy keystone # Explicitly setting the series to 'focal' as it defaults to 'xenial' - await ops_test.model.deploy( + keystone_application = await ops_test.model.deploy( "keystone", - series="focal", + channel="yoga/stable", + series="jammy", application_name=keystone_application_name, num_units=number_of_units, ) - await ops_test.model.wait_for_idle( - apps=[keystone_application_name], - status="blocked", - raise_on_blocked=False, + + await ops_test.model.block_until( + lambda: {unit.workload_status for unit in keystone_application.units} == {"blocked"}, timeout=SLOW_WAIT_TIMEOUT, ) # Relate keystone to mysql await ops_test.model.relate(f"{keystone_application_name}:shared-db", f"{APP_NAME}:shared-db") - await ops_test.model.wait_for_idle( - apps=[keystone_application_name], - status="active", - raise_on_blocked=False, # both applications are blocked initially + await ops_test.model.block_until( + lambda: {unit.workload_status for unit in keystone_application.units} == {"active"}, timeout=SLOW_WAIT_TIMEOUT, )