Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collect disk commitment info #133

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/pve_exporter/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ def main():
parser.add_argument('--collector.config', dest='collector_config',
action=BooleanOptionalAction, default=True,
help='Exposes PVE onboot status')
parser.add_argument('--collector.volumes', dest='collector_volumes',
action=BooleanOptionalAction, default=True,
help='Exposes PVE VM resource commitments')
parser.add_argument('config', nargs='?', default='pve.yml',
help='Path to configuration file (pve.yml)')
parser.add_argument('port', nargs='?', type=int, default='9221',
Expand All @@ -97,7 +100,8 @@ def main():
node=params.collector_node,
cluster=params.collector_cluster,
resources=params.collector_resources,
config=params.collector_config
config=params.collector_config,
volumes=params.collector_volumes,
)

# Load configuration.
Expand Down
61 changes: 56 additions & 5 deletions src/pve_exporter/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import collections
import itertools
import logging

from prometheus_client.registry import Collector
from proxmoxer import ProxmoxAPI
from proxmoxer.core import ResourceException

Expand All @@ -19,6 +21,7 @@
'cluster',
'resources',
'config',
'volumes',
])

class StatusCollector:
Expand Down Expand Up @@ -167,19 +170,19 @@ def collect(self): # pylint: disable=missing-docstring
'maxdisk': GaugeMetricFamily(
'pve_disk_size_bytes',
'Size of storage device',
labels=['id']),
labels=['id', 'type', 'storage', 'node']),
'disk': GaugeMetricFamily(
'pve_disk_usage_bytes',
'Disk usage in bytes',
labels=['id']),
'maxmem': GaugeMetricFamily(
'pve_memory_size_bytes',
'Size of memory',
labels=['id']),
labels=['id', 'node', 'type']),
'mem': GaugeMetricFamily(
'pve_memory_usage_bytes',
'Memory usage in bytes',
labels=['id']),
labels=['id', 'node', 'type']),
'netout': GaugeMetricFamily(
'pve_network_transmit_bytes',
'Number of bytes transmitted over the network',
Expand Down Expand Up @@ -247,10 +250,11 @@ def collect(self): # pylint: disable=missing-docstring
label_values = [resource.get(key, '') for key in info_lookup[restype]['labels']]
info_lookup[restype]['gauge'].add_metric(label_values, 1)

label_values = [resource['id']]
for key, metric_value in resource.items():
if key in metrics:
metrics[key].add_metric(label_values, metric_value)
metric = metrics[key]
label_values = [resource[labelname] for labelname in metric._labelnames if labelname in resource]
metric.add_metric(label_values, metric_value)

return itertools.chain(metrics.values(), info_metrics.values())

Expand Down Expand Up @@ -310,6 +314,51 @@ def collect(self): # pylint: disable=missing-docstring

return metrics.values()


class VolumesCollector(Collector):
"""
Collects info on volume sizes - the storage disk usage may not reflect the commitments
in case of thin allocation.
"""
def __init__(self, pve):
self._pve = pve

def collect(self): # pylint: disable=missing-docstring
disk_size = GaugeMetricFamily(
'pve_volume_size_bytes',
'Proxmox volume commitments',
labels=['id', 'node', 'storage']
)
seen_shared_storages = set()
for node in self._pve.nodes.get():
# The nodes/{node} api call will result in requests being forwarded
# from the api node to the target node. Those calls can fail if the
# target node is offline or otherwise unable to respond to the
# request. In that case it is better to just skip scraping the
# config for guests on that particular node and continue with the
# next one in order to avoid failing the whole scrape.
try:
storage_api = self._pve.nodes(node['node']).storage
for storage in storage_api.get():
if not (storage['type'] != 'dir' and storage['active']):
continue
if storage['shared'] and storage['storage'] in seen_shared_storages:
continue
else:
seen_shared_storages.add(storage['storage'])

for disk in storage_api(storage['storage']).content.get():
disk_size.add_metric([f'disk/{node["node"]}/{disk["volid"]}', node['node'],
storage['storage']], disk['size'])
except ResourceException:
self._log.exception(
"Exception thrown while scraping quemu/lxc config from %s",
node['node']
)
continue
return [disk_size]


def collect_pve(config, host, options: CollectorsOptions):
"""Scrape a host and return prometheus text format for it"""

Expand All @@ -328,5 +377,7 @@ def collect_pve(config, host, options: CollectorsOptions):
registry.register(ClusterNodeConfigCollector(pve))
if options.version:
registry.register(VersionCollector(pve))
if options.volumes:
registry.register(VolumesCollector(pve))

return generate_latest(registry)