Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add prometheus for monitor #368

Merged
merged 18 commits into from
Aug 6, 2019
4 changes: 4 additions & 0 deletions src/redis_protocol/proxy/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ set(MY_PROJ_LIBS pegasus.rproxylib
pegasus.base
pegasus_geo_lib
pegasus_reporter
prometheus-cpp-pull
prometheus-cpp-push
ChenQShmily marked this conversation as resolved.
Show resolved Hide resolved
prometheus-cpp-core
curl
event
s2
pegasus_client_static
Expand Down
3 changes: 2 additions & 1 deletion src/reporter/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ set(MY_PROJ_SRC "")
# "GLOB" for non-recursive search
set(MY_SRC_SEARCH_MODE "GLOB")

set(MY_PROJ_LIBS "")
set(MY_PROJ_LIB_PATH "")
set(MY_PROJ_INC_PATH "")

dsn_add_static_library()
112 changes: 110 additions & 2 deletions src/reporter/pegasus_counter_reporter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,36 @@
#include "base/pegasus_utils.h"
#include "pegasus_io_service.h"

#include <chrono>
#include <map>
#include <memory>
#include <string>
#include <thread>
#include <sstream>
#include <iterator>
#include <regex>

using namespace ::dsn;

static std::string GetHostName()
{
char hostname[1024];

if (::gethostname(hostname, sizeof(hostname))) {
return {};
}
return hostname;
}

static void change_metrics_name(std::string &metrics_name)
{
replace(metrics_name.begin(), metrics_name.end(), '@', ':');
replace(metrics_name.begin(), metrics_name.end(), '.', '_');
replace(metrics_name.begin(), metrics_name.end(), '*', '_');
replace(metrics_name.begin(), metrics_name.end(), '(', '_');
replace(metrics_name.begin(), metrics_name.end(), ')', '_');
}

namespace pegasus {
namespace server {

Expand All @@ -43,12 +71,29 @@ pegasus_counter_reporter::pegasus_counter_reporter()
_last_report_time_ms(0),
_enable_logging(false),
_enable_falcon(false),
_falcon_port(0)
_enable_prometheus(false),
_falcon_port(0),
_prometheus_port(0)
{
}

pegasus_counter_reporter::~pegasus_counter_reporter() { stop(); }

void pegasus_counter_reporter::prometheus_initialize()
{
_prometheus_host = dsn_config_get_value_string(
"pegasus.server", "prometheus_host", "127.0.0.1", "prometheus gateway host");
_prometheus_port = (uint16_t)dsn_config_get_value_uint64(
"pegasus.server", "prometheus_port", 9091, "prometheus gateway port");
ddebug("prometheus initialize: host:port(%s:%d)", _prometheus_host.c_str(), _prometheus_port);

const auto &labels = prometheus::Gateway::GetInstanceLabel(GetHostName());
_gateway = std::make_shared<prometheus::Gateway>(
_prometheus_host, std::to_string(_prometheus_port), "pegasus", labels);
_registry = std::make_shared<prometheus::Registry>();
_gateway->RegisterCollectable(_registry);
}

void pegasus_counter_reporter::falcon_initialize()
{
_falcon_host = dsn_config_get_value_string(
Expand Down Expand Up @@ -112,11 +157,19 @@ void pegasus_counter_reporter::start()
"pegasus.server", "perf_counter_enable_logging", true, "perf_counter_enable_logging");
_enable_falcon = dsn_config_get_value_bool(
"pegasus.server", "perf_counter_enable_falcon", false, "perf_counter_enable_falcon");
_enable_prometheus = dsn_config_get_value_bool("pegasus.server",
"perf_counter_enable_prometheus",
false,
"perf_counter_enable_prometheus");

if (_enable_falcon) {
falcon_initialize();
}

if (_enable_prometheus) {
prometheus_initialize();
}

event_set_log_callback(libevent_log);

_report_timer.reset(new boost::asio::deadline_timer(pegasus_io_service::instance().ios));
Expand Down Expand Up @@ -167,21 +220,76 @@ void pegasus_counter_reporter::update()

bool first_append = true;
_falcon_metric.timestamp = timestamp;

perf_counters::instance().iterate_snapshot(
[&oss, &first_append, this](const dsn::perf_counters::counter_snapshot &cs) {
_falcon_metric.metric = cs.name;
_falcon_metric.value = cs.value;
_falcon_metric.counterType = "GAUGE";

if (!first_append)
oss << ",";
_falcon_metric.encode_json_state(oss);
first_append = false;
});
oss << "]";

update_counters_to_falcon(oss.str(), timestamp);
}

if (_enable_prometheus) {
perf_counters::instance().iterate_snapshot([this](
const dsn::perf_counters::counter_snapshot &cs) {
std::string metrics_name = cs.name;

// prometheus metric_name don't support characters like .*()@, it only support ":"
// and "_"
// so change the name to make it all right
change_metrics_name(metrics_name);

// split metric_name like "collector_app_pegasus_app_stat_multi_put_qps:1_0_p999" or
// "collector_app_pegasus_app_stat_multi_put_qps:1_0"
// app[0] = "1" which is the app_id
// app[1] = "0" which is the partition_cout
// app[2] = "p999" or "" which represent the percent
std::string app[3] = {"", "", ""};
std::list<std::string> lv;
::dsn::utils::split_args(metrics_name.c_str(), lv, ':');
if (lv.size() > 1) {
std::list<std::string> lv1;
::dsn::utils::split_args(lv.back().c_str(), lv1, '_');
int i = 0;
for (auto &v : lv1) {
app[i] = v;
i++;
}
}

// create metrics that prometheus support to report data
std::map<std::string, prometheus::Family<prometheus::Gauge> *>::iterator it =
_gauge_family_map.find(metrics_name);
if (it == _gauge_family_map.end()) {
auto &add_gauge_family = prometheus::BuildGauge()
.Name(metrics_name)
.Labels({{"service", "pegasus"},
{"cluster", _cluster_name},
{"pegasus_job", _app_name},
{"port", std::to_string(_local_port)}})
.Register(*_registry);
it = _gauge_family_map
.insert(std::pair<std::string, prometheus::Family<prometheus::Gauge> *>(
metrics_name, &add_gauge_family))
.first;
}

auto &second_gauge = it->second->Add(
{{"app_id", app[0]}, {"partition_count", app[1]}, {"percent", app[2]}});
second_gauge.Set(cs.value);
});

// report data to pushgateway
_gateway->Push();
}

ddebug("update now_ms(%lld), last_report_time_ms(%lld)", now, _last_report_time_ms);
_last_report_time_ms = now;
}
Expand Down
13 changes: 12 additions & 1 deletion src/reporter/pegasus_counter_reporter.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include <event2/http_struct.h>
#include <event2/keyvalq_struct.h>

#include <prometheus/registry.h>
#include <prometheus/gateway.h>
namespace pegasus {
namespace server {

Expand Down Expand Up @@ -42,6 +44,7 @@ class pegasus_counter_reporter : public ::dsn::utils::singleton<pegasus_counter_

private:
void falcon_initialize();
void prometheus_initialize();

void update_counters_to_falcon(const std::string &result, int64_t timestamp);

Expand Down Expand Up @@ -69,12 +72,20 @@ class pegasus_counter_reporter : public ::dsn::utils::singleton<pegasus_counter_
// perf counter flags
bool _enable_logging;
bool _enable_falcon;
bool _enable_prometheus;

// falcon related
// falcon relates
std::string _falcon_host;
uint16_t _falcon_port;
std::string _falcon_path;
falcon_metric _falcon_metric;

// prometheus relates
std::string _prometheus_host;
uint16_t _prometheus_port;
std::shared_ptr<prometheus::Registry> _registry;
std::shared_ptr<prometheus::Gateway> _gateway;
std::map<std::string, prometheus::Family<prometheus::Gauge> *> _gauge_family_map;
};
}
} // namespace
4 changes: 4 additions & 0 deletions src/server/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ set(MY_PROJ_LIBS
dsn.failure_detector.multimaster
dsn.replication.zookeeper_provider
pegasus_reporter
prometheus-cpp-pull
prometheus-cpp-push
prometheus-cpp-core
curl
pegasus_client_static
zookeeper_mt
event
Expand Down
5 changes: 5 additions & 0 deletions src/server/config-server.ini
Original file line number Diff line number Diff line change
Expand Up @@ -269,13 +269,18 @@ perf_counter_cluster_name = onebox
perf_counter_update_interval_seconds = 10
perf_counter_enable_logging = false
perf_counter_enable_falcon = false
perf_counter_enable_prometheus = false
perf_counter_read_capacity_unit_size = 4096
perf_counter_write_capacity_unit_size = 4096

falcon_host = 127.0.0.1
falcon_port = 1988
falcon_path = /v1/push


prometheus_host = 127.0.0.1
prometheus_port = 9091

[pegasus.collector]
cluster = onebox

Expand Down
4 changes: 4 additions & 0 deletions src/server/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -283,13 +283,17 @@
perf_counter_update_interval_seconds = 10
perf_counter_enable_logging = false
perf_counter_enable_falcon = false
perf_counter_enable_prometheus = false
perf_counter_read_capacity_unit_size = 4096
perf_counter_write_capacity_unit_size = 4096

falcon_host = 127.0.0.1
falcon_port = 1988
falcon_path = /v1/push

prometheus_host = 127.0.0.1
prometheus_port = 9091

[pegasus.collector]
cluster = %{cluster.name}

Expand Down
4 changes: 4 additions & 0 deletions src/server/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ set(MY_PROJ_LIBS
PocoJSON
crypto
fmt
prometheus-cpp-push
prometheus-cpp-core
prometheus-cpp-pull
curl
${ROCKSDB_LINK_LIBRARIES}
pegasus.base
gtest
Expand Down