Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add scrape state metrics #1900

Open
wants to merge 23 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions core/common/http/AsynCurlRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ bool AsynCurlRunner::AddRequestToClient(unique_ptr<AsynHttpRequest>&& request) {
AppConfig::GetInstance()->GetBindInterface());
if (curl == nullptr) {
LOG_ERROR(sLogger, ("failed to send request", "failed to init curl handler")("request address", request.get()));
request->mResponse.SetCurlCode(CURL_LAST);
catdogpandas marked this conversation as resolved.
Show resolved Hide resolved
request->OnSendDone(request->mResponse);
return false;
}
Expand All @@ -103,6 +104,7 @@ bool AsynCurlRunner::AddRequestToClient(unique_ptr<AsynHttpRequest>&& request) {
LOG_ERROR(sLogger,
("failed to send request", "failed to add the easy curl handle to multi_handle")(
"errMsg", curl_multi_strerror(res))("request address", request.get()));
request->mResponse.SetCurlCode(CURL_LAST);
request->OnSendDone(request->mResponse);
curl_easy_cleanup(curl);
return false;
Expand Down Expand Up @@ -213,6 +215,7 @@ void AsynCurlRunner::HandleCompletedRequests(int& runningHandlers) {
++runningHandlers;
requestReused = true;
} else {
request->mResponse.SetCurlCode(msg->data.result);
request->OnSendDone(request->mResponse);
LOG_DEBUG(
sLogger,
Expand Down
3 changes: 3 additions & 0 deletions core/common/http/HttpResponse.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,12 @@ class HttpResponse {
}

void SetStatusCode(int32_t code) { mStatusCode = code; }
void SetCurlCode(int32_t code) { mCurlCode = code; }
int32_t GetCurlCode() { return mCurlCode; }
catdogpandas marked this conversation as resolved.
Show resolved Hide resolved

private:
int32_t mStatusCode = 0; // 0 means no response from server
int32_t mCurlCode = 0; // 0 means no error
std::map<std::string, std::string, decltype(compareHeader)*> mHeader;
std::unique_ptr<void, std::function<void(void*)>> mBody;
size_t (*mWriteCallback)(char*, size_t, size_t, void*) = nullptr;
Expand Down
1 change: 1 addition & 0 deletions core/monitor/metric_constants/MetricConstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ extern const std::string METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS;
extern const std::string METRIC_PLUGIN_PROM_SUBSCRIBE_TOTAL;
extern const std::string METRIC_PLUGIN_PROM_SUBSCRIBE_TIME_MS;
extern const std::string METRIC_PLUGIN_PROM_SCRAPE_TIME_MS;
extern const std::string METRIC_PLUGIN_PROM_SCRAPE_STATE;
extern const std::string METRIC_PLUGIN_PROM_SCRAPE_DELAY_TOTAL;

/**********************************************************
Expand Down
1 change: 1 addition & 0 deletions core/monitor/metric_constants/PluginMetrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ const std::string METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS = "prom_subscribe_targets
const std::string METRIC_PLUGIN_PROM_SUBSCRIBE_TOTAL = "prom_subscribe_total";
const std::string METRIC_PLUGIN_PROM_SUBSCRIBE_TIME_MS = "prom_subscribe_time_ms";
const std::string METRIC_PLUGIN_PROM_SCRAPE_TIME_MS = "prom_scrape_time_ms";
const std::string METRIC_PLUGIN_PROM_SCRAPE_STATE = "prom_scrape_state";
catdogpandas marked this conversation as resolved.
Show resolved Hide resolved
const std::string METRIC_PLUGIN_PROM_SCRAPE_DELAY_TOTAL = "prom_scrape_delay_total";

/**********************************************************
Expand Down
149 changes: 127 additions & 22 deletions core/prometheus/PromSelfMonitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,30 @@
#include <string>
#include <unordered_map>

#include "common/StringTools.h"
#include "monitor/MetricTypes.h"
#include "monitor/metric_constants/MetricConstants.h"
using namespace std;

namespace logtail {

void PromSelfMonitorUnsafe::InitMetricManager(const std::unordered_map<std::string, MetricType>& metricKeys,
const MetricLabels& labels) {
const MetricLabels& labels) {
auto metricLabels = std::make_shared<MetricLabels>(labels);
mPluginMetricManagerPtr = std::make_shared<PluginMetricManager>(metricLabels, metricKeys, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE);
mPluginMetricManagerPtr = std::make_shared<PluginMetricManager>(
metricLabels, metricKeys, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE);
}

void PromSelfMonitorUnsafe::AddCounter(const std::string& metricName, uint64_t statusCode, uint64_t val) {
auto& status = StatusToString(statusCode);
auto status = StatusToString(statusCode);
if (!mMetricsCounterMap.count(metricName) || !mMetricsCounterMap[metricName].count(status)) {
mMetricsCounterMap[metricName][status] = GetOrCreateReentrantMetricsRecordRef(status)->GetCounter(metricName);
}
mMetricsCounterMap[metricName][status]->Add(val);
}

void PromSelfMonitorUnsafe::SetIntGauge(const std::string& metricName, uint64_t statusCode, uint64_t value) {
auto& status = StatusToString(statusCode);
auto status = StatusToString(statusCode);
if (!mMetricsIntGaugeMap.count(metricName) || !mMetricsIntGaugeMap[metricName].count(status)) {
mMetricsIntGaugeMap[metricName][status] = GetOrCreateReentrantMetricsRecordRef(status)->GetIntGauge(metricName);
}
Expand All @@ -43,29 +45,132 @@ ReentrantMetricsRecordRef PromSelfMonitorUnsafe::GetOrCreateReentrantMetricsReco
return mPromStatusMap[status];
}

std::string& PromSelfMonitorUnsafe::StatusToString(uint64_t status) {
static string sHttp0XX = "0XX";
static string sHttp1XX = "1XX";
static string sHttp2XX = "2XX";
static string sHttp3XX = "3XX";
static string sHttp4XX = "4XX";
static string sHttp5XX = "5XX";
std::string PromSelfMonitorUnsafe::StatusToString(uint64_t status) {
static string sHttpOther = "other";
if (status < 100) {
return sHttp0XX;
} else if (status < 200) {
return sHttp1XX;
} else if (status < 300) {
return sHttp2XX;
} else if (status < 400) {
return sHttp3XX;
} else if (status < 500) {
return sHttp4XX;
} else if (status < 500) {
return sHttp5XX;
// status represents curl error code when it is less than 100, and curl error code is always less than 100
return CurlCodeToString(status);
} else if (status < 600) {
return ToString(status);
} else {
return sHttpOther;
}
}

std::string PromSelfMonitorUnsafe::CurlCodeToString(uint64_t code) {
static map<uint64_t, string> sCurlCodeMap = {{0, "OK"},
{7, "ERR_CONN_REFUSED"},
{9, "ERR_ACCESS_DENIED"},
{28, "ERR_TIMEOUT"},
{35, "ERR_SSL_CONN_ERR"},
{51, "ERR_SSL_CERT_ERR"},
{52, "ERR_SERVER_RESPONSE_NONE"},
{55, "ERR_SEND_DATA_FAILED"},
{56, "ERR_RECV_DATA_FAILED"}};
static string sCurlOther = "ERR_UNKNOWN";
if (sCurlCodeMap.count(code)) {
return sCurlCodeMap[code];
}
return sCurlOther;
}

// inused curl error code:
// 7 Couldn't connect to server
// 9 Access denied to remote resource
// 28 Timeout was reached
// 35 SSL connect error
// 51 SSL peer certificate or SSH remote key was not OK
// 52 Server returned nothing (no headers, no data)
// 55 Failed sending data to the peer
// 56 Failure when receiving data from the peer

// unused
// 0 No error
// 1 Unsupported protocol
// 2 Failed initialization
// 3 URL using bad/illegal format or missing URL
// 4 A requested feature, protocol or option was not found built-in in this libcurl due to a build-time decision.
// 5 Couldn't resolve proxy name
// 6 Couldn't resolve host name
// 8 Weird server reply
// 10 FTP: The server failed to connect to data port
// 11 FTP: unknown PASS reply
// 12 FTP: Accepting server connect has timed out
// 13 FTP: unknown PASV reply
// 14 FTP: unknown 227 response format
// 15 FTP: can't figure out the host in the PASV response
// 16 Error in the HTTP2 framing layer
// 17 FTP: couldn't set file type
// 18 Transferred a partial file
// 19 FTP: couldn't retrieve (RETR failed) the specified file
// 20 Unknown error
// 21 Quote command returned error
// 22 HTTP response code said error
// 23 Failed writing received data to disk/application
// 24 Unknown error
// 25 Upload failed (at start/before it took off)
// 26 Failed to open/read local data from file/application
// 27 Out of memory
// 29 Unknown error
// 30 FTP: command PORT failed
// 31 FTP: command REST failed
// 32 Unknown error
// 33 Requested range was not delivered by the server
// 34 Internal problem setting up the POST
// 36 Couldn't resume download
// 37 Couldn't read a file:// file
// 38 LDAP: cannot bind
// 39 LDAP: search failed
// 40 Unknown error
// 41 A required function in the library was not found
// 42 Operation was aborted by an application callback
// 43 A libcurl function was given a bad argument
// 44 Unknown error
// 45 Failed binding local connection end
// 46 Unknown error
// 47 Number of redirects hit maximum amount
// 48 An unknown option was passed in to libcurl
// 49 Malformed telnet option
// 50 Unknown error
// 53 SSL crypto engine not found
// 54 Can not set SSL crypto engine as default
// 57 Unknown error
// 58 Problem with the local SSL certificate
// 59 Couldn't use specified SSL cipher
// 60 Peer certificate cannot be authenticated with given CA certificates
// 61 Unrecognized or bad HTTP Content or Transfer-Encoding
// 62 Invalid LDAP URL
// 63 Maximum file size exceeded
// 64 Requested SSL level failed
// 65 Send failed since rewinding of the data stream failed
// 66 Failed to initialise SSL crypto engine
// 67 Login denied
// 68 TFTP: File Not Found
// 69 TFTP: Access Violation
// 70 Disk full or allocation exceeded
// 71 TFTP: Illegal operation
// 72 TFTP: Unknown transfer ID
// 73 Remote file already exists
// 74 TFTP: No such user
// 75 Conversion failed
// 76 Caller must register CURLOPT_CONV_ callback options
// 77 Problem with the SSL CA cert (path? access rights?)
// 78 Remote file not found
// 79 Error in the SSH layer
// 80 Failed to shut down the SSL connection
// 81 Socket not ready for send/recv
// 82 Failed to load CRL file (path? access rights?, format?)
// 83 Issuer check against peer certificate failed
// 84 FTP: The server did not accept the PRET command.
// 85 RTSP CSeq mismatch or invalid CSeq
// 86 RTSP session error
// 87 Unable to parse FTP file list
// 88 Chunk callback failed
// 89 The max connection limit is reached
// 90 SSL public key does not match pinned public key
// 91 SSL server certificate status verification FAILED
// 92 Stream error in the HTTP/2 framing layer
// 93 API function called from within callback
// 94 Unknown error

} // namespace logtail
3 changes: 2 additions & 1 deletion core/prometheus/PromSelfMonitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ class PromSelfMonitorUnsafe {

private:
ReentrantMetricsRecordRef GetOrCreateReentrantMetricsRecordRef(const std::string& status);
std::string& StatusToString(uint64_t status);
std::string StatusToString(uint64_t status);
std::string CurlCodeToString(uint64_t code);

PluginMetricManagerPtr mPluginMetricManagerPtr;
std::map<std::string, ReentrantMetricsRecordRef> mPromStatusMap;
Expand Down
22 changes: 16 additions & 6 deletions core/prometheus/schedulers/ScrapeScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,15 @@ void ScrapeScheduler::OnMetricResult(HttpResponse& response, uint64_t timestampM
mSelfMonitor->AddCounter(METRIC_PLUGIN_PROM_SCRAPE_TIME_MS,
response.GetStatusCode(),
GetCurrentTimeInMilliSeconds() - timestampMilliSec);
if (response.GetCurlCode() != 0) {
// not 0 means curl error
mSelfMonitor->AddCounter(METRIC_PLUGIN_PROM_SCRAPE_STATE, response.GetCurlCode());
} else if (response.GetStatusCode() != 200) {
catdogpandas marked this conversation as resolved.
Show resolved Hide resolved
mSelfMonitor->AddCounter(METRIC_PLUGIN_PROM_SCRAPE_STATE, response.GetStatusCode());
} else {
// 0 means success
mSelfMonitor->AddCounter(METRIC_PLUGIN_PROM_SCRAPE_STATE, 0);
}

mScrapeTimestampMilliSec = timestampMilliSec;
mScrapeDurationSeconds = 1.0 * (GetCurrentTimeInMilliSeconds() - timestampMilliSec) / 1000;
Expand Down Expand Up @@ -247,15 +256,16 @@ void ScrapeScheduler::InitSelfMonitor(const MetricLabels& defaultLabels) {
MetricLabels labels = defaultLabels;
labels.emplace_back(METRIC_LABEL_KEY_INSTANCE, mInstance);

static const std::unordered_map<std::string, MetricType> sScrapeMetricKeys = {
{METRIC_PLUGIN_OUT_EVENTS_TOTAL, MetricType::METRIC_TYPE_COUNTER},
{METRIC_PLUGIN_OUT_SIZE_BYTES, MetricType::METRIC_TYPE_COUNTER},
{METRIC_PLUGIN_PROM_SCRAPE_TIME_MS, MetricType::METRIC_TYPE_COUNTER},
};
static const std::unordered_map<std::string, MetricType> sScrapeMetricKeys
= {{METRIC_PLUGIN_OUT_EVENTS_TOTAL, MetricType::METRIC_TYPE_COUNTER},
{METRIC_PLUGIN_OUT_SIZE_BYTES, MetricType::METRIC_TYPE_COUNTER},
{METRIC_PLUGIN_PROM_SCRAPE_TIME_MS, MetricType::METRIC_TYPE_COUNTER},
{METRIC_PLUGIN_PROM_SCRAPE_STATE, MetricType::METRIC_TYPE_COUNTER}};

mSelfMonitor->InitMetricManager(sScrapeMetricKeys, labels);

WriteMetrics::GetInstance()->PrepareMetricsRecordRef(mMetricsRecordRef, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE, std::move(labels));
WriteMetrics::GetInstance()->PrepareMetricsRecordRef(
mMetricsRecordRef, MetricCategory::METRIC_CATEGORY_PLUGIN_SOURCE, std::move(labels));
mPromDelayTotal = mMetricsRecordRef.CreateCounter(METRIC_PLUGIN_PROM_SCRAPE_DELAY_TOTAL);
mPluginTotalDelayMs = mMetricsRecordRef.CreateCounter(METRIC_PLUGIN_TOTAL_DELAY_MS);
}
Expand Down
69 changes: 67 additions & 2 deletions core/unittest/prometheus/PromSelfMonitorUnittest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class PromSelfMonitorUnittest : public ::testing::Test {
public:
void TestCounterAdd();
void TestIntGaugeSet();
void TestCurlCode();
};

void PromSelfMonitorUnittest::TestCounterAdd() {
Expand All @@ -22,7 +23,7 @@ void PromSelfMonitorUnittest::TestCounterAdd() {
selfMonitor->AddCounter(METRIC_PLUGIN_PROM_SUBSCRIBE_TOTAL, 200, 999);

// check result
auto metric = selfMonitor->mPromStatusMap["2XX"]->GetCounter(METRIC_PLUGIN_PROM_SUBSCRIBE_TOTAL);
auto metric = selfMonitor->mPromStatusMap["200"]->GetCounter(METRIC_PLUGIN_PROM_SUBSCRIBE_TOTAL);
APSARA_TEST_EQUAL("prom_subscribe_total", metric->GetName());
APSARA_TEST_EQUAL(999ULL, metric->GetValue());
selfMonitor->AddCounter(METRIC_PLUGIN_PROM_SUBSCRIBE_TOTAL, 200);
Expand All @@ -40,16 +41,80 @@ void PromSelfMonitorUnittest::TestIntGaugeSet() {
selfMonitor->SetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS, 200, 999);

// check result
auto metric = selfMonitor->mPromStatusMap["2XX"]->GetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS);
auto metric = selfMonitor->mPromStatusMap["200"]->GetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS);
APSARA_TEST_EQUAL("prom_subscribe_targets", metric->GetName());
APSARA_TEST_EQUAL(999ULL, metric->GetValue());
selfMonitor->SetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS, 200, 0);
APSARA_TEST_EQUAL(0ULL, metric->GetValue());
}

void PromSelfMonitorUnittest::TestCurlCode() {
auto selfMonitor = std::make_shared<PromSelfMonitorUnsafe>();
std::unordered_map<std::string, MetricType> testMetricKeys = {
{METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS, MetricType::METRIC_TYPE_INT_GAUGE},
};
selfMonitor->InitMetricManager(testMetricKeys, MetricLabels{});

// 200
auto metricLabels = std::map<std::string, std::string>({{"test-label", "test-value"}});
selfMonitor->SetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS, 200, 999);
// check result
auto metric = selfMonitor->mPromStatusMap["200"]->GetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS);
APSARA_TEST_EQUAL("prom_subscribe_targets", metric->GetName());
APSARA_TEST_EQUAL(999ULL, metric->GetValue());
selfMonitor->SetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS, 200, 0);
APSARA_TEST_EQUAL(0ULL, metric->GetValue());

// 301
selfMonitor->SetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS, 301, 999);
// check result
metric = selfMonitor->mPromStatusMap["301"]->GetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS);
APSARA_TEST_EQUAL("prom_subscribe_targets", metric->GetName());
APSARA_TEST_EQUAL(999ULL, metric->GetValue());
selfMonitor->SetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS, 301, 0);
APSARA_TEST_EQUAL(0ULL, metric->GetValue());

// 678
selfMonitor->SetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS, 678, 999);
// check result
metric = selfMonitor->mPromStatusMap["other"]->GetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS);
APSARA_TEST_EQUAL("prom_subscribe_targets", metric->GetName());
APSARA_TEST_EQUAL(999ULL, metric->GetValue());
selfMonitor->SetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS, 678, 0);
APSARA_TEST_EQUAL(0ULL, metric->GetValue());

// 0
selfMonitor->SetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS, 0, 999);
// check result
metric = selfMonitor->mPromStatusMap["OK"]->GetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS);
APSARA_TEST_EQUAL("prom_subscribe_targets", metric->GetName());
APSARA_TEST_EQUAL(999ULL, metric->GetValue());
selfMonitor->SetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS, 0, 0);
APSARA_TEST_EQUAL(0ULL, metric->GetValue());

// 35
selfMonitor->SetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS, 35, 999);
// check result
metric = selfMonitor->mPromStatusMap["ERR_SSL_CONN_ERR"]->GetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS);
APSARA_TEST_EQUAL("prom_subscribe_targets", metric->GetName());
APSARA_TEST_EQUAL(999ULL, metric->GetValue());
selfMonitor->SetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS, 35, 0);
APSARA_TEST_EQUAL(0ULL, metric->GetValue());

// 88
selfMonitor->SetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS, 88, 999);
// check result
metric = selfMonitor->mPromStatusMap["ERR_UNKNOWN"]->GetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS);
APSARA_TEST_EQUAL("prom_subscribe_targets", metric->GetName());
APSARA_TEST_EQUAL(999ULL, metric->GetValue());
selfMonitor->SetIntGauge(METRIC_PLUGIN_PROM_SUBSCRIBE_TARGETS, 88, 0);
APSARA_TEST_EQUAL(0ULL, metric->GetValue());
}


UNIT_TEST_CASE(PromSelfMonitorUnittest, TestCounterAdd)
UNIT_TEST_CASE(PromSelfMonitorUnittest, TestIntGaugeSet)
UNIT_TEST_CASE(PromSelfMonitorUnittest, TestCurlCode)

} // namespace logtail

Expand Down
Loading