Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add scrape state metrics #1900

Merged
merged 31 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
7bcec59
feat: add scrape state metrics
catdogpandas Nov 19, 2024
0cd4d62
feat: update
catdogpandas Nov 20, 2024
a15a57c
update
catdogpandas Nov 20, 2024
ca53f80
update
catdogpandas Nov 20, 2024
afe17fc
update
catdogpandas Nov 20, 2024
a59b189
update
catdogpandas Nov 20, 2024
12773ce
update
catdogpandas Nov 20, 2024
59efed5
chore: update ut
catdogpandas Nov 20, 2024
e2c84d6
chore: add ut
catdogpandas Nov 20, 2024
0c6b002
update
catdogpandas Nov 25, 2024
acfe0b2
update
catdogpandas Nov 25, 2024
1d42ba6
feat: change to autometric
catdogpandas Nov 28, 2024
a5586d9
update
catdogpandas Nov 28, 2024
0eff5b3
update
catdogpandas Nov 28, 2024
562a1a4
update
catdogpandas Nov 28, 2024
d9e6b7a
chore: add enable_scrape_state
catdogpandas Nov 28, 2024
0028a72
update
catdogpandas Nov 29, 2024
f06b4e0
Merge branch 'main' into feat/prom-curl-err-msg
catdogpandas Nov 29, 2024
8fd8b12
feat: refactor scrape_state
catdogpandas Nov 29, 2024
81a1d8e
update
catdogpandas Nov 29, 2024
a4d011a
update
catdogpandas Nov 29, 2024
0b16b06
update
catdogpandas Nov 29, 2024
1db2b6a
update
catdogpandas Nov 29, 2024
04049ae
chore: remove enable_scrape_state
catdogpandas Dec 2, 2024
e4a9fa1
chore: add ut
catdogpandas Dec 2, 2024
c0e1ed8
Merge branch 'main' into feat/prom-curl-err-msg
catdogpandas Dec 2, 2024
3bca53b
chore: add HttpCodeToState ERR_HTTP_xxx
catdogpandas Dec 3, 2024
448719f
update
catdogpandas Dec 3, 2024
6e35124
chore: update code style
catdogpandas Dec 3, 2024
0d7b61b
Merge branch 'main' into feat/prom-curl-err-msg
catdogpandas Dec 3, 2024
3fc9223
Merge branch 'main' into feat/prom-curl-err-msg
catdogpandas Dec 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions core/common/http/AsynCurlRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ bool AsynCurlRunner::AddRequestToClient(unique_ptr<AsynHttpRequest>&& request) {

if (curl == nullptr) {
LOG_ERROR(sLogger, ("failed to send request", "failed to init curl handler")("request address", request.get()));
request->mResponse.SetNetworkStatus(CURLE_FAILED_INIT);
request->OnSendDone(request->mResponse);
return false;
}
Expand All @@ -106,6 +107,7 @@ bool AsynCurlRunner::AddRequestToClient(unique_ptr<AsynHttpRequest>&& request) {
LOG_ERROR(sLogger,
("failed to send request", "failed to add the easy curl handle to multi_handle")(
"errMsg", curl_multi_strerror(res))("request address", request.get()));
request->mResponse.SetNetworkStatus(CURLE_FAILED_INIT);
request->OnSendDone(request->mResponse);
curl_easy_cleanup(curl);
return false;
Expand Down Expand Up @@ -190,6 +192,7 @@ void AsynCurlRunner::HandleCompletedRequests(int& runningHandlers) {
case CURLE_OK: {
long statusCode = 0;
curl_easy_getinfo(handler, CURLINFO_RESPONSE_CODE, &statusCode);
request->mResponse.SetNetworkStatus(CURLE_OK);
request->mResponse.SetStatusCode(statusCode);
request->OnSendDone(request->mResponse);
LOG_DEBUG(sLogger,
Expand All @@ -214,6 +217,7 @@ void AsynCurlRunner::HandleCompletedRequests(int& runningHandlers) {
++runningHandlers;
requestReused = true;
} else {
request->mResponse.SetNetworkStatus(msg->data.result);
request->OnSendDone(request->mResponse);
LOG_DEBUG(
sLogger,
Expand Down
74 changes: 74 additions & 0 deletions core/common/http/HttpResponse.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

#pragma once

#include <curl/curl.h>

#include <cstdint>
#include <functional>
#include <map>
Expand All @@ -28,6 +30,24 @@ namespace logtail {

struct CurlTLS;

enum NetworkCode {
Ok = 0,
ConnectionFailed,
RemoteAccessDenied,
SSLConnectError,
SSLCertError,
SSLOtherProblem,
SendDataFailed,
RecvDataFailed,
TIMEOUT, // 超时
catdogpandas marked this conversation as resolved.
Show resolved Hide resolved
Other // 回显请求收到
};

struct NetWorkStatus {
catdogpandas marked this conversation as resolved.
Show resolved Hide resolved
NetworkCode mCode = NetworkCode::Ok;
std::string mMessage;
};

bool caseInsensitiveComp(const char lhs, const char rhs);

bool compareHeader(const std::string& lhs, const std::string& rhs);
Expand Down Expand Up @@ -76,8 +96,62 @@ class HttpResponse {

void SetStatusCode(int32_t code) { mStatusCode = code; }

void SetNetworkStatus(CURLcode code) {
mNetworkStatus.mMessage = curl_easy_strerror(code);
// please refer to https://curl.se/libcurl/c/libcurl-errors.html
switch (code) {
case CURLE_OK:
mNetworkStatus.mCode = NetworkCode::Ok;
break;
case CURLE_COULDNT_CONNECT:
mNetworkStatus.mCode = NetworkCode::ConnectionFailed;
break;
case CURLE_LOGIN_DENIED:
case CURLE_REMOTE_ACCESS_DENIED:
mNetworkStatus.mCode = NetworkCode::RemoteAccessDenied;
break;
case CURLE_OPERATION_TIMEDOUT:
mNetworkStatus.mCode = NetworkCode::TIMEOUT;
break;
case CURLE_SSL_CONNECT_ERROR:
mNetworkStatus.mCode = NetworkCode::SSLConnectError;
break;
case CURLE_SSL_CERTPROBLEM:
case CURLE_SSL_CACERT:
mNetworkStatus.mCode = NetworkCode::SSLCertError;
break;
case CURLE_SEND_ERROR:
case CURLE_SEND_FAIL_REWIND:
mNetworkStatus.mCode = NetworkCode::SendDataFailed;
break;
case CURLE_RECV_ERROR:
mNetworkStatus.mCode = NetworkCode::RecvDataFailed;
break;
case CURLE_SSL_PINNEDPUBKEYNOTMATCH:
case CURLE_SSL_INVALIDCERTSTATUS:
case CURLE_SSL_CACERT_BADFILE:
case CURLE_SSL_CIPHER:
case CURLE_SSL_ENGINE_NOTFOUND:
case CURLE_SSL_ENGINE_SETFAILED:
case CURLE_USE_SSL_FAILED:
case CURLE_SSL_ENGINE_INITFAILED:
case CURLE_SSL_CRL_BADFILE:
case CURLE_SSL_ISSUER_ERROR:
case CURLE_SSL_SHUTDOWN_FAILED:
mNetworkStatus.mCode = NetworkCode::SSLOtherProblem;
break;
case CURLE_FAILED_INIT:
default:
mNetworkStatus.mCode = NetworkCode::Other;
break;
}
}

NetWorkStatus GetNetworkStatus() { return mNetworkStatus; }
catdogpandas marked this conversation as resolved.
Show resolved Hide resolved

private:
int32_t mStatusCode = 0; // 0 means no response from server
NetWorkStatus mNetworkStatus; // 0 means no error
std::map<std::string, std::string, decltype(compareHeader)*> mHeader;
std::unique_ptr<void, std::function<void(void*)>> mBody;
size_t (*mWriteCallback)(char*, size_t, size_t, void*) = nullptr;
Expand Down
1 change: 1 addition & 0 deletions core/models/PipelineEventGroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ enum class EventGroupMetaKey {
CONTAINER_IMAGE_NAME,
CONTAINER_IMAGE_ID,

PROMETHEUS_SCRAPE_STATE,
PROMETHEUS_SCRAPE_DURATION,
PROMETHEUS_SCRAPE_RESPONSE_SIZE,
PROMETHEUS_SAMPLES_SCRAPED,
Expand Down
10 changes: 10 additions & 0 deletions core/plugin/processor/inner/ProcessorPromRelabelMetricNative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@
#include "prometheus/Constants.h"

using namespace std;

DECLARE_FLAG_STRING(_pod_name_);
DEFINE_FLAG_BOOL(enable_scrape_state, "enable scrape_state auto metric", true);
catdogpandas marked this conversation as resolved.
Show resolved Hide resolved

namespace logtail {

const string ProcessorPromRelabelMetricNative::sName = "processor_prom_relabel_metric_native";
Expand Down Expand Up @@ -192,6 +195,13 @@ void ProcessorPromRelabelMetricNative::AddAutoMetrics(PipelineEventGroup& metric
// up metric must be the last one
bool upState = StringTo<bool>(metricGroup.GetMetadata(EventGroupMetaKey::PROMETHEUS_UP_STATE).to_string());

if (BOOL_FLAG(enable_scrape_state) && metricGroup.HasMetadata(EventGroupMetaKey::PROMETHEUS_SCRAPE_STATE)) {
auto scrapeState = metricGroup.GetMetadata(EventGroupMetaKey::PROMETHEUS_SCRAPE_STATE);
AddMetric(metricGroup, prometheus::SCRAPE_STATE, 1.0 * upState, timestamp, nanoSec, targetTags);
auto& last = metricGroup.MutableEvents()[metricGroup.GetEvents().size() - 1];
last.Cast<MetricEvent>().SetTag(METRIC_LABEL_KEY_STATUS, scrapeState);
}

AddMetric(metricGroup, prometheus::UP, 1.0 * upState, timestamp, nanoSec, targetTags);
}

Expand Down
1 change: 1 addition & 0 deletions core/prometheus/Constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ const char* const PARAM_LABEL_NAME = "__param_";
const char* const LABELS = "labels";

// auto metrics
const char* const SCRAPE_STATE = "scrape_state";
const char* const SCRAPE_DURATION_SECONDS = "scrape_duration_seconds";
const char* const SCRAPE_RESPONSE_SIZE_BYTES = "scrape_response_size_bytes";
const char* const SCRAPE_SAMPLES_LIMIT = "scrape_samples_limit";
Expand Down
23 changes: 23 additions & 0 deletions core/prometheus/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <iomanip>

#include "common/StringTools.h"
#include "http/HttpResponse.h"
#include "models/StringView.h"

using namespace std;
Expand Down Expand Up @@ -148,4 +149,26 @@ uint64_t GetRandSleepMilliSec(const std::string& key, uint64_t intervalSeconds,
randSleep -= sleepOffset;
return randSleep;
}

namespace prom {

std::string NetworkCodeToString(NetworkCode code) {
static map<uint64_t, string> sNetworkCodeMap = {{NetworkCode::Ok, "OK"},
{NetworkCode::ConnectionFailed, "ERR_CONN_FAILED"},
{NetworkCode::RemoteAccessDenied, "ERR_ACCESS_DENIED"},
{NetworkCode::TIMEOUT, "ERR_TIMEOUT"},
{NetworkCode::SSLConnectError, "ERR_SSL_CONN_ERR"},
{NetworkCode::SSLCertError, "ERR_SSL_CERT_ERR"},
{NetworkCode::SSLOtherProblem, "ERR_SSL_OTHER_PROBLEM"},
{NetworkCode::SendDataFailed, "ERR_SEND_DATA_FAILED"},
{NetworkCode::RecvDataFailed, "ERR_RECV_DATA_FAILED"},
{NetworkCode::Other, "ERR_UNKNOWN"}};
static string sCurlOther = "ERR_UNKNOWN";
if (sNetworkCodeMap.count(code)) {
return sNetworkCodeMap[code];
}
return sCurlOther;
}

} // namespace prom
} // namespace logtail
6 changes: 6 additions & 0 deletions core/prometheus/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <string>

#include "common/http/HttpResponse.h"
#include "models/StringView.h"

namespace logtail {
Expand All @@ -19,4 +20,9 @@ void SplitStringView(const std::string& s, char delimiter, std::vector<StringVie
bool IsNumber(const std::string& str);

uint64_t GetRandSleepMilliSec(const std::string& key, uint64_t intervalSeconds, uint64_t currentMilliSeconds);

namespace prom {
std::string NetworkCodeToString(NetworkCode code);
}

} // namespace logtail
21 changes: 16 additions & 5 deletions core/prometheus/schedulers/ScrapeScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,17 @@ void ScrapeScheduler::OnMetricResult(HttpResponse& response, uint64_t timestampM
response.GetStatusCode(),
GetCurrentTimeInMilliSeconds() - timestampMilliSec);

auto networkStatus = response.GetNetworkStatus();
if (networkStatus.mCode != NetworkCode::Ok) {
// not 0 means curl error
mScrapeState = prom::NetworkCodeToString(networkStatus.mCode);
} else if (response.GetStatusCode() != 200) {
catdogpandas marked this conversation as resolved.
Show resolved Hide resolved
mScrapeState = ToString(response.GetStatusCode());
catdogpandas marked this conversation as resolved.
Show resolved Hide resolved
} else {
// 0 means success
mScrapeState = prom::NetworkCodeToString(NetworkCode::Ok);
}

mScrapeTimestampMilliSec = timestampMilliSec;
mScrapeDurationSeconds = 1.0 * (GetCurrentTimeInMilliSeconds() - timestampMilliSec) / 1000;
mScrapeResponseSizeBytes = responseBody.mRawSize;
Expand All @@ -118,6 +129,7 @@ void ScrapeScheduler::OnMetricResult(HttpResponse& response, uint64_t timestampM
}

void ScrapeScheduler::SetAutoMetricMeta(PipelineEventGroup& eGroup) {
eGroup.SetMetadata(EventGroupMetaKey::PROMETHEUS_SCRAPE_STATE, mScrapeState);
eGroup.SetMetadata(EventGroupMetaKey::PROMETHEUS_SCRAPE_TIMESTAMP_MILLISEC, ToString(mScrapeTimestampMilliSec));
eGroup.SetMetadata(EventGroupMetaKey::PROMETHEUS_SCRAPE_DURATION, ToString(mScrapeDurationSeconds));
eGroup.SetMetadata(EventGroupMetaKey::PROMETHEUS_SCRAPE_RESPONSE_SIZE, ToString(mScrapeResponseSizeBytes));
Expand Down Expand Up @@ -241,11 +253,10 @@ void ScrapeScheduler::InitSelfMonitor(const MetricLabels& defaultLabels) {
MetricLabels labels = defaultLabels;
labels.emplace_back(METRIC_LABEL_KEY_INSTANCE, mInstance);

static const std::unordered_map<std::string, MetricType> sScrapeMetricKeys = {
{METRIC_PLUGIN_OUT_EVENTS_TOTAL, MetricType::METRIC_TYPE_COUNTER},
{METRIC_PLUGIN_OUT_SIZE_BYTES, MetricType::METRIC_TYPE_COUNTER},
{METRIC_PLUGIN_PROM_SCRAPE_TIME_MS, MetricType::METRIC_TYPE_COUNTER},
};
static const std::unordered_map<std::string, MetricType> sScrapeMetricKeys
= {{METRIC_PLUGIN_OUT_EVENTS_TOTAL, MetricType::METRIC_TYPE_COUNTER},
{METRIC_PLUGIN_OUT_SIZE_BYTES, MetricType::METRIC_TYPE_COUNTER},
{METRIC_PLUGIN_PROM_SCRAPE_TIME_MS, MetricType::METRIC_TYPE_COUNTER}};

mSelfMonitor->InitMetricManager(sScrapeMetricKeys, labels);

Expand Down
1 change: 1 addition & 0 deletions core/prometheus/schedulers/ScrapeScheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class ScrapeScheduler : public BaseScheduler {
size_t mInputIndex;

// auto metrics
std::string mScrapeState;
uint64_t mScrapeTimestampMilliSec = 0;
double mScrapeDurationSeconds = 0;
uint64_t mScrapeResponseSizeBytes = 0;
Expand Down
Loading