Skip to content

Commit

Permalink
lotus metrics (#533)
Browse files Browse the repository at this point in the history
Signed-off-by: turuslan <turuslan.devbox@gmail.com>
  • Loading branch information
turuslan authored Nov 25, 2021
1 parent b285318 commit 132cdf0
Show file tree
Hide file tree
Showing 11 changed files with 279 additions and 6 deletions.
3 changes: 3 additions & 0 deletions cmake/dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,6 @@ find_package(RapidJSON CONFIG REQUIRED)
# https://thalhammer.it/projects/jwt_cpp
hunter_add_package(jwt-cpp)
find_package(jwt-cpp CONFIG REQUIRED)

hunter_add_package(prometheus-cpp)
find_package(prometheus-cpp CONFIG REQUIRED)
5 changes: 5 additions & 0 deletions core/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ target_link_libraries(outcome
p2p::p2p
)

add_library(prometheus INTERFACE)
target_link_libraries(prometheus INTERFACE
prometheus-cpp::core
)

add_library(logger
logger.cpp
)
Expand Down
23 changes: 23 additions & 0 deletions core/common/prometheus/metrics.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/**
* Copyright Soramitsu Co., Ltd. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*/

#pragma once

#include <prometheus/histogram.h>
#include <prometheus/registry.h>

namespace fc {
inline auto &prometheusRegistry() {
static prometheus::Registry x;
return x;
}

constexpr std::initializer_list<double> kDefaultPrometheusMsBuckets{
0.01, 0.05, 0.1, 0.3, 0.6, 0.8, 1, 2, 3, 4, 5,
6, 8, 10, 13, 16, 20, 25, 30, 40, 50, 65,
80, 100, 130, 160, 200, 250, 300, 400, 500, 650, 800,
1000, 2000, 3000, 4000, 5000, 7500, 10000, 20000, 50000, 100000,
};
} // namespace fc
49 changes: 49 additions & 0 deletions core/common/prometheus/rpc.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/**
* Copyright Soramitsu Co., Ltd. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*/

#pragma once

#include "api/rpc/rpc.hpp"
#include "common/prometheus/metrics.hpp"
#include "common/prometheus/since.hpp"

namespace fc::api::rpc {
inline auto &metricApiTime() {
static auto &x{prometheus::BuildHistogram()
.Name("lotus_api_request_duration_ms")
.Help("Duration of API requests")
.Register(prometheusRegistry())};
return x;
}

inline Method metricApiTime(std::string name, Method f) {
return [name{std::move(name)}, f{std::move(f)}](
const Value &value,
Respond respond,
MakeChan make_chan,
Send send,
const Permissions &permissions) {
f(
value,
[name{std::move(name)}, respond{std::move(respond)}, since{Since{}}](
auto &&value) {
const auto time{since.ms()};
metricApiTime()
.Add({{"endpoint", name}}, kDefaultPrometheusMsBuckets)
.Observe(time);
respond(std::move(value));
},
std::move(make_chan),
std::move(send),
permissions);
};
}

inline void metricApiTime(Rpc &rpc) {
for (auto &[name, value] : rpc.ms) {
value = metricApiTime(name, value);
}
}
} // namespace fc::api::rpc
23 changes: 23 additions & 0 deletions core/common/prometheus/since.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/**
* Copyright Soramitsu Co., Ltd. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*/

#pragma once

#include <chrono>

namespace fc {
struct Since {
using Clock = std::chrono::steady_clock;

Clock::time_point start{Clock::now()};

template <typename T = double>
T ms() const {
return std::chrono::duration_cast<std::chrono::duration<T, std::milli>>(
Clock::now() - start)
.count();
}
};
} // namespace fc
4 changes: 4 additions & 0 deletions core/node/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "common/libp2p/peer/peer_info_helper.hpp"
#include "common/libp2p/soralog.hpp"
#include "common/logger.hpp"
#include "common/prometheus/rpc.hpp"
#include "drand/impl/http.hpp"
#include "markets/storage/types.hpp"
#include "node/blocksync_server.hpp"
Expand Down Expand Up @@ -209,6 +210,9 @@ namespace fc {
*node_objects.api,
std::bind(node_objects.api->AuthVerify, std::placeholders::_1))};

metricApiTime(*rpc_v1);
metricApiTime(*rpc);

std::map<std::string, std::shared_ptr<api::Rpc>> rpcs;
rpcs.emplace("/rpc/v0", rpc_v1);
rpcs.emplace("/rpc/v1", rpc);
Expand Down
38 changes: 32 additions & 6 deletions core/node/main/metrics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@

#pragma once

#include <prometheus/text_serializer.h>
#include <libp2p/common/metrics/instance_count.hpp>
#include <sstream>

#include "clock/chain_epoch_clock.hpp"
#include "clock/utc_clock.hpp"
#include "common/fd_usage.hpp"
#include "common/memory_usage.hpp"
#include "common/prometheus/metrics.hpp"
#include "node/events.hpp"
#include "node/main/builder.hpp"
#include "node/sync_job.hpp"
Expand All @@ -27,9 +29,19 @@ namespace fc::node {
}

std::string prometheus() const {
std::stringstream ss;
auto metric{[&](auto &&name, auto &&value) {
ss << name << ' ' << value << std::endl;
auto families{prometheusRegistry().Collect()};
using ::prometheus::MetricType;
auto manual{[&](MetricType type,
std::string name,
std::string help) -> ::prometheus::ClientMetric & {
auto &family{families.emplace_back()};
family.name = std::move(name);
family.help = std::move(help);
family.type = type;
return family.metric.emplace_back();
}};
auto metric{[&](std::string name, double value) {
manual(MetricType::Untyped, std::move(name), "").untyped.value = value;
}};

metric("uptime",
Expand All @@ -52,8 +64,9 @@ namespace fc::node {
metric("height_attached",
std::max(height_head, o.sync_job->metricAttachedHeight()));
metric("height_known", std::max(height_head, height_known.load()));
metric("height_expected",
o.chain_epoch_clock->epochAtTime(o.utc_clock->nowUTC()).value());
const auto height_expected{
o.chain_epoch_clock->epochAtTime(o.utc_clock->nowUTC()).value()};
metric("height_expected", height_expected);

auto car{[&](auto _size, auto _count, auto _tmp, auto &ipld) {
uint64_t size{};
Expand Down Expand Up @@ -85,7 +98,20 @@ namespace fc::node {
}
instances_lock.unlock();

return ss.str();
manual(MetricType::Gauge,
"lotus_chain_node_height",
"Current Height of the node")
.gauge.value = height_head;
manual(MetricType::Gauge,
"lotus_chain_node_height_expected",
"Expected Height of the node")
.gauge.value = height_expected;
manual(MetricType::Gauge,
"lotus_chain_node_worker_height",
"Height of workers on the node")
.gauge.value = height_head;

return ::prometheus::TextSerializer{}.Serialize(families);
}

const NodeObjects &o;
Expand Down
31 changes: 31 additions & 0 deletions core/node/pubsub_gate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include "codec/cbor/cbor_codec.hpp"
#include "common/logger.hpp"
#include "common/prometheus/metrics.hpp"
#include "primitives/block/block.hpp"
#include "primitives/cid/cid_of_cbor.hpp"

Expand Down Expand Up @@ -85,6 +86,14 @@ namespace fc::sync {
if (!gossip_->publish(blocks_topic_, buffer)) {
log()->warn("cannot publish block");
}

static auto &metric{prometheus::BuildCounter()
.Name("lotus_block_published")
.Help("Counter for total locally published blocks")
.Register(prometheusRegistry())
.Add({})};
metric.Increment();

return outcome::success();
}

Expand All @@ -93,6 +102,14 @@ namespace fc::sync {
Bytes{codec::cbor::encode(msg).value()})) {
log()->warn("cannot publish message");
}

static auto &metric{
prometheus::BuildCounter()
.Name("lotus_message_published")
.Help("Counter for total locally published messages")
.Register(prometheusRegistry())
.Add({})};
metric.Increment();
}

bool PubSubGate::onBlock(const PeerId &from, const Bytes &raw) {
Expand All @@ -102,6 +119,13 @@ namespace fc::sync {

// TODO validate

static auto &metric{prometheus::BuildCounter()
.Name("lotus_block_received")
.Help("Counter for total received blocks")
.Register(prometheusRegistry())
.Add({})};
metric.Increment();

events_->signalBlockFromPubSub(
events::BlockFromPubSub{from, CbCid::hash(cbor), std::move(bm)});

Expand Down Expand Up @@ -129,6 +153,13 @@ namespace fc::sync {
} else {
auto res = codec::cbor::decode<primitives::block::SignedMessage>(raw);
if (res) {
static auto &metric{prometheus::BuildCounter()
.Name("lotus_message_received")
.Help("Counter for total received messages")
.Register(prometheusRegistry())
.Add({})};
metric.Increment();

events_->signalMessageFromPubSub(events::MessageFromPubSub{
from,
std::move(cid_res.value()),
Expand Down
65 changes: 65 additions & 0 deletions core/vm/interpreter/impl/interpreter_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

#include <utility>

#include "common/prometheus/metrics.hpp"
#include "common/prometheus/since.hpp"
#include "const.hpp"
#include "primitives/tipset/load.hpp"
#include "vm/actor/builtin/v0/cron/cron_actor.hpp"
Expand Down Expand Up @@ -70,6 +72,61 @@ namespace fc::vm::interpreter {
std::vector<MessageReceipt> *all_receipts) const {
const auto &ipld{env_context_.ipld};

static auto &metricFailure{
prometheus::BuildCounter()
.Name("lotus_block_failure")
.Help("Counter for block validation failures")
.Register(prometheusRegistry())
.Add({})};
static auto &metricSuccess{
prometheus::BuildCounter()
.Name("lotus_block_success")
.Help("Counter for block validation successes")
.Register(prometheusRegistry())
.Add({})};
static auto &metricTotal{prometheus::BuildHistogram()
.Name("lotus_vm_applyblocks_total_ms")
.Help("Time spent applying block state")
.Register(prometheusRegistry())
.Add({}, kDefaultPrometheusMsBuckets)};
static auto &metricMessages{prometheus::BuildHistogram()
.Name("lotus_vm_applyblocks_messages")
.Help("Time spent applying block messages")
.Register(prometheusRegistry())
.Add({}, kDefaultPrometheusMsBuckets)};
static auto &metricEarly{
prometheus::BuildHistogram()
.Name("lotus_vm_applyblocks_early")
.Help("Time spent in early apply-blocks (null cron, upgrades)")
.Register(prometheusRegistry())
.Add({}, kDefaultPrometheusMsBuckets)};
static auto &metricCron{prometheus::BuildHistogram()
.Name("lotus_vm_applyblocks_cron")
.Help("Time spent in cron")
.Register(prometheusRegistry())
.Add({}, kDefaultPrometheusMsBuckets)};
static auto &metricFlush{prometheus::BuildHistogram()
.Name("lotus_vm_applyblocks_flush")
.Help("Time spent flushing vm state")
.Register(prometheusRegistry())
.Add({}, kDefaultPrometheusMsBuckets)};

bool success{false};
const Since since;
std::pair<::prometheus::Histogram *, Since> last_step;
auto nextStep{[&](auto metric) {
if (last_step.first) {
last_step.first->Observe(last_step.second.ms());
}
last_step = std::make_pair(metric, Since{});
}};
auto BOOST_OUTCOME_TRY_UNIQUE_NAME{gsl::finally([&] {
metricTotal.Observe(since.ms());
nextStep(nullptr);
(success ? metricSuccess : metricFailure).Increment();
})};
nextStep(&metricEarly);

auto on_receipt{[&](auto &receipt) {
if (all_receipts) {
all_receipts->push_back(receipt);
Expand Down Expand Up @@ -111,6 +168,8 @@ namespace fc::vm::interpreter {
env->setHeight(tipset->height());
}

nextStep(&metricMessages);

adt::Array<MessageReceipt> receipts{ipld};
MessageVisitor message_visitor{ipld, true, true};
for (const auto &block : tipset->blks) {
Expand Down Expand Up @@ -147,15 +206,21 @@ namespace fc::vm::interpreter {
on_receipt(receipt);
}

nextStep(&metricCron);

OUTCOME_TRY(cron());

nextStep(&metricFlush);

OUTCOME_TRY(new_state_root, env->state_tree->flush());
OUTCOME_TRY(env->ipld->flush(new_state_root));

OUTCOME_TRY(receipts.amt.flush());

OUTCOME_TRY(weight, getWeight(tipset));

success = true;

return Result{new_state_root, receipts.amt.cid(), std::move(weight)};
}

Expand Down
1 change: 1 addition & 0 deletions core/vm/runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ target_link_libraries(runtime
ipfs_datastore_error
keystore
message
prometheus
proofs
tipset
signature
Expand Down
Loading

0 comments on commit 132cdf0

Please sign in to comment.