From 0290d0b82c3dcd190d2ce9dad4985ee33d1f845a Mon Sep 17 00:00:00 2001 From: Howard Hinnant Date: Mon, 6 Apr 2020 17:22:19 -0400 Subject: [PATCH] Create health_check rpc * Gives a summary of the health of the node: Healthy, Warning, or Critical * Last validated ledger age: <7s is Healthy, 7s to 20s is Warning > 20s is Critcal * If amendment blocked, Critical * Number of peers: > 7 is Healthy 1 to 7 is Warning 0 is Critical * server state: One of full, validating or proposing is Healthy One of syncing, tracking or connected is Warning All other states are Critical * load factor: <= 100 is Healthy 101 to 999 is Warning >= 1000 is Critical * If not Healthy, info field contains data that is considered not Healthy. Fixes: #2809 --- src/ripple/overlay/impl/OverlayImpl.cpp | 89 ++++++++++++++++++++++++- src/ripple/overlay/impl/OverlayImpl.h | 8 +++ 2 files changed, 96 insertions(+), 1 deletion(-) diff --git a/src/ripple/overlay/impl/OverlayImpl.cpp b/src/ripple/overlay/impl/OverlayImpl.cpp index 44912c9ce25..b1d3f401e0d 100644 --- a/src/ripple/overlay/impl/OverlayImpl.cpp +++ b/src/ripple/overlay/impl/OverlayImpl.cpp @@ -1075,11 +1075,98 @@ OverlayImpl::processValidatorList( return true; } +bool +OverlayImpl::processHealth(http_request_type const& req, Handoff& handoff) +{ + if (req.target() != "/health") + return false; + boost::beast::http::response msg; + msg.version(req.version()); + msg.insert("Server", BuildInfo::getFullVersionString()); + msg.insert("Content-Type", "application/json"); + msg.insert("Connection", "close"); + + auto info = getServerInfo(); + + int last_validated_ledger_age = std::numeric_limits::max(); + if (info.isMember("validated_ledger")) + last_validated_ledger_age = info["validated_ledger"]["age"].asInt(); + bool amendment_blocked = false; + if (info.isMember("amendment_blocked")) + amendment_blocked = true; + int number_peers = info["peers"].asInt(); + std::string server_state = info["server_state"].asString(); + auto load_factor = info["load_factor"].asDouble(); + + enum { healthy, warning, critical }; + int health = healthy; + auto set_health = [&health](int state) { + if (health < state) + health = state; + }; + + if (last_validated_ledger_age >= 7) + { + msg.body()[jss::info]["validated_ledger"] = last_validated_ledger_age; + if (last_validated_ledger_age < 20) + set_health(warning); + else + set_health(critical); + } + + if (amendment_blocked) + { + msg.body()[jss::info]["amendment_blocked"] = true; + set_health(critical); + } + + if (number_peers <= 7) + { + msg.body()[jss::info]["peers"] = number_peers; + if (number_peers != 0) + set_health(warning); + else + set_health(critical); + } + + if (!(server_state == "full" || server_state == "validating" || + server_state == "proposing")) + { + msg.body()[jss::info]["server_state"] = server_state; + if (server_state == "syncing" || server_state == "tracking" || + server_state == "connected") + { + set_health(warning); + } + else + set_health(critical); + } + + if (load_factor > 100) + { + msg.body()[jss::info]["load_factor"] = load_factor; + if (load_factor < 1000) + set_health(warning); + else + set_health(critical); + } + + if (health != critical) + msg.result(boost::beast::http::status::ok); + else + msg.result(boost::beast::http::status::service_unavailable); + + msg.prepare_payload(); + handoff.response = std::make_shared(msg); + return true; +} + bool OverlayImpl::processRequest(http_request_type const& req, Handoff& handoff) { // Take advantage of || short-circuiting - return processCrawl(req, handoff) || processValidatorList(req, handoff); + return processCrawl(req, handoff) || processValidatorList(req, handoff) || + processHealth(req, handoff); } Overlay::PeerSequence diff --git a/src/ripple/overlay/impl/OverlayImpl.h b/src/ripple/overlay/impl/OverlayImpl.h index b36004d51fb..099b90556db 100644 --- a/src/ripple/overlay/impl/OverlayImpl.h +++ b/src/ripple/overlay/impl/OverlayImpl.h @@ -396,6 +396,14 @@ class OverlayImpl : public Overlay bool processValidatorList(http_request_type const& req, Handoff& handoff); + /** Handles health requests. Health returns information about the + health of the node. + + @return true if the request was handled. + */ + bool + processHealth(http_request_type const& req, Handoff& handoff); + /** Handles non-peer protocol requests. @return true if the request was handled.