diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index 4ca1d2d8388..a2812eb50b5 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -427,11 +427,13 @@ struct controller_impl { emit( self.irreversible_block, *bitr ); + // blog.append could fail due to failures like running out of space. + // Do it before commit so that in case it throws, DB can be rolled back. + blog.append( (*bitr)->block, packed_transaction::cf_compression_type::none ); + db.commit( (*bitr)->block_num ); root_id = (*bitr)->id; - blog.append( (*bitr)->block, packed_transaction::cf_compression_type::none ); - auto rbitr = rbi.begin(); while( rbitr != rbi.end() && rbitr->blocknum <= (*bitr)->block_num ) { reversible_blocks.remove( *rbitr ); diff --git a/plugins/CMakeLists.txt b/plugins/CMakeLists.txt index 5d1491a1e76..2e0314789e5 100644 --- a/plugins/CMakeLists.txt +++ b/plugins/CMakeLists.txt @@ -11,6 +11,7 @@ add_subdirectory(history_api_plugin) add_subdirectory(state_history_plugin) add_subdirectory(trace_api_plugin) add_subdirectory(signature_provider_plugin) +add_subdirectory(resource_monitor_plugin) add_subdirectory(wallet_plugin) add_subdirectory(wallet_api_plugin) add_subdirectory(txn_test_gen_plugin) diff --git a/plugins/chain_plugin/CMakeLists.txt b/plugins/chain_plugin/CMakeLists.txt index 83059eb5456..e93657213c2 100644 --- a/plugins/chain_plugin/CMakeLists.txt +++ b/plugins/chain_plugin/CMakeLists.txt @@ -8,5 +8,5 @@ if(EOSIO_ENABLE_DEVELOPER_OPTIONS) target_compile_definitions(chain_plugin PUBLIC EOSIO_DEVELOPER) endif() -target_link_libraries( chain_plugin eosio_chain appbase ) -target_include_directories( chain_plugin PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include" "${CMAKE_CURRENT_SOURCE_DIR}/../chain_interface/include" "${CMAKE_CURRENT_SOURCE_DIR}/../../libraries/appbase/include") +target_link_libraries( chain_plugin eosio_chain appbase resource_monitor_plugin ) +target_include_directories( chain_plugin PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include" "${CMAKE_CURRENT_SOURCE_DIR}/../chain_interface/include" "${CMAKE_CURRENT_SOURCE_DIR}/../../libraries/appbase/include" "${CMAKE_CURRENT_SOURCE_DIR}/../resource_monitor_plugin/include") diff --git a/plugins/chain_plugin/chain_plugin.cpp b/plugins/chain_plugin/chain_plugin.cpp index ab248fd5b8e..6092f037248 100644 --- a/plugins/chain_plugin/chain_plugin.cpp +++ b/plugins/chain_plugin/chain_plugin.cpp @@ -15,6 +15,8 @@ #include +#include + #include #include @@ -741,6 +743,11 @@ void chain_plugin::plugin_initialize(const variables_map& options) { my->chain_config->state_dir = app().data_dir() / config::default_state_dir_name; my->chain_config->read_only = my->readonly; + if (auto resmon_plugin = app().find_plugin()) { + resmon_plugin->monitor_directory(my->chain_config->blocks_dir); + resmon_plugin->monitor_directory(my->chain_config->state_dir); + } + if( options.count( "chain-state-db-size-mb" )) my->chain_config->state_size = options.at( "chain-state-db-size-mb" ).as() * 1024 * 1024; diff --git a/plugins/producer_plugin/producer_plugin.cpp b/plugins/producer_plugin/producer_plugin.cpp index 2d97f185ce2..ecc9ae94151 100644 --- a/plugins/producer_plugin/producer_plugin.cpp +++ b/plugins/producer_plugin/producer_plugin.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -798,6 +799,10 @@ void producer_plugin::plugin_initialize(const boost::program_options::variables_ EOS_ASSERT( fc::is_directory(my->_snapshots_dir), snapshot_directory_not_found_exception, "No such directory '${dir}'", ("dir", my->_snapshots_dir.generic_string()) ); + + if (auto resmon_plugin = app().find_plugin()) { + resmon_plugin->monitor_directory(my->_snapshots_dir); + } } my->_incoming_block_subscription = app().get_channel().subscribe( diff --git a/plugins/resource_monitor_plugin/CMakeLists.txt b/plugins/resource_monitor_plugin/CMakeLists.txt new file mode 100644 index 00000000000..fff4bb30ae7 --- /dev/null +++ b/plugins/resource_monitor_plugin/CMakeLists.txt @@ -0,0 +1,10 @@ +file(GLOB HEADERS "include/eosio/resource_monitor_plugin/*.hpp") +add_library( resource_monitor_plugin + resource_monitor_plugin.cpp + system_file_space_provider.cpp + ${HEADERS} ) + +target_link_libraries( resource_monitor_plugin appbase fc chain_plugin) +target_include_directories( resource_monitor_plugin PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include" ) + +add_subdirectory( test ) diff --git a/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp b/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp new file mode 100644 index 00000000000..d8b49499bdc --- /dev/null +++ b/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/file_space_handler.hpp @@ -0,0 +1,162 @@ +#pragma once + +#include +#include + +#include +#include + +namespace bfs = boost::filesystem; + +namespace eosio::resource_monitor { + template + class file_space_handler { + public: + file_space_handler(SpaceProvider&& space_provider, boost::asio::io_context& ctx) + :space_provider(std::move(space_provider)), + timer{ctx} + { + } + + void set_sleep_time(uint32_t sleep_time) { + sleep_time_in_secs = sleep_time; + } + + // warning_threshold must be less than shutdown_threshold. + // set them together so it is simpler to check. + void set_threshold(uint32_t new_threshold, uint32_t new_warning_threshold) { + EOS_ASSERT(new_warning_threshold < new_threshold, chain::plugin_config_exception, + "warning_threshold ${new_warning_threshold} must be less than threshold ${new_threshold}", ("new_warning_threshold", new_warning_threshold) ("new_threshold", new_threshold)); + + shutdown_threshold = new_threshold; + warning_threshold = new_warning_threshold; + } + + void set_shutdown_on_exceeded(bool new_shutdown_on_exceeded) { + shutdown_on_exceeded = new_shutdown_on_exceeded; + } + + bool is_threshold_exceeded() const { + // Go over each monitored file system + for (auto& fs: filesystems) { + boost::system::error_code ec; + auto info = space_provider.get_space(fs.path_name, ec); + if ( ec ) { + // As the system is running and this plugin is not a critical + // part of the system, we should not exit. + // Just report the failure and continue; + wlog( "Unable to get space info for ${path_name}: [code: ${ec}] ${message}. Ignore this failure.", + ("path_name", fs.path_name.string()) + ("ec", ec.value()) + ("message", ec.message())); + + continue; + } + + if ( info.available < fs.warning_available ) { + wlog("Space usage on ${path}'s file system approaching threshold. available: ${available}, warning_available: ${warning_available}", ("path", fs.path_name.string()) ("available", info.available) ("warning_available", fs.warning_available)); + if ( shutdown_on_exceeded ) { + wlog("nodeos will shutdown when space usage exceeds threshold ${threshold}%", ("threshold", shutdown_threshold)); + } + + if ( info.available < fs.shutdown_available ) { + wlog("Space usage on ${path}'s file system exceeded threshold ${threshold}%, available: ${available}, Capacity: ${capacity}, shutdown_available: ${shutdown_available}", ("path", fs.path_name.string()) ("threshold", shutdown_threshold) ("available", info.available) ("capacity", info.capacity) ("shutdown_available", fs.shutdown_available)); + + return true; + } + } + } + + return false; + } + + void add_file_system(const bfs::path& path_name) { + // Get detailed information of the path + struct stat statbuf; + auto status = space_provider.get_stat(path_name.string().c_str(), &statbuf); + EOS_ASSERT(status == 0, chain::plugin_config_exception, + "Failed to run stat on ${path} with status ${status}", ("path", path_name.string())("status", status)); + + dlog("${path_name}'s file system to be monitored", ("path_name", path_name.string())); + + // If the file system containing the path is already + // in the filesystem list, do not add it again + for (auto& fs: filesystems) { + if (statbuf.st_dev == fs.st_dev) { // Two files belong to the same file system if their device IDs are the same. + dlog("${path_name}'s file system already monitored", ("path_name", path_name.string())); + + return; + } + } + + // For efficiency, precalculate threshold values to avoid calculating it + // everytime we check space usage. Since bfs::space returns + // available amount, we use minimum available amount as threshold. + boost::system::error_code ec; + auto info = space_provider.get_space(path_name, ec); + EOS_ASSERT(!ec, chain::plugin_config_exception, + "Unable to get space info for ${path_name}: [code: ${ec}] ${message}", + ("path_name", path_name.string()) + ("ec", ec.value()) + ("message", ec.message())); + + auto shutdown_available = (100 - shutdown_threshold) * (info.capacity / 100); // (100 - shutdown_threshold)/100 is the percentage of minimum number of available bytes the file system must maintain + auto warning_available = (100 - warning_threshold) * (info.capacity / 100); + + // Add to the list + filesystems.emplace_back(statbuf.st_dev, shutdown_available, path_name, warning_available); + + ilog("${path_name}'s file system monitored. shutdown_available: ${shutdown_available}, capacity: ${capacity}, threshold: ${threshold}", ("path_name", path_name.string()) ("shutdown_available", shutdown_available) ("capacity", info.capacity) ("threshold", shutdown_threshold) ); + } + + void space_monitor_loop() { + if ( is_threshold_exceeded() && shutdown_on_exceeded ) { + wlog("Shutting down"); + appbase::app().quit(); // This will gracefully stop Nodeos + return; + } + + timer.expires_from_now( boost::posix_time::seconds( sleep_time_in_secs )); + timer.async_wait([this](auto& ec) { + if ( ec ) { + wlog("Exit due to error: ${rc}, message: ${message}", + ("ec", ec.value()) + ("message", ec.message())); + return; + } else { + // Loop over + space_monitor_loop(); + } + }); + } + + private: + SpaceProvider space_provider; + + boost::asio::deadline_timer timer; + + uint32_t sleep_time_in_secs {2}; + uint32_t shutdown_threshold {90}; + uint32_t warning_threshold {85}; + bool shutdown_on_exceeded {true}; + + struct filesystem_info { + dev_t st_dev; // device id of file system containing "file_path" + uintmax_t shutdown_available {0}; // minimum number of available bytes the file system must maintain + bfs::path path_name; + uintmax_t warning_available {0}; // warning is issued when availabla number of bytese drops below warning_available + + filesystem_info(dev_t dev, uintmax_t available, const bfs::path& path, uintmax_t warning) + : st_dev(dev), + shutdown_available(available), + path_name(path), + warning_available(warning) + { + } + }; + + // Stores file systems to be monitored. Duplicate + // file systems are not stored. + std::vector filesystems; + }; +} diff --git a/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/resource_monitor_plugin.hpp b/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/resource_monitor_plugin.hpp new file mode 100644 index 00000000000..739ef99b0d4 --- /dev/null +++ b/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/resource_monitor_plugin.hpp @@ -0,0 +1,29 @@ +#pragma once +#include +#include + +namespace eosio { + +using namespace appbase; + +class resource_monitor_plugin : public appbase::plugin { +public: + resource_monitor_plugin( ); + virtual ~resource_monitor_plugin(); + + APPBASE_PLUGIN_REQUIRES( (chain_plugin) ) + virtual void set_program_options(options_description&, options_description& cfg) override; + + void plugin_initialize(const variables_map& options); + void plugin_startup(); + void plugin_shutdown(); + + // Called by plugins and other components to request + // directory monitoring + void monitor_directory(const bfs::path& path); + +private: + std::unique_ptr my; +}; + +} diff --git a/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/system_file_space_provider.hpp b/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/system_file_space_provider.hpp new file mode 100644 index 00000000000..f76f974fc86 --- /dev/null +++ b/plugins/resource_monitor_plugin/include/eosio/resource_monitor_plugin/system_file_space_provider.hpp @@ -0,0 +1,21 @@ +#pragma once + +#include +#include + +namespace bfs = boost::filesystem; + +namespace eosio::resource_monitor { + class system_file_space_provider { + public: + system_file_space_provider() + { + } + + // Wrapper for Linux stat + int get_stat(const char *path, struct stat *buf) const; + + // Wrapper for boost file system space + bfs::space_info get_space(const bfs::path& p, boost::system::error_code& ec) const; + }; +} diff --git a/plugins/resource_monitor_plugin/resource_monitor_plugin.cpp b/plugins/resource_monitor_plugin/resource_monitor_plugin.cpp new file mode 100644 index 00000000000..674e552172d --- /dev/null +++ b/plugins/resource_monitor_plugin/resource_monitor_plugin.cpp @@ -0,0 +1,172 @@ +/** + It was reported from a customer that when file system which + "data/blocks" belongs to is running out of space, the producer + continued to produce blocks and update state but the blocks log was + "corrupted" in that it no longer contained all the irreversible blocks. + It was also observed that when file system which "data/state" + belons to is running out of space, nodeos will crash with SIGBUS as + the state file is unable to acquire new pages. + + The solution is to have a dedicated plugin to monitor resource + usages (file system space now, CPU, memory, and networking + bandwidth in the future). + The plugin uses a thread to periodically check space usage of file + systems of directories being monitored. If space used + is over a predefined threshold, a graceful shutdown is initiated. +**/ + +#include +#include +#include + +#include + +#include +#include // set_os_thread_name + +#include + +#include + +#include + +using namespace eosio::resource_monitor; + +namespace bfs = boost::filesystem; + +namespace eosio { + static appbase::abstract_plugin& _resource_monitor_plugin = app().register_plugin(); + +class resource_monitor_plugin_impl { +public: + resource_monitor_plugin_impl() + :space_handler(system_file_space_provider(), ctx) + { + } + + void set_program_options(options_description&, options_description& cfg) { + cfg.add_options() + ( "resource-monitor-interval-seconds", bpo::value()->default_value(def_interval_in_secs), + "Time in seconds between two consecutive checks of resource usage. Should be between 1 and 300" ) + ( "resource-monitor-space-threshold", bpo::value()->default_value(def_space_threshold), + "Threshold in terms of percentage of used space vs total space. If used space is above (threshold - 5%), a warning is generated. If used space is above the threshold and resource-monitor-not-shutdown-on-threshold-exceeded is enabled, a graceful shutdown is initiated. The value should be between 6 and 99" ) + ( "resource-monitor-not-shutdown-on-threshold-exceeded", + "Used to indicate nodeos will not shutdown when threshold is exceeded." ) + ; + } + + void plugin_initialize(const appbase::variables_map& options) { + dlog("plugin_initialize"); + + auto interval = options.at("resource-monitor-interval-seconds").as(); + EOS_ASSERT(interval >= interval_low && interval <= interval_high, chain::plugin_config_exception, + "\"resource-monitor-interval-seconds\" must be between ${interval_low} and ${interval_high}", ("interval_low", interval_low) ("interval_high", interval_high)); + space_handler.set_sleep_time(interval); + ilog("Monitoring interval set to ${interval}", ("interval", interval)); + + auto threshold = options.at("resource-monitor-space-threshold").as(); + EOS_ASSERT(threshold >= space_threshold_low && threshold <= space_threshold_high, chain::plugin_config_exception, + "\"resource-monitor-space-threshold\" must be between ${space_threshold_low} and ${space_threshold_high}", ("space_threshold_low", space_threshold_low) ("space_threshold_high", space_threshold_high)); + space_handler.set_threshold(threshold, threshold - space_threshold_warning_diff); + ilog("Space usage threshold set to ${threshold}", ("threshold", threshold)); + + if (options.count("resource-monitor-not-shutdown-on-threshold-exceeded")) { + // If set, not shutdown + space_handler.set_shutdown_on_exceeded(false); + ilog("Shutdown flag when threshold exceeded set to false"); + } else { + // Default will shut down + space_handler.set_shutdown_on_exceeded(true); + ilog("Shutdown flag when threshold exceeded set to true"); + } + } + + // Start main thread + void plugin_startup() { + ilog("Creating and starting monitor thread"); + + // By now all plugins are initialized. + // Find out filesystems containing the directories requested + // so far. + for ( auto& dir: directories_registered ) { + space_handler.add_file_system( dir ); + + // A directory like "data" contains subdirectories like + // "block". Those subdirectories can mount on different + // file systems. Make sure they are taken care of. + for (bfs::directory_iterator itr(dir); itr != bfs::directory_iterator(); ++itr) { + if (fc::is_directory(itr->path())) { + space_handler.add_file_system( itr->path() ); + } + } + } + + monitor_thread = std::thread( [this] { + fc::set_os_thread_name( "resmon" ); // console_appender uses 9 chars for thread name reporting. + space_handler.space_monitor_loop(); + + ctx.run(); + } ); + } + + // System is shutting down. + void plugin_shutdown() { + ilog("shutdown..."); + + ctx.stop(); + + // Wait for the thread to end + monitor_thread.join(); + + ilog("exit shutdown"); + } + + void monitor_directory(const bfs::path& path) { + dlog("${path} registered to be monitored", ("path", path.string())); + directories_registered.push_back(path); + } + +private: + std::thread monitor_thread; + std::vector directories_registered; + + static constexpr uint32_t def_interval_in_secs = 2; + static constexpr uint32_t interval_low = 1; + static constexpr uint32_t interval_high = 300; + + static constexpr uint32_t def_space_threshold = 90; // in percentage + static constexpr uint32_t space_threshold_low = 6; // in percentage + static constexpr uint32_t space_threshold_high = 99; // in percentage + static constexpr uint32_t space_threshold_warning_diff = 5; // Warning issued when space used reached (threshold - space_threshold_warning_diff). space_threshold_warning_diff must be smaller than space_threshold_low + + boost::asio::io_context ctx; + + using file_space_handler_t = file_space_handler; + file_space_handler_t space_handler; +}; + +resource_monitor_plugin::resource_monitor_plugin():my(std::make_unique()) {} + +resource_monitor_plugin::~resource_monitor_plugin() {} + +void resource_monitor_plugin::set_program_options(options_description& cli, options_description& cfg) { + my->set_program_options(cli, cfg); +} + +void resource_monitor_plugin::plugin_initialize(const variables_map& options) { + my->plugin_initialize(options); +} + +void resource_monitor_plugin::plugin_startup() { + my->plugin_startup(); +} + +void resource_monitor_plugin::plugin_shutdown() { + my->plugin_shutdown(); +} + +void resource_monitor_plugin::monitor_directory(const bfs::path& path) { + my->monitor_directory( path ); +} + +} // namespace diff --git a/plugins/resource_monitor_plugin/system_file_space_provider.cpp b/plugins/resource_monitor_plugin/system_file_space_provider.cpp new file mode 100644 index 00000000000..6086bd871c2 --- /dev/null +++ b/plugins/resource_monitor_plugin/system_file_space_provider.cpp @@ -0,0 +1,15 @@ +#include + +namespace bfs = boost::filesystem; + +namespace eosio::resource_monitor { + int system_file_space_provider::get_stat(const char *path, struct stat *buf) const { + return stat(path, buf); + } + + bfs::space_info system_file_space_provider::get_space(const bfs::path& p, boost::system::error_code& ec) const { + return bfs::space(p, ec); + } + + using bfs::directory_iterator; +} diff --git a/plugins/resource_monitor_plugin/test/CMakeLists.txt b/plugins/resource_monitor_plugin/test/CMakeLists.txt new file mode 100644 index 00000000000..b22bcdc647e --- /dev/null +++ b/plugins/resource_monitor_plugin/test/CMakeLists.txt @@ -0,0 +1,22 @@ +add_executable( test_threshold test_threshold.cpp ) +target_link_libraries( test_threshold resource_monitor_plugin ) +target_include_directories( test_threshold PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include" ) + +add_test(NAME test_threshold COMMAND plugins/resource_monitor_plugin/test/test_threshold WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_executable( test_monitor_loop test_monitor_loop.cpp ) +target_link_libraries( test_monitor_loop resource_monitor_plugin ) +target_include_directories( test_monitor_loop PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include" ) + +add_test(NAME test_monitor_loop COMMAND plugins/resource_monitor_plugin/test/test_monitor_loop WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_executable( test_add_file_system test_add_file_system.cpp ) +target_link_libraries( test_add_file_system resource_monitor_plugin ) +target_include_directories( test_add_file_system PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include" ) + +add_test(NAME test_add_file_system COMMAND plugins/resource_monitor_plugin/test/test_add_file_system WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_executable( test_resmon_plugin test_resmon_plugin.cpp ) +target_link_libraries( test_resmon_plugin resource_monitor_plugin ) +target_include_directories( test_resmon_plugin PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include" ) +add_test(NAME test_resmon_plugin COMMAND plugins/resource_monitor_plugin/test/test_resmon_plugin WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) diff --git a/plugins/resource_monitor_plugin/test/test_add_file_system.cpp b/plugins/resource_monitor_plugin/test/test_add_file_system.cpp new file mode 100644 index 00000000000..b9508aa6eac --- /dev/null +++ b/plugins/resource_monitor_plugin/test/test_add_file_system.cpp @@ -0,0 +1,138 @@ +#define BOOST_TEST_MODULE add_file_system +#include + +#include + +#include + +using namespace eosio; +using namespace eosio::resource_monitor; +using namespace boost::system; + +struct add_file_system_fixture { + struct mock_space_provider { + mock_space_provider(add_file_system_fixture& fixture) + :fixture(fixture) + {} + + int get_stat(const char *path, struct stat *buf) const { + return fixture.mock_get_stat(path, buf); + } + + bfs::space_info get_space(const bfs::path& p, boost::system::error_code& ec) const { + return fixture.mock_get_space(p, ec); + } + + add_file_system_fixture& fixture; + }; + + boost::asio::io_context ctx; + + using file_space_handler_t = file_space_handler; + add_file_system_fixture() + : space_handler(mock_space_provider(*this), ctx) + { + } + + void add_file_system(const bfs::path& path_name) { + space_handler.add_file_system(path_name); + } + + void set_threshold(uint32_t threshold, uint32_t warning_threshold) { + space_handler.set_threshold( threshold, warning_threshold ); + } + + bool is_threshold_exceeded() const { + return space_handler.is_threshold_exceeded(); + } + + void test_add_file_systems_common(std::vector& capacity, std::vector& available, std::vector& devs) { + mock_get_space = [ i = 0, capacity, available ]( const bfs::path& p, boost::system::error_code& ec) mutable -> bfs::space_info { + ec = boost::system::errc::make_error_code(errc::success); + + bfs::space_info rc; + rc.capacity = capacity[i]; + rc.available = available[i]; + i++; + + return rc; + }; + + mock_get_stat = [ j = 0, devs ]( const char *path, struct stat *buf ) mutable -> int { + buf->st_dev = devs[j]; + j++; + + return 0; + }; + + set_threshold(80, 75); + + for (auto k = 0; k < capacity.size(); k++) { + add_file_system("/test" + std::to_string(k)); + } + } + + // fixture data and methods + std::function mock_get_space; + std::function mock_get_stat; + + file_space_handler_t space_handler; +}; + +BOOST_AUTO_TEST_SUITE(space_handler_tests) + BOOST_FIXTURE_TEST_CASE(get_stat_failure, add_file_system_fixture) + { + mock_get_stat = []( const char *path, struct stat *buf ) -> int { + return 1; // anything other than 0 is an error in stat + }; + + BOOST_REQUIRE_THROW(add_file_system("/test"), chain::plugin_config_exception); + } + + BOOST_FIXTURE_TEST_CASE(get_space_failure, add_file_system_fixture) + { + mock_get_space = []( const bfs::path& p, boost::system::error_code& ec) -> bfs::space_info { + ec = boost::system::errc::make_error_code(errc::no_such_file_or_directory); + bfs::space_info rc; + return rc; + }; + + mock_get_stat = []( const char *path, struct stat *buf ) -> int { + buf->st_dev = 0; + return 0; + }; + + BOOST_REQUIRE_THROW(add_file_system("/test"), chain::plugin_config_exception); + } + + BOOST_FIXTURE_TEST_CASE(different_file_systems, add_file_system_fixture) + { + std::vector capacity {1000000, 2000000, 3000000, 4000000}; + std::vector available {500000, 1500000, 2500000, 3500000}; + std::vector devs {0, 1, 2, 3}; + + // As long as no exceptions, it is considered success. + BOOST_REQUIRE_NO_THROW(test_add_file_systems_common(capacity, available, devs)); + } + + BOOST_FIXTURE_TEST_CASE(same_file_system, add_file_system_fixture) + { + std::vector capacity {1000000, 2000000, 3000000, 4000000}; + std::vector available {500000, 1500000, 2500000, 3500000}; + std::vector devs {0, 0, 0, 0}; + + // As long as no exceptions, it is considered success. + BOOST_REQUIRE_NO_THROW(test_add_file_systems_common(capacity, available, devs)); + } + + BOOST_FIXTURE_TEST_CASE(mixed_file_systems, add_file_system_fixture) + { + std::vector capacity {1000000, 2000000, 3000000, 4000000, 50000}; + std::vector available {500000, 1500000, 2500000, 3500000, 20000}; + std::vector devs {0, 2, 2, 0, 3}; + + // As long as no exceptions, it is considered success. + BOOST_REQUIRE_NO_THROW(test_add_file_systems_common(capacity, available, devs)); + } + +BOOST_AUTO_TEST_SUITE_END() diff --git a/plugins/resource_monitor_plugin/test/test_monitor_loop.cpp b/plugins/resource_monitor_plugin/test/test_monitor_loop.cpp new file mode 100644 index 00000000000..faf6e6b42cf --- /dev/null +++ b/plugins/resource_monitor_plugin/test/test_monitor_loop.cpp @@ -0,0 +1,159 @@ +#define BOOST_TEST_MODULE monitor_loop +#include + +#include + +#include + +using namespace eosio; +using namespace eosio::resource_monitor; +using namespace boost::system; + +struct space_handler_fixture { + struct mock_space_provider { + mock_space_provider(space_handler_fixture& fixture) + :fixture(fixture) + {} + + int get_stat(const char *path, struct stat *buf) const { + return fixture.mock_get_stat(path, buf); + } + + bfs::space_info get_space(const bfs::path& p, boost::system::error_code& ec) const { + return fixture.mock_get_space(p, ec); + } + + space_handler_fixture& fixture; + }; + + boost::asio::io_context ctx; + + using file_space_handler_t = file_space_handler; + space_handler_fixture() + : space_handler(mock_space_provider( *this ), ctx) + { + } + + void add_file_system(const bfs::path& path_name) { + space_handler.add_file_system( path_name ); + } + + void set_threshold(uint32_t threshold, uint32_t warning_threshold) { + space_handler.set_threshold( threshold, warning_threshold ); + } + + void set_sleep_time(uint32_t sleep_time) { + space_handler.set_sleep_time( sleep_time ); + } + + void set_shutdown_on_exceeded(bool shutdown_on_exceeded) { + space_handler.set_shutdown_on_exceeded(shutdown_on_exceeded); + } + + bool is_threshold_exceeded() const { + return space_handler.is_threshold_exceeded(); + } + + void space_monitor_loop() { + return space_handler.space_monitor_loop(); + } + + bool test_loop_common(int num_loops, int interval) + { + mock_get_space = [ i = 0, num_loops ]( const bfs::path& p, boost::system::error_code& ec) mutable -> bfs::space_info { + ec = boost::system::errc::make_error_code(errc::success); + + bfs::space_info rc; + rc.capacity = 1000000; + + if ( i < num_loops + 1 ) { // "+ 1" for the get_space in add_file_system + rc.available = 300000; + } else { + rc.available = 100000; + } + + i++; + + return rc; + }; + + mock_get_stat = []( const char *path, struct stat *buf ) -> int { + buf->st_dev = 0; + return 0; + }; + + set_threshold(80, 75); + set_shutdown_on_exceeded(true); + set_sleep_time(interval); + add_file_system("/test"); + + auto start = std::chrono::system_clock::now(); + + auto monitor_thread = std::thread( [this] { + space_monitor_loop(); + ctx.run(); + }); + + monitor_thread.join(); + + auto end = std::chrono::system_clock::now(); + std::chrono::duration test_duration = end - start; + + // For tests to be repeatable on any platforms under any loads, + // particularly for longer runs, + // we just make sure the test duration is longer than a margin + // of theroretical duration. + bool finished_in_time = (test_duration >= std::chrono::duration((num_loops - 1) * interval)); + + return finished_in_time; + } + + // fixture data and methods + std::function mock_get_space; + std::function mock_get_stat; + + file_space_handler_t space_handler; +}; + +BOOST_AUTO_TEST_SUITE(monitor_loop_tests) + BOOST_FIXTURE_TEST_CASE(zero_loop, space_handler_fixture) + { + BOOST_TEST( test_loop_common(0, 1) ); + } + + BOOST_FIXTURE_TEST_CASE(one_loop_1_secs_interval, space_handler_fixture) + { + BOOST_TEST( test_loop_common(1, 1) ); + } + + BOOST_FIXTURE_TEST_CASE(two_loops_1_sec_interval, space_handler_fixture) + { + BOOST_TEST( test_loop_common(2, 1) ); + } + + BOOST_FIXTURE_TEST_CASE(ten_loops_1_sec_interval, space_handler_fixture) + { + BOOST_TEST( test_loop_common(10, 1) ); + } + + BOOST_FIXTURE_TEST_CASE(one_loop_5_secs_interval, space_handler_fixture) + { + BOOST_TEST( test_loop_common(1, 5) ); + } + + BOOST_FIXTURE_TEST_CASE(two_loops_5_sec_interval, space_handler_fixture) + { + BOOST_TEST( test_loop_common(2, 5) ); + } + + BOOST_FIXTURE_TEST_CASE(ten_loops_5_sec_interval, space_handler_fixture) + { + BOOST_TEST( test_loop_common(10, 5) ); + } + + BOOST_FIXTURE_TEST_CASE(one_hundred_twenty_loops_1_sec_interval, space_handler_fixture) + { + BOOST_TEST( test_loop_common(120, 1) ); + } + +BOOST_AUTO_TEST_SUITE_END() diff --git a/plugins/resource_monitor_plugin/test/test_resmon_plugin.cpp b/plugins/resource_monitor_plugin/test/test_resmon_plugin.cpp new file mode 100644 index 00000000000..1c47e1302da --- /dev/null +++ b/plugins/resource_monitor_plugin/test/test_resmon_plugin.cpp @@ -0,0 +1,145 @@ +#define BOOST_TEST_MODULE test_resmom_plugin +#include + +#include + +#include + +using namespace eosio; +using namespace boost::system; + +namespace bfs = boost::filesystem; + +// For program options +namespace bpo = boost::program_options; +using bpo::options_description; +using bpo::variables_map; + +struct resmon_fixture { + void set_program_options() { + options_description dummy; + _my.set_program_options(dummy, _cfg); + } + + void initialize(const std::vector& args){ + // We only have at most 3 arguments. OK to hardcodied in test + // programs. + const char* argv[10]; + EOS_ASSERT(args.size() < 10, chain::plugin_exception, "number of arguments (${size}) must be less than 10", ("size", args.size())); + + // argv[0] is program name, no need to fill in + for (auto i=0; i& arg) { + set_program_options(); + initialize(arg); + } + + void plugin_startup(const std::vector& dirs, int runTimeSecs=3) { + set_options({"--resource-monitor-interval-seconds=1"}); + + for (auto& dir: dirs) { + _my.monitor_directory(dir); + } + + _my.plugin_startup(); + std::this_thread::sleep_for( std::chrono::milliseconds(runTimeSecs*1000) ); + _my.plugin_shutdown(); + } + + resource_monitor_plugin _my; + options_description _cfg; +}; + +BOOST_AUTO_TEST_SUITE(resmon_plugin_tests) + BOOST_FIXTURE_TEST_CASE(intervalTooBig, resmon_fixture) + { + BOOST_REQUIRE_THROW(set_options({"--resource-monitor-interval-seconds=301"}), chain::plugin_config_exception); + } + + BOOST_FIXTURE_TEST_CASE(intervalTooSmall, resmon_fixture) + { + BOOST_REQUIRE_THROW(set_options({"--resource-monitor-interval-seconds=0"}), chain::plugin_config_exception); + } + + BOOST_FIXTURE_TEST_CASE(intervalLowBound, resmon_fixture) + { + BOOST_REQUIRE_NO_THROW(set_options({"--resource-monitor-interval-seconds=1"})); + } + + BOOST_FIXTURE_TEST_CASE(intervalMiddle, resmon_fixture) + { + BOOST_REQUIRE_NO_THROW(set_options({"--resource-monitor-interval-seconds=150"})); + } + + BOOST_FIXTURE_TEST_CASE(intervalHighBound, resmon_fixture) + { + BOOST_REQUIRE_NO_THROW(set_options({"--resource-monitor-interval-seconds=300"})); + } + + BOOST_FIXTURE_TEST_CASE(thresholdTooBig, resmon_fixture) + { + BOOST_REQUIRE_THROW(set_options({"--resource-monitor-space-threshold=100"}), chain::plugin_config_exception); + } + + BOOST_FIXTURE_TEST_CASE(thresholdTooSmall, resmon_fixture) + { + BOOST_REQUIRE_THROW(set_options({"--resource-monitor-space-threshold=5"}), chain::plugin_config_exception); + } + + BOOST_FIXTURE_TEST_CASE(thresholdLowBound, resmon_fixture) + { + BOOST_REQUIRE_NO_THROW(set_options({"--resource-monitor-space-threshold=6"})); + } + + BOOST_FIXTURE_TEST_CASE(thresholdMiddle, resmon_fixture) + { + BOOST_REQUIRE_NO_THROW(set_options({"--resource-monitor-space-threshold=60"})); + } + + BOOST_FIXTURE_TEST_CASE(thresholdHighBound, resmon_fixture) + { + BOOST_REQUIRE_NO_THROW(set_options({"--resource-monitor-space-threshold=99"})); + } + + BOOST_FIXTURE_TEST_CASE(noShutdown, resmon_fixture) + { + BOOST_REQUIRE_NO_THROW(set_options({"--resource-monitor-not-shutdown-on-threshold-exceeded"})); + } + + BOOST_FIXTURE_TEST_CASE(startupNormal, resmon_fixture) + { + BOOST_REQUIRE_NO_THROW( plugin_startup({"/tmp"})); + } + + BOOST_FIXTURE_TEST_CASE(startupDuplicateDirs, resmon_fixture) + { + BOOST_REQUIRE_NO_THROW( plugin_startup({"/tmp", "/tmp"})); + } + + BOOST_FIXTURE_TEST_CASE(startupMultDirs, resmon_fixture) + { + // Under "/" are multiple file systems + BOOST_REQUIRE_NO_THROW( plugin_startup({"/", "/tmp"})); + } + + BOOST_FIXTURE_TEST_CASE(startupNoExistingDirs, resmon_fixture) + { + // "hsdfgd983" a random file and not existing + BOOST_REQUIRE_THROW( plugin_startup({"/tmp", "hsdfgd983"}), chain::plugin_config_exception); + } + + BOOST_FIXTURE_TEST_CASE(startupLongRun, resmon_fixture) + { + BOOST_REQUIRE_NO_THROW( plugin_startup({"/tmp"}, 120)); + } +BOOST_AUTO_TEST_SUITE_END() diff --git a/plugins/resource_monitor_plugin/test/test_threshold.cpp b/plugins/resource_monitor_plugin/test/test_threshold.cpp new file mode 100644 index 00000000000..5f3e4d61039 --- /dev/null +++ b/plugins/resource_monitor_plugin/test/test_threshold.cpp @@ -0,0 +1,301 @@ +#define BOOST_TEST_MODULE threshold +#include + +#include + +#include + +using namespace eosio; +using namespace eosio::resource_monitor; +using namespace boost::system; + +struct threshold_fixture { + struct mock_space_provider { + mock_space_provider(threshold_fixture& fixture) + :fixture(fixture) + {} + + int get_stat(const char *path, struct stat *buf) const { + return fixture.mock_get_stat(path, buf); + } + + bfs::space_info get_space(const bfs::path& p, boost::system::error_code& ec) const { + return fixture.mock_get_space(p, ec); + } + + threshold_fixture& fixture; + }; + + boost::asio::io_context ctx; + + using file_space_handler_t = file_space_handler; + threshold_fixture() + : space_handler(mock_space_provider(*this), ctx) + { + } + + void add_file_system(const bfs::path& path_name) { + space_handler.add_file_system(path_name); + } + + void set_threshold(uint32_t threshold, uint32_t warning_threshold) { + space_handler.set_threshold(threshold, warning_threshold); + } + + bool is_threshold_exceeded() const { + return space_handler.is_threshold_exceeded(); + } + + void set_shutdown_on_exceeded(bool shutdown_on_exceeded) { + space_handler.set_shutdown_on_exceeded(shutdown_on_exceeded); + } + + bool test_threshold_common(std::map& available, std::map& dev, uint32_t warning_threshold=75) + { + mock_get_space = [available]( const bfs::path& p, boost::system::error_code& ec) mutable -> bfs::space_info { + ec = boost::system::errc::make_error_code(errc::success); + + bfs::space_info rc; + rc.capacity = 1000000; + rc.available = available[p]; + + return rc; + }; + + mock_get_stat = [dev]( const char *path, struct stat *buf ) mutable -> int { + bfs::path name = path; + buf->st_dev = dev[name]; + + return 0; + }; + + set_threshold(80, warning_threshold); + set_shutdown_on_exceeded(true); + + for (auto i = 0; i < available.size(); i++) { + add_file_system("/test" + std::to_string(i)); + } + + return is_threshold_exceeded(); + } + + // fixture data and methods + std::function mock_get_space; + std::function mock_get_stat; + + file_space_handler_t space_handler; +}; + +BOOST_AUTO_TEST_SUITE(threshol_tests) + BOOST_FIXTURE_TEST_CASE(equal_to_threshold, threshold_fixture) + { + std::map availables {{"/test0", 200000}}; + std::map devs {{"/test0", 0}}; + + BOOST_TEST( !test_threshold_common(availables, devs) ); + } + + BOOST_FIXTURE_TEST_CASE(above_threshold_1_byte, threshold_fixture) + { + std::map availables {{"/test0", 199999}}; + std::map devs {{"/test0", 0}}; + + BOOST_TEST( test_threshold_common(availables, devs) ); + } + + BOOST_FIXTURE_TEST_CASE(above_threshold_1000_byte, threshold_fixture) + { + std::map availables {{"/test0", 199000}}; + std::map devs {{"/test0", 0}}; + + BOOST_TEST( test_threshold_common(availables, devs) ); + } + + BOOST_FIXTURE_TEST_CASE(within_warning, threshold_fixture) + { + std::map availables {{"/test0", 249999}}; + std::map devs {{"/test0", 0}}; + + BOOST_TEST( !test_threshold_common(availables, devs) ); + } + + BOOST_FIXTURE_TEST_CASE(not_yet_warning, threshold_fixture) + { + std::map availables {{"/test0", 250001}}; + std::map devs {{"/test0", 0}}; + + BOOST_TEST( !test_threshold_common(availables, devs) ); + } + + BOOST_FIXTURE_TEST_CASE(below_threshold_1_byte, threshold_fixture) + { + std::map availables {{"/test0", 200001}}; + std::map devs {{"/test0", 0}}; + + BOOST_TEST( !test_threshold_common(availables, devs) ); + } + + BOOST_FIXTURE_TEST_CASE(below_threshold_500_byte, threshold_fixture) + { + std::map availables {{"/test0", 200500}}; + std::map devs {{"/test0", 0}}; + + BOOST_TEST( !test_threshold_common(availables, devs) ); + } + + BOOST_FIXTURE_TEST_CASE(first_file_system_over_threshold, threshold_fixture) + { + std::map availables {{"/test0", 199999}, + {"/test1", 200500}}; + std::map devs {{"/test0", 0}, + {"/test1", 1}}; + + BOOST_TEST( test_threshold_common(availables, devs) ); + } + + BOOST_FIXTURE_TEST_CASE(second_file_system_over_threshold, threshold_fixture) + { + std::map availables {{"/test0", 300000}, + {"/test1", 100000}}; + std::map devs {{"/test0", 0}, + {"/test1", 1}}; + + BOOST_TEST( test_threshold_common(availables, devs) ); + } + + BOOST_FIXTURE_TEST_CASE(no_file_system_over_threshold, threshold_fixture) + { + std::map availables {{"/test0", 300000}, + {"/test1", 200000}}; + std::map devs {{"/test0", 0}, + {"/test1", 1}}; + + BOOST_TEST( !test_threshold_common(availables, devs) ); + } + + BOOST_FIXTURE_TEST_CASE(both_file_systems_over_threshold, threshold_fixture) + { + std::map availables {{"/test0", 150000}, + {"/test1", 100000}}; + std::map devs {{"/test0", 0}, + {"/test1", 1}}; + + BOOST_TEST( test_threshold_common(availables, devs) ); + } + + BOOST_FIXTURE_TEST_CASE(one_of_three_over_threshold, threshold_fixture) + { + std::map availables {{"/test0", 300000}, + {"/test1", 199999}, + {"/test2", 250000}}; + std::map devs {{"/test0", 0}, + {"/test1", 1}, + {"/test2", 2}}; + + BOOST_TEST( test_threshold_common(availables, devs) ); + } + + BOOST_FIXTURE_TEST_CASE(one_of_three_over_threshold_dup, threshold_fixture) + { + std::map availables {{"/test0", 100000}, + {"/test1", 250000}, + {"/test2", 250000}}; + std::map devs {{"/test0", 0}, + {"/test1", 1}, // dup + {"/test2", 1}}; // dup + + BOOST_TEST( test_threshold_common(availables, devs) ); + } + + BOOST_FIXTURE_TEST_CASE(none_of_three_over_threshold, threshold_fixture) + { + std::map availables {{"/test0", 300000}, + {"/test1", 200000}, + {"/test2", 250000}}; + std::map devs {{"/test0", 0}, + {"/test1", 1}, + {"/test2", 2}}; + + BOOST_TEST( !test_threshold_common(availables, devs) ); + } + + BOOST_FIXTURE_TEST_CASE(none_of_three_over_threshold_dup, threshold_fixture) + { + std::map availables {{"/test0", 800000}, + {"/test1", 550000}, + {"/test2", 550000}}; + std::map devs {{"/test0", 0}, + {"/test1", 1}, // dup + {"/test2", 1}}; // dup + + BOOST_TEST( !test_threshold_common(availables, devs) ); + } + + BOOST_FIXTURE_TEST_CASE(warning_threshold_equal_to_threshold, threshold_fixture) + { + std::map availables {{"/test0", 150000}}; + std::map devs {{"/test0", 0}}; + + BOOST_REQUIRE_THROW(test_threshold_common(availables, devs, 80), chain::plugin_config_exception); + } + + BOOST_FIXTURE_TEST_CASE(warning_threshold_greater_than_threshold, threshold_fixture) + { + std::map availables {{"/test0", 150000}}; + std::map devs {{"/test0", 0}}; + + BOOST_REQUIRE_THROW( test_threshold_common(availables, devs, 85), chain::plugin_config_exception ); + } + + BOOST_FIXTURE_TEST_CASE(warning_threshold_less_than_threshold, threshold_fixture) + { + std::map availables {{"/test0", 200000}}; + std::map devs {{"/test0", 0}}; + + BOOST_TEST( !test_threshold_common(availables, devs, 70) ); + } + + BOOST_FIXTURE_TEST_CASE(get_space_failure_in_middle, threshold_fixture) + { + mock_get_space = [ i = 0 ]( const bfs::path& p, boost::system::error_code& ec) mutable -> bfs::space_info { + if ( i == 3 ) { + ec = boost::system::errc::make_error_code(errc::no_such_file_or_directory); + } else { + ec = boost::system::errc::make_error_code(errc::success); + } + + bfs::space_info rc; + rc.capacity = 1000000; + rc.available = 200500; + + i++; + + return rc; + }; + + mock_get_stat = []( const char *path, struct stat *buf ) -> int { + buf->st_dev = 0; + return 0; + }; + + set_threshold(80, 75); + add_file_system("/test"); + + auto expected_response = false; + + auto actual_response_0 = is_threshold_exceeded(); + auto actual_response_1 = is_threshold_exceeded(); + auto actual_response_2 = is_threshold_exceeded(); + auto actual_response_3 = is_threshold_exceeded(); + auto actual_response_4 = is_threshold_exceeded(); + auto actual_response_5 = is_threshold_exceeded(); + + BOOST_TEST(expected_response == actual_response_0); + BOOST_TEST(expected_response == actual_response_1); + BOOST_TEST(expected_response == actual_response_2); + BOOST_TEST(expected_response == actual_response_3); + BOOST_TEST(expected_response == actual_response_4); + BOOST_TEST(expected_response == actual_response_5); + } + +BOOST_AUTO_TEST_SUITE_END() diff --git a/plugins/state_history_plugin/state_history_plugin.cpp b/plugins/state_history_plugin/state_history_plugin.cpp index 60a256f30d7..1d77fa44494 100644 --- a/plugins/state_history_plugin/state_history_plugin.cpp +++ b/plugins/state_history_plugin/state_history_plugin.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -390,6 +391,8 @@ void state_history_plugin::plugin_initialize(const variables_map& options) { state_history_dir = app().data_dir() / dir_option; else state_history_dir = dir_option; + if (auto resmon_plugin = app().find_plugin()) + resmon_plugin->monitor_directory(state_history_dir); auto ip_port = options.at("state-history-endpoint").as(); auto port = ip_port.substr(ip_port.find(':') + 1, ip_port.size()); diff --git a/plugins/trace_api_plugin/trace_api_plugin.cpp b/plugins/trace_api_plugin/trace_api_plugin.cpp index 4cc02231984..0fffa1d5eec 100644 --- a/plugins/trace_api_plugin/trace_api_plugin.cpp +++ b/plugins/trace_api_plugin/trace_api_plugin.cpp @@ -7,6 +7,8 @@ #include +#include + #include using namespace eosio::trace_api; @@ -114,6 +116,8 @@ struct trace_api_common_impl { trace_dir = app().data_dir() / dir_option; else trace_dir = dir_option; + if (auto resmon_plugin = app().find_plugin()) + resmon_plugin->monitor_directory(trace_dir); slice_stride = options.at("trace-slice-stride").as(); @@ -428,4 +432,4 @@ void trace_api_rpc_plugin::handle_sighup() { fc::logger::update( logger_name, _log ); } -} \ No newline at end of file +} diff --git a/programs/nodeos/CMakeLists.txt b/programs/nodeos/CMakeLists.txt index e5eddb218a9..90664975f35 100644 --- a/programs/nodeos/CMakeLists.txt +++ b/programs/nodeos/CMakeLists.txt @@ -55,6 +55,7 @@ target_link_libraries( ${NODE_EXECUTABLE_NAME} PRIVATE -Wl,${whole_archive_flag} txn_test_gen_plugin -Wl,${no_whole_archive_flag} PRIVATE -Wl,${whole_archive_flag} db_size_api_plugin -Wl,${no_whole_archive_flag} PRIVATE -Wl,${whole_archive_flag} producer_api_plugin -Wl,${no_whole_archive_flag} + PRIVATE -Wl,${whole_archive_flag} resource_monitor_plugin -Wl,${no_whole_archive_flag} PRIVATE -Wl,${whole_archive_flag} test_control_plugin -Wl,${no_whole_archive_flag} PRIVATE -Wl,${whole_archive_flag} test_control_api_plugin -Wl,${no_whole_archive_flag} PRIVATE -Wl,${build_id_flag} diff --git a/programs/nodeos/main.cpp b/programs/nodeos/main.cpp index 3db036957ea..65ea5066bb4 100644 --- a/programs/nodeos/main.cpp +++ b/programs/nodeos/main.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -118,13 +119,19 @@ int main(int argc, char** argv) .default_unix_socket_path = "", .default_http_port = 8888 }); - if(!app().initialize(argc, argv)) { + if(!app().initialize(argc, argv)) { const auto& opts = app().get_options(); if( opts.count("help") || opts.count("version") || opts.count("full-version") || opts.count("print-default-config") ) { return SUCCESS; } return INITIALIZE_FAIL; } + if (auto resmon_plugin = app().find_plugin()) { + resmon_plugin->monitor_directory(app().data_dir()); + } else { + elog("resource_monitor_plugin failed to initialize"); + return INITIALIZE_FAIL; + } initialize_logging(); ilog( "${name} version ${ver} ${fv}", ("name", nodeos::config::node_executable_name)("ver", app().version_string()) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 2e3181ae675..47291cf0bd3 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -54,6 +54,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/light_validation_sync_test.py ${CMAKE configure_file(${CMAKE_CURRENT_SOURCE_DIR}/eosio_blocklog_prune_test.py ${CMAKE_CURRENT_BINARY_DIR}/eosio_blocklog_prune_test.py COPYONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cli_test.py ${CMAKE_CURRENT_BINARY_DIR}/cli_test.py COPYONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/plugin_http_api_test.py ${CMAKE_CURRENT_BINARY_DIR}/plugin_http_api_test.py COPYONLY) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/resource_monitor_plugin_test.py ${CMAKE_CURRENT_BINARY_DIR}/resource_monitor_plugin_test.py COPYONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/rodeos_test.py ${CMAKE_CURRENT_BINARY_DIR}/rodeos_test.py COPYONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/test_filter.wasm ${CMAKE_CURRENT_BINARY_DIR}/test_filter.wasm COPYONLY) @@ -173,6 +174,9 @@ set_property(TEST plugin_http_api_test PROPERTY LABELS nonparallelizable_tests) add_subdirectory(se_tests) +add_test(NAME resource_monitor_plugin_test COMMAND tests/resource_monitor_plugin_test.py WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) +set_property(TEST resource_monitor_plugin_test PROPERTY LABELS long_running_tests) + if(ENABLE_COVERAGE_TESTING) set(Coverage_NAME ${PROJECT_NAME}_coverage) diff --git a/tests/resource_monitor_plugin_test.py b/tests/resource_monitor_plugin_test.py new file mode 100755 index 00000000000..899a6b9a94d --- /dev/null +++ b/tests/resource_monitor_plugin_test.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python3 + +from testUtils import Utils +from Cluster import Cluster +from TestHelper import TestHelper + +import random +import subprocess +import signal +import os +import shutil +import re + +Print=Utils.Print +errorExit=Utils.errorExit + +stagingDir="rsmStaging" +dataDir=stagingDir+"/data" +configDir=stagingDir+"/etc" +traceDir=dataDir+"/traceDir" + +loggingFile=configDir+"/logging.json" +stderrFile=dataDir + "/stderr.txt" + +testNum=0 + +# We need debug level to get more information about nodeos process +logging="""{ + "includes": [], + "appenders": [{ + "name": "stderr", + "type": "console", + "args": { + "stream": "std_error", + "level_colors": [{ + "level": "debug", + "color": "green" + },{ + "level": "warn", + "color": "brown" + },{ + "level": "error", + "color": "red" + } + ] + }, + "enabled": true + } + ], + "loggers": [{ + "name": "default", + "level": "debug", + "enabled": true, + "additivity": false, + "appenders": [ + "stderr" + ] + } + ] +}""" + +def cleanDirectories(): + os.path.exists(stagingDir) and shutil.rmtree(stagingDir) + +def prepareDirectories(): + # Prepare own directories so we don't depend on others to make sure + # tests are repeatable + cleanDirectories() + os.makedirs(stagingDir) + os.makedirs(dataDir) + os.makedirs(configDir) + + with open(loggingFile, "w") as textFile: + print(logging,file=textFile) + +def runNodeos(extraNodeosArgs, myTimeout): + """Startup nodeos, wait for timeout (before forced shutdown) and collect output.""" + if debug: Print("Launching nodeos process.") + cmd="programs/nodeos/nodeos --config-dir rsmStaging/etc -e -p eosio --plugin eosio::chain_api_plugin --plugin eosio::history_api_plugin --data-dir " + dataDir + " " + + cmd=cmd + extraNodeosArgs; + if debug: Print("cmd: %s" % (cmd)) + with open(stderrFile, 'w') as serr: + proc=subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=serr) + + try: + proc.communicate(timeout=myTimeout) + except (subprocess.TimeoutExpired) as _: + if debug: Print("Timed out\n") + proc.send_signal(signal.SIGKILL) + +def isMsgInStderrFile(msg): + msgFound=False + with open(stderrFile) as errFile: + for line in errFile: + if msg in line: + msgFound=True + break + return msgFound + +def testCommon(title, extraNodeosArgs, expectedMsgs): + global testNum + testNum+=1 + Print("Test %d: %s" % (testNum, title)) + + prepareDirectories() + + timeout=120 # Leave sufficient time such nodeos can start up fully in any platforms + runNodeos(extraNodeosArgs, timeout) + + for msg in expectedMsgs: + if not isMsgInStderrFile(msg): + errorExit ("Log should have contained \"%s\"" % (expectedMsgs)) + +def testAll(): + testCommon("Resmon enabled: all arguments", "--plugin eosio::resource_monitor_plugin --resource-monitor-space-threshold=85 --resource-monitor-interval-seconds=5 --resource-monitor-not-shutdown-on-threshold-exceeded", ["threshold set to 85", "interval set to 5", "Shutdown flag when threshold exceeded set to false", "Creating and starting monitor thread"]) + + # default arguments and default directories to be monitored + testCommon("Resmon not enabled: no arguments", "", ["interval set to 2", "threshold set to 90", "Shutdown flag when threshold exceeded set to true", "Creating and starting monitor thread", "snapshots's file system to be monitored", "blocks's file system to be monitored", "state's file system to be monitored"]) + + # default arguments with registered directories + testCommon("Resmon not enabled: Producer, Chain, State History and Trace Api", "--plugin eosio::state_history_plugin --state-history-dir=/tmp/state-history --disable-replay-opts --plugin eosio::trace_api_plugin --trace-dir=/tmp/trace --trace-no-abis", ["interval set to 2", "threshold set to 90", "Shutdown flag when threshold exceeded set to true", "snapshots's file system to be monitored", "blocks's file system to be monitored", "state's file system to be monitored", "state-history's file system to be monitored", "trace's file system to be monitored", "Creating and starting monitor thread"]) + + testCommon("Resmon enabled: Producer, Chain, State History and Trace Api", "--plugin eosio::resource_monitor_plugin --plugin eosio::state_history_plugin --state-history-dir=/tmp/state-history --disable-replay-opts --plugin eosio::trace_api_plugin --trace-dir=/tmp/trace --trace-no-abis --resource-monitor-space-threshold=80 --resource-monitor-interval-seconds=3", ["snapshots's file system to be monitored", "blocks's file system to be monitored", "state's file system to be monitored", "state-history's file system to be monitored", "trace's file system to be monitored", "Creating and starting monitor thread", "threshold set to 80", "interval set to 3", "Shutdown flag when threshold exceeded set to true"]) + +args = TestHelper.parse_args({"--keep-logs","--dump-error-details","-v","--leave-running","--clean-run"}) +debug=args.v +pnodes=1 +topo="mesh" +delay=1 +chainSyncStrategyStr=Utils.SyncResyncTag +total_nodes = pnodes +killCount=1 +killSignal=Utils.SigKillTag + +killEosInstances= not args.leave_running +dumpErrorDetails=args.dump_error_details +keepLogs=args.keep_logs +killAll=args.clean_run + +seed=1 +Utils.Debug=debug +testSuccessful=False + +cluster=Cluster(walletd=True) + +try: + TestHelper.printSystemInfo("BEGIN") + + cluster.setChainStrategy(chainSyncStrategyStr) + + cluster.killall(allInstances=killAll) + cluster.cleanup() + + if cluster.launch(pnodes=pnodes, totalNodes=total_nodes, topo=topo,delay=delay, dontBootstrap=True) is False: + errorExit("Failed to stand up eos cluster.") + cluster.killall(allInstances=killAll) + + testAll() + + testSuccessful=True +finally: + if debug: Print("Cleanup in finally block.") + cleanDirectories() + TestHelper.shutdown(cluster, None, testSuccessful, killEosInstances, False, keepLogs, killAll, dumpErrorDetails) + +if debug: Print("Exiting test, exit value 0.") +exit(0)