From 0f0b6c675baeca1f779c29f0b0b09b2f452a8944 Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Mon, 17 Apr 2023 20:46:07 -0700 Subject: [PATCH 01/33] replace async future with packed_task and thread to avoid blocking future destructor --- driver/connection_handler.cc | 2 +- driver/connection_handler.h | 2 +- driver/failover.h | 38 ++-- driver/failover_handler.cc | 14 +- driver/failover_reader_handler.cc | 192 ++++++++++++------- driver/failover_writer_handler.cc | 140 ++++++++------ driver/host_info.cc | 2 +- driver/host_info.h | 2 +- driver/monitor.cc | 1 + driver/mylog.cc | 10 +- unit_testing/failover_reader_handler_test.cc | 53 ++--- unit_testing/failover_writer_handler_test.cc | 50 ++--- 12 files changed, 293 insertions(+), 213 deletions(-) diff --git a/driver/connection_handler.cc b/driver/connection_handler.cc index d934b9f0c..b2eac55d0 100644 --- a/driver/connection_handler.cc +++ b/driver/connection_handler.cc @@ -60,7 +60,7 @@ SQLRETURN CONNECTION_HANDLER::do_connect(DBC* dbc_ptr, DataSource* ds, bool fail return dbc_ptr->connect(ds, failover_enabled); } -CONNECTION_PROXY* CONNECTION_HANDLER::connect(const std::shared_ptr& host_info, DataSource* ds) { +CONNECTION_PROXY* CONNECTION_HANDLER::connect(std::shared_ptr host_info, DataSource* ds) { if (dbc == nullptr || host_info == nullptr) { return nullptr; diff --git a/driver/connection_handler.h b/driver/connection_handler.h index d5423527a..1753c95c0 100644 --- a/driver/connection_handler.h +++ b/driver/connection_handler.h @@ -52,7 +52,7 @@ class CONNECTION_HANDLER { virtual ~CONNECTION_HANDLER(); virtual SQLRETURN do_connect(DBC* dbc_ptr, DataSource* ds, bool failover_enabled); - virtual CONNECTION_PROXY* connect(const std::shared_ptr& host_info, DataSource* ds); + virtual CONNECTION_PROXY* connect(std::shared_ptr host_info, DataSource* ds); void update_connection(CONNECTION_PROXY* new_connection, const std::string& new_host_name); private: diff --git a/driver/failover.h b/driver/failover.h index 82c45cd54..9cd967967 100644 --- a/driver/failover.h +++ b/driver/failover.h @@ -78,20 +78,20 @@ class FAILOVER_READER_HANDLER { ~FAILOVER_READER_HANDLER(); - READER_FAILOVER_RESULT failover( + std::shared_ptr failover( std::shared_ptr topology_info); - virtual READER_FAILOVER_RESULT get_reader_connection( + virtual std::shared_ptr get_reader_connection( std::shared_ptr topology_info, - FAILOVER_SYNC& f_sync); + std::shared_ptr f_sync); std::vector> build_hosts_list( - const std::shared_ptr& topology_info, + std::shared_ptr topology_info, bool contain_writers); - READER_FAILOVER_RESULT get_connection_from_hosts( + std::shared_ptr get_connection_from_hosts( std::vector> hosts_list, - FAILOVER_SYNC& global_sync); + std::shared_ptr global_sync); protected: int reader_connect_timeout_ms = 30000; // 30 sec @@ -138,7 +138,7 @@ class FAILOVER_WRITER_HANDLER { int writer_failover_timeout_ms, int read_topology_interval_ms, int reconnect_writer_interval_ms, unsigned long dbc_id, bool enable_logging = false); ~FAILOVER_WRITER_HANDLER(); - WRITER_FAILOVER_RESULT failover( + std::shared_ptr failover( std::shared_ptr current_topology); protected: @@ -222,7 +222,7 @@ class FAILOVER { bool is_writer_connected(); protected: - bool connect(const std::shared_ptr& host_info); + bool connect(std::shared_ptr host_info); void sleep(int miliseconds); void release_new_connection(); std::shared_ptr connection_handler; @@ -242,8 +242,8 @@ class CONNECT_TO_READER_HANDLER : public FAILOVER { void operator()( std::shared_ptr reader, - FAILOVER_SYNC& f_sync, - READER_FAILOVER_RESULT& result); + std::shared_ptr f_sync, + std::shared_ptr result); }; class RECONNECT_TO_WRITER_HANDLER : public FAILOVER { @@ -256,15 +256,15 @@ class RECONNECT_TO_WRITER_HANDLER : public FAILOVER { void operator()( std::shared_ptr original_writer, - FAILOVER_SYNC& f_sync, - WRITER_FAILOVER_RESULT& result); + std::shared_ptr f_sync, + std::shared_ptr result); private: int reconnect_interval_ms; bool is_current_host_writer( - const std::shared_ptr& original_writer, - const std::shared_ptr& latest_topology); + std::shared_ptr original_writer, + std::shared_ptr latest_topology); }; class WAIT_NEW_WRITER_HANDLER : public FAILOVER { @@ -279,8 +279,8 @@ class WAIT_NEW_WRITER_HANDLER : public FAILOVER { void operator()( std::shared_ptr original_writer, - FAILOVER_SYNC& f_sync, - WRITER_FAILOVER_RESULT& result); + std::shared_ptr f_sync, + std::shared_ptr result); private: // TODO - initialize in constructor and define constant for default value @@ -288,11 +288,11 @@ class WAIT_NEW_WRITER_HANDLER : public FAILOVER { std::shared_ptr reader_handler; std::shared_ptr current_topology; CONNECTION_PROXY* reader_connection = nullptr; // To retrieve latest topology - std::shared_ptr current_reader_host; + std::shared_ptr current_reader_host = nullptr; void refresh_topology_and_connect_to_new_writer( - std::shared_ptr original_writer, FAILOVER_SYNC& f_sync); - void connect_to_reader(FAILOVER_SYNC& f_sync); + std::shared_ptr original_writer, std::shared_ptr f_sync); + void connect_to_reader(std::shared_ptr f_sync); bool connect_to_writer(std::shared_ptr writer_candidate); void clean_up_reader_connection(); }; diff --git a/driver/failover_handler.cc b/driver/failover_handler.cc index 3e60b6240..02319b1b5 100644 --- a/driver/failover_handler.cc +++ b/driver/failover_handler.cc @@ -471,9 +471,9 @@ bool FAILOVER_HANDLER::failover_to_reader(const char*& new_error_code, const cha MYLOG_DBC_TRACE(dbc, "[FAILOVER_HANDLER] Starting reader failover procedure."); auto result = failover_reader_handler->failover(current_topology); - if (result.connected) { - current_host = result.new_host; - connection_handler->update_connection(result.new_connection, current_host->get_host()); + if (result->connected) { + current_host = result->new_host; + connection_handler->update_connection(result->new_connection, current_host->get_host()); new_error_code = "08S02"; error_msg = "The active SQL connection has changed."; MYLOG_DBC_TRACE(dbc, @@ -494,20 +494,20 @@ bool FAILOVER_HANDLER::failover_to_writer(const char*& new_error_code, const cha MYLOG_DBC_TRACE(dbc, "[FAILOVER_HANDLER] Starting writer failover procedure."); auto result = failover_writer_handler->failover(current_topology); - if (!result.connected) { + if (!result->connected) { MYLOG_DBC_TRACE(dbc, "[FAILOVER_HANDLER] Unable to establish SQL connection to writer node."); new_error_code = "08S01"; error_msg = "The active SQL connection was lost."; return false; } - if (result.is_new_host) { + if (result->is_new_host) { // connected to a new writer host; take it over - current_topology = result.new_topology; + current_topology = result->new_topology; current_host = current_topology->get_writer(); } connection_handler->update_connection( - result.new_connection, result.new_topology->get_writer()->get_host()); + result->new_connection, result->new_topology->get_writer()->get_host()); new_error_code = "08S02"; error_msg = "The active SQL connection has changed."; diff --git a/driver/failover_reader_handler.cc b/driver/failover_reader_handler.cc index fc6d73d5b..7893d3f58 100644 --- a/driver/failover_reader_handler.cc +++ b/driver/failover_reader_handler.cc @@ -58,65 +58,81 @@ FAILOVER_READER_HANDLER::~FAILOVER_READER_HANDLER() {} // Function called to start the Reader Failover process. // This process will generate a list of available hosts: First readers that are up, then readers marked as down, then writers. // If it goes through the list and does not succeed to connect, it tries again, endlessly. -READER_FAILOVER_RESULT FAILOVER_READER_HANDLER::failover( +std::shared_ptr FAILOVER_READER_HANDLER::failover( std::shared_ptr current_topology) { - - READER_FAILOVER_RESULT reader_result(false, nullptr, nullptr); + auto empty_result = std::make_shared(false, nullptr, nullptr); if (!current_topology || current_topology->total_hosts() == 0) { - return reader_result; + return empty_result; } - FAILOVER_SYNC global_sync(1); + const auto start = std::chrono::steady_clock::now(); + auto global_sync = std::make_shared(1); - auto reader_result_future = std::async(std::launch::async, [=, &global_sync, &reader_result]() { - std::vector> hosts_list; - while (!global_sync.is_completed()) { - hosts_list = build_hosts_list(current_topology, !enable_strict_reader_failover); - reader_result = get_connection_from_hosts(hosts_list, global_sync); - if (reader_result.connected) { - global_sync.mark_as_complete(true); - return; + std::packaged_task reader_failover_task ([=] { + while (!global_sync->is_completed()) { + auto hosts_list = build_hosts_list(current_topology, !enable_strict_reader_failover); + auto reader_result = get_connection_from_hosts(hosts_list, global_sync); + if (reader_result->connected) { + global_sync->mark_as_complete(true); + return reader_result; } // TODO Think of changes to the strategy if it went // through all the hosts and did not connect. std::this_thread::sleep_for(std::chrono::seconds(READER_CONNECT_INTERVAL_SEC)); } - }); - - global_sync.wait_and_complete(max_failover_timeout_ms); + return empty_result; + }); + + auto reader_result_future = reader_failover_task.get_future(); + std::thread reader_failover_thread(std::move(reader_failover_task)); + + // Wait for task complete signal with specified timeout + global_sync->wait_and_complete(max_failover_timeout_ms); + // Constantly polling for results until timeout + while (true) { + if (reader_result_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { + reader_failover_thread.join(); + MYLOG_TRACE(logger.get(), dbc_id, "[FAILOVER_READER_HANDLER] Successfully connected to the reader instance."); + return reader_result_future.get(); + } - if (reader_result_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { - reader_result_future.get(); + // No result it ready, update remaining timeout + const auto duration = std::chrono::duration_cast(std::chrono::steady_clock::now() - start); + const auto remaining_wait_ms = max_failover_timeout_ms - duration.count(); + if (remaining_wait_ms <= 0) { + // Reader failover timed out + reader_failover_thread.detach(); + MYLOG_TRACE(logger.get(), dbc_id, "[FAILOVER_READER_HANDLER] Failed to connect to the reader instance."); + return empty_result; + } } - - return reader_result; } // Function to connect to a reader host. Often used to query/update the topology. // If it goes through the list of readers and fails to connect, it tries again, endlessly. // This function only tries to connect to reader hosts. -READER_FAILOVER_RESULT FAILOVER_READER_HANDLER::get_reader_connection( +std::shared_ptr FAILOVER_READER_HANDLER::get_reader_connection( std::shared_ptr topology_info, - FAILOVER_SYNC& f_sync) { + std::shared_ptr f_sync) { // We build a list of all readers, up then down, without writers. auto hosts = build_hosts_list(topology_info, false); - while (!f_sync.is_completed()) { + while (!f_sync->is_completed()) { auto reader_result = get_connection_from_hosts(hosts, f_sync); // TODO Think of changes to the strategy if it went through all the readers and did not connect. - if (reader_result.connected) { + if (reader_result->connected) { return reader_result; } } // Return a false result if the connection request has been cancelled. - return READER_FAILOVER_RESULT(false, nullptr, nullptr); + return std::make_shared(false, nullptr, nullptr); } // Function that reads the topology and builds a list of hosts to connect to, in order of priority. // boolean include_writers indicate whether one wants to append the writers to the end of the list or not. std::vector> FAILOVER_READER_HANDLER::build_hosts_list( - const std::shared_ptr& topology_info, + std::shared_ptr topology_info, bool include_writers) { std::vector> hosts_list; @@ -153,8 +169,8 @@ std::vector> FAILOVER_READER_HANDLER::build_hosts_lis return hosts_list; } -READER_FAILOVER_RESULT FAILOVER_READER_HANDLER::get_connection_from_hosts( - std::vector> hosts_list, FAILOVER_SYNC& global_sync) { +std::shared_ptr FAILOVER_READER_HANDLER::get_connection_from_hosts( + std::vector> hosts_list, std::shared_ptr global_sync) { size_t total_hosts = hosts_list.size(); size_t i = 0; @@ -162,64 +178,94 @@ READER_FAILOVER_RESULT FAILOVER_READER_HANDLER::get_connection_from_hosts( // This loop should end once it reaches the end of the list without a successful connection. // The function calling it already has a neverending loop looking for a connection. // Ending this loop will allow the calling function to update the list or change strategy if this failed. - while (!global_sync.is_completed() && i < total_hosts) { + while (!global_sync->is_completed() && i < total_hosts) { + const auto start = std::chrono::steady_clock::now(); + // This boolean verifies if the next host in the list is also the last, meaning there's no host for the second thread. - bool odd_hosts_number = (i + 1 == total_hosts); + const bool odd_hosts_number = (i + 1 == total_hosts); - FAILOVER_SYNC local_sync(1); + auto local_sync = std::make_shared(1); if (!odd_hosts_number) { - local_sync.increment_task(); + local_sync->increment_task(); } CONNECT_TO_READER_HANDLER first_connection_handler(connection_handler, topology_service, dbc_id, logger != nullptr); - std::future first_connection_future; - READER_FAILOVER_RESULT first_connection_result(false, nullptr, nullptr); + auto first_connection_result = std::make_shared(false, nullptr, nullptr); CONNECT_TO_READER_HANDLER second_connection_handler(connection_handler, topology_service, dbc_id, logger != nullptr); - std::future second_connection_future; - READER_FAILOVER_RESULT second_connection_result(false, nullptr, nullptr); + auto second_connection_result = std::make_shared(false, nullptr, nullptr); std::shared_ptr first_reader_host = hosts_list.at(i); - first_connection_future = std::async(std::launch::async, std::ref(first_connection_handler), - std::ref(first_reader_host), std::ref(local_sync), - std::ref(first_connection_result)); - if (!odd_hosts_number) { - std::shared_ptr second_reader_host = hosts_list.at(i + 1); - second_connection_future = std::async(std::launch::async, std::ref(second_connection_handler), - std::ref(second_reader_host), std::ref(local_sync), - std::ref(second_connection_result)); - } + std::packaged_task first_reader_task(first_connection_handler); + std::packaged_task second_reader_task(second_connection_handler); - local_sync.wait_and_complete(reader_connect_timeout_ms); + auto result1 = first_reader_task.get_future(); + auto result2 = second_reader_task.get_future(); - if (first_connection_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { - first_connection_future.get(); - } - if (!odd_hosts_number && - second_connection_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { + std::thread task_thread1(std::move(first_reader_task), first_reader_host,local_sync, first_connection_result); + + std::thread task_thread2; - second_connection_future.get(); + if (!odd_hosts_number) { + std::shared_ptr second_reader_host = hosts_list.at(i + 1); + task_thread2 = std::thread(std::move(second_reader_task), second_reader_host, local_sync,second_connection_result); } - if (first_connection_result.connected) { - MYLOG_TRACE(logger.get(), dbc_id, + // Wait for task complete signal with specified timeout + local_sync->wait_and_complete(reader_connect_timeout_ms); + + // Constantly polling for results until timeout + while (true) { + // Check if first reader task result is ready + if (result1.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { + task_thread1.join(); + if (!odd_hosts_number) { + task_thread2.detach(); + } + result1.get(); + if (first_connection_result->connected) { + MYLOG_TRACE(logger.get(), dbc_id, "[FAILOVER_READER_HANDLER] Connected to reader: %s", - first_connection_result.new_host->get_host_port_pair().c_str()); - return first_connection_result; - } else if (!odd_hosts_number && second_connection_result.connected) { - MYLOG_TRACE(logger.get(), dbc_id, + first_connection_result->new_host->get_host_port_pair().c_str()); + + return first_connection_result; + } + break; + } + + // Check if second reader task result is ready if there is one + if (!odd_hosts_number && + result2.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { + task_thread1.detach(); + task_thread2.join(); + result2.get(); + if (second_connection_result->connected) { + MYLOG_TRACE(logger.get(), dbc_id, "[FAILOVER_READER_HANDLER] Connected to reader: %s", - second_connection_result.new_host->get_host_port_pair().c_str()); - return second_connection_result; + second_connection_result->new_host->get_host_port_pair().c_str()); + + return second_connection_result; + } + break; + } + + // No result it ready, update remaining timeout + const auto duration = std::chrono::duration_cast(std::chrono::steady_clock::now() - start); + const auto remaining_wait_ms = reader_connect_timeout_ms - duration.count(); + if (remaining_wait_ms <= 0) { + // None has connected. We move on and try new hosts. + task_thread1.detach(); + task_thread2.detach(); + i += 2; + std::this_thread::sleep_for(std::chrono::seconds(READER_CONNECT_INTERVAL_SEC)); + break; + } } - // None has connected. We move on and try new hosts. - i += 2; - std::this_thread::sleep_for(std::chrono::seconds(READER_CONNECT_INTERVAL_SEC)); } // The operation was either cancelled either reached the end of the list without connecting. - return READER_FAILOVER_RESULT(false, nullptr, nullptr); + return std::make_shared(false, nullptr, nullptr); } // *** CONNECT_TO_READER_HANDLER @@ -234,10 +280,10 @@ CONNECT_TO_READER_HANDLER::~CONNECT_TO_READER_HANDLER() {} void CONNECT_TO_READER_HANDLER::operator()( std::shared_ptr reader, - FAILOVER_SYNC& f_sync, - READER_FAILOVER_RESULT& result) { + std::shared_ptr f_sync, + std::shared_ptr result) { - if (reader && !f_sync.is_completed()) { + if (reader && !f_sync->is_completed()) { MYLOG_TRACE(logger.get(), dbc_id, "[CONNECT_TO_READER_HANDLER] Trying to connect to reader: %s", @@ -245,12 +291,14 @@ void CONNECT_TO_READER_HANDLER::operator()( if (connect(reader)) { topology_service->mark_host_up(reader); - if (f_sync.is_completed()) { + if (f_sync->is_completed()) { // If another thread finishes first, or both timeout, this thread is canceled. release_new_connection(); } else { - result = READER_FAILOVER_RESULT(true, reader, std::move(this->new_connection)); - f_sync.mark_as_complete(true); + result->connected =true; + result->new_host = reader; + result->new_connection = std::move(this->new_connection); + f_sync->mark_as_complete(true); MYLOG_TRACE( logger.get(), dbc_id, "[CONNECT_TO_READER_HANDLER] Connected to reader: %s", @@ -263,8 +311,8 @@ void CONNECT_TO_READER_HANDLER::operator()( logger.get(), dbc_id, "[CONNECT_TO_READER_HANDLER] Failed to connect to reader: %s", reader->get_host_port_pair().c_str()); - if (!f_sync.is_completed()) { - f_sync.mark_as_complete(false); + if (!f_sync->is_completed()) { + f_sync->mark_as_complete(false); } } } diff --git a/driver/failover_writer_handler.cc b/driver/failover_writer_handler.cc index 0baa81e24..edeeec7bc 100644 --- a/driver/failover_writer_handler.cc +++ b/driver/failover_writer_handler.cc @@ -86,7 +86,7 @@ bool FAILOVER::is_writer_connected() { return new_connection && new_connection->is_connected(); } -bool FAILOVER::connect(const std::shared_ptr& host_info) { +bool FAILOVER::connect(std::shared_ptr host_info) { new_connection = connection_handler->connect(host_info, nullptr); return is_writer_connected(); } @@ -117,8 +117,8 @@ RECONNECT_TO_WRITER_HANDLER::~RECONNECT_TO_WRITER_HANDLER() {} void RECONNECT_TO_WRITER_HANDLER::operator()( std::shared_ptr original_writer, - FAILOVER_SYNC& f_sync, - WRITER_FAILOVER_RESULT& result) { + std::shared_ptr f_sync, + std::shared_ptr result) { if (original_writer) { MYLOG_TRACE(logger.get(), dbc_id, @@ -126,7 +126,7 @@ void RECONNECT_TO_WRITER_HANDLER::operator()( "re-connect to the current writer instance: %s", original_writer->get_host_port_pair().c_str()); - while (!f_sync.is_completed()) { + while (!f_sync->is_completed()) { if (connect(original_writer)) { auto latest_topology = topology_service->get_topology(new_connection, true); @@ -134,12 +134,14 @@ void RECONNECT_TO_WRITER_HANDLER::operator()( is_current_host_writer(original_writer, latest_topology)) { topology_service->mark_host_up(original_writer); - if (f_sync.is_completed()) { + if (f_sync->is_completed()) { break; } - result = WRITER_FAILOVER_RESULT(true, false, latest_topology, - std::move(new_connection)); - f_sync.mark_as_complete(true); + result->connected = true; + result->is_new_host = false; + result->new_topology = latest_topology; + result->new_connection = std::move(new_connection); + f_sync->mark_as_complete(true); MYLOG_TRACE(logger.get(), dbc_id, "[RECONNECT_TO_WRITER_HANDLER] [TaskA] Finished"); return; } @@ -155,8 +157,8 @@ void RECONNECT_TO_WRITER_HANDLER::operator()( } bool RECONNECT_TO_WRITER_HANDLER::is_current_host_writer( - const std::shared_ptr& original_writer, - const std::shared_ptr& latest_topology) { + std::shared_ptr original_writer, + std::shared_ptr latest_topology) { auto original_instance = original_writer->instance_name; if (original_instance.empty()) return false; auto latest_writer = latest_topology->get_writer(); @@ -182,20 +184,22 @@ WAIT_NEW_WRITER_HANDLER::~WAIT_NEW_WRITER_HANDLER() {} void WAIT_NEW_WRITER_HANDLER::operator()( std::shared_ptr original_writer, - FAILOVER_SYNC& f_sync, - WRITER_FAILOVER_RESULT& result) { + std::shared_ptr f_sync, + std::shared_ptr result) { MYLOG_TRACE(logger.get(), dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Attempting to connect to a new writer instance"); - while (!f_sync.is_completed()) { + while (!f_sync->is_completed()) { if (!is_writer_connected()) { connect_to_reader(f_sync); refresh_topology_and_connect_to_new_writer(original_writer, f_sync); clean_up_reader_connection(); } else { - result = WRITER_FAILOVER_RESULT(true, true, current_topology, - std::move(new_connection)); - f_sync.mark_as_complete(true); + result->connected = true; + result->is_new_host = true; + result->new_topology = current_topology; + result->new_connection = std::move(new_connection); + f_sync->mark_as_complete(true); MYLOG_TRACE(logger.get(), dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Finished"); return; } @@ -209,16 +213,16 @@ void WAIT_NEW_WRITER_HANDLER::operator()( } // Connect to a reader to later retrieve the latest topology -void WAIT_NEW_WRITER_HANDLER::connect_to_reader(FAILOVER_SYNC& f_sync) { - while (!f_sync.is_completed()) { +void WAIT_NEW_WRITER_HANDLER::connect_to_reader(std::shared_ptr f_sync) { + while (!f_sync->is_completed()) { auto connection_result = reader_handler->get_reader_connection(current_topology, f_sync); - if (connection_result.connected && connection_result.new_connection->is_connected()) { - reader_connection = connection_result.new_connection; - current_reader_host = connection_result.new_host; + if (connection_result->connected && connection_result->new_connection->is_connected()) { + reader_connection = connection_result->new_connection; + current_reader_host = connection_result->new_host; MYLOG_TRACE( logger.get(), dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Connected to reader: %s", - connection_result.new_host->get_host_port_pair().c_str()); + connection_result->new_host->get_host_port_pair().c_str()); break; } MYLOG_TRACE(logger.get(), dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Failed to connect to any reader."); @@ -227,8 +231,8 @@ void WAIT_NEW_WRITER_HANDLER::connect_to_reader(FAILOVER_SYNC& f_sync) { // Use just connected reader to refresh topology and try to connect to a new writer void WAIT_NEW_WRITER_HANDLER::refresh_topology_and_connect_to_new_writer( - std::shared_ptr original_writer, FAILOVER_SYNC& f_sync) { - while (!f_sync.is_completed()) { + std::shared_ptr original_writer, std::shared_ptr f_sync) { + while (!f_sync->is_completed()) { auto latest_topology = topology_service->get_topology(reader_connection, true); if (latest_topology->total_hosts() > 0) { current_topology = latest_topology; @@ -291,17 +295,20 @@ FAILOVER_WRITER_HANDLER::FAILOVER_WRITER_HANDLER( FAILOVER_WRITER_HANDLER::~FAILOVER_WRITER_HANDLER() {} -WRITER_FAILOVER_RESULT FAILOVER_WRITER_HANDLER::failover( +std::shared_ptr FAILOVER_WRITER_HANDLER::failover( std::shared_ptr current_topology) { if (!current_topology || current_topology->total_hosts() == 0) { MYLOG_TRACE(logger.get(), dbc_id, "[FAILOVER_WRITER_HANDLER] Failover was called with " "an invalid (null or empty) topology"); - return WRITER_FAILOVER_RESULT(false, false, nullptr, nullptr); + return std::make_shared(false, false, nullptr, nullptr); } - FAILOVER_SYNC failover_sync(2); + const auto start = std::chrono::steady_clock::now(); + + auto failover_sync = std::make_shared(2); + // Constructing the function objects RECONNECT_TO_WRITER_HANDLER reconnect_handler( connection_handler, topology_service, reconnect_writer_interval_ms, dbc_id, logger != nullptr); @@ -312,42 +319,63 @@ WRITER_FAILOVER_RESULT FAILOVER_WRITER_HANDLER::failover( auto original_writer = current_topology->get_writer(); topology_service->mark_host_down(original_writer); - auto reconnect_result = WRITER_FAILOVER_RESULT(false, false, nullptr, nullptr); - auto new_writer_result = WRITER_FAILOVER_RESULT(false, false, nullptr, nullptr); + auto reconnect_result = std::make_shared(false, false, nullptr, nullptr); + auto new_writer_result = std::make_shared(false, false, nullptr, nullptr); - // Try reconnecting to the original writer host - auto reconnect_future = std::async(std::launch::async, std::ref(reconnect_handler), - original_writer, std::ref(failover_sync), - std::ref(reconnect_result)); - - // Concurrently see if topology has changed and try connecting to a new writer - auto new_writer_future = std::async(std::launch::async, std::ref(new_writer_handler), - std::cref(original_writer), std::ref(failover_sync), - std::ref(new_writer_result)); + std::packaged_task reconnect_task(reconnect_handler); + std::packaged_task wait_new_writer_task(new_writer_handler); - failover_sync.wait_and_complete(writer_failover_timeout_ms); + auto reconnect_future = reconnect_task.get_future(); + auto wait_new_writer_future = wait_new_writer_task.get_future(); - if (reconnect_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { - reconnect_future.get(); - } + std::thread reconnect_thread(std::move(reconnect_task), original_writer, failover_sync, reconnect_result); + std::thread wait_new_writer_thread(std::move(wait_new_writer_task), original_writer, failover_sync, new_writer_result); - if (new_writer_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { - new_writer_future.get(); - } + // Wait for task complete signal with specified timeout + failover_sync->wait_and_complete(writer_failover_timeout_ms); - if (reconnect_result.connected) { - MYLOG_TRACE(logger.get(), dbc_id, + // Constantly polling for results until timeout + while (true) { + // Check if reconnect task result is ready + if (reconnect_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { + reconnect_thread.join(); + wait_new_writer_thread.detach(); + reconnect_future.get(); + if (reconnect_result->connected) { + MYLOG_TRACE(logger.get(), dbc_id, "[FAILOVER_WRITER_HANDLER] Successfully re-connected to the current writer instance: %s", - reconnect_result.new_topology->get_writer()->get_host_port_pair().c_str()); - return reconnect_result; - } else if (new_writer_result.connected) { - MYLOG_TRACE(logger.get(), dbc_id, + reconnect_result->new_topology->get_writer()->get_host_port_pair().c_str()); + return reconnect_result; + } + break; + } + + // Check if wait new writer task result is ready + if (wait_new_writer_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { + reconnect_thread.detach(); + wait_new_writer_thread.join(); + wait_new_writer_future.get(); + if (new_writer_result->connected) { + MYLOG_TRACE(logger.get(), dbc_id, "[FAILOVER_WRITER_HANDLER] Successfully connected to the new writer instance: %s", - new_writer_result.new_topology->get_writer()->get_host_port_pair().c_str()); - return new_writer_result; + new_writer_result->new_topology->get_writer()->get_host_port_pair().c_str()); + return new_writer_result; + } + break; + } + + // No result it ready, update remaining timeout + const auto duration = std::chrono::duration_cast(std::chrono::steady_clock::now() - start); + const auto remaining_wait_ms = writer_failover_timeout_ms - duration.count(); + if (remaining_wait_ms <= 0) { + // Writer failover timed out + reconnect_thread.detach(); + wait_new_writer_thread.detach(); + MYLOG_TRACE(logger.get(), dbc_id, "[FAILOVER_WRITER_HANDLER] Failed to connect to the writer instance."); + return std::make_shared(false, false, nullptr, nullptr); + } } - // timeout - MYLOG_TRACE(logger.get(), dbc_id, "[FAILOVER_WRITER_HANDLER] Failed to connect to the writer instance."); - return WRITER_FAILOVER_RESULT(false, false, nullptr, nullptr); + // Writer failover finished but not connected + return std::make_shared(false, false, nullptr, nullptr); } diff --git a/driver/host_info.cc b/driver/host_info.cc index 05ef63488..ba9e3beba 100644 --- a/driver/host_info.cc +++ b/driver/host_info.cc @@ -113,6 +113,6 @@ void HOST_INFO::mark_as_writer(bool writer) { } // Check if two host info have same instance name -bool HOST_INFO::is_host_same(const std::shared_ptr& h1, const std::shared_ptr& h2) { +bool HOST_INFO::is_host_same(std::shared_ptr h1, std::shared_ptr h2) { return h1->instance_name == h2->instance_name; } diff --git a/driver/host_info.h b/driver/host_info.h index d9d442edd..e5c64a420 100644 --- a/driver/host_info.h +++ b/driver/host_info.h @@ -57,7 +57,7 @@ class HOST_INFO { bool is_host_down(); bool is_host_writer(); void mark_as_writer(bool writer); - static bool is_host_same(const std::shared_ptr& h1, const std::shared_ptr& h2); + static bool is_host_same(std::shared_ptr h1, std::shared_ptr h2); static constexpr int NO_PORT = -1; // used to be properties - TODO - remove the ones that are not necessary diff --git a/driver/monitor.cc b/driver/monitor.cc index eac8cef25..6bba5a17e 100644 --- a/driver/monitor.cc +++ b/driver/monitor.cc @@ -79,6 +79,7 @@ MONITOR::~MONITOR() { } if (this->connection_proxy) { + this->connection_proxy->delete_ds(); delete this->connection_proxy; this->connection_proxy = nullptr; } diff --git a/driver/mylog.cc b/driver/mylog.cc index 54c22b73c..e81eae3be 100644 --- a/driver/mylog.cc +++ b/driver/mylog.cc @@ -63,10 +63,12 @@ void trace_print(FILE *file, unsigned long dbc_id, const char *fmt, ...) { pid_t pid; pid = getpid(); #endif - - fprintf(file, "%s - Process ID %ld - DBC ID %lu - %s\n", time_buf, pid, - dbc_id, buf.data()); - fflush(file); + { + std::lock_guard guard(log_file_mutex); + fprintf(file, "%s - Process ID %ld - DBC ID %lu - %s\n", time_buf, pid, + dbc_id, buf.data()); + fflush(file); + } } } diff --git a/unit_testing/failover_reader_handler_test.cc b/unit_testing/failover_reader_handler_test.cc index 4a8569f7a..8e2d47d9e 100644 --- a/unit_testing/failover_reader_handler_test.cc +++ b/unit_testing/failover_reader_handler_test.cc @@ -65,7 +65,7 @@ class FailoverReaderHandlerTest : public testing::Test { std::shared_ptr mock_ts; std::shared_ptr mock_connection_handler; - MOCK_FAILOVER_SYNC mock_sync; + std::shared_ptr mock_sync; static void SetUpTestSuite() { reader_a_host = std::make_shared("reader-a-host" + HOST_SUFFIX, 1234, UP, false); @@ -99,7 +99,8 @@ class FailoverReaderHandlerTest : public testing::Test { mock_ts = std::make_shared(); mock_connection_handler = std::make_shared(); - EXPECT_CALL(mock_sync, is_completed()).WillRepeatedly(Return(false)); + mock_sync = std::make_shared(); + EXPECT_CALL(*mock_sync, is_completed()).WillRepeatedly(Return(false)); } void TearDown() override { @@ -229,10 +230,10 @@ TEST_F(FailoverReaderHandlerTest, GetConnectionFromHosts_Failure) { FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 60000, 30000, false, 0); auto hosts_list = reader_handler.build_hosts_list(topology, true); - READER_FAILOVER_RESULT result = reader_handler.get_connection_from_hosts(hosts_list, std::ref(mock_sync)); + auto result = reader_handler.get_connection_from_hosts(hosts_list, mock_sync); - EXPECT_FALSE(result.connected); - EXPECT_THAT(result.new_connection, nullptr); + EXPECT_FALSE(result->connected); + EXPECT_THAT(result->new_connection, nullptr); } // Verify that reader failover handler connects to a reader node that is marked up. @@ -252,11 +253,11 @@ TEST_F(FailoverReaderHandlerTest, GetConnectionFromHosts_Success_Reader) { FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 60000, 30000, false, 0); auto hosts_list = reader_handler.build_hosts_list(topology, true); - READER_FAILOVER_RESULT result = reader_handler.get_connection_from_hosts(hosts_list, std::ref(mock_sync)); + READER_FAILOVER_RESULT result = reader_handler.get_connection_from_hosts(hosts_list, mock_sync); - EXPECT_TRUE(result.connected); - EXPECT_THAT(result.new_connection, mock_reader_a_proxy); - EXPECT_FALSE(result.new_host->is_host_writer()); + EXPECT_TRUE(result->connected); + EXPECT_THAT(result->new_connection, mock_reader_a_proxy); + EXPECT_FALSE(result->new_host->is_host_writer()); // Explicit delete on reader A as it is returned as valid connection/result delete mock_reader_a_proxy; @@ -278,11 +279,11 @@ TEST_F(FailoverReaderHandlerTest, GetConnectionFromHosts_Success_Writer) { FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 60000, 30000, false, 0); auto hosts_list = reader_handler.build_hosts_list(topology, true); - READER_FAILOVER_RESULT result = reader_handler.get_connection_from_hosts(hosts_list, std::ref(mock_sync)); + READER_FAILOVER_RESULT result = reader_handler.get_connection_from_hosts(hosts_list, mock_sync); - EXPECT_TRUE(result.connected); - EXPECT_THAT(result.new_connection, mock_writer_proxy); - EXPECT_TRUE(result.new_host->is_host_writer()); + EXPECT_TRUE(result->connected); + EXPECT_THAT(result->new_connection, mock_writer_proxy); + EXPECT_TRUE(result->new_host->is_host_writer()); // Explicit delete as it is returned as result & is not deconstructed during failover delete mock_writer_proxy; @@ -315,11 +316,11 @@ TEST_F(FailoverReaderHandlerTest, GetConnectionFromHosts_FastestHost) { FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 60000, 30000, false, 0); auto hosts_list = reader_handler.build_hosts_list(topology, true); - READER_FAILOVER_RESULT result = reader_handler.get_connection_from_hosts(hosts_list, std::ref(mock_sync)); + READER_FAILOVER_RESULT result = reader_handler.get_connection_from_hosts(hosts_list, mock_sync); - EXPECT_TRUE(result.connected); - EXPECT_THAT(result.new_connection, mock_reader_a_proxy); - EXPECT_FALSE(result.new_host->is_host_writer()); + EXPECT_TRUE(result->connected); + EXPECT_THAT(result->new_connection, mock_reader_a_proxy); + EXPECT_FALSE(result->new_host->is_host_writer()); // Explicit delete on reader A as it is returned as a valid result delete mock_reader_a_proxy; @@ -354,10 +355,10 @@ TEST_F(FailoverReaderHandlerTest, GetConnectionFromHosts_Timeout) { FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 60000, 1000, false, 0); auto hosts_list = reader_handler.build_hosts_list(topology, true); - READER_FAILOVER_RESULT result = reader_handler.get_connection_from_hosts(hosts_list, std::ref(mock_sync)); + READER_FAILOVER_RESULT result = reader_handler.get_connection_from_hosts(hosts_list, mock_sync); - EXPECT_FALSE(result.connected); - EXPECT_THAT(result.new_connection, nullptr); + EXPECT_FALSE(result->connected); + EXPECT_THAT(result->new_connection, nullptr); } // Verify that reader failover handler fails to connect to any reader node or @@ -375,8 +376,8 @@ TEST_F(FailoverReaderHandlerTest, Failover_Failure) { FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 3000, 1000, false, 0); READER_FAILOVER_RESULT result = reader_handler.failover(topology); - EXPECT_FALSE(result.connected); - EXPECT_THAT(result.new_connection, nullptr); + EXPECT_FALSE(result->connected); + EXPECT_THAT(result->new_connection, nullptr); } // Verify that reader failover handler connects to a faster reader node. @@ -412,10 +413,10 @@ TEST_F(FailoverReaderHandlerTest, Failover_Success_Reader) { FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 60000, 30000, false, 0); READER_FAILOVER_RESULT result = reader_handler.failover(current_topology); - EXPECT_TRUE(result.connected); - EXPECT_THAT(result.new_connection, mock_reader_a_proxy); - EXPECT_FALSE(result.new_host->is_host_writer()); - EXPECT_EQ("reader-a-host", result.new_host->instance_name); + EXPECT_TRUE(result->connected); + EXPECT_THAT(result->new_connection, mock_reader_a_proxy); + EXPECT_FALSE(result->new_host->is_host_writer()); + EXPECT_EQ("reader-a-host", result->new_host->instance_name); // Explicit delete on reader A as it's returned as a valid result delete mock_reader_a_proxy; diff --git a/unit_testing/failover_writer_handler_test.cc b/unit_testing/failover_writer_handler_test.cc index 86b94b7e5..431b9b988 100644 --- a/unit_testing/failover_writer_handler_test.cc +++ b/unit_testing/failover_writer_handler_test.cc @@ -137,9 +137,9 @@ TEST_F(FailoverWriterHandlerTest, ReconnectToWriter_TaskBEmptyReaderResult) { mock_ts, mock_reader_handler, mock_connection_handler, 5000, 2000, 2000, 0); auto result = writer_handler.failover(current_topology); - EXPECT_TRUE(result.connected); - EXPECT_FALSE(result.is_new_host); - EXPECT_THAT(result.new_connection, mock_writer_proxy); + EXPECT_TRUE(result->connected); + EXPECT_FALSE(result->is_new_host); + EXPECT_THAT(result->new_connection, mock_writer_proxy); // Explicit delete on writer connection as it's returned as a valid result delete mock_writer_proxy; @@ -198,9 +198,9 @@ TEST_F(FailoverWriterHandlerTest, ReconnectToWriter_SlowReaderA) { mock_ts, mock_reader_handler, mock_connection_handler, 60000, 5000, 5000, 0); const auto result = writer_handler.failover(current_topology); - EXPECT_TRUE(result.connected); - EXPECT_FALSE(result.is_new_host); - EXPECT_THAT(result.new_connection, mock_writer_proxy); + EXPECT_TRUE(result->connected); + EXPECT_FALSE(result->is_new_host); + EXPECT_THAT(result->new_connection, mock_writer_proxy); // Explicit delete on writer connection as it's returned as a valid result delete mock_writer_proxy; @@ -243,9 +243,9 @@ TEST_F(FailoverWriterHandlerTest, ReconnectToWriter_TaskBDefers) { mock_ts, mock_reader_handler, mock_connection_handler, 60000, 2000, 2000, 0); auto result = writer_handler.failover(current_topology); - EXPECT_TRUE(result.connected); - EXPECT_FALSE(result.is_new_host); - EXPECT_THAT(result.new_connection, mock_writer_proxy); + EXPECT_TRUE(result->connected); + EXPECT_FALSE(result->is_new_host); + EXPECT_THAT(result->new_connection, mock_writer_proxy); // Explicit delete on writer connection as it's returned as a valid result delete mock_writer_proxy; @@ -306,12 +306,12 @@ TEST_F(FailoverWriterHandlerTest, ConnectToReaderA_SlowWriter) { mock_ts, mock_reader_handler, mock_connection_handler, 60000, 5000, 5000, 0); auto result = writer_handler.failover(current_topology); - EXPECT_TRUE(result.connected); - EXPECT_TRUE(result.is_new_host); - EXPECT_THAT(result.new_connection, mock_new_writer_proxy); - EXPECT_EQ(3, result.new_topology->total_hosts()); + EXPECT_TRUE(result->connected); + EXPECT_TRUE(result->is_new_host); + EXPECT_THAT(result->new_connection, mock_new_writer_proxy); + EXPECT_EQ(3, result->new_topology->total_hosts()); EXPECT_EQ(new_writer_instance_name, - result.new_topology->get_writer()->instance_name); + result->new_topology->get_writer()->instance_name); // Explicit delete on new writer connection as it's returned as a valid result delete mock_new_writer_proxy; @@ -365,12 +365,12 @@ TEST_F(FailoverWriterHandlerTest, ConnectToReaderA_TaskADefers) { mock_ts, mock_reader_handler, mock_connection_handler, 60000, 5000, 5000, 0); auto result = writer_handler.failover(current_topology); - EXPECT_TRUE(result.connected); - EXPECT_TRUE(result.is_new_host); - EXPECT_THAT(result.new_connection, mock_new_writer_proxy); - EXPECT_EQ(4, result.new_topology->total_hosts()); + EXPECT_TRUE(result->connected); + EXPECT_TRUE(result->is_new_host); + EXPECT_THAT(result->new_connection, mock_new_writer_proxy); + EXPECT_EQ(4, result->new_topology->total_hosts()); EXPECT_EQ(new_writer_instance_name, - result.new_topology->get_writer()->instance_name); + result->new_topology->get_writer()->instance_name); // Explicit delete on new writer connection as it's returned as a valid result delete mock_new_writer_proxy; @@ -434,9 +434,9 @@ TEST_F(FailoverWriterHandlerTest, FailedToConnect_FailoverTimeout) { mock_ts, mock_reader_handler, mock_connection_handler, 1000, 2000, 2000, 0); auto result = writer_handler.failover(current_topology); - EXPECT_FALSE(result.connected); - EXPECT_FALSE(result.is_new_host); - EXPECT_THAT(result.new_connection, nullptr); + EXPECT_FALSE(result->connected); + EXPECT_FALSE(result->is_new_host); + EXPECT_THAT(result->new_connection, nullptr); // delete reader b explicitly, since get_reader_connection() is mocked delete mock_reader_b_proxy; @@ -483,9 +483,9 @@ TEST_F(FailoverWriterHandlerTest, FailedToConnect_TaskAFailed_TaskBWriterFailed) mock_ts, mock_reader_handler, mock_connection_handler, 5000, 2000, 2000, 0); auto result = writer_handler.failover(current_topology); - EXPECT_FALSE(result.connected); - EXPECT_FALSE(result.is_new_host); - EXPECT_THAT(result.new_connection, nullptr); + EXPECT_FALSE(result->connected); + EXPECT_FALSE(result->is_new_host); + EXPECT_THAT(result->new_connection, nullptr); // delete reader b explicitly, since get_reader_connection() is mocked delete mock_reader_b_proxy; From 1924ab82b7cb4bae14680d32b06c62776770091e Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Tue, 18 Apr 2023 13:58:33 -0700 Subject: [PATCH 02/33] fix unit tests --- driver/failover_reader_handler.cc | 5 ++-- unit_testing/failover_reader_handler_test.cc | 27 ++++++++++++-------- unit_testing/failover_writer_handler_test.cc | 18 ++++++------- unit_testing/mock_objects.h | 6 ++--- 4 files changed, 30 insertions(+), 26 deletions(-) diff --git a/driver/failover_reader_handler.cc b/driver/failover_reader_handler.cc index 7893d3f58..66b8bbf5c 100644 --- a/driver/failover_reader_handler.cc +++ b/driver/failover_reader_handler.cc @@ -206,9 +206,8 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ std::thread task_thread1(std::move(first_reader_task), first_reader_host,local_sync, first_connection_result); std::thread task_thread2; - if (!odd_hosts_number) { - std::shared_ptr second_reader_host = hosts_list.at(i + 1); + auto second_reader_host = hosts_list.at(i + 1); task_thread2 = std::thread(std::move(second_reader_task), second_reader_host, local_sync,second_connection_result); } @@ -257,11 +256,11 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ // None has connected. We move on and try new hosts. task_thread1.detach(); task_thread2.detach(); - i += 2; std::this_thread::sleep_for(std::chrono::seconds(READER_CONNECT_INTERVAL_SEC)); break; } } + i += 2; } // The operation was either cancelled either reached the end of the list without connecting. diff --git a/unit_testing/failover_reader_handler_test.cc b/unit_testing/failover_reader_handler_test.cc index 8e2d47d9e..c8aa96b5b 100644 --- a/unit_testing/failover_reader_handler_test.cc +++ b/unit_testing/failover_reader_handler_test.cc @@ -38,6 +38,7 @@ using ::testing::_; using ::testing::AnyNumber; +using ::testing::AtLeast; using ::testing::Invoke; using ::testing::Mock; using ::testing::Return; @@ -91,6 +92,8 @@ class FailoverReaderHandlerTest : public testing::Test { void SetUp() override { allocate_odbc_handles(env, dbc, ds); + ds->save_queries = true; + ds->allow_reader_connections = true; reader_a_host->set_host_state(UP); reader_b_host->set_host_state(UP); @@ -253,7 +256,7 @@ TEST_F(FailoverReaderHandlerTest, GetConnectionFromHosts_Success_Reader) { FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 60000, 30000, false, 0); auto hosts_list = reader_handler.build_hosts_list(topology, true); - READER_FAILOVER_RESULT result = reader_handler.get_connection_from_hosts(hosts_list, mock_sync); + auto result = reader_handler.get_connection_from_hosts(hosts_list, mock_sync); EXPECT_TRUE(result->connected); EXPECT_THAT(result->new_connection, mock_reader_a_proxy); @@ -279,7 +282,7 @@ TEST_F(FailoverReaderHandlerTest, GetConnectionFromHosts_Success_Writer) { FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 60000, 30000, false, 0); auto hosts_list = reader_handler.build_hosts_list(topology, true); - READER_FAILOVER_RESULT result = reader_handler.get_connection_from_hosts(hosts_list, mock_sync); + auto result = reader_handler.get_connection_from_hosts(hosts_list, mock_sync); EXPECT_TRUE(result->connected); EXPECT_THAT(result->new_connection, mock_writer_proxy); @@ -316,7 +319,7 @@ TEST_F(FailoverReaderHandlerTest, GetConnectionFromHosts_FastestHost) { FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 60000, 30000, false, 0); auto hosts_list = reader_handler.build_hosts_list(topology, true); - READER_FAILOVER_RESULT result = reader_handler.get_connection_from_hosts(hosts_list, mock_sync); + auto result = reader_handler.get_connection_from_hosts(hosts_list, mock_sync); EXPECT_TRUE(result->connected); EXPECT_THAT(result->new_connection, mock_reader_a_proxy); @@ -355,7 +358,7 @@ TEST_F(FailoverReaderHandlerTest, GetConnectionFromHosts_Timeout) { FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 60000, 1000, false, 0); auto hosts_list = reader_handler.build_hosts_list(topology, true); - READER_FAILOVER_RESULT result = reader_handler.get_connection_from_hosts(hosts_list, mock_sync); + auto result = reader_handler.get_connection_from_hosts(hosts_list, mock_sync); EXPECT_FALSE(result->connected); EXPECT_THAT(result->new_connection, nullptr); @@ -368,13 +371,13 @@ TEST_F(FailoverReaderHandlerTest, Failover_Failure) { EXPECT_CALL(*mock_connection_handler, connect(_, nullptr)).WillRepeatedly(Return(nullptr)); - EXPECT_CALL(*mock_ts, mark_host_down(reader_a_host)).Times(1); - EXPECT_CALL(*mock_ts, mark_host_down(reader_b_host)).Times(1); - EXPECT_CALL(*mock_ts, mark_host_down(reader_c_host)).Times(1); - EXPECT_CALL(*mock_ts, mark_host_down(writer_host)).Times(1); + EXPECT_CALL(*mock_ts, mark_host_down(reader_a_host)).Times(AtLeast(1)); + EXPECT_CALL(*mock_ts, mark_host_down(reader_b_host)).Times(AtLeast(1)); + EXPECT_CALL(*mock_ts, mark_host_down(reader_c_host)).Times(AtLeast(1)); + EXPECT_CALL(*mock_ts, mark_host_down(writer_host)).Times(AtLeast(1)); FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 3000, 1000, false, 0); - READER_FAILOVER_RESULT result = reader_handler.failover(topology); + auto result = reader_handler.failover(topology); EXPECT_FALSE(result->connected); EXPECT_THAT(result->new_connection, nullptr); @@ -390,6 +393,8 @@ TEST_F(FailoverReaderHandlerTest, Failover_Success_Reader) { // Cannot delete at the end as it may cause double delete Mock::AllowLeak(mock_reader_b_proxy); Mock::AllowLeak(mock_reader_b_proxy->get_ds()); + Mock::AllowLeak(mock_ts.get()); + Mock::AllowLeak(mock_connection_handler.get()); auto current_topology = std::make_shared(); current_topology->add_host(writer_host); @@ -405,13 +410,13 @@ TEST_F(FailoverReaderHandlerTest, Failover_Success_Reader) { EXPECT_CALL(*mock_connection_handler, connect(_, nullptr)).WillRepeatedly(Return(nullptr)); EXPECT_CALL(*mock_connection_handler, connect(reader_a_host, nullptr)).WillRepeatedly( Return(mock_reader_a_proxy)); - EXPECT_CALL(*mock_connection_handler, connect(reader_b_host, nullptr)).WillRepeatedly(Invoke([&]() { + EXPECT_CALL(*mock_connection_handler, connect(reader_b_host, nullptr)).WillRepeatedly(Invoke([=]() { std::this_thread::sleep_for(std::chrono::milliseconds(5000)); return mock_reader_b_proxy; })); FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 60000, 30000, false, 0); - READER_FAILOVER_RESULT result = reader_handler.failover(current_topology); + auto result = reader_handler.failover(current_topology); EXPECT_TRUE(result->connected); EXPECT_THAT(result->new_connection, mock_reader_a_proxy); diff --git a/unit_testing/failover_writer_handler_test.cc b/unit_testing/failover_writer_handler_test.cc index 431b9b988..7206b2552 100644 --- a/unit_testing/failover_writer_handler_test.cc +++ b/unit_testing/failover_writer_handler_test.cc @@ -124,7 +124,7 @@ TEST_F(FailoverWriterHandlerTest, ReconnectToWriter_TaskBEmptyReaderResult) { EXPECT_CALL(*mock_ts, mark_host_up(writer_host)).InSequence(s); EXPECT_CALL(*mock_reader_handler, get_reader_connection(_, _)) - .WillRepeatedly(Return(READER_FAILOVER_RESULT(false, nullptr, nullptr))); + .WillRepeatedly(Return(std::make_shared(false, nullptr, nullptr))); EXPECT_CALL(*mock_connection_handler, connect(writer_host, nullptr)) .WillRepeatedly(Return(mock_writer_proxy)); @@ -186,12 +186,12 @@ TEST_F(FailoverWriterHandlerTest, ReconnectToWriter_SlowReaderA) { EXPECT_CALL(*mock_reader_handler, get_reader_connection(_, _)) .WillRepeatedly(DoAll( - Invoke([](Unused, FAILOVER_SYNC& f_sync) { - for (int i = 0; i <= 50 && !f_sync.is_completed(); i++) { + Invoke([](Unused, std::shared_ptr f_sync) { + for (int i = 0; i <= 50 && !f_sync->is_completed(); i++) { std::this_thread::sleep_for(std::chrono::milliseconds(100)); } }), - Return(READER_FAILOVER_RESULT(true, reader_a_host, + Return(std::make_shared(true, reader_a_host, mock_reader_a_proxy)))); FAILOVER_WRITER_HANDLER writer_handler( @@ -236,7 +236,7 @@ TEST_F(FailoverWriterHandlerTest, ReconnectToWriter_TaskBDefers) { EXPECT_CALL(*mock_ts, mark_host_up(writer_host)).InSequence(s); EXPECT_CALL(*mock_reader_handler, get_reader_connection(_, _)) - .WillRepeatedly(Return(READER_FAILOVER_RESULT(true, reader_a_host, + .WillRepeatedly(Return(std::make_shared(true, reader_a_host, mock_reader_a_proxy))); FAILOVER_WRITER_HANDLER writer_handler( @@ -299,7 +299,7 @@ TEST_F(FailoverWriterHandlerTest, ConnectToReaderA_SlowWriter) { EXPECT_CALL(*mock_ts, mark_host_up(new_writer_host)).Times(1); EXPECT_CALL(*mock_reader_handler, get_reader_connection(_, _)) - .WillRepeatedly(Return(READER_FAILOVER_RESULT(true, reader_a_host, + .WillRepeatedly(Return(std::make_shared(true, reader_a_host, mock_reader_a_proxy))); FAILOVER_WRITER_HANDLER writer_handler( @@ -358,7 +358,7 @@ TEST_F(FailoverWriterHandlerTest, ConnectToReaderA_TaskADefers) { EXPECT_CALL(*mock_ts, mark_host_up(new_writer_host)).Times(1); EXPECT_CALL(*mock_reader_handler, get_reader_connection(_, _)) - .WillRepeatedly(Return(READER_FAILOVER_RESULT(true, reader_a_host, + .WillRepeatedly(Return(std::make_shared(true, reader_a_host, mock_reader_a_proxy))); FAILOVER_WRITER_HANDLER writer_handler( @@ -427,7 +427,7 @@ TEST_F(FailoverWriterHandlerTest, FailedToConnect_FailoverTimeout) { EXPECT_CALL(*mock_ts, mark_host_up(new_writer_host)).Times(1); EXPECT_CALL(*mock_reader_handler, get_reader_connection(_, _)) - .WillRepeatedly(Return(READER_FAILOVER_RESULT(true, reader_a_host, + .WillRepeatedly(Return(std::make_shared(true, reader_a_host, mock_reader_a_proxy))); FAILOVER_WRITER_HANDLER writer_handler( @@ -476,7 +476,7 @@ TEST_F(FailoverWriterHandlerTest, FailedToConnect_TaskAFailed_TaskBWriterFailed) EXPECT_CALL(*mock_ts, mark_host_down(new_writer_host)).Times(AtLeast(1)); EXPECT_CALL(*mock_reader_handler, get_reader_connection(_, _)) - .WillRepeatedly(Return(READER_FAILOVER_RESULT(true, reader_a_host, + .WillRepeatedly(Return(std::make_shared(true, reader_a_host, mock_reader_a_proxy))); FAILOVER_WRITER_HANDLER writer_handler( diff --git a/unit_testing/mock_objects.h b/unit_testing/mock_objects.h index e234b633d..4065518bd 100644 --- a/unit_testing/mock_objects.h +++ b/unit_testing/mock_objects.h @@ -109,14 +109,14 @@ class MOCK_TOPOLOGY_SERVICE : public TOPOLOGY_SERVICE { class MOCK_READER_HANDLER : public FAILOVER_READER_HANDLER { public: MOCK_READER_HANDLER() : FAILOVER_READER_HANDLER(nullptr, nullptr, 0, 0, false, 0) {} - MOCK_METHOD(READER_FAILOVER_RESULT, get_reader_connection, - (std::shared_ptr, FAILOVER_SYNC&)); + MOCK_METHOD(std::shared_ptr, get_reader_connection, + (std::shared_ptr, std::shared_ptr)); }; class MOCK_CONNECTION_HANDLER : public CONNECTION_HANDLER { public: MOCK_CONNECTION_HANDLER() : CONNECTION_HANDLER(nullptr) {} - MOCK_METHOD(CONNECTION_PROXY*, connect, (const std::shared_ptr&, DataSource*)); + MOCK_METHOD(CONNECTION_PROXY*, connect, (std::shared_ptr, DataSource*)); MOCK_METHOD(SQLRETURN, do_connect, (DBC*, DataSource*, bool)); }; From 75cefd94bd1af6f9a36c3be486da6df6e4f9d79e Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Tue, 18 Apr 2023 14:18:42 -0700 Subject: [PATCH 03/33] add template arguments for std::packaged_task --- driver/failover_reader_handler.cc | 4 ++-- driver/failover_writer_handler.cc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/driver/failover_reader_handler.cc b/driver/failover_reader_handler.cc index 66b8bbf5c..ce2c4b51c 100644 --- a/driver/failover_reader_handler.cc +++ b/driver/failover_reader_handler.cc @@ -197,8 +197,8 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ std::shared_ptr first_reader_host = hosts_list.at(i); - std::packaged_task first_reader_task(first_connection_handler); - std::packaged_task second_reader_task(second_connection_handler); + std::packaged_task, std::shared_ptr, std::shared_ptr)> first_reader_task(first_connection_handler); + std::packaged_task, std::shared_ptr, std::shared_ptr)> second_reader_task(second_connection_handler); auto result1 = first_reader_task.get_future(); auto result2 = second_reader_task.get_future(); diff --git a/driver/failover_writer_handler.cc b/driver/failover_writer_handler.cc index edeeec7bc..7116c4812 100644 --- a/driver/failover_writer_handler.cc +++ b/driver/failover_writer_handler.cc @@ -322,8 +322,8 @@ std::shared_ptr FAILOVER_WRITER_HANDLER::failover( auto reconnect_result = std::make_shared(false, false, nullptr, nullptr); auto new_writer_result = std::make_shared(false, false, nullptr, nullptr); - std::packaged_task reconnect_task(reconnect_handler); - std::packaged_task wait_new_writer_task(new_writer_handler); + std::packaged_task, std::shared_ptr, std::shared_ptr)> reconnect_task(reconnect_handler); + std::packaged_task, std::shared_ptr, std::shared_ptr)> wait_new_writer_task(new_writer_handler); auto reconnect_future = reconnect_task.get_future(); auto wait_new_writer_future = wait_new_writer_task.get_future(); From 8ead08468e37f729bd09d6f4aed7634ce1d00cf3 Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Tue, 18 Apr 2023 14:24:30 -0700 Subject: [PATCH 04/33] missing template arguments --- driver/failover_reader_handler.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/failover_reader_handler.cc b/driver/failover_reader_handler.cc index ce2c4b51c..d0f210ee5 100644 --- a/driver/failover_reader_handler.cc +++ b/driver/failover_reader_handler.cc @@ -68,7 +68,7 @@ std::shared_ptr FAILOVER_READER_HANDLER::failover( const auto start = std::chrono::steady_clock::now(); auto global_sync = std::make_shared(1); - std::packaged_task reader_failover_task ([=] { + std::packaged_task()> reader_failover_task([=] { while (!global_sync->is_completed()) { auto hosts_list = build_hosts_list(current_topology, !enable_strict_reader_failover); auto reader_result = get_connection_from_hosts(hosts_list, global_sync); From 91ddb4088bd900dda87abac855bab8c4eb5c93af Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Tue, 18 Apr 2023 20:44:21 -0700 Subject: [PATCH 05/33] use thread pool to run failover and wait for all when driver unloads, fix logger --- driver/cluster_aware_metrics_container.cc | 2 +- driver/cluster_aware_metrics_container.h | 2 +- driver/connect.cc | 8 +-- driver/failover.h | 6 ++ driver/failover_handler.cc | 5 +- driver/failover_reader_handler.cc | 75 ++++++++++++-------- driver/failover_writer_handler.cc | 60 +++++++++------- driver/handle.cc | 3 + driver/monitor.cc | 2 +- driver/monitor_connection_context.cc | 6 +- driver/monitor_service.cc | 12 ++-- driver/mylog.cc | 8 ++- driver/mylog.h | 8 +-- driver/topology_service.cc | 6 +- unit_testing/failover_reader_handler_test.cc | 6 +- unit_testing/failover_writer_handler_test.cc | 2 +- 16 files changed, 122 insertions(+), 89 deletions(-) diff --git a/driver/cluster_aware_metrics_container.cc b/driver/cluster_aware_metrics_container.cc index ce51ff6c9..7dc470f33 100644 --- a/driver/cluster_aware_metrics_container.cc +++ b/driver/cluster_aware_metrics_container.cc @@ -79,7 +79,7 @@ void CLUSTER_AWARE_METRICS_CONTAINER::set_gather_metric(bool gather) { this->can_gather = gather; } -void CLUSTER_AWARE_METRICS_CONTAINER::report_metrics(std::string conn_url, bool for_instances, FILE* log, unsigned long dbc_id) { +void CLUSTER_AWARE_METRICS_CONTAINER::report_metrics(std::string conn_url, bool for_instances, std::shared_ptr log, unsigned long dbc_id) { if (!log) { return; } diff --git a/driver/cluster_aware_metrics_container.h b/driver/cluster_aware_metrics_container.h index 72086afcb..405df8e3b 100644 --- a/driver/cluster_aware_metrics_container.h +++ b/driver/cluster_aware_metrics_container.h @@ -60,7 +60,7 @@ class CLUSTER_AWARE_METRICS_CONTAINER { void set_gather_metric(bool gather); - static void report_metrics(std::string conn_url, bool for_instances, FILE* log, unsigned long dbc_id); + static void report_metrics(std::string conn_url, bool for_instances, std::shared_ptr log, unsigned long dbc_id); static std::string report_metrics(std::string conn_url, bool for_instances); static void reset_metrics(); diff --git a/driver/connect.cc b/driver/connect.cc index 9951f9490..86dfb3f1d 100644 --- a/driver/connect.cc +++ b/driver/connect.cc @@ -282,7 +282,7 @@ std::shared_ptr get_host_info_from_ds(DataSource* ds) { } catch (std::string &) { err << "Invalid server '" << ds->server8 << "'."; if (ds->save_queries) { - MYLOG_TRACE(init_log_file().get(), 0, err.str().c_str()); + MYLOG_TRACE(init_log_file(), 0, err.str().c_str()); } throw std::runtime_error(err.str()); } @@ -290,7 +290,7 @@ std::shared_ptr get_host_info_from_ds(DataSource* ds) { if (hosts.size() == 0) { err << "No host was retrieved from the data source."; if (ds->save_queries) { - MYLOG_TRACE(init_log_file().get(), 0, err.str().c_str()); + MYLOG_TRACE(init_log_file(), 0, err.str().c_str()); } throw std::runtime_error(err.str()); } @@ -1448,9 +1448,7 @@ SQLRETURN SQL_API SQLDisconnect(SQLHDBC hdbc) cluster_id_str.append(":").append(std::to_string(dbc->connection_proxy->get_port())); } - CLUSTER_AWARE_METRICS_CONTAINER::report_metrics( - cluster_id_str, dbc->ds->gather_metrics_per_instance, - dbc->log_file ? dbc->log_file.get() : nullptr, dbc->id); + CLUSTER_AWARE_METRICS_CONTAINER::report_metrics(cluster_id_str, dbc->ds->gather_metrics_per_instance, dbc->log_file, dbc->id); } CHECK_HANDLE(hdbc); diff --git a/driver/failover.h b/driver/failover.h index 9cd967967..2b1c0957a 100644 --- a/driver/failover.h +++ b/driver/failover.h @@ -36,6 +36,9 @@ #include "mylog.h" #include +#include + +extern ctpl::thread_pool failover_thread_pool; struct READER_FAILOVER_RESULT { bool connected = false; @@ -241,6 +244,7 @@ class CONNECT_TO_READER_HANDLER : public FAILOVER { ~CONNECT_TO_READER_HANDLER(); void operator()( + int id, std::shared_ptr reader, std::shared_ptr f_sync, std::shared_ptr result); @@ -255,6 +259,7 @@ class RECONNECT_TO_WRITER_HANDLER : public FAILOVER { ~RECONNECT_TO_WRITER_HANDLER(); void operator()( + int id, std::shared_ptr original_writer, std::shared_ptr f_sync, std::shared_ptr result); @@ -278,6 +283,7 @@ class WAIT_NEW_WRITER_HANDLER : public FAILOVER { ~WAIT_NEW_WRITER_HANDLER(); void operator()( + int id, std::shared_ptr original_writer, std::shared_ptr f_sync, std::shared_ptr result); diff --git a/driver/failover_handler.cc b/driver/failover_handler.cc index 02319b1b5..059a50e13 100644 --- a/driver/failover_handler.cc +++ b/driver/failover_handler.cc @@ -128,7 +128,7 @@ SQLRETURN FAILOVER_HANDLER::init_cluster_info() { host_patterns = parse_host_list(hp_str.c_str(), port); } catch (std::string&) { err << "Invalid host pattern: '" << hp_str << "' - the value could not be parsed"; - MYLOG_TRACE(dbc->log_file.get(), dbc->id, err.str().c_str()); + MYLOG_TRACE(dbc->log_file, dbc->id, err.str().c_str()); throw std::runtime_error(err.str()); } @@ -388,6 +388,9 @@ SQLRETURN FAILOVER_HANDLER::create_connection_and_initialize_topology() { network_timeout != ds->write_timeout)) { rc = reconnect(true); } + if (is_failover_enabled()) { + failover_thread_pool.resize(4); + } } return rc; diff --git a/driver/failover_reader_handler.cc b/driver/failover_reader_handler.cc index d0f210ee5..b97b2499f 100644 --- a/driver/failover_reader_handler.cc +++ b/driver/failover_reader_handler.cc @@ -68,7 +68,7 @@ std::shared_ptr FAILOVER_READER_HANDLER::failover( const auto start = std::chrono::steady_clock::now(); auto global_sync = std::make_shared(1); - std::packaged_task()> reader_failover_task([=] { + auto reader_result_future = failover_thread_pool.push([=](int id) { while (!global_sync->is_completed()) { auto hosts_list = build_hosts_list(current_topology, !enable_strict_reader_failover); auto reader_result = get_connection_from_hosts(hosts_list, global_sync); @@ -81,18 +81,33 @@ std::shared_ptr FAILOVER_READER_HANDLER::failover( std::this_thread::sleep_for(std::chrono::seconds(READER_CONNECT_INTERVAL_SEC)); } return empty_result; - }); - - auto reader_result_future = reader_failover_task.get_future(); - std::thread reader_failover_thread(std::move(reader_failover_task)); + }); + + //std::packaged_task()> reader_failover_task([=] { + // while (!global_sync->is_completed()) { + // auto hosts_list = build_hosts_list(current_topology, !enable_strict_reader_failover); + // auto reader_result = get_connection_from_hosts(hosts_list, global_sync); + // if (reader_result->connected) { + // global_sync->mark_as_complete(true); + // return reader_result; + // } + // // TODO Think of changes to the strategy if it went + // // through all the hosts and did not connect. + // std::this_thread::sleep_for(std::chrono::seconds(READER_CONNECT_INTERVAL_SEC)); + // } + // return empty_result; + // }); + + //auto reader_result_future = reader_failover_task.get_future(); + //std::thread reader_failover_thread(std::move(reader_failover_task)); // Wait for task complete signal with specified timeout global_sync->wait_and_complete(max_failover_timeout_ms); // Constantly polling for results until timeout while (true) { if (reader_result_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { - reader_failover_thread.join(); - MYLOG_TRACE(logger.get(), dbc_id, "[FAILOVER_READER_HANDLER] Successfully connected to the reader instance."); + //reader_failover_thread.join(); + MYLOG_TRACE(logger, dbc_id, "[FAILOVER_READER_HANDLER] Successfully connected to the reader instance."); return reader_result_future.get(); } @@ -101,8 +116,8 @@ std::shared_ptr FAILOVER_READER_HANDLER::failover( const auto remaining_wait_ms = max_failover_timeout_ms - duration.count(); if (remaining_wait_ms <= 0) { // Reader failover timed out - reader_failover_thread.detach(); - MYLOG_TRACE(logger.get(), dbc_id, "[FAILOVER_READER_HANDLER] Failed to connect to the reader instance."); + //reader_failover_thread.detach(); + MYLOG_TRACE(logger, dbc_id, "[FAILOVER_READER_HANDLER] Failed to connect to the reader instance."); return empty_result; } } @@ -197,18 +212,21 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ std::shared_ptr first_reader_host = hosts_list.at(i); - std::packaged_task, std::shared_ptr, std::shared_ptr)> first_reader_task(first_connection_handler); - std::packaged_task, std::shared_ptr, std::shared_ptr)> second_reader_task(second_connection_handler); + //std::packaged_task, std::shared_ptr, std::shared_ptr)> first_reader_task(first_connection_handler); + //std::packaged_task, std::shared_ptr, std::shared_ptr)> second_reader_task(second_connection_handler); - auto result1 = first_reader_task.get_future(); - auto result2 = second_reader_task.get_future(); + //auto result1 = first_reader_task.get_future(); + //auto result2 = second_reader_task.get_future(); - std::thread task_thread1(std::move(first_reader_task), first_reader_host,local_sync, first_connection_result); - - std::thread task_thread2; + auto result1 = failover_thread_pool.push(std::move(first_connection_handler), first_reader_host, local_sync, first_connection_result); + + //std::thread task_thread1(std::move(first_reader_task), first_reader_host,local_sync, first_connection_result); + //std::thread task_thread2; + std::future result2; if (!odd_hosts_number) { auto second_reader_host = hosts_list.at(i + 1); - task_thread2 = std::thread(std::move(second_reader_task), second_reader_host, local_sync,second_connection_result); + //task_thread2 = std::thread(std::move(second_reader_task), second_reader_host, local_sync,second_connection_result); + result2 = failover_thread_pool.push(std::move(second_connection_handler), second_reader_host, local_sync, second_connection_result); } // Wait for task complete signal with specified timeout @@ -218,13 +236,13 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ while (true) { // Check if first reader task result is ready if (result1.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { - task_thread1.join(); + //task_thread1.join(); if (!odd_hosts_number) { - task_thread2.detach(); + //task_thread2.detach(); } result1.get(); if (first_connection_result->connected) { - MYLOG_TRACE(logger.get(), dbc_id, + MYLOG_TRACE(logger, dbc_id, "[FAILOVER_READER_HANDLER] Connected to reader: %s", first_connection_result->new_host->get_host_port_pair().c_str()); @@ -236,11 +254,11 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ // Check if second reader task result is ready if there is one if (!odd_hosts_number && result2.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { - task_thread1.detach(); - task_thread2.join(); + //task_thread1.detach(); + //task_thread2.join(); result2.get(); if (second_connection_result->connected) { - MYLOG_TRACE(logger.get(), dbc_id, + MYLOG_TRACE(logger, dbc_id, "[FAILOVER_READER_HANDLER] Connected to reader: %s", second_connection_result->new_host->get_host_port_pair().c_str()); @@ -254,8 +272,8 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ const auto remaining_wait_ms = reader_connect_timeout_ms - duration.count(); if (remaining_wait_ms <= 0) { // None has connected. We move on and try new hosts. - task_thread1.detach(); - task_thread2.detach(); + //task_thread1.detach(); + //task_thread2.detach(); std::this_thread::sleep_for(std::chrono::seconds(READER_CONNECT_INTERVAL_SEC)); break; } @@ -278,13 +296,14 @@ CONNECT_TO_READER_HANDLER::CONNECT_TO_READER_HANDLER( CONNECT_TO_READER_HANDLER::~CONNECT_TO_READER_HANDLER() {} void CONNECT_TO_READER_HANDLER::operator()( + int id, std::shared_ptr reader, std::shared_ptr f_sync, std::shared_ptr result) { if (reader && !f_sync->is_completed()) { - MYLOG_TRACE(logger.get(), dbc_id, + MYLOG_TRACE(logger, dbc_id, "[CONNECT_TO_READER_HANDLER] Trying to connect to reader: %s", reader->get_host_port_pair().c_str()); @@ -299,7 +318,7 @@ void CONNECT_TO_READER_HANDLER::operator()( result->new_connection = std::move(this->new_connection); f_sync->mark_as_complete(true); MYLOG_TRACE( - logger.get(), dbc_id, + logger, dbc_id, "[CONNECT_TO_READER_HANDLER] Connected to reader: %s", reader->get_host_port_pair().c_str()); return; @@ -307,7 +326,7 @@ void CONNECT_TO_READER_HANDLER::operator()( } else { topology_service->mark_host_down(reader); MYLOG_TRACE( - logger.get(), dbc_id, + logger, dbc_id, "[CONNECT_TO_READER_HANDLER] Failed to connect to reader: %s", reader->get_host_port_pair().c_str()); if (!f_sync->is_completed()) { diff --git a/driver/failover_writer_handler.cc b/driver/failover_writer_handler.cc index 7116c4812..6784e5f1a 100644 --- a/driver/failover_writer_handler.cc +++ b/driver/failover_writer_handler.cc @@ -116,12 +116,13 @@ RECONNECT_TO_WRITER_HANDLER::RECONNECT_TO_WRITER_HANDLER( RECONNECT_TO_WRITER_HANDLER::~RECONNECT_TO_WRITER_HANDLER() {} void RECONNECT_TO_WRITER_HANDLER::operator()( + int id, std::shared_ptr original_writer, std::shared_ptr f_sync, std::shared_ptr result) { if (original_writer) { - MYLOG_TRACE(logger.get(), dbc_id, + MYLOG_TRACE(logger, dbc_id, "[RECONNECT_TO_WRITER_HANDLER] [TaskA] Attempting to " "re-connect to the current writer instance: %s", original_writer->get_host_port_pair().c_str()); @@ -142,18 +143,18 @@ void RECONNECT_TO_WRITER_HANDLER::operator()( result->new_topology = latest_topology; result->new_connection = std::move(new_connection); f_sync->mark_as_complete(true); - MYLOG_TRACE(logger.get(), dbc_id, "[RECONNECT_TO_WRITER_HANDLER] [TaskA] Finished"); + MYLOG_TRACE(logger, dbc_id, "[RECONNECT_TO_WRITER_HANDLER] [TaskA] Finished"); return; } release_new_connection(); } sleep(reconnect_interval_ms); } - MYLOG_TRACE(logger.get(), dbc_id, "[RECONNECT_TO_WRITER_HANDLER] [TaskA] Cancelled"); + MYLOG_TRACE(logger, dbc_id, "[RECONNECT_TO_WRITER_HANDLER] [TaskA] Cancelled"); } // Another thread finishes or both timeout, this thread is canceled release_new_connection(); - MYLOG_TRACE(logger.get(), dbc_id, "[RECONNECT_TO_WRITER_HANDLER] [TaskA] Finished"); + MYLOG_TRACE(logger, dbc_id, "[RECONNECT_TO_WRITER_HANDLER] [TaskA] Finished"); } bool RECONNECT_TO_WRITER_HANDLER::is_current_host_writer( @@ -183,11 +184,12 @@ WAIT_NEW_WRITER_HANDLER::WAIT_NEW_WRITER_HANDLER( WAIT_NEW_WRITER_HANDLER::~WAIT_NEW_WRITER_HANDLER() {} void WAIT_NEW_WRITER_HANDLER::operator()( + int id, std::shared_ptr original_writer, std::shared_ptr f_sync, std::shared_ptr result) { - MYLOG_TRACE(logger.get(), dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Attempting to connect to a new writer instance"); + MYLOG_TRACE(logger, dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Attempting to connect to a new writer instance"); while (!f_sync->is_completed()) { if (!is_writer_connected()) { @@ -200,16 +202,16 @@ void WAIT_NEW_WRITER_HANDLER::operator()( result->new_topology = current_topology; result->new_connection = std::move(new_connection); f_sync->mark_as_complete(true); - MYLOG_TRACE(logger.get(), dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Finished"); + MYLOG_TRACE(logger, dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Finished"); return; } } - MYLOG_TRACE(logger.get(), dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Cancelled"); + MYLOG_TRACE(logger, dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Cancelled"); // Another thread finishes or both timeout, this thread is canceled clean_up_reader_connection(); release_new_connection(); - MYLOG_TRACE(logger.get(), dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Finished"); + MYLOG_TRACE(logger, dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Finished"); } // Connect to a reader to later retrieve the latest topology @@ -220,12 +222,12 @@ void WAIT_NEW_WRITER_HANDLER::connect_to_reader(std::shared_ptr f reader_connection = connection_result->new_connection; current_reader_host = connection_result->new_host; MYLOG_TRACE( - logger.get(), dbc_id, + logger, dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Connected to reader: %s", connection_result->new_host->get_host_port_pair().c_str()); break; } - MYLOG_TRACE(logger.get(), dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Failed to connect to any reader."); + MYLOG_TRACE(logger, dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Failed to connect to any reader."); } } @@ -250,7 +252,7 @@ void WAIT_NEW_WRITER_HANDLER::refresh_topology_and_connect_to_new_writer( bool WAIT_NEW_WRITER_HANDLER::connect_to_writer( std::shared_ptr writer_candidate) { - MYLOG_TRACE(logger.get(), dbc_id, + MYLOG_TRACE(logger, dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Trying to connect to a new writer: %s", writer_candidate->get_host_port_pair().c_str()); @@ -299,7 +301,7 @@ std::shared_ptr FAILOVER_WRITER_HANDLER::failover( std::shared_ptr current_topology) { if (!current_topology || current_topology->total_hosts() == 0) { - MYLOG_TRACE(logger.get(), dbc_id, + MYLOG_TRACE(logger, dbc_id, "[FAILOVER_WRITER_HANDLER] Failover was called with " "an invalid (null or empty) topology"); return std::make_shared(false, false, nullptr, nullptr); @@ -322,14 +324,18 @@ std::shared_ptr FAILOVER_WRITER_HANDLER::failover( auto reconnect_result = std::make_shared(false, false, nullptr, nullptr); auto new_writer_result = std::make_shared(false, false, nullptr, nullptr); - std::packaged_task, std::shared_ptr, std::shared_ptr)> reconnect_task(reconnect_handler); - std::packaged_task, std::shared_ptr, std::shared_ptr)> wait_new_writer_task(new_writer_handler); + //std::packaged_task, std::shared_ptr, std::shared_ptr)> reconnect_task(reconnect_handler); + //std::packaged_task, std::shared_ptr, std::shared_ptr)> wait_new_writer_task(new_writer_handler); - auto reconnect_future = reconnect_task.get_future(); - auto wait_new_writer_future = wait_new_writer_task.get_future(); + //auto reconnect_future = reconnect_task.get_future(); + //auto wait_new_writer_future = wait_new_writer_task.get_future(); - std::thread reconnect_thread(std::move(reconnect_task), original_writer, failover_sync, reconnect_result); - std::thread wait_new_writer_thread(std::move(wait_new_writer_task), original_writer, failover_sync, new_writer_result); + //std::thread reconnect_thread(std::move(reconnect_task), original_writer, failover_sync, reconnect_result); + //std::thread wait_new_writer_thread(std::move((wait_new_writer_task), original_writer, failover_sync, new_writer_result); + + + auto reconnect_future = failover_thread_pool.push(std::move(reconnect_handler), original_writer, failover_sync, reconnect_result); + auto wait_new_writer_future = failover_thread_pool.push(std::move(new_writer_handler), original_writer, failover_sync, new_writer_result); // Wait for task complete signal with specified timeout failover_sync->wait_and_complete(writer_failover_timeout_ms); @@ -338,11 +344,11 @@ std::shared_ptr FAILOVER_WRITER_HANDLER::failover( while (true) { // Check if reconnect task result is ready if (reconnect_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { - reconnect_thread.join(); - wait_new_writer_thread.detach(); + //reconnect_thread.join(); + //wait_new_writer_thread.detach(); reconnect_future.get(); if (reconnect_result->connected) { - MYLOG_TRACE(logger.get(), dbc_id, + MYLOG_TRACE(logger, dbc_id, "[FAILOVER_WRITER_HANDLER] Successfully re-connected to the current writer instance: %s", reconnect_result->new_topology->get_writer()->get_host_port_pair().c_str()); return reconnect_result; @@ -352,11 +358,11 @@ std::shared_ptr FAILOVER_WRITER_HANDLER::failover( // Check if wait new writer task result is ready if (wait_new_writer_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { - reconnect_thread.detach(); - wait_new_writer_thread.join(); + //reconnect_thread.detach(); + //wait_new_writer_thread.join(); wait_new_writer_future.get(); if (new_writer_result->connected) { - MYLOG_TRACE(logger.get(), dbc_id, + MYLOG_TRACE(logger, dbc_id, "[FAILOVER_WRITER_HANDLER] Successfully connected to the new writer instance: %s", new_writer_result->new_topology->get_writer()->get_host_port_pair().c_str()); return new_writer_result; @@ -369,9 +375,9 @@ std::shared_ptr FAILOVER_WRITER_HANDLER::failover( const auto remaining_wait_ms = writer_failover_timeout_ms - duration.count(); if (remaining_wait_ms <= 0) { // Writer failover timed out - reconnect_thread.detach(); - wait_new_writer_thread.detach(); - MYLOG_TRACE(logger.get(), dbc_id, "[FAILOVER_WRITER_HANDLER] Failed to connect to the writer instance."); + //reconnect_thread.detach(); + //wait_new_writer_thread.detach(); + MYLOG_TRACE(logger, dbc_id, "[FAILOVER_WRITER_HANDLER] Failed to connect to the writer instance."); return std::make_shared(false, false, nullptr, nullptr); } } diff --git a/driver/handle.cc b/driver/handle.cc index 8b680255b..06b69c1f9 100644 --- a/driver/handle.cc +++ b/driver/handle.cc @@ -55,6 +55,8 @@ #include +ctpl::thread_pool failover_thread_pool; + thread_local long thread_count = 0; std::mutex g_lock; @@ -224,6 +226,7 @@ SQLRETURN SQL_API SQLAllocEnv(SQLHENV *phenv) SQLRETURN SQL_API my_SQLFreeEnv(SQLHENV henv) { MONITOR_THREAD_CONTAINER::release_instance(); + failover_thread_pool.stop(true); ENV *env= (ENV *) henv; delete env; diff --git a/driver/monitor.cc b/driver/monitor.cc index 6bba5a17e..2f261aa86 100644 --- a/driver/monitor.cc +++ b/driver/monitor.cc @@ -104,7 +104,7 @@ void MONITOR::start_monitoring(std::shared_ptr conte void MONITOR::stop_monitoring(std::shared_ptr context) { if (context == nullptr) { MYLOG_TRACE( - this->logger.get(), 0, + this->logger, 0, "[MONITOR] Invalid context passed into stop_monitoring()"); return; } diff --git a/driver/monitor_connection_context.cc b/driver/monitor_connection_context.cc index 1fe5e8aeb..ae846d828 100644 --- a/driver/monitor_connection_context.cc +++ b/driver/monitor_connection_context.cc @@ -165,14 +165,14 @@ void MONITOR_CONNECTION_CONTEXT::set_connection_valid( const auto max_invalid_node_duration = get_failure_detection_interval() * (std::max)(0, get_failure_detection_count()); if (invalid_node_duration_ms >= max_invalid_node_duration) { - MYLOG_TRACE(logger.get(), get_dbc_id(), "[MONITOR_CONNECTION_CONTEXT] Node '%s' is *dead*.", node_keys_str.c_str()); + MYLOG_TRACE(logger, get_dbc_id(), "[MONITOR_CONNECTION_CONTEXT] Node '%s' is *dead*.", node_keys_str.c_str()); set_node_unhealthy(true); abort_connection(); return; } MYLOG_TRACE( - logger.get(), get_dbc_id(), + logger, get_dbc_id(), "[MONITOR_CONNECTION_CONTEXT] Node '%s' is *not responding* (%d).", node_keys_str.c_str(), get_failure_count()); return; } @@ -180,7 +180,7 @@ void MONITOR_CONNECTION_CONTEXT::set_connection_valid( set_failure_count(0); reset_invalid_node_start_time(); set_node_unhealthy(false); - MYLOG_TRACE(logger.get(), get_dbc_id(), "[MONITOR_CONNECTION_CONTEXT] Node '%s' is *alive*.", node_keys_str.c_str()); + MYLOG_TRACE(logger, get_dbc_id(), "[MONITOR_CONNECTION_CONTEXT] Node '%s' is *alive*.", node_keys_str.c_str()); } void MONITOR_CONNECTION_CONTEXT::abort_connection() { diff --git a/driver/monitor_service.cc b/driver/monitor_service.cc index bf2e0a2e4..0643710a9 100644 --- a/driver/monitor_service.cc +++ b/driver/monitor_service.cc @@ -58,13 +58,13 @@ std::shared_ptr MONITOR_SERVICE::start_monitoring( if (!dbc || !ds) { auto msg = "[MONITOR_SERVICE] Parameter dbc or ds cannot be null"; - MYLOG_TRACE(this->logger.get(), dbc ? dbc->id : 0, msg); + MYLOG_TRACE(this->logger, dbc ? dbc->id : 0, msg); throw std::invalid_argument(msg); } if (node_keys.empty()) { auto msg = "[MONITOR_SERVICE] Parameter node_keys cannot be empty"; - MYLOG_TRACE(this->logger.get(), dbc ? dbc->id : 0, msg); + MYLOG_TRACE(this->logger, dbc ? dbc->id : 0, msg); throw std::invalid_argument(msg); } @@ -96,7 +96,7 @@ std::shared_ptr MONITOR_SERVICE::start_monitoring( void MONITOR_SERVICE::stop_monitoring(std::shared_ptr context) { if (context == nullptr) { MYLOG_TRACE( - this->logger.get(), 0, + this->logger, 0, "[MONITOR_SERVICE] Invalid context passed into stop_monitoring()"); return; } @@ -106,7 +106,7 @@ void MONITOR_SERVICE::stop_monitoring(std::shared_ptrthread_container->get_node(context->get_node_keys()); if (node.empty()) { MYLOG_TRACE( - this->logger.get(), context->get_dbc_id(), + this->logger, context->get_dbc_id(), "[MONITOR_SERVICE] Can not find node key from context"); return; } @@ -121,7 +121,7 @@ void MONITOR_SERVICE::stop_monitoring_for_all_connections(std::set std::string node = this->thread_container->get_node(node_keys); if (node.empty()) { MYLOG_TRACE( - this->logger.get(), 0, + this->logger, 0, "[MONITOR_SERVICE] Invalid node keys passed into stop_monitoring_for_all_connections(). " "No existing monitor for the given set of node keys"); return; @@ -137,7 +137,7 @@ void MONITOR_SERVICE::stop_monitoring_for_all_connections(std::set void MONITOR_SERVICE::notify_unused(const std::shared_ptr& monitor) const { if (monitor == nullptr) { MYLOG_TRACE( - this->logger.get(), 0, + this->logger, 0, "[MONITOR_SERVICE] Invalid monitor passed into notify_unused()"); return; } diff --git a/driver/mylog.cc b/driver/mylog.cc index e81eae3be..b2ef1679f 100644 --- a/driver/mylog.cc +++ b/driver/mylog.cc @@ -41,7 +41,9 @@ #include #endif -void trace_print(FILE *file, unsigned long dbc_id, const char *fmt, ...) { +std::mutex log_file_mutex; + +void trace_print(std::shared_ptr file, unsigned long dbc_id, const char *fmt, ...) { if (file && fmt) { time_t now = time(nullptr); char time_buf[256]; @@ -65,9 +67,9 @@ void trace_print(FILE *file, unsigned long dbc_id, const char *fmt, ...) { #endif { std::lock_guard guard(log_file_mutex); - fprintf(file, "%s - Process ID %ld - DBC ID %lu - %s\n", time_buf, pid, + fprintf(file.get(), "%s - Process ID %ld - DBC ID %lu - %s\n", time_buf, pid, dbc_id, buf.data()); - fflush(file); + fflush(file.get()); } } } diff --git a/driver/mylog.h b/driver/mylog.h index b352e7724..775e0996d 100644 --- a/driver/mylog.h +++ b/driver/mylog.h @@ -38,11 +38,11 @@ #define MYLOG_STMT_TRACE(A, B) \ { \ if ((A)->dbc->ds->save_queries) \ - trace_print((A)->dbc->log_file.get(), (A)->dbc->id, (const char *)B); \ + trace_print((A)->dbc->log_file, (A)->dbc->id, (const char *)B); \ } #define MYLOG_DBC_TRACE(A, ...) \ - { trace_print((A)->log_file.get(), (A)->id, __VA_ARGS__); } + { trace_print((A)->log_file, (A)->id, __VA_ARGS__); } #define MYLOG_TRACE(A, B, ...) \ { \ @@ -60,11 +60,11 @@ struct FILEDeleter { }; static std::shared_ptr log_file; -static std::mutex log_file_mutex; +extern std::mutex log_file_mutex; /* Functions used when debugging */ std::shared_ptr init_log_file(); void end_log_file(); -void trace_print(FILE *file, unsigned long dbc_id, const char *fmt, ...); +void trace_print(std::shared_ptr file, unsigned long dbc_id, const char *fmt, ...); #endif /* __MYLOG_H__ */ diff --git a/driver/topology_service.cc b/driver/topology_service.cc index a92632d3e..4ef132b8e 100644 --- a/driver/topology_service.cc +++ b/driver/topology_service.cc @@ -58,7 +58,7 @@ TOPOLOGY_SERVICE::~TOPOLOGY_SERVICE() { } void TOPOLOGY_SERVICE::set_cluster_id(std::string cid) { - MYLOG_TRACE(logger.get(), dbc_id, "[TOPOLOGY_SERVICE] cluster ID=%s", cid.c_str()); + MYLOG_TRACE(logger, dbc_id, "[TOPOLOGY_SERVICE] cluster ID=%s", cid.c_str()); this->cluster_id = cid; metrics_container->set_cluster_id(this->cluster_id); } @@ -71,7 +71,7 @@ void TOPOLOGY_SERVICE::set_cluster_instance_template(std::shared_ptr if (cluster_instance_host) cluster_instance_host.reset(); - MYLOG_TRACE(logger.get(), dbc_id, + MYLOG_TRACE(logger, dbc_id, "[TOPOLOGY_SERVICE] cluster instance host=%s, port=%d", host_template->get_host().c_str(), host_template->get_port()); cluster_instance_host = host_template; @@ -300,7 +300,7 @@ std::shared_ptr TOPOLOGY_SERVICE::query_for_topology(CONN topology_info->is_multi_writer_cluster = writer_count > 1; if (writer_count == 0) { - MYLOG_TRACE(logger.get(), dbc_id, + MYLOG_TRACE(logger, dbc_id, "[TOPOLOGY_SERVICE] The topology query returned an " "invalid topology - no writer instance detected"); } diff --git a/unit_testing/failover_reader_handler_test.cc b/unit_testing/failover_reader_handler_test.cc index c8aa96b5b..8f3c635b8 100644 --- a/unit_testing/failover_reader_handler_test.cc +++ b/unit_testing/failover_reader_handler_test.cc @@ -92,9 +92,7 @@ class FailoverReaderHandlerTest : public testing::Test { void SetUp() override { allocate_odbc_handles(env, dbc, ds); - ds->save_queries = true; - ds->allow_reader_connections = true; - + failover_thread_pool.resize(3); reader_a_host->set_host_state(UP); reader_b_host->set_host_state(UP); reader_c_host->set_host_state(DOWN); @@ -393,8 +391,6 @@ TEST_F(FailoverReaderHandlerTest, Failover_Success_Reader) { // Cannot delete at the end as it may cause double delete Mock::AllowLeak(mock_reader_b_proxy); Mock::AllowLeak(mock_reader_b_proxy->get_ds()); - Mock::AllowLeak(mock_ts.get()); - Mock::AllowLeak(mock_connection_handler.get()); auto current_topology = std::make_shared(); current_topology->add_host(writer_host); diff --git a/unit_testing/failover_writer_handler_test.cc b/unit_testing/failover_writer_handler_test.cc index 7206b2552..743419ed2 100644 --- a/unit_testing/failover_writer_handler_test.cc +++ b/unit_testing/failover_writer_handler_test.cc @@ -76,7 +76,7 @@ class FailoverWriterHandlerTest : public testing::Test { void SetUp() override { allocate_odbc_handles(env, dbc, ds); - + failover_thread_pool.resize(3); writer_instance_name = "writer-host"; new_writer_instance_name = "new-writer-host"; From 07405ead889f79fc8b22ff98d25d75a29643e107 Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Wed, 19 Apr 2023 10:45:43 -0700 Subject: [PATCH 06/33] increase thread pool size --- driver/failover_reader_handler.cc | 11 +++++++++++ driver/failover_writer_handler.cc | 7 +++++++ 2 files changed, 18 insertions(+) diff --git a/driver/failover_reader_handler.cc b/driver/failover_reader_handler.cc index b97b2499f..43874c1f3 100644 --- a/driver/failover_reader_handler.cc +++ b/driver/failover_reader_handler.cc @@ -68,6 +68,9 @@ std::shared_ptr FAILOVER_READER_HANDLER::failover( const auto start = std::chrono::steady_clock::now(); auto global_sync = std::make_shared(1); + if (failover_thread_pool.n_idle() == 0) { + failover_thread_pool.resize(failover_thread_pool.size() + 1); + } auto reader_result_future = failover_thread_pool.push([=](int id) { while (!global_sync->is_completed()) { auto hosts_list = build_hosts_list(current_topology, !enable_strict_reader_failover); @@ -218,6 +221,14 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ //auto result1 = first_reader_task.get_future(); //auto result2 = second_reader_task.get_future(); + if (failover_thread_pool.n_idle() == 0) { + failover_thread_pool.resize(failover_thread_pool.size() + 2); + } + + if (failover_thread_pool.n_idle() == 1) { + failover_thread_pool.resize(failover_thread_pool.size() + 1); + } + auto result1 = failover_thread_pool.push(std::move(first_connection_handler), first_reader_host, local_sync, first_connection_result); //std::thread task_thread1(std::move(first_reader_task), first_reader_host,local_sync, first_connection_result); diff --git a/driver/failover_writer_handler.cc b/driver/failover_writer_handler.cc index 6784e5f1a..920ac853f 100644 --- a/driver/failover_writer_handler.cc +++ b/driver/failover_writer_handler.cc @@ -333,6 +333,13 @@ std::shared_ptr FAILOVER_WRITER_HANDLER::failover( //std::thread reconnect_thread(std::move(reconnect_task), original_writer, failover_sync, reconnect_result); //std::thread wait_new_writer_thread(std::move((wait_new_writer_task), original_writer, failover_sync, new_writer_result); + if (failover_thread_pool.n_idle() == 0) { + failover_thread_pool.resize(failover_thread_pool.size() + 2); + } + + if (failover_thread_pool.n_idle() == 1) { + failover_thread_pool.resize(failover_thread_pool.size() + 1); + } auto reconnect_future = failover_thread_pool.push(std::move(reconnect_handler), original_writer, failover_sync, reconnect_result); auto wait_new_writer_future = failover_thread_pool.push(std::move(new_writer_handler), original_writer, failover_sync, new_writer_result); From 3f69598f08908211dd87b1c9e98aecfb162d183d Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Wed, 19 Apr 2023 15:32:26 -0700 Subject: [PATCH 07/33] fix reader failover where failed result overwrite successful result --- driver/failover_handler.cc | 2 +- driver/failover_reader_handler.cc | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/driver/failover_handler.cc b/driver/failover_handler.cc index 059a50e13..c7a4dd4a5 100644 --- a/driver/failover_handler.cc +++ b/driver/failover_handler.cc @@ -389,7 +389,7 @@ SQLRETURN FAILOVER_HANDLER::create_connection_and_initialize_topology() { rc = reconnect(true); } if (is_failover_enabled()) { - failover_thread_pool.resize(4); + failover_thread_pool.resize(current_topology->total_hosts()); } } diff --git a/driver/failover_reader_handler.cc b/driver/failover_reader_handler.cc index 43874c1f3..a46f6431c 100644 --- a/driver/failover_reader_handler.cc +++ b/driver/failover_reader_handler.cc @@ -246,7 +246,7 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ // Constantly polling for results until timeout while (true) { // Check if first reader task result is ready - if (result1.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { + if (result1.wait_for(std::chrono::seconds(0)) == std::future_status::ready && result1.valid()) { //task_thread1.join(); if (!odd_hosts_number) { //task_thread2.detach(); @@ -259,12 +259,11 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ return first_connection_result; } - break; } // Check if second reader task result is ready if there is one if (!odd_hosts_number && - result2.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { + result2.wait_for(std::chrono::seconds(0)) == std::future_status::ready && result2.valid()) { //task_thread1.detach(); //task_thread2.join(); result2.get(); @@ -275,6 +274,10 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ return second_connection_result; } + } + + // Results are ready but non has valid connection + if (!result1.valid() && (odd_hosts_number || !result2.valid())) { break; } From c870e5603122b79f0ec8970adaaa7f8561844d81 Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Wed, 19 Apr 2023 17:25:17 -0700 Subject: [PATCH 08/33] not stop thread pool on free env handle --- driver/failover_reader_handler.cc | 20 ++++++++++---------- driver/failover_writer_handler.cc | 12 ++++++++---- driver/handle.cc | 1 - 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/driver/failover_reader_handler.cc b/driver/failover_reader_handler.cc index a46f6431c..0d43639f3 100644 --- a/driver/failover_reader_handler.cc +++ b/driver/failover_reader_handler.cc @@ -110,7 +110,7 @@ std::shared_ptr FAILOVER_READER_HANDLER::failover( while (true) { if (reader_result_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { //reader_failover_thread.join(); - MYLOG_TRACE(logger, dbc_id, "[FAILOVER_READER_HANDLER] Successfully connected to the reader instance."); + MYLOG_TRACE(logger, dbc_id, "[FAILOVER_READER_HANDLER] Reader failover finished."); return reader_result_future.get(); } @@ -120,7 +120,7 @@ std::shared_ptr FAILOVER_READER_HANDLER::failover( if (remaining_wait_ms <= 0) { // Reader failover timed out //reader_failover_thread.detach(); - MYLOG_TRACE(logger, dbc_id, "[FAILOVER_READER_HANDLER] Failed to connect to the reader instance."); + MYLOG_TRACE(logger, dbc_id, "[FAILOVER_READER_HANDLER] Reader failover timed out. Failed to connect to the reader instance."); return empty_result; } } @@ -229,15 +229,15 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ failover_thread_pool.resize(failover_thread_pool.size() + 1); } - auto result1 = failover_thread_pool.push(std::move(first_connection_handler), first_reader_host, local_sync, first_connection_result); + auto first_result = failover_thread_pool.push(std::move(first_connection_handler), first_reader_host, local_sync, first_connection_result); //std::thread task_thread1(std::move(first_reader_task), first_reader_host,local_sync, first_connection_result); //std::thread task_thread2; - std::future result2; + std::future second_future; if (!odd_hosts_number) { auto second_reader_host = hosts_list.at(i + 1); //task_thread2 = std::thread(std::move(second_reader_task), second_reader_host, local_sync,second_connection_result); - result2 = failover_thread_pool.push(std::move(second_connection_handler), second_reader_host, local_sync, second_connection_result); + second_future = failover_thread_pool.push(std::move(second_connection_handler), second_reader_host, local_sync, second_connection_result); } // Wait for task complete signal with specified timeout @@ -246,12 +246,12 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ // Constantly polling for results until timeout while (true) { // Check if first reader task result is ready - if (result1.wait_for(std::chrono::seconds(0)) == std::future_status::ready && result1.valid()) { + if (first_result.wait_for(std::chrono::seconds(0)) == std::future_status::ready && first_result.valid()) { //task_thread1.join(); if (!odd_hosts_number) { //task_thread2.detach(); } - result1.get(); + first_result.get(); if (first_connection_result->connected) { MYLOG_TRACE(logger, dbc_id, "[FAILOVER_READER_HANDLER] Connected to reader: %s", @@ -263,10 +263,10 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ // Check if second reader task result is ready if there is one if (!odd_hosts_number && - result2.wait_for(std::chrono::seconds(0)) == std::future_status::ready && result2.valid()) { + second_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready && second_future.valid()) { //task_thread1.detach(); //task_thread2.join(); - result2.get(); + second_future.get(); if (second_connection_result->connected) { MYLOG_TRACE(logger, dbc_id, "[FAILOVER_READER_HANDLER] Connected to reader: %s", @@ -277,7 +277,7 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ } // Results are ready but non has valid connection - if (!result1.valid() && (odd_hosts_number || !result2.valid())) { + if (!first_result.valid() && (odd_hosts_number || !second_future.valid())) { break; } diff --git a/driver/failover_writer_handler.cc b/driver/failover_writer_handler.cc index 920ac853f..0a9e3ed29 100644 --- a/driver/failover_writer_handler.cc +++ b/driver/failover_writer_handler.cc @@ -350,7 +350,7 @@ std::shared_ptr FAILOVER_WRITER_HANDLER::failover( // Constantly polling for results until timeout while (true) { // Check if reconnect task result is ready - if (reconnect_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { + if (reconnect_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready && reconnect_future.valid()) { //reconnect_thread.join(); //wait_new_writer_thread.detach(); reconnect_future.get(); @@ -360,11 +360,10 @@ std::shared_ptr FAILOVER_WRITER_HANDLER::failover( reconnect_result->new_topology->get_writer()->get_host_port_pair().c_str()); return reconnect_result; } - break; } // Check if wait new writer task result is ready - if (wait_new_writer_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { + if (wait_new_writer_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready && wait_new_writer_future.valid()) { //reconnect_thread.detach(); //wait_new_writer_thread.join(); wait_new_writer_future.get(); @@ -374,6 +373,10 @@ std::shared_ptr FAILOVER_WRITER_HANDLER::failover( new_writer_result->new_topology->get_writer()->get_host_port_pair().c_str()); return new_writer_result; } + } + + // Results are ready but non has valid connection + if (!reconnect_future.valid() && !wait_new_writer_future.valid()) { break; } @@ -384,11 +387,12 @@ std::shared_ptr FAILOVER_WRITER_HANDLER::failover( // Writer failover timed out //reconnect_thread.detach(); //wait_new_writer_thread.detach(); - MYLOG_TRACE(logger, dbc_id, "[FAILOVER_WRITER_HANDLER] Failed to connect to the writer instance."); + MYLOG_TRACE(logger, dbc_id, "[FAILOVER_WRITER_HANDLER] Writer failover timed out. Failed to connect to the writer instance."); return std::make_shared(false, false, nullptr, nullptr); } } // Writer failover finished but not connected + MYLOG_TRACE(logger, dbc_id, "[FAILOVER_WRITER_HANDLER] Failed to connect to the writer instance."); return std::make_shared(false, false, nullptr, nullptr); } diff --git a/driver/handle.cc b/driver/handle.cc index 06b69c1f9..83f541b7f 100644 --- a/driver/handle.cc +++ b/driver/handle.cc @@ -226,7 +226,6 @@ SQLRETURN SQL_API SQLAllocEnv(SQLHENV *phenv) SQLRETURN SQL_API my_SQLFreeEnv(SQLHENV henv) { MONITOR_THREAD_CONTAINER::release_instance(); - failover_thread_pool.stop(true); ENV *env= (ENV *) henv; delete env; From 479a1c5d280ffdcb1ad203c75414047eccc8d320 Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Wed, 19 Apr 2023 17:53:24 -0700 Subject: [PATCH 09/33] redirect maven central and stop thread pool in teardown --- testframework/build.gradle.kts | 4 +++- unit_testing/failover_reader_handler_test.cc | 1 + unit_testing/failover_writer_handler_test.cc | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/testframework/build.gradle.kts b/testframework/build.gradle.kts index c10816d24..a7dfcc94e 100644 --- a/testframework/build.gradle.kts +++ b/testframework/build.gradle.kts @@ -6,7 +6,9 @@ group = "software.aws.rds" version = "1.0-SNAPSHOT" repositories { - mavenCentral() + maven { + url = uri("https://repo.maven.apache.org/maven2/") + } } dependencies { diff --git a/unit_testing/failover_reader_handler_test.cc b/unit_testing/failover_reader_handler_test.cc index 8f3c635b8..6153ba399 100644 --- a/unit_testing/failover_reader_handler_test.cc +++ b/unit_testing/failover_reader_handler_test.cc @@ -106,6 +106,7 @@ class FailoverReaderHandlerTest : public testing::Test { void TearDown() override { cleanup_odbc_handles(env, dbc, ds); + failover_thread_pool.stop(true); } }; diff --git a/unit_testing/failover_writer_handler_test.cc b/unit_testing/failover_writer_handler_test.cc index 743419ed2..367b5202f 100644 --- a/unit_testing/failover_writer_handler_test.cc +++ b/unit_testing/failover_writer_handler_test.cc @@ -104,6 +104,7 @@ class FailoverWriterHandlerTest : public testing::Test { void TearDown() override { cleanup_odbc_handles(env, dbc, ds); + failover_thread_pool.stop(true); } }; From e0017bff38046fc8a45a420a0a608f7443c630a2 Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Wed, 19 Apr 2023 18:13:42 -0700 Subject: [PATCH 10/33] maven central --- testframework/build.gradle.kts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testframework/build.gradle.kts b/testframework/build.gradle.kts index a7dfcc94e..d2a75cf7c 100644 --- a/testframework/build.gradle.kts +++ b/testframework/build.gradle.kts @@ -7,7 +7,7 @@ version = "1.0-SNAPSHOT" repositories { maven { - url = uri("https://repo.maven.apache.org/maven2/") + url = uri("https://repo1.maven.org/maven2") } } From e35602f66633556dccf9cd4456463cc8fc3a6aff Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Wed, 19 Apr 2023 20:21:24 -0700 Subject: [PATCH 11/33] add thread id to logs --- driver/failover_reader_handler.cc | 26 +++++++++++++------------- driver/failover_writer_handler.cc | 29 ++++++++++++++--------------- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/driver/failover_reader_handler.cc b/driver/failover_reader_handler.cc index 0d43639f3..668ccff42 100644 --- a/driver/failover_reader_handler.cc +++ b/driver/failover_reader_handler.cc @@ -69,6 +69,7 @@ std::shared_ptr FAILOVER_READER_HANDLER::failover( auto global_sync = std::make_shared(1); if (failover_thread_pool.n_idle() == 0) { + MYLOG_TRACE(logger, dbc_id, "[FAILOVER_READER_HANDLER] Resizing thread pool to %d", failover_thread_pool.size() + 1); failover_thread_pool.resize(failover_thread_pool.size() + 1); } auto reader_result_future = failover_thread_pool.push([=](int id) { @@ -221,13 +222,12 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ //auto result1 = first_reader_task.get_future(); //auto result2 = second_reader_task.get_future(); - if (failover_thread_pool.n_idle() == 0) { - failover_thread_pool.resize(failover_thread_pool.size() + 2); - } - - if (failover_thread_pool.n_idle() == 1) { - failover_thread_pool.resize(failover_thread_pool.size() + 1); - } + if (failover_thread_pool.n_idle() <= 1) { + int size = failover_thread_pool.size() + 2 - failover_thread_pool.n_idle(); + MYLOG_TRACE(logger, dbc_id, + "[FAILOVER_READER_HANDLER] Resizing thread pool to %d", size); + failover_thread_pool.resize(size); + } auto first_result = failover_thread_pool.push(std::move(first_connection_handler), first_reader_host, local_sync, first_connection_result); @@ -318,8 +318,8 @@ void CONNECT_TO_READER_HANDLER::operator()( if (reader && !f_sync->is_completed()) { MYLOG_TRACE(logger, dbc_id, - "[CONNECT_TO_READER_HANDLER] Trying to connect to reader: %s", - reader->get_host_port_pair().c_str()); + "Thread ID %d - [CONNECT_TO_READER_HANDLER] Trying to connect to reader: %s", + id, reader->get_host_port_pair().c_str()); if (connect(reader)) { topology_service->mark_host_up(reader); @@ -333,16 +333,16 @@ void CONNECT_TO_READER_HANDLER::operator()( f_sync->mark_as_complete(true); MYLOG_TRACE( logger, dbc_id, - "[CONNECT_TO_READER_HANDLER] Connected to reader: %s", - reader->get_host_port_pair().c_str()); + "Thread ID %d - [CONNECT_TO_READER_HANDLER] Connected to reader: %s", + id, reader->get_host_port_pair().c_str()); return; } } else { topology_service->mark_host_down(reader); MYLOG_TRACE( logger, dbc_id, - "[CONNECT_TO_READER_HANDLER] Failed to connect to reader: %s", - reader->get_host_port_pair().c_str()); + "Thread ID %d - [CONNECT_TO_READER_HANDLER] Failed to connect to reader: %s", + id, reader->get_host_port_pair().c_str()); if (!f_sync->is_completed()) { f_sync->mark_as_complete(false); } diff --git a/driver/failover_writer_handler.cc b/driver/failover_writer_handler.cc index 0a9e3ed29..759a4c37a 100644 --- a/driver/failover_writer_handler.cc +++ b/driver/failover_writer_handler.cc @@ -123,9 +123,9 @@ void RECONNECT_TO_WRITER_HANDLER::operator()( if (original_writer) { MYLOG_TRACE(logger, dbc_id, - "[RECONNECT_TO_WRITER_HANDLER] [TaskA] Attempting to " + "Thread ID %d - [RECONNECT_TO_WRITER_HANDLER] [TaskA] Attempting to " "re-connect to the current writer instance: %s", - original_writer->get_host_port_pair().c_str()); + id, original_writer->get_host_port_pair().c_str()); while (!f_sync->is_completed()) { if (connect(original_writer)) { @@ -143,18 +143,18 @@ void RECONNECT_TO_WRITER_HANDLER::operator()( result->new_topology = latest_topology; result->new_connection = std::move(new_connection); f_sync->mark_as_complete(true); - MYLOG_TRACE(logger, dbc_id, "[RECONNECT_TO_WRITER_HANDLER] [TaskA] Finished"); + MYLOG_TRACE(logger, dbc_id, "Thread ID %d - [RECONNECT_TO_WRITER_HANDLER] [TaskA] Finished", id); return; } release_new_connection(); } sleep(reconnect_interval_ms); } - MYLOG_TRACE(logger, dbc_id, "[RECONNECT_TO_WRITER_HANDLER] [TaskA] Cancelled"); + MYLOG_TRACE(logger, dbc_id, "Thread ID %d - [RECONNECT_TO_WRITER_HANDLER] [TaskA] Cancelled", id); } // Another thread finishes or both timeout, this thread is canceled release_new_connection(); - MYLOG_TRACE(logger, dbc_id, "[RECONNECT_TO_WRITER_HANDLER] [TaskA] Finished"); + MYLOG_TRACE(logger, dbc_id, "Thread ID %d - [RECONNECT_TO_WRITER_HANDLER] [TaskA] Finished", id); } bool RECONNECT_TO_WRITER_HANDLER::is_current_host_writer( @@ -189,7 +189,7 @@ void WAIT_NEW_WRITER_HANDLER::operator()( std::shared_ptr f_sync, std::shared_ptr result) { - MYLOG_TRACE(logger, dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Attempting to connect to a new writer instance"); + MYLOG_TRACE(logger, dbc_id, "Thread ID %d - [WAIT_NEW_WRITER_HANDLER] [TaskB] Attempting to connect to a new writer instance", id); while (!f_sync->is_completed()) { if (!is_writer_connected()) { @@ -202,16 +202,16 @@ void WAIT_NEW_WRITER_HANDLER::operator()( result->new_topology = current_topology; result->new_connection = std::move(new_connection); f_sync->mark_as_complete(true); - MYLOG_TRACE(logger, dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Finished"); + MYLOG_TRACE(logger, dbc_id, "Thread ID %d - [WAIT_NEW_WRITER_HANDLER] [TaskB] Finished", id); return; } } - MYLOG_TRACE(logger, dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Cancelled"); + MYLOG_TRACE(logger, dbc_id, "Thread ID %d - [WAIT_NEW_WRITER_HANDLER] [TaskB] Cancelled", id); // Another thread finishes or both timeout, this thread is canceled clean_up_reader_connection(); release_new_connection(); - MYLOG_TRACE(logger, dbc_id, "[WAIT_NEW_WRITER_HANDLER] [TaskB] Finished"); + MYLOG_TRACE(logger, dbc_id, "Thread ID %d - [WAIT_NEW_WRITER_HANDLER] [TaskB] Finished", id); } // Connect to a reader to later retrieve the latest topology @@ -333,12 +333,11 @@ std::shared_ptr FAILOVER_WRITER_HANDLER::failover( //std::thread reconnect_thread(std::move(reconnect_task), original_writer, failover_sync, reconnect_result); //std::thread wait_new_writer_thread(std::move((wait_new_writer_task), original_writer, failover_sync, new_writer_result); - if (failover_thread_pool.n_idle() == 0) { - failover_thread_pool.resize(failover_thread_pool.size() + 2); - } - - if (failover_thread_pool.n_idle() == 1) { - failover_thread_pool.resize(failover_thread_pool.size() + 1); + if (failover_thread_pool.n_idle() <= 1) { + int size = failover_thread_pool.size() + 2 - failover_thread_pool.n_idle(); + MYLOG_TRACE(logger, dbc_id, + "[FAILOVER_WRITER_HANDLER] Resizing thread pool to %d", size); + failover_thread_pool.resize(size); } auto reconnect_future = failover_thread_pool.push(std::move(reconnect_handler), original_writer, failover_sync, reconnect_result); From f4e4db48bb953d22d99e00941367f14c64dfa1b0 Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Wed, 19 Apr 2023 21:15:17 -0700 Subject: [PATCH 12/33] move future valid check before wait for --- driver/failover_reader_handler.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/driver/failover_reader_handler.cc b/driver/failover_reader_handler.cc index 668ccff42..a5a59b2ba 100644 --- a/driver/failover_reader_handler.cc +++ b/driver/failover_reader_handler.cc @@ -246,7 +246,7 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ // Constantly polling for results until timeout while (true) { // Check if first reader task result is ready - if (first_result.wait_for(std::chrono::seconds(0)) == std::future_status::ready && first_result.valid()) { + if (first_result.valid() && first_result.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { //task_thread1.join(); if (!odd_hosts_number) { //task_thread2.detach(); @@ -262,8 +262,8 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ } // Check if second reader task result is ready if there is one - if (!odd_hosts_number && - second_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready && second_future.valid()) { + if (!odd_hosts_number && second_future.valid() && + second_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { //task_thread1.detach(); //task_thread2.join(); second_future.get(); From d222b091efc4aa7e4630aa5742636646b17672b6 Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Wed, 19 Apr 2023 22:47:10 -0700 Subject: [PATCH 13/33] move future valid check before wait_for() for writer failover --- driver/failover_writer_handler.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/driver/failover_writer_handler.cc b/driver/failover_writer_handler.cc index 759a4c37a..bcdaea29b 100644 --- a/driver/failover_writer_handler.cc +++ b/driver/failover_writer_handler.cc @@ -349,7 +349,7 @@ std::shared_ptr FAILOVER_WRITER_HANDLER::failover( // Constantly polling for results until timeout while (true) { // Check if reconnect task result is ready - if (reconnect_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready && reconnect_future.valid()) { + if (reconnect_future.valid() && reconnect_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { //reconnect_thread.join(); //wait_new_writer_thread.detach(); reconnect_future.get(); @@ -362,7 +362,7 @@ std::shared_ptr FAILOVER_WRITER_HANDLER::failover( } // Check if wait new writer task result is ready - if (wait_new_writer_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready && wait_new_writer_future.valid()) { + if (wait_new_writer_future.valid() && wait_new_writer_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { //reconnect_thread.detach(); //wait_new_writer_thread.join(); wait_new_writer_future.get(); From b0c6955add190f3e6c07f3e384cb12fbdf156584 Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Thu, 20 Apr 2023 11:36:15 -0700 Subject: [PATCH 14/33] mark logger as extern --- driver/mylog.cc | 1 + driver/mylog.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/driver/mylog.cc b/driver/mylog.cc index b2ef1679f..fc9cbb38c 100644 --- a/driver/mylog.cc +++ b/driver/mylog.cc @@ -42,6 +42,7 @@ #endif std::mutex log_file_mutex; +std::shared_ptr log_file; void trace_print(std::shared_ptr file, unsigned long dbc_id, const char *fmt, ...) { if (file && fmt) { diff --git a/driver/mylog.h b/driver/mylog.h index 775e0996d..e496738f2 100644 --- a/driver/mylog.h +++ b/driver/mylog.h @@ -59,7 +59,7 @@ struct FILEDeleter { } }; -static std::shared_ptr log_file; +extern std::shared_ptr log_file; extern std::mutex log_file_mutex; /* Functions used when debugging */ From 9f79d3ad5dca209aced61988d07d1c78b150b35a Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Thu, 20 Apr 2023 12:43:54 -0700 Subject: [PATCH 15/33] test diable logs --- integration/network_failover_integration_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration/network_failover_integration_test.cc b/integration/network_failover_integration_test.cc index 2c3630e57..3acac7270 100644 --- a/integration/network_failover_integration_test.cc +++ b/integration/network_failover_integration_test.cc @@ -77,7 +77,7 @@ class NetworkFailoverIntegrationTest : public BaseFailoverIntegrationTest { .withNetworkTimeout(10); builder.withPort(MYSQL_PROXY_PORT) .withHostPattern(PROXIED_CLUSTER_TEMPLATE) - .withLogQuery(true) + //.withLogQuery(true) .withEnableFailureDetection(true); } From a14e2e50fca822f6b92f9f9f62e8ad9038a919bd Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Thu, 20 Apr 2023 13:48:24 -0700 Subject: [PATCH 16/33] test failover_integration_test first --- integration/CMakeLists.txt | 3 ++- integration/network_failover_integration_test.cc | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/integration/CMakeLists.txt b/integration/CMakeLists.txt index 83e0d4b68..9f0823404 100644 --- a/integration/CMakeLists.txt +++ b/integration/CMakeLists.txt @@ -101,8 +101,9 @@ set(TEST_SOURCES set(INTEGRATION_TESTS iam_authentication_integration_test.cc secrets_manager_integration_test.cc + failover_integration_test.cc network_failover_integration_test.cc - failover_integration_test.cc) + ) if(NOT ENABLE_PERFORMANCE_TESTS) set(TEST_SOURCES ${TEST_SOURCES} ${INTEGRATION_TESTS}) diff --git a/integration/network_failover_integration_test.cc b/integration/network_failover_integration_test.cc index 3acac7270..2c3630e57 100644 --- a/integration/network_failover_integration_test.cc +++ b/integration/network_failover_integration_test.cc @@ -77,7 +77,7 @@ class NetworkFailoverIntegrationTest : public BaseFailoverIntegrationTest { .withNetworkTimeout(10); builder.withPort(MYSQL_PROXY_PORT) .withHostPattern(PROXIED_CLUSTER_TEMPLATE) - //.withLogQuery(true) + .withLogQuery(true) .withEnableFailureDetection(true); } From 0c98d9c87708668157331e80a72a64a2ff7a188e Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Thu, 20 Apr 2023 15:11:27 -0700 Subject: [PATCH 17/33] test --- driver/failover_reader_handler.cc | 2 +- integration/CMakeLists.txt | 2 +- integration/network_failover_integration_test.cc | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/driver/failover_reader_handler.cc b/driver/failover_reader_handler.cc index a5a59b2ba..30d45ac31 100644 --- a/driver/failover_reader_handler.cc +++ b/driver/failover_reader_handler.cc @@ -237,7 +237,7 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ if (!odd_hosts_number) { auto second_reader_host = hosts_list.at(i + 1); //task_thread2 = std::thread(std::move(second_reader_task), second_reader_host, local_sync,second_connection_result); - second_future = failover_thread_pool.push(std::move(second_connection_handler), second_reader_host, local_sync, second_connection_result); + second_future = std::move(failover_thread_pool.push(std::move(second_connection_handler), second_reader_host, local_sync, second_connection_result)); } // Wait for task complete signal with specified timeout diff --git a/integration/CMakeLists.txt b/integration/CMakeLists.txt index 9f0823404..232df2180 100644 --- a/integration/CMakeLists.txt +++ b/integration/CMakeLists.txt @@ -101,7 +101,7 @@ set(TEST_SOURCES set(INTEGRATION_TESTS iam_authentication_integration_test.cc secrets_manager_integration_test.cc - failover_integration_test.cc + #failover_integration_test.cc network_failover_integration_test.cc ) diff --git a/integration/network_failover_integration_test.cc b/integration/network_failover_integration_test.cc index 2c3630e57..7999cf2d7 100644 --- a/integration/network_failover_integration_test.cc +++ b/integration/network_failover_integration_test.cc @@ -91,7 +91,7 @@ class NetworkFailoverIntegrationTest : public BaseFailoverIntegrationTest { } }; -TEST_F(NetworkFailoverIntegrationTest, connection_test) { +TEST_F(NetworkFailoverIntegrationTest, DISABLED_connection_test) { test_connection(dbc, MYSQL_INSTANCE_1_URL, MYSQL_PORT); test_connection(dbc, MYSQL_INSTANCE_1_URL + PROXIED_DOMAIN_NAME_SUFFIX, MYSQL_PROXY_PORT); test_connection(dbc, MYSQL_CLUSTER_URL, MYSQL_PORT); @@ -100,7 +100,7 @@ TEST_F(NetworkFailoverIntegrationTest, connection_test) { test_connection(dbc, MYSQL_RO_CLUSTER_URL + PROXIED_DOMAIN_NAME_SUFFIX, MYSQL_PROXY_PORT); } -TEST_F(NetworkFailoverIntegrationTest, lost_connection_to_writer) { +TEST_F(NetworkFailoverIntegrationTest, DISABLED_lost_connection_to_writer) { const std::string server = get_proxied_endpoint(writer_id); connection_string = builder.withServer(server).withFailoverTimeout(GLOBAL_FAILOVER_TIMEOUT).build(); EXPECT_EQ(SQL_SUCCESS, SQLDriverConnect(dbc, nullptr, AS_SQLCHAR(connection_string.c_str()), SQL_NTS, conn_out, MAX_NAME_LEN, &len, SQL_DRIVER_NOPROMPT)); @@ -121,7 +121,7 @@ TEST_F(NetworkFailoverIntegrationTest, lost_connection_to_writer) { EXPECT_EQ(SQL_SUCCESS, SQLDisconnect(dbc)); } -TEST_F(NetworkFailoverIntegrationTest, use_same_connection_after_failing_failover) { +TEST_F(NetworkFailoverIntegrationTest, DISABLED_use_same_connection_after_failing_failover) { const std::string server = get_proxied_endpoint(writer_id); connection_string = builder.withServer(server).withFailoverTimeout(GLOBAL_FAILOVER_TIMEOUT).build(); EXPECT_EQ(SQL_SUCCESS, SQLDriverConnect(dbc, nullptr, AS_SQLCHAR(connection_string.c_str()), SQL_NTS, conn_out, MAX_NAME_LEN, &len, SQL_DRIVER_NOPROMPT)); @@ -148,7 +148,7 @@ TEST_F(NetworkFailoverIntegrationTest, use_same_connection_after_failing_failove EXPECT_EQ(SQL_SUCCESS, SQLDisconnect(dbc)); } -TEST_F(NetworkFailoverIntegrationTest, lost_connection_to_all_readers) { +TEST_F(NetworkFailoverIntegrationTest, DISABLED_lost_connection_to_all_readers) { connection_string = builder.withServer(reader_endpoint).build(); EXPECT_EQ(SQL_SUCCESS, SQLDriverConnect(dbc, nullptr, AS_SQLCHAR(connection_string.c_str()), SQL_NTS, conn_out, MAX_NAME_LEN, &len, SQL_DRIVER_NOPROMPT)); From 1aa0cad42d2b8595040f7102b8a5e64c1d56cd75 Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Thu, 20 Apr 2023 15:51:04 -0700 Subject: [PATCH 18/33] debug gh action --- .github/workflows/dockerized.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/dockerized.yml b/.github/workflows/dockerized.yml index 8f8a8a1da..3246c69e8 100644 --- a/.github/workflows/dockerized.yml +++ b/.github/workflows/dockerized.yml @@ -11,6 +11,7 @@ on: - '**/*.md' - '**/*.jpg' - '**/README.txt' + - '**/LICENSE.txt' - 'docs/**' - 'ISSUE_TEMPLATE/**' - '**/remove-old-artifacts.yml' @@ -180,6 +181,9 @@ jobs: echo "TEMP_AWS_SECRET_ACCESS_KEY=${creds[1]}" >> $GITHUB_ENV echo "TEMP_AWS_SESSION_TOKEN=${creds[2]}" >> $GITHUB_ENV + - name: Setup upterm session + uses: lhotari/action-upterm@v1 + - name: 'Run Integration Tests' working-directory: ${{ github.workspace }}/testframework run: | From d4ecb665d4f1d635e6eb4fc95d6a4615f1f1ab2c Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Thu, 20 Apr 2023 15:57:12 -0700 Subject: [PATCH 19/33] make env var available for debug session --- .github/workflows/dockerized.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/dockerized.yml b/.github/workflows/dockerized.yml index 3246c69e8..7f1ab8b41 100644 --- a/.github/workflows/dockerized.yml +++ b/.github/workflows/dockerized.yml @@ -183,6 +183,15 @@ jobs: - name: Setup upterm session uses: lhotari/action-upterm@v1 + env: + TEST_DSN: atlas + TEST_USERNAME: ${{ secrets.TEST_USERNAME }} + TEST_PASSWORD: ${{ secrets.TEST_PASSWORD }} + TEST_DB_CLUSTER_IDENTIFIER: ${{ secrets.TEST_DB_CLUSTER_IDENTIFIER }}-${{ github.run_id }}-${{ github.run_attempt }} + AWS_ACCESS_KEY_ID: ${{ env.TEMP_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ env.TEMP_AWS_SECRET_ACCESS_KEY }} + AWS_SESSION_TOKEN: ${{ env.TEMP_AWS_SESSION_TOKEN }} + DRIVER_PATH: ${{ github.workspace }} - name: 'Run Integration Tests' working-directory: ${{ github.workspace }}/testframework From 254cb1aa09508afd4697a0fc1e3f2e82b98b3024 Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Thu, 20 Apr 2023 17:08:17 -0700 Subject: [PATCH 20/33] debug in docker --- .github/workflows/dockerized.yml | 2 +- testframework/src/test/java/host/IntegrationContainerTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/dockerized.yml b/.github/workflows/dockerized.yml index 7f1ab8b41..f86b426aa 100644 --- a/.github/workflows/dockerized.yml +++ b/.github/workflows/dockerized.yml @@ -170,7 +170,7 @@ jobs: - name: 'Set up Temp AWS Credentials' run: | creds=($(aws sts get-session-token \ - --duration-seconds 7200 \ + --duration-seconds 17200 \ --query 'Credentials.[AccessKeyId, SecretAccessKey, SessionToken]' \ --output text \ | xargs)); diff --git a/testframework/src/test/java/host/IntegrationContainerTest.java b/testframework/src/test/java/host/IntegrationContainerTest.java index 5c7090d7b..bbc544bf9 100644 --- a/testframework/src/test/java/host/IntegrationContainerTest.java +++ b/testframework/src/test/java/host/IntegrationContainerTest.java @@ -133,7 +133,7 @@ public void testRunCommunityTestInContainer() public void testRunFailoverTestInContainer() throws UnsupportedOperationException, IOException, InterruptedException { setupFailoverIntegrationTests(NETWORK); - + Thread.sleep(Long.MAX_VALUE); containerHelper.runExecutable(testContainer, "build/integration/bin", "integration"); } From 2ec1ae5e95f2257166836b439e9ae4d189084317 Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Thu, 20 Apr 2023 18:16:24 -0700 Subject: [PATCH 21/33] refactor integration tests to use shared pointer for rds client --- integration/base_failover_integration_test.cc | 32 ++++++++++--------- integration/failover_integration_test.cc | 4 +-- integration/failover_performance_test.cc | 3 +- .../network_failover_integration_test.cc | 12 +++---- testframework/build.gradle.kts | 4 +-- 5 files changed, 28 insertions(+), 27 deletions(-) diff --git a/integration/base_failover_integration_test.cc b/integration/base_failover_integration_test.cc index e1b1bf25e..fe51b36a0 100644 --- a/integration/base_failover_integration_test.cc +++ b/integration/base_failover_integration_test.cc @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -65,6 +66,7 @@ static std::string DOWN_STREAM_STR = "DOWNSTREAM"; static std::string UP_STREAM_STR = "UPSTREAM"; static Aws::SDKOptions options; +static std::shared_ptr rds_client; class BaseFailoverIntegrationTest : public testing::Test { protected: @@ -219,7 +221,7 @@ class BaseFailoverIntegrationTest : public testing::Test { // Helper functions from integration tests - static std::vector retrieve_topology_via_SDK(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id) { + static std::vector retrieve_topology_via_SDK(std::shared_ptr client, const Aws::String& cluster_id) { std::vector instances; std::string writer; @@ -227,7 +229,7 @@ class BaseFailoverIntegrationTest : public testing::Test { Aws::RDS::Model::DescribeDBClustersRequest rds_req; rds_req.WithDBClusterIdentifier(cluster_id); - auto outcome = client.DescribeDBClusters(rds_req); + auto outcome = client->DescribeDBClusters(rds_req); if (!outcome.IsSuccess()) { throw std::runtime_error("Unable to get cluster topology using SDK."); @@ -252,15 +254,15 @@ class BaseFailoverIntegrationTest : public testing::Test { return instances; } - static Aws::RDS::Model::DBCluster get_DB_cluster(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id) { + static Aws::RDS::Model::DBCluster get_DB_cluster(std::shared_ptr client, const Aws::String& cluster_id) { Aws::RDS::Model::DescribeDBClustersRequest rds_req; rds_req.WithDBClusterIdentifier(cluster_id); - auto outcome = client.DescribeDBClusters(rds_req); + auto outcome = client->DescribeDBClusters(rds_req); const auto result = outcome.GetResult(); return result.GetDBClusters().at(0); } - static void wait_until_cluster_has_right_state(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id) { + static void wait_until_cluster_has_right_state(std::shared_ptr client, const Aws::String& cluster_id) { Aws::String status = get_DB_cluster(client, cluster_id).GetStatus(); while (status != "available") { @@ -269,7 +271,7 @@ class BaseFailoverIntegrationTest : public testing::Test { } } - static Aws::RDS::Model::DBClusterMember get_DB_cluster_writer_instance(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id) { + static Aws::RDS::Model::DBClusterMember get_DB_cluster_writer_instance(std::shared_ptr client, const Aws::String& cluster_id) { Aws::RDS::Model::DBClusterMember instance; const Aws::RDS::Model::DBCluster cluster = get_DB_cluster(client, cluster_id); for (const auto& member : cluster.GetDBClusterMembers()) { @@ -280,11 +282,11 @@ class BaseFailoverIntegrationTest : public testing::Test { return instance; } - static Aws::String get_DB_cluster_writer_instance_id(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id) { + static Aws::String get_DB_cluster_writer_instance_id(std::shared_ptr client, const Aws::String& cluster_id) { return get_DB_cluster_writer_instance(client, cluster_id).GetDBInstanceIdentifier(); } - static void wait_until_writer_instance_changed(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id, + static void wait_until_writer_instance_changed(std::shared_ptr client, const Aws::String& cluster_id, const Aws::String& initial_writer_instance_id) { Aws::String next_cluster_writer_id = get_DB_cluster_writer_instance_id(client, cluster_id); while (initial_writer_instance_id == next_cluster_writer_id) { @@ -293,14 +295,14 @@ class BaseFailoverIntegrationTest : public testing::Test { } } - static void failover_cluster(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id, const Aws::String& target_instance_id = "") { + static void failover_cluster(std::shared_ptr client, const Aws::String& cluster_id, const Aws::String& target_instance_id = "") { wait_until_cluster_has_right_state(client, cluster_id); Aws::RDS::Model::FailoverDBClusterRequest rds_req; rds_req.WithDBClusterIdentifier(cluster_id); if (!target_instance_id.empty()) { rds_req.WithTargetDBInstanceIdentifier(target_instance_id); } - auto outcome = client.FailoverDBCluster(rds_req); + auto outcome = client->FailoverDBCluster(rds_req); } static Aws::String get_random_DB_cluster_reader_instance_id(std::vector readers) { @@ -310,7 +312,7 @@ class BaseFailoverIntegrationTest : public testing::Test { return readers.at(distribution(generator)); } - static bool has_writer_changed(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id, std::string initial_writer_id, std::chrono::nanoseconds timeout) { + static bool has_writer_changed(std::shared_ptr client, const Aws::String& cluster_id, std::string initial_writer_id, std::chrono::nanoseconds timeout) { auto start = std::chrono::high_resolution_clock::now(); std::string current_writer_id = get_DB_cluster_writer_instance_id(client, cluster_id); @@ -325,7 +327,7 @@ class BaseFailoverIntegrationTest : public testing::Test { return true; } - static void failover_cluster_and_wait_until_writer_changed(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id, + static void failover_cluster_and_wait_until_writer_changed(std::shared_ptr client, const Aws::String& cluster_id, const Aws::String& initial_writer_id, const Aws::String& target_writer_id = "") { @@ -357,7 +359,7 @@ class BaseFailoverIntegrationTest : public testing::Test { } } - static Aws::RDS::Model::DBClusterMember get_matched_DBClusterMember(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id, + static Aws::RDS::Model::DBClusterMember get_matched_DBClusterMember(std::shared_ptr client, const Aws::String& cluster_id, const Aws::String& instance_id) { Aws::RDS::Model::DBClusterMember instance; const Aws::RDS::Model::DBCluster cluster = get_DB_cluster(client, cluster_id); @@ -370,11 +372,11 @@ class BaseFailoverIntegrationTest : public testing::Test { return instance; } - static bool is_DB_instance_writer(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id, const Aws::String& instance_id) { + static bool is_DB_instance_writer(std::shared_ptr client, const Aws::String& cluster_id, const Aws::String& instance_id) { return get_matched_DBClusterMember(client, cluster_id, instance_id).GetIsClusterWriter(); } - static bool is_DB_instance_reader(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id, const Aws::String& instance_id) { + static bool is_DB_instance_reader(std::shared_ptr client, const Aws::String& cluster_id, const Aws::String& instance_id) { return !get_matched_DBClusterMember(client, cluster_id, instance_id).GetIsClusterWriter(); } diff --git a/integration/failover_integration_test.cc b/integration/failover_integration_test.cc index 20fb3be0d..a99daf49d 100644 --- a/integration/failover_integration_test.cc +++ b/integration/failover_integration_test.cc @@ -38,15 +38,16 @@ class FailoverIntegrationTest : public BaseFailoverIntegrationTest { Aws::String(SECRET_ACCESS_KEY), Aws::String(SESSION_TOKEN)); Aws::Client::ClientConfiguration client_config; - Aws::RDS::RDSClient rds_client; SQLHENV env = nullptr; SQLHDBC dbc = nullptr; static void SetUpTestSuite() { Aws::InitAPI(options); + rds_client = std::make_shared(credentials, client_config); } static void TearDownTestSuite() { + rds_client->reset(); Aws::ShutdownAPI(options); } @@ -55,7 +56,6 @@ class FailoverIntegrationTest : public BaseFailoverIntegrationTest { SQLSetEnvAttr(env, SQL_ATTR_ODBC_VERSION, reinterpret_cast(SQL_OV_ODBC3), 0); SQLAllocHandle(SQL_HANDLE_DBC, env, &dbc); client_config.region = "us-east-2"; - rds_client = Aws::RDS::RDSClient(credentials, client_config); cluster_instances = retrieve_topology_via_SDK(rds_client, cluster_id); writer_id = get_writer_id(cluster_instances); diff --git a/integration/failover_performance_test.cc b/integration/failover_performance_test.cc index 31c74a036..3ddfbccfa 100644 --- a/integration/failover_performance_test.cc +++ b/integration/failover_performance_test.cc @@ -151,7 +151,6 @@ class FailoverPerformanceTest : Aws::String(SECRET_ACCESS_KEY), Aws::String(SESSION_TOKEN)); Aws::Client::ClientConfiguration client_config; - Aws::RDS::RDSClient rds_client; SQLHENV env = nullptr; SQLHDBC dbc = nullptr; @@ -161,9 +160,11 @@ class FailoverPerformanceTest : static void SetUpTestSuite() { Aws::InitAPI(options); + rds_client = std::make_shared(credentials, client_config); } static void TearDownTestSuite() { + rds_client->reset(); Aws::ShutdownAPI(options); // Save results to spreadsheet diff --git a/integration/network_failover_integration_test.cc b/integration/network_failover_integration_test.cc index 7999cf2d7..490be5933 100644 --- a/integration/network_failover_integration_test.cc +++ b/integration/network_failover_integration_test.cc @@ -38,15 +38,16 @@ class NetworkFailoverIntegrationTest : public BaseFailoverIntegrationTest { Aws::String(SECRET_ACCESS_KEY), Aws::String(SESSION_TOKEN)); Aws::Client::ClientConfiguration client_config; - Aws::RDS::RDSClient rds_client; SQLHENV env = nullptr; SQLHDBC dbc = nullptr; static void SetUpTestSuite() { Aws::InitAPI(options); + rds_client = std::make_shared(credentials, client_config); } static void TearDownTestSuite() { + rds_client->reset(); Aws::ShutdownAPI(options); } @@ -56,7 +57,6 @@ class NetworkFailoverIntegrationTest : public BaseFailoverIntegrationTest { SQLAllocHandle(SQL_HANDLE_DBC, env, &dbc); client_config.region = "us-east-2"; - rds_client = Aws::RDS::RDSClient(credentials, client_config); for (const auto& x : proxy_map) { enable_connectivity(x.second); @@ -91,7 +91,7 @@ class NetworkFailoverIntegrationTest : public BaseFailoverIntegrationTest { } }; -TEST_F(NetworkFailoverIntegrationTest, DISABLED_connection_test) { +TEST_F(NetworkFailoverIntegrationTest, connection_test) { test_connection(dbc, MYSQL_INSTANCE_1_URL, MYSQL_PORT); test_connection(dbc, MYSQL_INSTANCE_1_URL + PROXIED_DOMAIN_NAME_SUFFIX, MYSQL_PROXY_PORT); test_connection(dbc, MYSQL_CLUSTER_URL, MYSQL_PORT); @@ -100,7 +100,7 @@ TEST_F(NetworkFailoverIntegrationTest, DISABLED_connection_test) { test_connection(dbc, MYSQL_RO_CLUSTER_URL + PROXIED_DOMAIN_NAME_SUFFIX, MYSQL_PROXY_PORT); } -TEST_F(NetworkFailoverIntegrationTest, DISABLED_lost_connection_to_writer) { +TEST_F(NetworkFailoverIntegrationTest, lost_connection_to_writer) { const std::string server = get_proxied_endpoint(writer_id); connection_string = builder.withServer(server).withFailoverTimeout(GLOBAL_FAILOVER_TIMEOUT).build(); EXPECT_EQ(SQL_SUCCESS, SQLDriverConnect(dbc, nullptr, AS_SQLCHAR(connection_string.c_str()), SQL_NTS, conn_out, MAX_NAME_LEN, &len, SQL_DRIVER_NOPROMPT)); @@ -121,7 +121,7 @@ TEST_F(NetworkFailoverIntegrationTest, DISABLED_lost_connection_to_writer) { EXPECT_EQ(SQL_SUCCESS, SQLDisconnect(dbc)); } -TEST_F(NetworkFailoverIntegrationTest, DISABLED_use_same_connection_after_failing_failover) { +TEST_F(NetworkFailoverIntegrationTest, use_same_connection_after_failing_failover) { const std::string server = get_proxied_endpoint(writer_id); connection_string = builder.withServer(server).withFailoverTimeout(GLOBAL_FAILOVER_TIMEOUT).build(); EXPECT_EQ(SQL_SUCCESS, SQLDriverConnect(dbc, nullptr, AS_SQLCHAR(connection_string.c_str()), SQL_NTS, conn_out, MAX_NAME_LEN, &len, SQL_DRIVER_NOPROMPT)); @@ -148,7 +148,7 @@ TEST_F(NetworkFailoverIntegrationTest, DISABLED_use_same_connection_after_failin EXPECT_EQ(SQL_SUCCESS, SQLDisconnect(dbc)); } -TEST_F(NetworkFailoverIntegrationTest, DISABLED_lost_connection_to_all_readers) { +TEST_F(NetworkFailoverIntegrationTest, lost_connection_to_all_readers) { connection_string = builder.withServer(reader_endpoint).build(); EXPECT_EQ(SQL_SUCCESS, SQLDriverConnect(dbc, nullptr, AS_SQLCHAR(connection_string.c_str()), SQL_NTS, conn_out, MAX_NAME_LEN, &len, SQL_DRIVER_NOPROMPT)); diff --git a/testframework/build.gradle.kts b/testframework/build.gradle.kts index d2a75cf7c..c10816d24 100644 --- a/testframework/build.gradle.kts +++ b/testframework/build.gradle.kts @@ -6,9 +6,7 @@ group = "software.aws.rds" version = "1.0-SNAPSHOT" repositories { - maven { - url = uri("https://repo1.maven.org/maven2") - } + mavenCentral() } dependencies { From 410964c99f5ba30f347f11f78cb71b9e3c359a11 Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Thu, 20 Apr 2023 20:38:23 -0700 Subject: [PATCH 22/33] make rds client back to non static --- integration/base_failover_integration_test.cc | 1 - integration/failover_integration_test.cc | 4 ++-- integration/failover_performance_test.cc | 4 ++-- integration/network_failover_integration_test.cc | 4 ++-- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/integration/base_failover_integration_test.cc b/integration/base_failover_integration_test.cc index fe51b36a0..3c3eb357f 100644 --- a/integration/base_failover_integration_test.cc +++ b/integration/base_failover_integration_test.cc @@ -66,7 +66,6 @@ static std::string DOWN_STREAM_STR = "DOWNSTREAM"; static std::string UP_STREAM_STR = "UPSTREAM"; static Aws::SDKOptions options; -static std::shared_ptr rds_client; class BaseFailoverIntegrationTest : public testing::Test { protected: diff --git a/integration/failover_integration_test.cc b/integration/failover_integration_test.cc index a99daf49d..5f8b42a0a 100644 --- a/integration/failover_integration_test.cc +++ b/integration/failover_integration_test.cc @@ -38,20 +38,20 @@ class FailoverIntegrationTest : public BaseFailoverIntegrationTest { Aws::String(SECRET_ACCESS_KEY), Aws::String(SESSION_TOKEN)); Aws::Client::ClientConfiguration client_config; + Aws::RDS::RDSClient rds_client; SQLHENV env = nullptr; SQLHDBC dbc = nullptr; static void SetUpTestSuite() { Aws::InitAPI(options); - rds_client = std::make_shared(credentials, client_config); } static void TearDownTestSuite() { - rds_client->reset(); Aws::ShutdownAPI(options); } void SetUp() override { + rds_client = std::make_shared(credentials, client_config); SQLAllocHandle(SQL_HANDLE_ENV, nullptr, &env); SQLSetEnvAttr(env, SQL_ATTR_ODBC_VERSION, reinterpret_cast(SQL_OV_ODBC3), 0); SQLAllocHandle(SQL_HANDLE_DBC, env, &dbc); diff --git a/integration/failover_performance_test.cc b/integration/failover_performance_test.cc index 3ddfbccfa..0af0af3d9 100644 --- a/integration/failover_performance_test.cc +++ b/integration/failover_performance_test.cc @@ -153,6 +153,7 @@ class FailoverPerformanceTest : Aws::Client::ClientConfiguration client_config; SQLHENV env = nullptr; SQLHDBC dbc = nullptr; + Aws::RDS::RDSClient rds_client; SQLCHAR* LONG_QUERY = AS_SQLCHAR("SELECT SLEEP(600)"); // 600s -> 10m const size_t NB_OF_RUNS = 6; @@ -160,11 +161,9 @@ class FailoverPerformanceTest : static void SetUpTestSuite() { Aws::InitAPI(options); - rds_client = std::make_shared(credentials, client_config); } static void TearDownTestSuite() { - rds_client->reset(); Aws::ShutdownAPI(options); // Save results to spreadsheet @@ -178,6 +177,7 @@ class FailoverPerformanceTest : } void SetUp() override { + rds_client = std::make_shared(credentials, client_config); SQLAllocHandle(SQL_HANDLE_ENV, nullptr, &env); SQLSetEnvAttr(env, SQL_ATTR_ODBC_VERSION, reinterpret_cast(SQL_OV_ODBC3), 0); SQLAllocHandle(SQL_HANDLE_DBC, env, &dbc); diff --git a/integration/network_failover_integration_test.cc b/integration/network_failover_integration_test.cc index 490be5933..1f2d80e73 100644 --- a/integration/network_failover_integration_test.cc +++ b/integration/network_failover_integration_test.cc @@ -40,18 +40,18 @@ class NetworkFailoverIntegrationTest : public BaseFailoverIntegrationTest { Aws::Client::ClientConfiguration client_config; SQLHENV env = nullptr; SQLHDBC dbc = nullptr; + Aws::RDS::RDSClient rds_client; static void SetUpTestSuite() { Aws::InitAPI(options); - rds_client = std::make_shared(credentials, client_config); } static void TearDownTestSuite() { - rds_client->reset(); Aws::ShutdownAPI(options); } void SetUp() override { + rds_client = std::make_shared(credentials, client_config); SQLAllocHandle(SQL_HANDLE_ENV, nullptr, &env); SQLSetEnvAttr(env, SQL_ATTR_ODBC_VERSION, reinterpret_cast(SQL_OV_ODBC3), 0); SQLAllocHandle(SQL_HANDLE_DBC, env, &dbc); From 5e30e6af38d30de715cd4553c1894567237f5dda Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Thu, 20 Apr 2023 20:48:48 -0700 Subject: [PATCH 23/33] fix build --- integration/failover_integration_test.cc | 2 +- integration/failover_performance_test.cc | 2 +- integration/network_failover_integration_test.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/integration/failover_integration_test.cc b/integration/failover_integration_test.cc index 5f8b42a0a..5cef7ca39 100644 --- a/integration/failover_integration_test.cc +++ b/integration/failover_integration_test.cc @@ -38,7 +38,7 @@ class FailoverIntegrationTest : public BaseFailoverIntegrationTest { Aws::String(SECRET_ACCESS_KEY), Aws::String(SESSION_TOKEN)); Aws::Client::ClientConfiguration client_config; - Aws::RDS::RDSClient rds_client; + std::shared_ptr rds_client; SQLHENV env = nullptr; SQLHDBC dbc = nullptr; diff --git a/integration/failover_performance_test.cc b/integration/failover_performance_test.cc index 0af0af3d9..6ad72bb62 100644 --- a/integration/failover_performance_test.cc +++ b/integration/failover_performance_test.cc @@ -153,7 +153,7 @@ class FailoverPerformanceTest : Aws::Client::ClientConfiguration client_config; SQLHENV env = nullptr; SQLHDBC dbc = nullptr; - Aws::RDS::RDSClient rds_client; + std::shared_ptr rds_client; SQLCHAR* LONG_QUERY = AS_SQLCHAR("SELECT SLEEP(600)"); // 600s -> 10m const size_t NB_OF_RUNS = 6; diff --git a/integration/network_failover_integration_test.cc b/integration/network_failover_integration_test.cc index 1f2d80e73..9cc65f810 100644 --- a/integration/network_failover_integration_test.cc +++ b/integration/network_failover_integration_test.cc @@ -38,9 +38,9 @@ class NetworkFailoverIntegrationTest : public BaseFailoverIntegrationTest { Aws::String(SECRET_ACCESS_KEY), Aws::String(SESSION_TOKEN)); Aws::Client::ClientConfiguration client_config; + std::shared_ptr rds_client; SQLHENV env = nullptr; SQLHDBC dbc = nullptr; - Aws::RDS::RDSClient rds_client; static void SetUpTestSuite() { Aws::InitAPI(options); From 473d90f346d3ab44972157dc649f6efa0ddc134a Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Thu, 20 Apr 2023 21:49:07 -0700 Subject: [PATCH 24/33] reset rds client in teardown --- integration/failover_integration_test.cc | 5 +++-- integration/failover_performance_test.cc | 7 ++++--- integration/network_failover_integration_test.cc | 5 +++-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/integration/failover_integration_test.cc b/integration/failover_integration_test.cc index 5cef7ca39..036d5d6ed 100644 --- a/integration/failover_integration_test.cc +++ b/integration/failover_integration_test.cc @@ -37,7 +37,7 @@ class FailoverIntegrationTest : public BaseFailoverIntegrationTest { Aws::Auth::AWSCredentials credentials = Aws::Auth::AWSCredentials(Aws::String(ACCESS_KEY), Aws::String(SECRET_ACCESS_KEY), Aws::String(SESSION_TOKEN)); - Aws::Client::ClientConfiguration client_config; + Aws::RDS::RDSClientConfiguration client_config; std::shared_ptr rds_client; SQLHENV env = nullptr; SQLHDBC dbc = nullptr; @@ -51,7 +51,7 @@ class FailoverIntegrationTest : public BaseFailoverIntegrationTest { } void SetUp() override { - rds_client = std::make_shared(credentials, client_config); + rds_client = std::make_shared(client_config); SQLAllocHandle(SQL_HANDLE_ENV, nullptr, &env); SQLSetEnvAttr(env, SQL_ATTR_ODBC_VERSION, reinterpret_cast(SQL_OV_ODBC3), 0); SQLAllocHandle(SQL_HANDLE_DBC, env, &dbc); @@ -78,6 +78,7 @@ class FailoverIntegrationTest : public BaseFailoverIntegrationTest { if (nullptr != env) { SQLFreeHandle(SQL_HANDLE_ENV, env); } + rds_client.reset(); } }; diff --git a/integration/failover_performance_test.cc b/integration/failover_performance_test.cc index 6ad72bb62..ed139e84d 100644 --- a/integration/failover_performance_test.cc +++ b/integration/failover_performance_test.cc @@ -150,10 +150,10 @@ class FailoverPerformanceTest : Aws::Auth::AWSCredentials credentials = Aws::Auth::AWSCredentials(Aws::String(ACCESS_KEY), Aws::String(SECRET_ACCESS_KEY), Aws::String(SESSION_TOKEN)); - Aws::Client::ClientConfiguration client_config; + Aws::RDS::RDSClientConfiguration client_config; + std::shared_ptr rds_client; SQLHENV env = nullptr; SQLHDBC dbc = nullptr; - std::shared_ptr rds_client; SQLCHAR* LONG_QUERY = AS_SQLCHAR("SELECT SLEEP(600)"); // 600s -> 10m const size_t NB_OF_RUNS = 6; @@ -177,7 +177,7 @@ class FailoverPerformanceTest : } void SetUp() override { - rds_client = std::make_shared(credentials, client_config); + rds_client = std::make_shared(client_config); SQLAllocHandle(SQL_HANDLE_ENV, nullptr, &env); SQLSetEnvAttr(env, SQL_ATTR_ODBC_VERSION, reinterpret_cast(SQL_OV_ODBC3), 0); SQLAllocHandle(SQL_HANDLE_DBC, env, &dbc); @@ -195,6 +195,7 @@ class FailoverPerformanceTest : if (nullptr != env) { SQLFreeHandle(SQL_HANDLE_ENV, env); } + rds_client.reset(); } // Only run if passed in parameter: data, is a type of BASE_PERFORMANCE_DATA diff --git a/integration/network_failover_integration_test.cc b/integration/network_failover_integration_test.cc index 9cc65f810..4d580af19 100644 --- a/integration/network_failover_integration_test.cc +++ b/integration/network_failover_integration_test.cc @@ -37,7 +37,7 @@ class NetworkFailoverIntegrationTest : public BaseFailoverIntegrationTest { Aws::Auth::AWSCredentials credentials = Aws::Auth::AWSCredentials(Aws::String(ACCESS_KEY), Aws::String(SECRET_ACCESS_KEY), Aws::String(SESSION_TOKEN)); - Aws::Client::ClientConfiguration client_config; + Aws::RDS::RDSClientConfiguration client_config; std::shared_ptr rds_client; SQLHENV env = nullptr; SQLHDBC dbc = nullptr; @@ -51,7 +51,7 @@ class NetworkFailoverIntegrationTest : public BaseFailoverIntegrationTest { } void SetUp() override { - rds_client = std::make_shared(credentials, client_config); + rds_client = std::make_shared(client_config); SQLAllocHandle(SQL_HANDLE_ENV, nullptr, &env); SQLSetEnvAttr(env, SQL_ATTR_ODBC_VERSION, reinterpret_cast(SQL_OV_ODBC3), 0); SQLAllocHandle(SQL_HANDLE_DBC, env, &dbc); @@ -88,6 +88,7 @@ class NetworkFailoverIntegrationTest : public BaseFailoverIntegrationTest { if (nullptr != env) { SQLFreeHandle(SQL_HANDLE_ENV, env); } + rds_client.reset(); } }; From 08eed0a0c9d7e98f2870e4a7679beb7532f25afb Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Thu, 20 Apr 2023 22:23:34 -0700 Subject: [PATCH 25/33] Revert "refactor integration tests to use shared pointer for rds client" This reverts commit 2ec1ae5e95f2257166836b439e9ae4d189084317. --- integration/base_failover_integration_test.cc | 31 +++++++++---------- integration/failover_integration_test.cc | 4 +-- integration/failover_performance_test.cc | 3 +- .../network_failover_integration_test.cc | 12 +++---- testframework/build.gradle.kts | 4 ++- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/integration/base_failover_integration_test.cc b/integration/base_failover_integration_test.cc index 3c3eb357f..e1b1bf25e 100644 --- a/integration/base_failover_integration_test.cc +++ b/integration/base_failover_integration_test.cc @@ -47,7 +47,6 @@ #include #include #include -#include #include #include #include @@ -220,7 +219,7 @@ class BaseFailoverIntegrationTest : public testing::Test { // Helper functions from integration tests - static std::vector retrieve_topology_via_SDK(std::shared_ptr client, const Aws::String& cluster_id) { + static std::vector retrieve_topology_via_SDK(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id) { std::vector instances; std::string writer; @@ -228,7 +227,7 @@ class BaseFailoverIntegrationTest : public testing::Test { Aws::RDS::Model::DescribeDBClustersRequest rds_req; rds_req.WithDBClusterIdentifier(cluster_id); - auto outcome = client->DescribeDBClusters(rds_req); + auto outcome = client.DescribeDBClusters(rds_req); if (!outcome.IsSuccess()) { throw std::runtime_error("Unable to get cluster topology using SDK."); @@ -253,15 +252,15 @@ class BaseFailoverIntegrationTest : public testing::Test { return instances; } - static Aws::RDS::Model::DBCluster get_DB_cluster(std::shared_ptr client, const Aws::String& cluster_id) { + static Aws::RDS::Model::DBCluster get_DB_cluster(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id) { Aws::RDS::Model::DescribeDBClustersRequest rds_req; rds_req.WithDBClusterIdentifier(cluster_id); - auto outcome = client->DescribeDBClusters(rds_req); + auto outcome = client.DescribeDBClusters(rds_req); const auto result = outcome.GetResult(); return result.GetDBClusters().at(0); } - static void wait_until_cluster_has_right_state(std::shared_ptr client, const Aws::String& cluster_id) { + static void wait_until_cluster_has_right_state(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id) { Aws::String status = get_DB_cluster(client, cluster_id).GetStatus(); while (status != "available") { @@ -270,7 +269,7 @@ class BaseFailoverIntegrationTest : public testing::Test { } } - static Aws::RDS::Model::DBClusterMember get_DB_cluster_writer_instance(std::shared_ptr client, const Aws::String& cluster_id) { + static Aws::RDS::Model::DBClusterMember get_DB_cluster_writer_instance(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id) { Aws::RDS::Model::DBClusterMember instance; const Aws::RDS::Model::DBCluster cluster = get_DB_cluster(client, cluster_id); for (const auto& member : cluster.GetDBClusterMembers()) { @@ -281,11 +280,11 @@ class BaseFailoverIntegrationTest : public testing::Test { return instance; } - static Aws::String get_DB_cluster_writer_instance_id(std::shared_ptr client, const Aws::String& cluster_id) { + static Aws::String get_DB_cluster_writer_instance_id(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id) { return get_DB_cluster_writer_instance(client, cluster_id).GetDBInstanceIdentifier(); } - static void wait_until_writer_instance_changed(std::shared_ptr client, const Aws::String& cluster_id, + static void wait_until_writer_instance_changed(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id, const Aws::String& initial_writer_instance_id) { Aws::String next_cluster_writer_id = get_DB_cluster_writer_instance_id(client, cluster_id); while (initial_writer_instance_id == next_cluster_writer_id) { @@ -294,14 +293,14 @@ class BaseFailoverIntegrationTest : public testing::Test { } } - static void failover_cluster(std::shared_ptr client, const Aws::String& cluster_id, const Aws::String& target_instance_id = "") { + static void failover_cluster(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id, const Aws::String& target_instance_id = "") { wait_until_cluster_has_right_state(client, cluster_id); Aws::RDS::Model::FailoverDBClusterRequest rds_req; rds_req.WithDBClusterIdentifier(cluster_id); if (!target_instance_id.empty()) { rds_req.WithTargetDBInstanceIdentifier(target_instance_id); } - auto outcome = client->FailoverDBCluster(rds_req); + auto outcome = client.FailoverDBCluster(rds_req); } static Aws::String get_random_DB_cluster_reader_instance_id(std::vector readers) { @@ -311,7 +310,7 @@ class BaseFailoverIntegrationTest : public testing::Test { return readers.at(distribution(generator)); } - static bool has_writer_changed(std::shared_ptr client, const Aws::String& cluster_id, std::string initial_writer_id, std::chrono::nanoseconds timeout) { + static bool has_writer_changed(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id, std::string initial_writer_id, std::chrono::nanoseconds timeout) { auto start = std::chrono::high_resolution_clock::now(); std::string current_writer_id = get_DB_cluster_writer_instance_id(client, cluster_id); @@ -326,7 +325,7 @@ class BaseFailoverIntegrationTest : public testing::Test { return true; } - static void failover_cluster_and_wait_until_writer_changed(std::shared_ptr client, const Aws::String& cluster_id, + static void failover_cluster_and_wait_until_writer_changed(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id, const Aws::String& initial_writer_id, const Aws::String& target_writer_id = "") { @@ -358,7 +357,7 @@ class BaseFailoverIntegrationTest : public testing::Test { } } - static Aws::RDS::Model::DBClusterMember get_matched_DBClusterMember(std::shared_ptr client, const Aws::String& cluster_id, + static Aws::RDS::Model::DBClusterMember get_matched_DBClusterMember(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id, const Aws::String& instance_id) { Aws::RDS::Model::DBClusterMember instance; const Aws::RDS::Model::DBCluster cluster = get_DB_cluster(client, cluster_id); @@ -371,11 +370,11 @@ class BaseFailoverIntegrationTest : public testing::Test { return instance; } - static bool is_DB_instance_writer(std::shared_ptr client, const Aws::String& cluster_id, const Aws::String& instance_id) { + static bool is_DB_instance_writer(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id, const Aws::String& instance_id) { return get_matched_DBClusterMember(client, cluster_id, instance_id).GetIsClusterWriter(); } - static bool is_DB_instance_reader(std::shared_ptr client, const Aws::String& cluster_id, const Aws::String& instance_id) { + static bool is_DB_instance_reader(const Aws::RDS::RDSClient& client, const Aws::String& cluster_id, const Aws::String& instance_id) { return !get_matched_DBClusterMember(client, cluster_id, instance_id).GetIsClusterWriter(); } diff --git a/integration/failover_integration_test.cc b/integration/failover_integration_test.cc index 036d5d6ed..0f8226fb3 100644 --- a/integration/failover_integration_test.cc +++ b/integration/failover_integration_test.cc @@ -38,7 +38,7 @@ class FailoverIntegrationTest : public BaseFailoverIntegrationTest { Aws::String(SECRET_ACCESS_KEY), Aws::String(SESSION_TOKEN)); Aws::RDS::RDSClientConfiguration client_config; - std::shared_ptr rds_client; + Aws::RDS::RDSClient rds_client; SQLHENV env = nullptr; SQLHDBC dbc = nullptr; @@ -51,11 +51,11 @@ class FailoverIntegrationTest : public BaseFailoverIntegrationTest { } void SetUp() override { - rds_client = std::make_shared(client_config); SQLAllocHandle(SQL_HANDLE_ENV, nullptr, &env); SQLSetEnvAttr(env, SQL_ATTR_ODBC_VERSION, reinterpret_cast(SQL_OV_ODBC3), 0); SQLAllocHandle(SQL_HANDLE_DBC, env, &dbc); client_config.region = "us-east-2"; + rds_client = Aws::RDS::RDSClient(credentials, client_config); cluster_instances = retrieve_topology_via_SDK(rds_client, cluster_id); writer_id = get_writer_id(cluster_instances); diff --git a/integration/failover_performance_test.cc b/integration/failover_performance_test.cc index ed139e84d..801a87456 100644 --- a/integration/failover_performance_test.cc +++ b/integration/failover_performance_test.cc @@ -151,7 +151,7 @@ class FailoverPerformanceTest : Aws::String(SECRET_ACCESS_KEY), Aws::String(SESSION_TOKEN)); Aws::RDS::RDSClientConfiguration client_config; - std::shared_ptr rds_client; + Aws::RDS::RDSClient rds_client; SQLHENV env = nullptr; SQLHDBC dbc = nullptr; @@ -177,7 +177,6 @@ class FailoverPerformanceTest : } void SetUp() override { - rds_client = std::make_shared(client_config); SQLAllocHandle(SQL_HANDLE_ENV, nullptr, &env); SQLSetEnvAttr(env, SQL_ATTR_ODBC_VERSION, reinterpret_cast(SQL_OV_ODBC3), 0); SQLAllocHandle(SQL_HANDLE_DBC, env, &dbc); diff --git a/integration/network_failover_integration_test.cc b/integration/network_failover_integration_test.cc index 4d580af19..6fbcb6bae 100644 --- a/integration/network_failover_integration_test.cc +++ b/integration/network_failover_integration_test.cc @@ -38,7 +38,7 @@ class NetworkFailoverIntegrationTest : public BaseFailoverIntegrationTest { Aws::String(SECRET_ACCESS_KEY), Aws::String(SESSION_TOKEN)); Aws::RDS::RDSClientConfiguration client_config; - std::shared_ptr rds_client; + Aws::RDS::RDSClient rds_client; SQLHENV env = nullptr; SQLHDBC dbc = nullptr; @@ -51,12 +51,12 @@ class NetworkFailoverIntegrationTest : public BaseFailoverIntegrationTest { } void SetUp() override { - rds_client = std::make_shared(client_config); SQLAllocHandle(SQL_HANDLE_ENV, nullptr, &env); SQLSetEnvAttr(env, SQL_ATTR_ODBC_VERSION, reinterpret_cast(SQL_OV_ODBC3), 0); SQLAllocHandle(SQL_HANDLE_DBC, env, &dbc); client_config.region = "us-east-2"; + rds_client = Aws::RDS::RDSClient(credentials, client_config); for (const auto& x : proxy_map) { enable_connectivity(x.second); @@ -92,7 +92,7 @@ class NetworkFailoverIntegrationTest : public BaseFailoverIntegrationTest { } }; -TEST_F(NetworkFailoverIntegrationTest, connection_test) { +TEST_F(NetworkFailoverIntegrationTest, DISABLED_connection_test) { test_connection(dbc, MYSQL_INSTANCE_1_URL, MYSQL_PORT); test_connection(dbc, MYSQL_INSTANCE_1_URL + PROXIED_DOMAIN_NAME_SUFFIX, MYSQL_PROXY_PORT); test_connection(dbc, MYSQL_CLUSTER_URL, MYSQL_PORT); @@ -101,7 +101,7 @@ TEST_F(NetworkFailoverIntegrationTest, connection_test) { test_connection(dbc, MYSQL_RO_CLUSTER_URL + PROXIED_DOMAIN_NAME_SUFFIX, MYSQL_PROXY_PORT); } -TEST_F(NetworkFailoverIntegrationTest, lost_connection_to_writer) { +TEST_F(NetworkFailoverIntegrationTest, DISABLED_lost_connection_to_writer) { const std::string server = get_proxied_endpoint(writer_id); connection_string = builder.withServer(server).withFailoverTimeout(GLOBAL_FAILOVER_TIMEOUT).build(); EXPECT_EQ(SQL_SUCCESS, SQLDriverConnect(dbc, nullptr, AS_SQLCHAR(connection_string.c_str()), SQL_NTS, conn_out, MAX_NAME_LEN, &len, SQL_DRIVER_NOPROMPT)); @@ -122,7 +122,7 @@ TEST_F(NetworkFailoverIntegrationTest, lost_connection_to_writer) { EXPECT_EQ(SQL_SUCCESS, SQLDisconnect(dbc)); } -TEST_F(NetworkFailoverIntegrationTest, use_same_connection_after_failing_failover) { +TEST_F(NetworkFailoverIntegrationTest, DISABLED_use_same_connection_after_failing_failover) { const std::string server = get_proxied_endpoint(writer_id); connection_string = builder.withServer(server).withFailoverTimeout(GLOBAL_FAILOVER_TIMEOUT).build(); EXPECT_EQ(SQL_SUCCESS, SQLDriverConnect(dbc, nullptr, AS_SQLCHAR(connection_string.c_str()), SQL_NTS, conn_out, MAX_NAME_LEN, &len, SQL_DRIVER_NOPROMPT)); @@ -149,7 +149,7 @@ TEST_F(NetworkFailoverIntegrationTest, use_same_connection_after_failing_failove EXPECT_EQ(SQL_SUCCESS, SQLDisconnect(dbc)); } -TEST_F(NetworkFailoverIntegrationTest, lost_connection_to_all_readers) { +TEST_F(NetworkFailoverIntegrationTest, DISABLED_lost_connection_to_all_readers) { connection_string = builder.withServer(reader_endpoint).build(); EXPECT_EQ(SQL_SUCCESS, SQLDriverConnect(dbc, nullptr, AS_SQLCHAR(connection_string.c_str()), SQL_NTS, conn_out, MAX_NAME_LEN, &len, SQL_DRIVER_NOPROMPT)); diff --git a/testframework/build.gradle.kts b/testframework/build.gradle.kts index c10816d24..d2a75cf7c 100644 --- a/testframework/build.gradle.kts +++ b/testframework/build.gradle.kts @@ -6,7 +6,9 @@ group = "software.aws.rds" version = "1.0-SNAPSHOT" repositories { - mavenCentral() + maven { + url = uri("https://repo1.maven.org/maven2") + } } dependencies { From 6b30d0d04d79460116a84cfb801d5872a37cff19 Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Thu, 20 Apr 2023 22:24:05 -0700 Subject: [PATCH 26/33] disable remote debug --- .github/workflows/dockerized.yml | 22 +++++++++---------- .../java/host/IntegrationContainerTest.java | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/dockerized.yml b/.github/workflows/dockerized.yml index f86b426aa..3d88e2415 100644 --- a/.github/workflows/dockerized.yml +++ b/.github/workflows/dockerized.yml @@ -181,17 +181,17 @@ jobs: echo "TEMP_AWS_SECRET_ACCESS_KEY=${creds[1]}" >> $GITHUB_ENV echo "TEMP_AWS_SESSION_TOKEN=${creds[2]}" >> $GITHUB_ENV - - name: Setup upterm session - uses: lhotari/action-upterm@v1 - env: - TEST_DSN: atlas - TEST_USERNAME: ${{ secrets.TEST_USERNAME }} - TEST_PASSWORD: ${{ secrets.TEST_PASSWORD }} - TEST_DB_CLUSTER_IDENTIFIER: ${{ secrets.TEST_DB_CLUSTER_IDENTIFIER }}-${{ github.run_id }}-${{ github.run_attempt }} - AWS_ACCESS_KEY_ID: ${{ env.TEMP_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ env.TEMP_AWS_SECRET_ACCESS_KEY }} - AWS_SESSION_TOKEN: ${{ env.TEMP_AWS_SESSION_TOKEN }} - DRIVER_PATH: ${{ github.workspace }} + # - name: Setup upterm session + # uses: lhotari/action-upterm@v1 + # env: + # TEST_DSN: atlas + # TEST_USERNAME: ${{ secrets.TEST_USERNAME }} + # TEST_PASSWORD: ${{ secrets.TEST_PASSWORD }} + # TEST_DB_CLUSTER_IDENTIFIER: ${{ secrets.TEST_DB_CLUSTER_IDENTIFIER }}-${{ github.run_id }}-${{ github.run_attempt }} + # AWS_ACCESS_KEY_ID: ${{ env.TEMP_AWS_ACCESS_KEY_ID }} + # AWS_SECRET_ACCESS_KEY: ${{ env.TEMP_AWS_SECRET_ACCESS_KEY }} + # AWS_SESSION_TOKEN: ${{ env.TEMP_AWS_SESSION_TOKEN }} + # DRIVER_PATH: ${{ github.workspace }} - name: 'Run Integration Tests' working-directory: ${{ github.workspace }}/testframework diff --git a/testframework/src/test/java/host/IntegrationContainerTest.java b/testframework/src/test/java/host/IntegrationContainerTest.java index bbc544bf9..059a9fcd2 100644 --- a/testframework/src/test/java/host/IntegrationContainerTest.java +++ b/testframework/src/test/java/host/IntegrationContainerTest.java @@ -133,7 +133,7 @@ public void testRunCommunityTestInContainer() public void testRunFailoverTestInContainer() throws UnsupportedOperationException, IOException, InterruptedException { setupFailoverIntegrationTests(NETWORK); - Thread.sleep(Long.MAX_VALUE); + //Thread.sleep(Long.MAX_VALUE); containerHelper.runExecutable(testContainer, "build/integration/bin", "integration"); } From 158c5d6d8e8ce3a6db381b19d6add14269307bdd Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Thu, 20 Apr 2023 22:32:30 -0700 Subject: [PATCH 27/33] fix build --- integration/failover_integration_test.cc | 1 - integration/failover_performance_test.cc | 1 - integration/network_failover_integration_test.cc | 1 - 3 files changed, 3 deletions(-) diff --git a/integration/failover_integration_test.cc b/integration/failover_integration_test.cc index 0f8226fb3..246eb90fc 100644 --- a/integration/failover_integration_test.cc +++ b/integration/failover_integration_test.cc @@ -78,7 +78,6 @@ class FailoverIntegrationTest : public BaseFailoverIntegrationTest { if (nullptr != env) { SQLFreeHandle(SQL_HANDLE_ENV, env); } - rds_client.reset(); } }; diff --git a/integration/failover_performance_test.cc b/integration/failover_performance_test.cc index 801a87456..939162a94 100644 --- a/integration/failover_performance_test.cc +++ b/integration/failover_performance_test.cc @@ -194,7 +194,6 @@ class FailoverPerformanceTest : if (nullptr != env) { SQLFreeHandle(SQL_HANDLE_ENV, env); } - rds_client.reset(); } // Only run if passed in parameter: data, is a type of BASE_PERFORMANCE_DATA diff --git a/integration/network_failover_integration_test.cc b/integration/network_failover_integration_test.cc index 6fbcb6bae..9beae774b 100644 --- a/integration/network_failover_integration_test.cc +++ b/integration/network_failover_integration_test.cc @@ -88,7 +88,6 @@ class NetworkFailoverIntegrationTest : public BaseFailoverIntegrationTest { if (nullptr != env) { SQLFreeHandle(SQL_HANDLE_ENV, env); } - rds_client.reset(); } }; From 8fd5078f10f7a628d378861c71ed8406b1268f4a Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Fri, 21 Apr 2023 10:50:01 -0700 Subject: [PATCH 28/33] debug --- .github/workflows/dockerized.yml | 22 +++++++++---------- testframework/build.gradle.kts | 4 +--- .../java/host/IntegrationContainerTest.java | 2 +- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/.github/workflows/dockerized.yml b/.github/workflows/dockerized.yml index 3d88e2415..f86b426aa 100644 --- a/.github/workflows/dockerized.yml +++ b/.github/workflows/dockerized.yml @@ -181,17 +181,17 @@ jobs: echo "TEMP_AWS_SECRET_ACCESS_KEY=${creds[1]}" >> $GITHUB_ENV echo "TEMP_AWS_SESSION_TOKEN=${creds[2]}" >> $GITHUB_ENV - # - name: Setup upterm session - # uses: lhotari/action-upterm@v1 - # env: - # TEST_DSN: atlas - # TEST_USERNAME: ${{ secrets.TEST_USERNAME }} - # TEST_PASSWORD: ${{ secrets.TEST_PASSWORD }} - # TEST_DB_CLUSTER_IDENTIFIER: ${{ secrets.TEST_DB_CLUSTER_IDENTIFIER }}-${{ github.run_id }}-${{ github.run_attempt }} - # AWS_ACCESS_KEY_ID: ${{ env.TEMP_AWS_ACCESS_KEY_ID }} - # AWS_SECRET_ACCESS_KEY: ${{ env.TEMP_AWS_SECRET_ACCESS_KEY }} - # AWS_SESSION_TOKEN: ${{ env.TEMP_AWS_SESSION_TOKEN }} - # DRIVER_PATH: ${{ github.workspace }} + - name: Setup upterm session + uses: lhotari/action-upterm@v1 + env: + TEST_DSN: atlas + TEST_USERNAME: ${{ secrets.TEST_USERNAME }} + TEST_PASSWORD: ${{ secrets.TEST_PASSWORD }} + TEST_DB_CLUSTER_IDENTIFIER: ${{ secrets.TEST_DB_CLUSTER_IDENTIFIER }}-${{ github.run_id }}-${{ github.run_attempt }} + AWS_ACCESS_KEY_ID: ${{ env.TEMP_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ env.TEMP_AWS_SECRET_ACCESS_KEY }} + AWS_SESSION_TOKEN: ${{ env.TEMP_AWS_SESSION_TOKEN }} + DRIVER_PATH: ${{ github.workspace }} - name: 'Run Integration Tests' working-directory: ${{ github.workspace }}/testframework diff --git a/testframework/build.gradle.kts b/testframework/build.gradle.kts index d2a75cf7c..c10816d24 100644 --- a/testframework/build.gradle.kts +++ b/testframework/build.gradle.kts @@ -6,9 +6,7 @@ group = "software.aws.rds" version = "1.0-SNAPSHOT" repositories { - maven { - url = uri("https://repo1.maven.org/maven2") - } + mavenCentral() } dependencies { diff --git a/testframework/src/test/java/host/IntegrationContainerTest.java b/testframework/src/test/java/host/IntegrationContainerTest.java index 059a9fcd2..bbc544bf9 100644 --- a/testframework/src/test/java/host/IntegrationContainerTest.java +++ b/testframework/src/test/java/host/IntegrationContainerTest.java @@ -133,7 +133,7 @@ public void testRunCommunityTestInContainer() public void testRunFailoverTestInContainer() throws UnsupportedOperationException, IOException, InterruptedException { setupFailoverIntegrationTests(NETWORK); - //Thread.sleep(Long.MAX_VALUE); + Thread.sleep(Long.MAX_VALUE); containerHelper.runExecutable(testContainer, "build/integration/bin", "integration"); } From 403054eac270bba8d5c43b1603189ab67c764427 Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Fri, 21 Apr 2023 11:52:11 -0700 Subject: [PATCH 29/33] specifically call thread pool stop in myodbc_end --- driver/dll.cc | 2 +- integration/network_failover_integration_test.cc | 8 ++++---- testframework/src/test/java/utility/ContainerHelper.java | 3 ++- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/driver/dll.cc b/driver/dll.cc index 0fb572a1a..958319cd6 100644 --- a/driver/dll.cc +++ b/driver/dll.cc @@ -153,7 +153,7 @@ void myodbc_end() */ my_thread_end_wait_time= 0; #endif - + failover_thread_pool.stop(true); mysql_library_end(); } } diff --git a/integration/network_failover_integration_test.cc b/integration/network_failover_integration_test.cc index 9beae774b..e9a7cde8c 100644 --- a/integration/network_failover_integration_test.cc +++ b/integration/network_failover_integration_test.cc @@ -91,7 +91,7 @@ class NetworkFailoverIntegrationTest : public BaseFailoverIntegrationTest { } }; -TEST_F(NetworkFailoverIntegrationTest, DISABLED_connection_test) { +TEST_F(NetworkFailoverIntegrationTest, connection_test) { test_connection(dbc, MYSQL_INSTANCE_1_URL, MYSQL_PORT); test_connection(dbc, MYSQL_INSTANCE_1_URL + PROXIED_DOMAIN_NAME_SUFFIX, MYSQL_PROXY_PORT); test_connection(dbc, MYSQL_CLUSTER_URL, MYSQL_PORT); @@ -100,7 +100,7 @@ TEST_F(NetworkFailoverIntegrationTest, DISABLED_connection_test) { test_connection(dbc, MYSQL_RO_CLUSTER_URL + PROXIED_DOMAIN_NAME_SUFFIX, MYSQL_PROXY_PORT); } -TEST_F(NetworkFailoverIntegrationTest, DISABLED_lost_connection_to_writer) { +TEST_F(NetworkFailoverIntegrationTest, lost_connection_to_writer) { const std::string server = get_proxied_endpoint(writer_id); connection_string = builder.withServer(server).withFailoverTimeout(GLOBAL_FAILOVER_TIMEOUT).build(); EXPECT_EQ(SQL_SUCCESS, SQLDriverConnect(dbc, nullptr, AS_SQLCHAR(connection_string.c_str()), SQL_NTS, conn_out, MAX_NAME_LEN, &len, SQL_DRIVER_NOPROMPT)); @@ -121,7 +121,7 @@ TEST_F(NetworkFailoverIntegrationTest, DISABLED_lost_connection_to_writer) { EXPECT_EQ(SQL_SUCCESS, SQLDisconnect(dbc)); } -TEST_F(NetworkFailoverIntegrationTest, DISABLED_use_same_connection_after_failing_failover) { +TEST_F(NetworkFailoverIntegrationTest, use_same_connection_after_failing_failover) { const std::string server = get_proxied_endpoint(writer_id); connection_string = builder.withServer(server).withFailoverTimeout(GLOBAL_FAILOVER_TIMEOUT).build(); EXPECT_EQ(SQL_SUCCESS, SQLDriverConnect(dbc, nullptr, AS_SQLCHAR(connection_string.c_str()), SQL_NTS, conn_out, MAX_NAME_LEN, &len, SQL_DRIVER_NOPROMPT)); @@ -148,7 +148,7 @@ TEST_F(NetworkFailoverIntegrationTest, DISABLED_use_same_connection_after_failin EXPECT_EQ(SQL_SUCCESS, SQLDisconnect(dbc)); } -TEST_F(NetworkFailoverIntegrationTest, DISABLED_lost_connection_to_all_readers) { +TEST_F(NetworkFailoverIntegrationTest, lost_connection_to_all_readers) { connection_string = builder.withServer(reader_endpoint).build(); EXPECT_EQ(SQL_SUCCESS, SQLDriverConnect(dbc, nullptr, AS_SQLCHAR(connection_string.c_str()), SQL_NTS, conn_out, MAX_NAME_LEN, &len, SQL_DRIVER_NOPROMPT)); diff --git a/testframework/src/test/java/utility/ContainerHelper.java b/testframework/src/test/java/utility/ContainerHelper.java index 1c3eee3aa..3007d6903 100644 --- a/testframework/src/test/java/utility/ContainerHelper.java +++ b/testframework/src/test/java/utility/ContainerHelper.java @@ -66,7 +66,7 @@ public void runExecutable(GenericContainer container, String testDir, String throws IOException, InterruptedException { System.out.println("==== Container console feed ==== >>>>"); Consumer consumer = new ConsoleConsumer(); - Long exitCode = execInContainer(container, consumer, String.format("./%s/%s", testDir, testExecutable)); + Long exitCode = execInContainer(container, consumer, String.format("./%s/%s --", testDir, testExecutable)); System.out.println("==== Container console feed ==== <<<<"); assertEquals(0, exitCode, "Some tests failed."); } @@ -93,6 +93,7 @@ public GenericContainer createTestContainer( .withDockerfileFromBuilder(builder -> builder .from(testContainerImageName) + .run("yum", "-y", "install", "gdb") .run("mkdir", "app") .workDir("/app") .entryPoint("/bin/sh -c \"while true; do sleep 30; done;\"") From e8397970330229d00275f0b05396cca6e4a09bf0 Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Fri, 21 Apr 2023 13:27:17 -0700 Subject: [PATCH 30/33] move failover thread pool inside env --- .github/workflows/dockerized.yml | 22 +++++++++---------- driver/dll.cc | 1 - driver/driver.h | 2 ++ driver/failover.h | 7 +++--- driver/failover_handler.cc | 6 ++--- driver/failover_reader_handler.cc | 20 +++++++++-------- driver/failover_writer_handler.cc | 12 +++++----- driver/handle.cc | 2 -- .../java/host/IntegrationContainerTest.java | 2 +- unit_testing/failover_reader_handler_test.cc | 22 +++++++++---------- unit_testing/failover_writer_handler_test.cc | 17 +++++++------- unit_testing/mock_objects.h | 2 +- 12 files changed, 59 insertions(+), 56 deletions(-) diff --git a/.github/workflows/dockerized.yml b/.github/workflows/dockerized.yml index f86b426aa..3d88e2415 100644 --- a/.github/workflows/dockerized.yml +++ b/.github/workflows/dockerized.yml @@ -181,17 +181,17 @@ jobs: echo "TEMP_AWS_SECRET_ACCESS_KEY=${creds[1]}" >> $GITHUB_ENV echo "TEMP_AWS_SESSION_TOKEN=${creds[2]}" >> $GITHUB_ENV - - name: Setup upterm session - uses: lhotari/action-upterm@v1 - env: - TEST_DSN: atlas - TEST_USERNAME: ${{ secrets.TEST_USERNAME }} - TEST_PASSWORD: ${{ secrets.TEST_PASSWORD }} - TEST_DB_CLUSTER_IDENTIFIER: ${{ secrets.TEST_DB_CLUSTER_IDENTIFIER }}-${{ github.run_id }}-${{ github.run_attempt }} - AWS_ACCESS_KEY_ID: ${{ env.TEMP_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ env.TEMP_AWS_SECRET_ACCESS_KEY }} - AWS_SESSION_TOKEN: ${{ env.TEMP_AWS_SESSION_TOKEN }} - DRIVER_PATH: ${{ github.workspace }} + # - name: Setup upterm session + # uses: lhotari/action-upterm@v1 + # env: + # TEST_DSN: atlas + # TEST_USERNAME: ${{ secrets.TEST_USERNAME }} + # TEST_PASSWORD: ${{ secrets.TEST_PASSWORD }} + # TEST_DB_CLUSTER_IDENTIFIER: ${{ secrets.TEST_DB_CLUSTER_IDENTIFIER }}-${{ github.run_id }}-${{ github.run_attempt }} + # AWS_ACCESS_KEY_ID: ${{ env.TEMP_AWS_ACCESS_KEY_ID }} + # AWS_SECRET_ACCESS_KEY: ${{ env.TEMP_AWS_SECRET_ACCESS_KEY }} + # AWS_SESSION_TOKEN: ${{ env.TEMP_AWS_SESSION_TOKEN }} + # DRIVER_PATH: ${{ github.workspace }} - name: 'Run Integration Tests' working-directory: ${{ github.workspace }}/testframework diff --git a/driver/dll.cc b/driver/dll.cc index 958319cd6..d452bb15a 100644 --- a/driver/dll.cc +++ b/driver/dll.cc @@ -153,7 +153,6 @@ void myodbc_end() */ my_thread_end_wait_time= 0; #endif - failover_thread_pool.stop(true); mysql_library_end(); } } diff --git a/driver/driver.h b/driver/driver.h index d25adb780..cb528e10e 100644 --- a/driver/driver.h +++ b/driver/driver.h @@ -37,6 +37,7 @@ #define __DRIVER_H__ #include +#include #include "../MYODBC_MYSQL.h" #include "../MYODBC_CONF.h" @@ -593,6 +594,7 @@ struct ENV std::list conn_list; MYERROR error; std::mutex lock; + ctpl::thread_pool failover_thread_pool; ENV(SQLINTEGER ver) : odbc_ver(ver) {} diff --git a/driver/failover.h b/driver/failover.h index 2b1c0957a..248ed1084 100644 --- a/driver/failover.h +++ b/driver/failover.h @@ -36,9 +36,6 @@ #include "mylog.h" #include -#include - -extern ctpl::thread_pool failover_thread_pool; struct READER_FAILOVER_RESULT { bool connected = false; @@ -75,6 +72,7 @@ class FAILOVER_READER_HANDLER { FAILOVER_READER_HANDLER( std::shared_ptr topology_service, std::shared_ptr connection_handler, + ctpl::thread_pool& thread_pool, int failover_timeout_ms, int failover_reader_connect_timeout, bool enable_strict_reader_failover, unsigned long dbc_id, bool enable_logging = false); @@ -107,6 +105,7 @@ class FAILOVER_READER_HANDLER { bool enable_strict_reader_failover = false; std::shared_ptr logger = nullptr; unsigned long dbc_id = 0; + ctpl::thread_pool& thread_pool; }; // This struct holds results of Writer Failover Process. @@ -138,6 +137,7 @@ class FAILOVER_WRITER_HANDLER { std::shared_ptr topology_service, std::shared_ptr reader_handler, std::shared_ptr connection_handler, + ctpl::thread_pool& thread_pool, int writer_failover_timeout_ms, int read_topology_interval_ms, int reconnect_writer_interval_ms, unsigned long dbc_id, bool enable_logging = false); ~FAILOVER_WRITER_HANDLER(); @@ -155,6 +155,7 @@ class FAILOVER_WRITER_HANDLER { std::shared_ptr reader_handler; std::shared_ptr logger = nullptr; unsigned long dbc_id = 0; + ctpl::thread_pool& thread_pool; }; class FAILOVER_HANDLER { diff --git a/driver/failover_handler.cc b/driver/failover_handler.cc index c7a4dd4a5..dc6bf757c 100644 --- a/driver/failover_handler.cc +++ b/driver/failover_handler.cc @@ -80,12 +80,12 @@ FAILOVER_HANDLER::FAILOVER_HANDLER(DBC* dbc, DataSource* ds, this->connection_handler = connection_handler; this->failover_reader_handler = std::make_shared( - this->topology_service, this->connection_handler, ds->failover_timeout, + this->topology_service, this->connection_handler, dbc->env->failover_thread_pool, ds->failover_timeout, ds->failover_reader_connect_timeout, ds->enable_strict_reader_failover, dbc->id, ds->save_queries); this->failover_writer_handler = std::make_shared( this->topology_service, this->failover_reader_handler, - this->connection_handler, ds->failover_timeout, + this->connection_handler, dbc->env->failover_thread_pool, ds->failover_timeout, ds->failover_topology_refresh_rate, ds->failover_writer_reconnect_interval, dbc->id, ds->save_queries); this->metrics_container = metrics_container; @@ -389,7 +389,7 @@ SQLRETURN FAILOVER_HANDLER::create_connection_and_initialize_topology() { rc = reconnect(true); } if (is_failover_enabled()) { - failover_thread_pool.resize(current_topology->total_hosts()); + this->dbc->env->failover_thread_pool.resize(current_topology->total_hosts()); } } diff --git a/driver/failover_reader_handler.cc b/driver/failover_reader_handler.cc index 30d45ac31..13ea6dcb0 100644 --- a/driver/failover_reader_handler.cc +++ b/driver/failover_reader_handler.cc @@ -39,11 +39,13 @@ FAILOVER_READER_HANDLER::FAILOVER_READER_HANDLER( std::shared_ptr topology_service, std::shared_ptr connection_handler, + ctpl::thread_pool& thread_pool, int failover_timeout_ms, int failover_reader_connect_timeout, bool enable_strict_reader_failover, unsigned long dbc_id, bool enable_logging) : topology_service{topology_service}, connection_handler{connection_handler}, + thread_pool{thread_pool}, max_failover_timeout_ms{failover_timeout_ms}, reader_connect_timeout_ms{failover_reader_connect_timeout}, enable_strict_reader_failover{enable_strict_reader_failover}, @@ -68,11 +70,11 @@ std::shared_ptr FAILOVER_READER_HANDLER::failover( const auto start = std::chrono::steady_clock::now(); auto global_sync = std::make_shared(1); - if (failover_thread_pool.n_idle() == 0) { - MYLOG_TRACE(logger, dbc_id, "[FAILOVER_READER_HANDLER] Resizing thread pool to %d", failover_thread_pool.size() + 1); - failover_thread_pool.resize(failover_thread_pool.size() + 1); + if (thread_pool.n_idle() == 0) { + MYLOG_TRACE(logger, dbc_id, "[FAILOVER_READER_HANDLER] Resizing thread pool to %d", thread_pool.size() + 1); + thread_pool.resize(thread_pool.size() + 1); } - auto reader_result_future = failover_thread_pool.push([=](int id) { + auto reader_result_future = thread_pool.push([=](int id) { while (!global_sync->is_completed()) { auto hosts_list = build_hosts_list(current_topology, !enable_strict_reader_failover); auto reader_result = get_connection_from_hosts(hosts_list, global_sync); @@ -222,14 +224,14 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ //auto result1 = first_reader_task.get_future(); //auto result2 = second_reader_task.get_future(); - if (failover_thread_pool.n_idle() <= 1) { - int size = failover_thread_pool.size() + 2 - failover_thread_pool.n_idle(); + if (thread_pool.n_idle() <= 1) { + int size = thread_pool.size() + 2 - thread_pool.n_idle(); MYLOG_TRACE(logger, dbc_id, "[FAILOVER_READER_HANDLER] Resizing thread pool to %d", size); - failover_thread_pool.resize(size); + thread_pool.resize(size); } - auto first_result = failover_thread_pool.push(std::move(first_connection_handler), first_reader_host, local_sync, first_connection_result); + auto first_result = thread_pool.push(std::move(first_connection_handler), first_reader_host, local_sync, first_connection_result); //std::thread task_thread1(std::move(first_reader_task), first_reader_host,local_sync, first_connection_result); //std::thread task_thread2; @@ -237,7 +239,7 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ if (!odd_hosts_number) { auto second_reader_host = hosts_list.at(i + 1); //task_thread2 = std::thread(std::move(second_reader_task), second_reader_host, local_sync,second_connection_result); - second_future = std::move(failover_thread_pool.push(std::move(second_connection_handler), second_reader_host, local_sync, second_connection_result)); + second_future = std::move(thread_pool.push(std::move(second_connection_handler), second_reader_host, local_sync, second_connection_result)); } // Wait for task complete signal with specified timeout diff --git a/driver/failover_writer_handler.cc b/driver/failover_writer_handler.cc index bcdaea29b..13b704cfb 100644 --- a/driver/failover_writer_handler.cc +++ b/driver/failover_writer_handler.cc @@ -281,11 +281,13 @@ FAILOVER_WRITER_HANDLER::FAILOVER_WRITER_HANDLER( std::shared_ptr topology_service, std::shared_ptr reader_handler, std::shared_ptr connection_handler, + ctpl::thread_pool& thread_pool, int writer_failover_timeout_ms, int read_topology_interval_ms, int reconnect_writer_interval_ms, unsigned long dbc_id, bool enable_logging) : connection_handler{connection_handler}, topology_service{topology_service}, reader_handler{reader_handler}, + thread_pool{thread_pool}, writer_failover_timeout_ms{writer_failover_timeout_ms}, read_topology_interval_ms{read_topology_interval_ms}, reconnect_writer_interval_ms{reconnect_writer_interval_ms}, @@ -333,15 +335,15 @@ std::shared_ptr FAILOVER_WRITER_HANDLER::failover( //std::thread reconnect_thread(std::move(reconnect_task), original_writer, failover_sync, reconnect_result); //std::thread wait_new_writer_thread(std::move((wait_new_writer_task), original_writer, failover_sync, new_writer_result); - if (failover_thread_pool.n_idle() <= 1) { - int size = failover_thread_pool.size() + 2 - failover_thread_pool.n_idle(); + if (thread_pool.n_idle() <= 1) { + int size = thread_pool.size() + 2 - thread_pool.n_idle(); MYLOG_TRACE(logger, dbc_id, "[FAILOVER_WRITER_HANDLER] Resizing thread pool to %d", size); - failover_thread_pool.resize(size); + thread_pool.resize(size); } - auto reconnect_future = failover_thread_pool.push(std::move(reconnect_handler), original_writer, failover_sync, reconnect_result); - auto wait_new_writer_future = failover_thread_pool.push(std::move(new_writer_handler), original_writer, failover_sync, new_writer_result); + auto reconnect_future = thread_pool.push(std::move(reconnect_handler), original_writer, failover_sync, reconnect_result); + auto wait_new_writer_future = thread_pool.push(std::move(new_writer_handler), original_writer, failover_sync, new_writer_result); // Wait for task complete signal with specified timeout failover_sync->wait_and_complete(writer_failover_timeout_ms); diff --git a/driver/handle.cc b/driver/handle.cc index 83f541b7f..8b680255b 100644 --- a/driver/handle.cc +++ b/driver/handle.cc @@ -55,8 +55,6 @@ #include -ctpl::thread_pool failover_thread_pool; - thread_local long thread_count = 0; std::mutex g_lock; diff --git a/testframework/src/test/java/host/IntegrationContainerTest.java b/testframework/src/test/java/host/IntegrationContainerTest.java index bbc544bf9..059a9fcd2 100644 --- a/testframework/src/test/java/host/IntegrationContainerTest.java +++ b/testframework/src/test/java/host/IntegrationContainerTest.java @@ -133,7 +133,7 @@ public void testRunCommunityTestInContainer() public void testRunFailoverTestInContainer() throws UnsupportedOperationException, IOException, InterruptedException { setupFailoverIntegrationTests(NETWORK); - Thread.sleep(Long.MAX_VALUE); + //Thread.sleep(Long.MAX_VALUE); containerHelper.runExecutable(testContainer, "build/integration/bin", "integration"); } diff --git a/unit_testing/failover_reader_handler_test.cc b/unit_testing/failover_reader_handler_test.cc index 6153ba399..5b34e2a40 100644 --- a/unit_testing/failover_reader_handler_test.cc +++ b/unit_testing/failover_reader_handler_test.cc @@ -61,12 +61,14 @@ class FailoverReaderHandlerTest : public testing::Test { MOCK_CONNECTION_PROXY* mock_reader_a_proxy; MOCK_CONNECTION_PROXY* mock_reader_b_proxy; MOCK_CONNECTION_PROXY* mock_writer_proxy; + ctpl::thread_pool failover_thread_pool; static std::shared_ptr topology; std::shared_ptr mock_ts; std::shared_ptr mock_connection_handler; - std::shared_ptr mock_sync; + std::shared_ptr mock_sync; + static void SetUpTestSuite() { reader_a_host = std::make_shared("reader-a-host" + HOST_SUFFIX, 1234, UP, false); @@ -92,7 +94,6 @@ class FailoverReaderHandlerTest : public testing::Test { void SetUp() override { allocate_odbc_handles(env, dbc, ds); - failover_thread_pool.resize(3); reader_a_host->set_host_state(UP); reader_b_host->set_host_state(UP); reader_c_host->set_host_state(DOWN); @@ -106,7 +107,6 @@ class FailoverReaderHandlerTest : public testing::Test { void TearDown() override { cleanup_odbc_handles(env, dbc, ds); - failover_thread_pool.stop(true); } }; @@ -158,7 +158,7 @@ TEST_F(FailoverReaderHandlerTest, GenerateTopology) { } TEST_F(FailoverReaderHandlerTest, BuildHostsList) { - FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 60000, 30000, false, 0); + FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, failover_thread_pool, 60000, 30000, false, 0); std::shared_ptr topology_info; std::vector> hosts_list; @@ -230,7 +230,7 @@ TEST_F(FailoverReaderHandlerTest, GetConnectionFromHosts_Failure) { EXPECT_CALL(*mock_ts, mark_host_down(reader_c_host)).Times(1); EXPECT_CALL(*mock_ts, mark_host_down(writer_host)).Times(1); - FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 60000, 30000, false, 0); + FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, failover_thread_pool, 60000, 30000, false, 0); auto hosts_list = reader_handler.build_hosts_list(topology, true); auto result = reader_handler.get_connection_from_hosts(hosts_list, mock_sync); @@ -253,7 +253,7 @@ TEST_F(FailoverReaderHandlerTest, GetConnectionFromHosts_Success_Reader) { // Reader C will not be used as it is put at the end. Will only try to connect to A and B EXPECT_CALL(*mock_ts, mark_host_up(reader_a_host)).Times(1); - FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 60000, 30000, false, 0); + FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, failover_thread_pool, 60000, 30000, false, 0); auto hosts_list = reader_handler.build_hosts_list(topology, true); auto result = reader_handler.get_connection_from_hosts(hosts_list, mock_sync); @@ -279,7 +279,7 @@ TEST_F(FailoverReaderHandlerTest, GetConnectionFromHosts_Success_Writer) { EXPECT_CALL(*mock_ts, mark_host_up(writer_host)).Times(1); - FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 60000, 30000, false, 0); + FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, failover_thread_pool, 60000, 30000, false, 0); auto hosts_list = reader_handler.build_hosts_list(topology, true); auto result = reader_handler.get_connection_from_hosts(hosts_list, mock_sync); @@ -316,7 +316,7 @@ TEST_F(FailoverReaderHandlerTest, GetConnectionFromHosts_FastestHost) { return mock_reader_b_proxy; })); - FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 60000, 30000, false, 0); + FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, failover_thread_pool, 60000, 30000, false, 0); auto hosts_list = reader_handler.build_hosts_list(topology, true); auto result = reader_handler.get_connection_from_hosts(hosts_list, mock_sync); @@ -355,7 +355,7 @@ TEST_F(FailoverReaderHandlerTest, GetConnectionFromHosts_Timeout) { EXPECT_CALL(*mock_ts, mark_host_down(_)).Times(AnyNumber()); EXPECT_CALL(*mock_ts, mark_host_down(writer_host)).Times(1); - FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 60000, 1000, false, 0); + FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, failover_thread_pool, 60000, 1000, false, 0); auto hosts_list = reader_handler.build_hosts_list(topology, true); auto result = reader_handler.get_connection_from_hosts(hosts_list, mock_sync); @@ -375,7 +375,7 @@ TEST_F(FailoverReaderHandlerTest, Failover_Failure) { EXPECT_CALL(*mock_ts, mark_host_down(reader_c_host)).Times(AtLeast(1)); EXPECT_CALL(*mock_ts, mark_host_down(writer_host)).Times(AtLeast(1)); - FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 3000, 1000, false, 0); + FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, failover_thread_pool, 3000, 1000, false, 0); auto result = reader_handler.failover(topology); EXPECT_FALSE(result->connected); @@ -412,7 +412,7 @@ TEST_F(FailoverReaderHandlerTest, Failover_Success_Reader) { return mock_reader_b_proxy; })); - FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, 60000, 30000, false, 0); + FAILOVER_READER_HANDLER reader_handler(mock_ts, mock_connection_handler, failover_thread_pool, 60000, 30000, false, 0); auto result = reader_handler.failover(current_topology); EXPECT_TRUE(result->connected); diff --git a/unit_testing/failover_writer_handler_test.cc b/unit_testing/failover_writer_handler_test.cc index 367b5202f..77ecbfb7f 100644 --- a/unit_testing/failover_writer_handler_test.cc +++ b/unit_testing/failover_writer_handler_test.cc @@ -69,6 +69,7 @@ class FailoverWriterHandlerTest : public testing::Test { MOCK_CONNECTION_PROXY* mock_reader_b_proxy; MOCK_CONNECTION_PROXY* mock_writer_proxy; MOCK_CONNECTION_PROXY* mock_new_writer_proxy; + ctpl::thread_pool failover_thread_pool; static void SetUpTestSuite() {} @@ -76,7 +77,6 @@ class FailoverWriterHandlerTest : public testing::Test { void SetUp() override { allocate_odbc_handles(env, dbc, ds); - failover_thread_pool.resize(3); writer_instance_name = "writer-host"; new_writer_instance_name = "new-writer-host"; @@ -104,7 +104,6 @@ class FailoverWriterHandlerTest : public testing::Test { void TearDown() override { cleanup_odbc_handles(env, dbc, ds); - failover_thread_pool.stop(true); } }; @@ -135,7 +134,7 @@ TEST_F(FailoverWriterHandlerTest, ReconnectToWriter_TaskBEmptyReaderResult) { .WillRepeatedly(Return(nullptr)); FAILOVER_WRITER_HANDLER writer_handler( - mock_ts, mock_reader_handler, mock_connection_handler, 5000, 2000, 2000, 0); + mock_ts, mock_reader_handler, mock_connection_handler, failover_thread_pool, 5000, 2000, 2000, 0); auto result = writer_handler.failover(current_topology); EXPECT_TRUE(result->connected); @@ -196,7 +195,7 @@ TEST_F(FailoverWriterHandlerTest, ReconnectToWriter_SlowReaderA) { mock_reader_a_proxy)))); FAILOVER_WRITER_HANDLER writer_handler( - mock_ts, mock_reader_handler, mock_connection_handler, 60000, 5000, 5000, 0); + mock_ts, mock_reader_handler, mock_connection_handler, failover_thread_pool, 60000, 5000, 5000, 0); const auto result = writer_handler.failover(current_topology); EXPECT_TRUE(result->connected); @@ -241,7 +240,7 @@ TEST_F(FailoverWriterHandlerTest, ReconnectToWriter_TaskBDefers) { mock_reader_a_proxy))); FAILOVER_WRITER_HANDLER writer_handler( - mock_ts, mock_reader_handler, mock_connection_handler, 60000, 2000, 2000, 0); + mock_ts, mock_reader_handler, mock_connection_handler, failover_thread_pool, 60000, 2000, 2000, 0); auto result = writer_handler.failover(current_topology); EXPECT_TRUE(result->connected); @@ -304,7 +303,7 @@ TEST_F(FailoverWriterHandlerTest, ConnectToReaderA_SlowWriter) { mock_reader_a_proxy))); FAILOVER_WRITER_HANDLER writer_handler( - mock_ts, mock_reader_handler, mock_connection_handler, 60000, 5000, 5000, 0); + mock_ts, mock_reader_handler, mock_connection_handler, failover_thread_pool, 60000, 5000, 5000, 0); auto result = writer_handler.failover(current_topology); EXPECT_TRUE(result->connected); @@ -363,7 +362,7 @@ TEST_F(FailoverWriterHandlerTest, ConnectToReaderA_TaskADefers) { mock_reader_a_proxy))); FAILOVER_WRITER_HANDLER writer_handler( - mock_ts, mock_reader_handler, mock_connection_handler, 60000, 5000, 5000, 0); + mock_ts, mock_reader_handler, mock_connection_handler, failover_thread_pool, 60000, 5000, 5000, 0); auto result = writer_handler.failover(current_topology); EXPECT_TRUE(result->connected); @@ -432,7 +431,7 @@ TEST_F(FailoverWriterHandlerTest, FailedToConnect_FailoverTimeout) { mock_reader_a_proxy))); FAILOVER_WRITER_HANDLER writer_handler( - mock_ts, mock_reader_handler, mock_connection_handler, 1000, 2000, 2000, 0); + mock_ts, mock_reader_handler, mock_connection_handler, failover_thread_pool, 1000, 2000, 2000, 0); auto result = writer_handler.failover(current_topology); EXPECT_FALSE(result->connected); @@ -481,7 +480,7 @@ TEST_F(FailoverWriterHandlerTest, FailedToConnect_TaskAFailed_TaskBWriterFailed) mock_reader_a_proxy))); FAILOVER_WRITER_HANDLER writer_handler( - mock_ts, mock_reader_handler, mock_connection_handler, 5000, 2000, 2000, 0); + mock_ts, mock_reader_handler, mock_connection_handler, failover_thread_pool, 5000, 2000, 2000, 0); auto result = writer_handler.failover(current_topology); EXPECT_FALSE(result->connected); diff --git a/unit_testing/mock_objects.h b/unit_testing/mock_objects.h index 4065518bd..47b36ac80 100644 --- a/unit_testing/mock_objects.h +++ b/unit_testing/mock_objects.h @@ -108,7 +108,7 @@ class MOCK_TOPOLOGY_SERVICE : public TOPOLOGY_SERVICE { class MOCK_READER_HANDLER : public FAILOVER_READER_HANDLER { public: - MOCK_READER_HANDLER() : FAILOVER_READER_HANDLER(nullptr, nullptr, 0, 0, false, 0) {} + MOCK_READER_HANDLER() : FAILOVER_READER_HANDLER(nullptr, nullptr, ctpl::thread_pool(), 0, 0, false, 0) {} MOCK_METHOD(std::shared_ptr, get_reader_connection, (std::shared_ptr, std::shared_ptr)); }; From 8853c626848aa95a699329f8ad74fc34442831dc Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Fri, 21 Apr 2023 15:30:02 -0700 Subject: [PATCH 31/33] fix unit test --- unit_testing/mock_objects.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/unit_testing/mock_objects.h b/unit_testing/mock_objects.h index 47b36ac80..156b18e15 100644 --- a/unit_testing/mock_objects.h +++ b/unit_testing/mock_objects.h @@ -108,9 +108,11 @@ class MOCK_TOPOLOGY_SERVICE : public TOPOLOGY_SERVICE { class MOCK_READER_HANDLER : public FAILOVER_READER_HANDLER { public: - MOCK_READER_HANDLER() : FAILOVER_READER_HANDLER(nullptr, nullptr, ctpl::thread_pool(), 0, 0, false, 0) {} + MOCK_READER_HANDLER() : FAILOVER_READER_HANDLER(nullptr, nullptr, thread_pool, 0, 0, false, 0) {} MOCK_METHOD(std::shared_ptr, get_reader_connection, (std::shared_ptr, std::shared_ptr)); + + ctpl::thread_pool thread_pool; }; class MOCK_CONNECTION_HANDLER : public CONNECTION_HANDLER { From 646ee2b9382582d3668ebbf01cad4010823b2ee1 Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Fri, 21 Apr 2023 16:01:57 -0700 Subject: [PATCH 32/33] fix build --- testframework/src/test/java/utility/ContainerHelper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testframework/src/test/java/utility/ContainerHelper.java b/testframework/src/test/java/utility/ContainerHelper.java index 3007d6903..26c96f613 100644 --- a/testframework/src/test/java/utility/ContainerHelper.java +++ b/testframework/src/test/java/utility/ContainerHelper.java @@ -66,7 +66,7 @@ public void runExecutable(GenericContainer container, String testDir, String throws IOException, InterruptedException { System.out.println("==== Container console feed ==== >>>>"); Consumer consumer = new ConsoleConsumer(); - Long exitCode = execInContainer(container, consumer, String.format("./%s/%s --", testDir, testExecutable)); + Long exitCode = execInContainer(container, consumer, String.format("./%s/%s", testDir, testExecutable)); System.out.println("==== Container console feed ==== <<<<"); assertEquals(0, exitCode, "Some tests failed."); } From a1d50d3243db5b81937b3cdc197661c754cf35be Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Fri, 21 Apr 2023 16:54:57 -0700 Subject: [PATCH 33/33] cleanup --- .github/workflows/dockerized.yml | 14 +------ driver/dll.cc | 1 + driver/failover_reader_handler.cc | 40 +------------------ driver/failover_writer_handler.cc | 15 ------- integration/CMakeLists.txt | 2 +- .../java/host/IntegrationContainerTest.java | 1 - .../test/java/utility/ContainerHelper.java | 1 - 7 files changed, 4 insertions(+), 70 deletions(-) diff --git a/.github/workflows/dockerized.yml b/.github/workflows/dockerized.yml index 3d88e2415..a046b959e 100644 --- a/.github/workflows/dockerized.yml +++ b/.github/workflows/dockerized.yml @@ -170,7 +170,7 @@ jobs: - name: 'Set up Temp AWS Credentials' run: | creds=($(aws sts get-session-token \ - --duration-seconds 17200 \ + --duration-seconds 7200 \ --query 'Credentials.[AccessKeyId, SecretAccessKey, SessionToken]' \ --output text \ | xargs)); @@ -180,18 +180,6 @@ jobs: echo "TEMP_AWS_ACCESS_KEY_ID=${creds[0]}" >> $GITHUB_ENV echo "TEMP_AWS_SECRET_ACCESS_KEY=${creds[1]}" >> $GITHUB_ENV echo "TEMP_AWS_SESSION_TOKEN=${creds[2]}" >> $GITHUB_ENV - - # - name: Setup upterm session - # uses: lhotari/action-upterm@v1 - # env: - # TEST_DSN: atlas - # TEST_USERNAME: ${{ secrets.TEST_USERNAME }} - # TEST_PASSWORD: ${{ secrets.TEST_PASSWORD }} - # TEST_DB_CLUSTER_IDENTIFIER: ${{ secrets.TEST_DB_CLUSTER_IDENTIFIER }}-${{ github.run_id }}-${{ github.run_attempt }} - # AWS_ACCESS_KEY_ID: ${{ env.TEMP_AWS_ACCESS_KEY_ID }} - # AWS_SECRET_ACCESS_KEY: ${{ env.TEMP_AWS_SECRET_ACCESS_KEY }} - # AWS_SESSION_TOKEN: ${{ env.TEMP_AWS_SESSION_TOKEN }} - # DRIVER_PATH: ${{ github.workspace }} - name: 'Run Integration Tests' working-directory: ${{ github.workspace }}/testframework diff --git a/driver/dll.cc b/driver/dll.cc index d452bb15a..0fb572a1a 100644 --- a/driver/dll.cc +++ b/driver/dll.cc @@ -153,6 +153,7 @@ void myodbc_end() */ my_thread_end_wait_time= 0; #endif + mysql_library_end(); } } diff --git a/driver/failover_reader_handler.cc b/driver/failover_reader_handler.cc index 13ea6dcb0..2ec2f5ddc 100644 --- a/driver/failover_reader_handler.cc +++ b/driver/failover_reader_handler.cc @@ -89,30 +89,11 @@ std::shared_ptr FAILOVER_READER_HANDLER::failover( return empty_result; }); - //std::packaged_task()> reader_failover_task([=] { - // while (!global_sync->is_completed()) { - // auto hosts_list = build_hosts_list(current_topology, !enable_strict_reader_failover); - // auto reader_result = get_connection_from_hosts(hosts_list, global_sync); - // if (reader_result->connected) { - // global_sync->mark_as_complete(true); - // return reader_result; - // } - // // TODO Think of changes to the strategy if it went - // // through all the hosts and did not connect. - // std::this_thread::sleep_for(std::chrono::seconds(READER_CONNECT_INTERVAL_SEC)); - // } - // return empty_result; - // }); - - //auto reader_result_future = reader_failover_task.get_future(); - //std::thread reader_failover_thread(std::move(reader_failover_task)); - // Wait for task complete signal with specified timeout global_sync->wait_and_complete(max_failover_timeout_ms); // Constantly polling for results until timeout while (true) { if (reader_result_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { - //reader_failover_thread.join(); MYLOG_TRACE(logger, dbc_id, "[FAILOVER_READER_HANDLER] Reader failover finished."); return reader_result_future.get(); } @@ -122,7 +103,6 @@ std::shared_ptr FAILOVER_READER_HANDLER::failover( const auto remaining_wait_ms = max_failover_timeout_ms - duration.count(); if (remaining_wait_ms <= 0) { // Reader failover timed out - //reader_failover_thread.detach(); MYLOG_TRACE(logger, dbc_id, "[FAILOVER_READER_HANDLER] Reader failover timed out. Failed to connect to the reader instance."); return empty_result; } @@ -218,12 +198,6 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ std::shared_ptr first_reader_host = hosts_list.at(i); - //std::packaged_task, std::shared_ptr, std::shared_ptr)> first_reader_task(first_connection_handler); - //std::packaged_task, std::shared_ptr, std::shared_ptr)> second_reader_task(second_connection_handler); - - //auto result1 = first_reader_task.get_future(); - //auto result2 = second_reader_task.get_future(); - if (thread_pool.n_idle() <= 1) { int size = thread_pool.size() + 2 - thread_pool.n_idle(); MYLOG_TRACE(logger, dbc_id, @@ -232,14 +206,10 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ } auto first_result = thread_pool.push(std::move(first_connection_handler), first_reader_host, local_sync, first_connection_result); - - //std::thread task_thread1(std::move(first_reader_task), first_reader_host,local_sync, first_connection_result); - //std::thread task_thread2; std::future second_future; if (!odd_hosts_number) { auto second_reader_host = hosts_list.at(i + 1); - //task_thread2 = std::thread(std::move(second_reader_task), second_reader_host, local_sync,second_connection_result); - second_future = std::move(thread_pool.push(std::move(second_connection_handler), second_reader_host, local_sync, second_connection_result)); + second_future = thread_pool.push(std::move(second_connection_handler), second_reader_host, local_sync, second_connection_result); } // Wait for task complete signal with specified timeout @@ -249,10 +219,6 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ while (true) { // Check if first reader task result is ready if (first_result.valid() && first_result.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { - //task_thread1.join(); - if (!odd_hosts_number) { - //task_thread2.detach(); - } first_result.get(); if (first_connection_result->connected) { MYLOG_TRACE(logger, dbc_id, @@ -266,8 +232,6 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ // Check if second reader task result is ready if there is one if (!odd_hosts_number && second_future.valid() && second_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { - //task_thread1.detach(); - //task_thread2.join(); second_future.get(); if (second_connection_result->connected) { MYLOG_TRACE(logger, dbc_id, @@ -288,8 +252,6 @@ std::shared_ptr FAILOVER_READER_HANDLER::get_connection_ const auto remaining_wait_ms = reader_connect_timeout_ms - duration.count(); if (remaining_wait_ms <= 0) { // None has connected. We move on and try new hosts. - //task_thread1.detach(); - //task_thread2.detach(); std::this_thread::sleep_for(std::chrono::seconds(READER_CONNECT_INTERVAL_SEC)); break; } diff --git a/driver/failover_writer_handler.cc b/driver/failover_writer_handler.cc index 13b704cfb..23d83d56d 100644 --- a/driver/failover_writer_handler.cc +++ b/driver/failover_writer_handler.cc @@ -326,15 +326,6 @@ std::shared_ptr FAILOVER_WRITER_HANDLER::failover( auto reconnect_result = std::make_shared(false, false, nullptr, nullptr); auto new_writer_result = std::make_shared(false, false, nullptr, nullptr); - //std::packaged_task, std::shared_ptr, std::shared_ptr)> reconnect_task(reconnect_handler); - //std::packaged_task, std::shared_ptr, std::shared_ptr)> wait_new_writer_task(new_writer_handler); - - //auto reconnect_future = reconnect_task.get_future(); - //auto wait_new_writer_future = wait_new_writer_task.get_future(); - - //std::thread reconnect_thread(std::move(reconnect_task), original_writer, failover_sync, reconnect_result); - //std::thread wait_new_writer_thread(std::move((wait_new_writer_task), original_writer, failover_sync, new_writer_result); - if (thread_pool.n_idle() <= 1) { int size = thread_pool.size() + 2 - thread_pool.n_idle(); MYLOG_TRACE(logger, dbc_id, @@ -352,8 +343,6 @@ std::shared_ptr FAILOVER_WRITER_HANDLER::failover( while (true) { // Check if reconnect task result is ready if (reconnect_future.valid() && reconnect_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { - //reconnect_thread.join(); - //wait_new_writer_thread.detach(); reconnect_future.get(); if (reconnect_result->connected) { MYLOG_TRACE(logger, dbc_id, @@ -365,8 +354,6 @@ std::shared_ptr FAILOVER_WRITER_HANDLER::failover( // Check if wait new writer task result is ready if (wait_new_writer_future.valid() && wait_new_writer_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { - //reconnect_thread.detach(); - //wait_new_writer_thread.join(); wait_new_writer_future.get(); if (new_writer_result->connected) { MYLOG_TRACE(logger, dbc_id, @@ -386,8 +373,6 @@ std::shared_ptr FAILOVER_WRITER_HANDLER::failover( const auto remaining_wait_ms = writer_failover_timeout_ms - duration.count(); if (remaining_wait_ms <= 0) { // Writer failover timed out - //reconnect_thread.detach(); - //wait_new_writer_thread.detach(); MYLOG_TRACE(logger, dbc_id, "[FAILOVER_WRITER_HANDLER] Writer failover timed out. Failed to connect to the writer instance."); return std::make_shared(false, false, nullptr, nullptr); } diff --git a/integration/CMakeLists.txt b/integration/CMakeLists.txt index 232df2180..fe716112a 100644 --- a/integration/CMakeLists.txt +++ b/integration/CMakeLists.txt @@ -101,8 +101,8 @@ set(TEST_SOURCES set(INTEGRATION_TESTS iam_authentication_integration_test.cc secrets_manager_integration_test.cc - #failover_integration_test.cc network_failover_integration_test.cc + failover_integration_test.cc ) if(NOT ENABLE_PERFORMANCE_TESTS) diff --git a/testframework/src/test/java/host/IntegrationContainerTest.java b/testframework/src/test/java/host/IntegrationContainerTest.java index 059a9fcd2..1f093428c 100644 --- a/testframework/src/test/java/host/IntegrationContainerTest.java +++ b/testframework/src/test/java/host/IntegrationContainerTest.java @@ -133,7 +133,6 @@ public void testRunCommunityTestInContainer() public void testRunFailoverTestInContainer() throws UnsupportedOperationException, IOException, InterruptedException { setupFailoverIntegrationTests(NETWORK); - //Thread.sleep(Long.MAX_VALUE); containerHelper.runExecutable(testContainer, "build/integration/bin", "integration"); } diff --git a/testframework/src/test/java/utility/ContainerHelper.java b/testframework/src/test/java/utility/ContainerHelper.java index 26c96f613..1c3eee3aa 100644 --- a/testframework/src/test/java/utility/ContainerHelper.java +++ b/testframework/src/test/java/utility/ContainerHelper.java @@ -93,7 +93,6 @@ public GenericContainer createTestContainer( .withDockerfileFromBuilder(builder -> builder .from(testContainerImageName) - .run("yum", "-y", "install", "gdb") .run("mkdir", "app") .workDir("/app") .entryPoint("/bin/sh -c \"while true; do sleep 30; done;\"")