-
Notifications
You must be signed in to change notification settings - Fork 409
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix mpp hang error if some error happens during compile of mpp plan in TiFlash #1533
Changes from 5 commits
25fc741
ff68e3e
b0abeb3
ee75903
6e89bad
84ac264
15773a7
8db7a7e
42cb7c4
2748388
3499d4e
14752c0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -126,6 +126,18 @@ struct MPPTunnel | |
cv_for_finished.notify_all(); | ||
} | ||
|
||
/// finish the tunnel without checking the connect status, this function | ||
/// should only be used when handling error if DispatchMPPTask fails for | ||
/// root task. Because for root task, if DispatchMPPTask fails, TiDB does | ||
/// not sending establish MPP connection request at all, it is meaningless | ||
/// to check the connect status in this case, just finish the tunnel. | ||
void finish() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about use a more critical word like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, I will use |
||
{ | ||
std::unique_lock<std::mutex> lk(mu); | ||
finished = true; | ||
cv_for_finished.notify_all(); | ||
} | ||
|
||
// a MPPConn request has arrived. it will build connection by this tunnel; | ||
void connect(::grpc::ServerWriter<::mpp::MPPDataPacket> * writer_) | ||
{ | ||
|
@@ -256,7 +268,7 @@ struct MPPTask : std::enable_shared_from_this<MPPTask>, private boost::noncopyab | |
// which targeted task we should send data by which tunnel. | ||
std::map<MPPTaskId, MPPTunnelPtr> tunnel_map; | ||
|
||
MPPTaskManager * manager; | ||
MPPTaskManager * manager = nullptr; | ||
|
||
Logger * log; | ||
|
||
|
@@ -279,6 +291,20 @@ struct MPPTask : std::enable_shared_from_this<MPPTask>, private boost::noncopyab | |
|
||
void cancel(); | ||
|
||
void finishAllTunnel() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto. |
||
{ | ||
try | ||
{ | ||
for (auto & it : tunnel_map) | ||
{ | ||
it.second->finish(); | ||
} | ||
} | ||
catch (...) | ||
{ | ||
LOG_WARNING(log, "Failed to finish all tunnels"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could leverage the follow method for logging the exception either. Otherwise we know only it failed but lost the reason. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea! |
||
} | ||
} | ||
void writeErrToAllTunnel(const String & e) | ||
{ | ||
try | ||
|
@@ -516,6 +542,7 @@ class MPPHandler | |
public: | ||
MPPHandler(const mpp::DispatchTaskRequest & task_request_) : task_request(task_request_), log(&Logger::get("MPPHandler")) {} | ||
grpc::Status execute(Context & context, mpp::DispatchTaskResponse * response); | ||
void handleError(MPPTaskPtr task, String error); | ||
}; | ||
|
||
} // namespace DB |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
# Preparation. | ||
=> DBGInvoke __init_fail_point() | ||
|
||
mysql> drop table if exists test.t | ||
mysql> create table test.t (id int, value varchar(64)) | ||
mysql> insert into test.t values(1,'a'),(2,'b'),(3,'c') | ||
mysql> alter table test.t set tiflash replica 1 | ||
|
||
func> wait_table test t | ||
|
||
|
||
# Data. | ||
|
||
## exception before mpp register non root mpp task | ||
=> DBGInvoke __enable_fail_point(exception_before_mpp_register_non_root_mpp_task) | ||
mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; | ||
ERROR 1105 (HY000) at line 1: DB::Exception: Fail point FailPoints::exception_before_mpp_register_non_root_mpp_task is triggered. | ||
=> DBGInvoke __disable_fail_point(exception_before_mpp_register_non_root_mpp_task) | ||
|
||
## exception before mpp register root mpp task | ||
=> DBGInvoke __enable_fail_point(exception_before_mpp_register_root_mpp_task) | ||
mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; | ||
ERROR 1105 (HY000) at line 1: DB::Exception: Fail point FailPoints::exception_before_mpp_register_root_mpp_task is triggered. | ||
=> DBGInvoke __disable_fail_point(exception_before_mpp_register_root_mpp_task) | ||
|
||
## exception before mpp register tunnel for non root mpp task | ||
=> DBGInvoke __enable_fail_point(exception_before_mpp_register_tunnel_for_non_root_mpp_task) | ||
mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; | ||
ERROR 1105 (HY000) at line 1: DB::Exception: Fail point FailPoints::exception_before_mpp_register_tunnel_for_non_root_mpp_task is triggered. | ||
=> DBGInvoke __disable_fail_point(exception_before_mpp_register_tunnel_for_non_root_mpp_task) | ||
|
||
## exception before mpp register tunnel for root mpp task | ||
=> DBGInvoke __enable_fail_point(exception_before_mpp_register_tunnel_for_root_mpp_task) | ||
mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; | ||
ERROR 1105 (HY000) at line 1: DB::Exception: Fail point FailPoints::exception_before_mpp_register_tunnel_for_root_mpp_task is triggered. | ||
=> DBGInvoke __disable_fail_point(exception_before_mpp_register_tunnel_for_root_mpp_task) | ||
|
||
## exception during mpp register tunnel for non root mpp task | ||
=> DBGInvoke __enable_fail_point(exception_during_mpp_register_tunnel_for_non_root_mpp_task) | ||
mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; | ||
ERROR 1105 (HY000) at line 1: DB::Exception: Fail point FailPoints::exception_during_mpp_register_tunnel_for_non_root_mpp_task is triggered. | ||
=> DBGInvoke __disable_fail_point(exception_during_mpp_register_tunnel_for_non_root_mpp_task) | ||
|
||
## exception before mpp run non root task | ||
=> DBGInvoke __enable_fail_point(exception_before_mpp_non_root_task_run) | ||
mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; | ||
ERROR 1105 (HY000) at line 1: DB::Exception: Fail point FailPoints::exception_before_mpp_non_root_task_run is triggered. | ||
=> DBGInvoke __disable_fail_point(exception_before_mpp_non_root_task_run) | ||
|
||
## exception before mpp run root task | ||
=> DBGInvoke __enable_fail_point(exception_before_mpp_root_task_run) | ||
mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; | ||
ERROR 1105 (HY000) at line 1: DB::Exception: Fail point FailPoints::exception_before_mpp_root_task_run is triggered. | ||
=> DBGInvoke __disable_fail_point(exception_before_mpp_root_task_run) | ||
|
||
## exception during mpp run non root task | ||
=> DBGInvoke __enable_fail_point(exception_during_mpp_non_root_task_run) | ||
mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; | ||
ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: exchange receiver meet error : DB::Exception: Fail point FailPoints::exception_during_mpp_non_root_task_run is triggered. | ||
=> DBGInvoke __disable_fail_point(exception_during_mpp_non_root_task_run) | ||
|
||
## exception during mpp run root task | ||
=> DBGInvoke __enable_fail_point(exception_during_mpp_root_task_run) | ||
mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; | ||
ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Fail point FailPoints::exception_during_mpp_root_task_run is triggered. | ||
=> DBGInvoke __disable_fail_point(exception_during_mpp_root_task_run) | ||
|
||
# Clean up. | ||
mysql> drop table if exists test.t |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ditto