Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(system_error_monitor): add heartbeat timeout for subscribed data (backport #1501) #217

Merged
merged 2 commits into from
Dec 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions system/system_error_monitor/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,13 @@ endif

### Node Parameters

| Name | Type | Default Value | Explanation |
| ---------------------------- | ------ | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `ignore_missing_diagnostics` | bool | `false` | If this parameter is turned off, it will be ignored if required modules have not been received. |
| `add_leaf_diagnostics` | bool | `true` | Required to use children diagnostics. |
| `diag_timeout_sec` | double | `1.0` (sec) | If required diagnostic is not received for a `diag_timeout_sec`, the diagnostic state become STALE state. |
| `data_ready_timeout` | double | `30.0` | If input topics required for system_error_monitor are not available for `data_ready_timeout` seconds, autoware_state will translate to emergency state. |
| Name | Type | Default Value | Explanation |
| ---------------------------- | ------ | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `ignore_missing_diagnostics` | bool | `false` | If this parameter is turned off, it will be ignored if required modules have not been received. |
| `add_leaf_diagnostics` | bool | `true` | Required to use children diagnostics. |
| `diag_timeout_sec` | double | `1.0` (sec) | If required diagnostic is not received for a `diag_timeout_sec`, the diagnostic state become STALE state. |
| `data_ready_timeout` | double | `30.0` | If input topics required for system_error_monitor are not available for `data_ready_timeout` seconds, autoware_state will translate to emergency state. |
| `data_heartbeat_timeout` | double | `1.0` | If input topics required for system_error_monitor are not no longer subscribed for `data_heartbeat_timeout` seconds, autoware_state will translate to emergency state. |

### Core Parameters

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class AutowareErrorMonitor : public rclcpp::Node
bool ignore_missing_diagnostics;
bool add_leaf_diagnostics;
double data_ready_timeout;
double data_heartbeat_timeout;
double diag_timeout_sec;
double hazard_recovery_timeout;
int emergency_hazard_level;
Expand All @@ -92,6 +93,7 @@ class AutowareErrorMonitor : public rclcpp::Node
rclcpp::TimerBase::SharedPtr timer_;

bool isDataReady();
bool isDataHeartbeatTimeout();
void onTimer();

// Subscriber
Expand Down Expand Up @@ -125,6 +127,12 @@ class AutowareErrorMonitor : public rclcpp::Node
[[maybe_unused]] std_srvs::srv::Trigger::Request::SharedPtr request,
std_srvs::srv::Trigger::Response::SharedPtr response);

// for Heartbeat
rclcpp::Time diag_array_stamp_;
rclcpp::Time autoware_state_stamp_;
rclcpp::Time current_gate_mode_stamp_;
rclcpp::Time control_mode_stamp_;

// Algorithm
boost::optional<DiagStamped> getLatestDiag(const std::string & diag_name) const;
uint8_t getHazardLevel(const DiagConfig & required_module, const int diag_level) const;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
<arg name="ignore_missing_diagnostics" default="false"/>
<arg name="add_leaf_diagnostics" default="true"/>
<arg name="data_ready_timeout" default="30.0"/>
<arg name="data_heartbeat_timeout" default="1.0"/>
<arg name="diag_timeout_sec" default="1.0"/>
<arg name="hazard_recovery_timeout" default="5.0"/>
<arg name="use_emergency_hold" default="false"/>
Expand Down Expand Up @@ -45,6 +46,7 @@
<arg name="ignore_missing_diagnostics" value="$(var ignore_missing_diagnostics)"/>
<arg name="add_leaf_diagnostics" value="$(var add_leaf_diagnostics)"/>
<arg name="data_ready_timeout" value="$(var data_ready_timeout)"/>
<arg name="data_heartbeat_timeout" value="$(var data_heartbeat_timeout)"/>
<arg name="diag_timeout_sec" value="$(var diag_timeout_sec)"/>
<arg name="hazard_recovery_timeout" value="$(var hazard_recovery_timeout)"/>
<arg name="use_emergency_hold" value="$(var use_emergency_hold)"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
<arg name="ignore_missing_diagnostics"/>
<arg name="add_leaf_diagnostics"/>
<arg name="data_ready_timeout"/>
<arg name="data_heartbeat_timeout"/>
<arg name="diag_timeout_sec"/>
<arg name="hazard_recovery_timeout"/>
<arg name="use_emergency_hold"/>
Expand All @@ -23,6 +24,7 @@
<param name="ignore_missing_diagnostics" value="$(var ignore_missing_diagnostics)"/>
<param name="add_leaf_diagnostics" value="$(var add_leaf_diagnostics)"/>
<param name="data_ready_timeout" value="$(var data_ready_timeout)"/>
<param name="data_heartbeat_timeout" value="$(var data_heartbeat_timeout)"/>
<param name="diag_timeout_sec" value="$(var diag_timeout_sec)"/>
<param name="hazard_recovery_timeout" value="$(var hazard_recovery_timeout)"/>
<param name="use_emergency_hold" value="$(var use_emergency_hold)"/>
Expand Down
49 changes: 49 additions & 0 deletions system/system_error_monitor/src/system_error_monitor_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ AutowareErrorMonitor::AutowareErrorMonitor()
get_parameter_or<bool>("ignore_missing_diagnostics", params_.ignore_missing_diagnostics, false);
get_parameter_or<bool>("add_leaf_diagnostics", params_.add_leaf_diagnostics, true);
get_parameter_or<double>("data_ready_timeout", params_.data_ready_timeout, 30.0);
get_parameter_or<double>("data_heartbeat_timeout", params_.data_heartbeat_timeout, 1.0);
get_parameter_or<double>("diag_timeout_sec", params_.diag_timeout_sec, 1.0);
get_parameter_or<double>("hazard_recovery_timeout", params_.hazard_recovery_timeout, 5.0);
get_parameter_or<int>(
Expand Down Expand Up @@ -363,24 +364,36 @@ void AutowareErrorMonitor::onDiagArray(
diag_buffer.pop_front();
}
}

// for Heartbeat
diag_array_stamp_ = this->now();
}

void AutowareErrorMonitor::onCurrentGateMode(
const tier4_control_msgs::msg::GateMode::ConstSharedPtr msg)
{
current_gate_mode_ = msg;

// for Heartbeat
current_gate_mode_stamp_ = this->now();
}

void AutowareErrorMonitor::onAutowareState(
const autoware_auto_system_msgs::msg::AutowareState::ConstSharedPtr msg)
{
autoware_state_ = msg;

// for Heartbeat
autoware_state_stamp_ = this->now();
}

void AutowareErrorMonitor::onControlMode(
const autoware_auto_vehicle_msgs::msg::ControlModeReport::ConstSharedPtr msg)
{
control_mode_ = msg;

// for Heartbeat
control_mode_stamp_ = this->now();
}

bool AutowareErrorMonitor::isDataReady()
Expand Down Expand Up @@ -408,6 +421,37 @@ bool AutowareErrorMonitor::isDataReady()
return true;
}

bool AutowareErrorMonitor::isDataHeartbeatTimeout()
{
auto isTimeout = [this](const rclcpp::Time & last_stamp, const double threshold) {
const auto time_diff = this->now() - last_stamp;
return time_diff.seconds() > threshold;
};

if (isTimeout(diag_array_stamp_, params_.data_heartbeat_timeout)) {
RCLCPP_ERROR_THROTTLE(get_logger(), *get_clock(), 5000, "diag_array msg is timeout...");
return true;
}

if (isTimeout(current_gate_mode_stamp_, params_.data_heartbeat_timeout)) {
RCLCPP_ERROR_THROTTLE(get_logger(), *get_clock(), 5000, "current_gate_mode msg is timeout...");
return true;
}

if (isTimeout(autoware_state_stamp_, params_.data_heartbeat_timeout)) {
RCLCPP_ERROR_THROTTLE(get_logger(), *get_clock(), 5000, "autoware_state msg is timeout...");
return true;
}

if (isTimeout(control_mode_stamp_, params_.data_heartbeat_timeout)) {
RCLCPP_ERROR_THROTTLE(
get_logger(), *get_clock(), 5000, "vehicle_state_report msg is timeout...");
return true;
}

return false;
}

void AutowareErrorMonitor::onTimer()
{
if (!isDataReady()) {
Expand All @@ -420,6 +464,11 @@ void AutowareErrorMonitor::onTimer()
return;
}

if (isDataHeartbeatTimeout()) {
publishHazardStatus(createTimeoutHazardStatus());
return;
}

current_mode_ = current_gate_mode_->data == tier4_control_msgs::msg::GateMode::AUTO
? KeyName::autonomous_driving
: KeyName::external_control;
Expand Down