diff --git a/airflow/dag_processing/processor.py b/airflow/dag_processing/processor.py index c11243e76e6f..d8a1f69850b8 100644 --- a/airflow/dag_processing/processor.py +++ b/airflow/dag_processing/processor.py @@ -435,6 +435,7 @@ def manage_slas(self, dag: DAG, session: Session = None) -> None: timestamp=ts, ) sla_misses.append(sla_miss) + Stats.incr('sla_missed') if sla_misses: session.add_all(sla_misses) session.commit() diff --git a/docs/apache-airflow/logging-monitoring/metrics.rst b/docs/apache-airflow/logging-monitoring/metrics.rst index 454f08507f6f..4ca6c985a510 100644 --- a/docs/apache-airflow/logging-monitoring/metrics.rst +++ b/docs/apache-airflow/logging-monitoring/metrics.rst @@ -101,6 +101,7 @@ Name Description ``scheduler.critical_section_busy`` Count of times a scheduler process tried to get a lock on the critical section (needed to send tasks to the executor) and found it locked by another process. +``sla_missed`` Number of SLA misses ``sla_callback_notification_failure`` Number of failed SLA miss callback notification attempts ``sla_email_notification_failure`` Number of failed SLA miss email notification attempts ``ti.start..`` Number of started task in a given dag. Similar to _start but for task diff --git a/tests/dag_processing/test_processor.py b/tests/dag_processing/test_processor.py index e2f9165131f2..eb122698f4d0 100644 --- a/tests/dag_processing/test_processor.py +++ b/tests/dag_processing/test_processor.py @@ -196,7 +196,8 @@ def test_dag_file_processor_sla_miss_callback_sent_notification(self, create_dum sla_callback.assert_not_called() - def test_dag_file_processor_sla_miss_doesnot_raise_integrity_error(self, dag_maker): + @mock.patch('airflow.dag_processing.processor.Stats.incr') + def test_dag_file_processor_sla_miss_doesnot_raise_integrity_error(self, mock_stats_incr, dag_maker): """ Test that the dag file processor does not try to insert already existing item into the database """ @@ -229,6 +230,7 @@ def test_dag_file_processor_sla_miss_doesnot_raise_integrity_error(self, dag_mak .count() ) assert sla_miss_count == 1 + mock_stats_incr.assert_called_with('sla_missed') # Now call manage_slas and see that it runs without errors # because of existing SlaMiss above. # Since this is run often, it's possible that it runs before another