From 0d83731fa462c2a1bc37af4320e10207cea39441 Mon Sep 17 00:00:00 2001 From: Najmudheen <45681499+NajmudheenCT@users.noreply.github.com> Date: Thu, 23 Sep 2021 11:16:18 +0530 Subject: [PATCH] Imroving missed task recovery logic (#704) Co-authored-by: Erik Co-authored-by: Ashit Kumar --- delfin/common/constants.py | 2 +- .../scheduler/schedulers/telemetry/job_handler.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/delfin/common/constants.py b/delfin/common/constants.py index 061ae2923..31a93bdfc 100644 --- a/delfin/common/constants.py +++ b/delfin/common/constants.py @@ -403,7 +403,7 @@ class TelemetryCollection(object): MAX_FAILED_JOB_RETRY_COUNT = 5 """Default performance collection interval""" DEF_PERFORMANCE_COLLECTION_INTERVAL = 900 - DEF_PERFORMANCE_HISTORY_ON_RESCHEDULE = 300 + DEF_PERFORMANCE_HISTORY_ON_RESCHEDULE = 1800 class TelemetryTaskStatus(object): diff --git a/delfin/task_manager/scheduler/schedulers/telemetry/job_handler.py b/delfin/task_manager/scheduler/schedulers/telemetry/job_handler.py index 6e8b145c4..d5aa5b5d4 100644 --- a/delfin/task_manager/scheduler/schedulers/telemetry/job_handler.py +++ b/delfin/task_manager/scheduler/schedulers/telemetry/job_handler.py @@ -115,10 +115,15 @@ def schedule_job(self, task_id): # miss any Data points due to reschedule LOG.debug('Triggering one historic collection for job %s', job['id']) + # Maximum supported history duration on restart history_on_reschedule = CONF.telemetry. \ performance_history_on_reschedule + # Adjust start_time and end_time based on last_run_time end_time = current_time * 1000 - start_time = end_time - (history_on_reschedule * 1000) + start_time = job['last_run_time'] * 1000 \ + if current_time - job['last_run_time'] < \ + history_on_reschedule \ + else (end_time - history_on_reschedule * 1000) telemetry = PerformanceCollectionTask() telemetry.collect(self.ctx, self.storage_id, self.args,