Imroving missed task recovery logic (sodafoundation#704)

Co-authored-by: Erik <lynheell@gmail.com> Co-authored-by: Ashit Kumar <akopensrc@gmail.com>
gh-ca · Sep 23, 2021 · 0d83731 · 0d83731
1 parent 3634b0b
commit 0d83731
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 2 deletions.
diff --git a/delfin/common/constants.py b/delfin/common/constants.py
@@ -403,7 +403,7 @@ class TelemetryCollection(object):
     MAX_FAILED_JOB_RETRY_COUNT = 5
     """Default performance collection interval"""
     DEF_PERFORMANCE_COLLECTION_INTERVAL = 900
-    DEF_PERFORMANCE_HISTORY_ON_RESCHEDULE = 300
+    DEF_PERFORMANCE_HISTORY_ON_RESCHEDULE = 1800
 
 
 class TelemetryTaskStatus(object):

diff --git a/delfin/task_manager/scheduler/schedulers/telemetry/job_handler.py b/delfin/task_manager/scheduler/schedulers/telemetry/job_handler.py
@@ -115,10 +115,15 @@ def schedule_job(self, task_id):
                 # miss any Data points due to reschedule
                 LOG.debug('Triggering one historic collection for job %s',
                           job['id'])
+                # Maximum supported history duration on restart
                 history_on_reschedule = CONF.telemetry. \
                     performance_history_on_reschedule
+                # Adjust start_time and end_time based on last_run_time
                 end_time = current_time * 1000
-                start_time = end_time - (history_on_reschedule * 1000)
+                start_time = job['last_run_time'] * 1000 \
+                    if current_time - job['last_run_time'] < \
+                    history_on_reschedule \
+                    else (end_time - history_on_reschedule * 1000)
                 telemetry = PerformanceCollectionTask()
                 telemetry.collect(self.ctx, self.storage_id,
                                   self.args,