Skip to content

Commit

Permalink
Fix Google Batch hang when internal error during scheduling
Browse files Browse the repository at this point in the history
Signed-off-by: jorgee <jorge.ejarque@seqera.io>
  • Loading branch information
jorgee committed Dec 4, 2024
1 parent 51aea8f commit 95f5a75
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -444,11 +444,27 @@ class GoogleBatchTaskHandler extends TaskHandler implements FusionAwareTask {
* @return Retrieve the submitted task state
*/
protected String getTaskState() {
final tasks = client.listTasks(jobId)
if( !tasks.iterator().hasNext() )
return 'PENDING'

final now = System.currentTimeMillis()
final tasks = client.listTasks(jobId)
if( !tasks.iterator().hasNext() ) {
// if there are no tasks checks the job status
final jobStatus = client.getJobStatus(jobId);
final newState = jobStatus?.state as String
if (newState) {
taskState = newState
timestamp = now
if (newState == "FAILED"){
final eventsCount = jobStatus.getStatusEventsCount()
final lastEvent = eventsCount > 0 ? jobStatus.getStatusEvents(eventsCount - 1) : null
if (lastEvent){
log.warn1 "Batch job failure: ${lastEvent.getDescription()}"
}
}
return taskState
} else {
return "PENDING"
}
}
final delta = now - timestamp;
if( !taskState || delta >= 1_000) {
final status = client.getTaskStatus(jobId, taskId)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import com.google.cloud.batch.v1.BatchServiceClient
import com.google.cloud.batch.v1.BatchServiceSettings
import com.google.cloud.batch.v1.Job
import com.google.cloud.batch.v1.JobName
import com.google.cloud.batch.v1.JobStatus
import com.google.cloud.batch.v1.LocationName
import com.google.cloud.batch.v1.Task
import com.google.cloud.batch.v1.TaskGroupName
Expand Down Expand Up @@ -123,6 +124,10 @@ class BatchClient {
return describeTask(jobId, taskId).getStatus()
}

JobStatus getJobStatus(String jobId){
return describeJob(jobId).getStatus();
}

String getTaskState(String jobId, String taskId) {
final status = getTaskStatus(jobId, taskId)
return status ? status.getState().toString() : null
Expand Down

0 comments on commit 95f5a75

Please sign in to comment.