Skip to content

Commit

Permalink
fix: wait for some time when che-operator updated status with an error (
Browse files Browse the repository at this point in the history
#1763)

* fix: wait for some time when che-operator updated status with an error

Signed-off-by: Anatolii Bazko <abazko@redhat.com>
  • Loading branch information
tolusha authored Oct 19, 2021
1 parent 400c7cd commit 603df9d
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 23 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,7 @@ OPTIONS
[default: 600000] Waiting time for Pod downloading image (in milliseconds)
--k8spoderrorrechecktimeout=k8spoderrorrechecktimeout
[default: 15000] Waiting time for Pod rechecking error (in milliseconds)
[default: 60000] Waiting time for Pod rechecking error (in milliseconds)
--k8spodreadytimeout=k8spodreadytimeout
[default: 600000] Waiting time for Pod Ready condition (in milliseconds)
Expand Down Expand Up @@ -738,7 +738,7 @@ OPTIONS
--k8spoddownloadimagetimeout=k8spoddownloadimagetimeout [default: 600000] Waiting time for Pod downloading image (in
milliseconds)
--k8spoderrorrechecktimeout=k8spoderrorrechecktimeout [default: 15000] Waiting time for Pod rechecking error (in
--k8spoderrorrechecktimeout=k8spoderrorrechecktimeout [default: 60000] Waiting time for Pod rechecking error (in
milliseconds)
--k8spodreadytimeout=k8spodreadytimeout [default: 600000] Waiting time for Pod Ready condition (in
Expand Down
2 changes: 1 addition & 1 deletion src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ export const DEFAULT_ANALYTIC_HOOK_NAME = 'analytics'

// Timeouts
export const DEFAULT_K8S_POD_WAIT_TIMEOUT = 600000
export const DEFAULT_K8S_POD_ERROR_RECHECK_TIMEOUT = 15000
export const DEFAULT_K8S_POD_ERROR_RECHECK_TIMEOUT = 60000

// Custom Resources names
export const CHE_CLUSTER_CRD = 'checlusters.org.eclipse.che'
Expand Down
73 changes: 53 additions & 20 deletions src/tasks/kube.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
*/
import { cli } from 'cli-ux'
import * as Listr from 'listr'

import { KubeHelper } from '../api/kube'

interface FailState {
Expand All @@ -37,18 +36,30 @@ export class KubeTasks {
for (let i = 1; i <= iterations; i++) {
// check cheCluster status
const cheClusterFailState = await this.getCheClusterFailState(namespace)
if (cheClusterFailState) {
task.title = `${task.title}...failed`
throw new Error(`Eclipse Che operator failed, reason: ${cheClusterFailState.reason}, message: ${cheClusterFailState.message}. Consider increasing error recheck timeout with --k8spoderrorrechecktimeout flag.`)
}

// check 'PodScheduled' condition
const podFailState = await this.getPodFailState(namespace, selector, 'PodScheduled')
if (podFailState) {
// for instance we need some time for pvc provisioning...
await cli.wait(this.kubeHelper.podErrorRecheckTimeout)

const podFailState = await this.getPodFailState(namespace, selector, 'PodScheduled')
if (cheClusterFailState || podFailState) {
const iterations = this.kubeHelper.podErrorRecheckTimeout / 1000
let cheClusterFailState: FailState | undefined
let podFailState: FailState | undefined

for (let j = 0; j < iterations; j++) {
await cli.wait(1000)

cheClusterFailState = await this.getCheClusterFailState(namespace)
podFailState = await this.getPodFailState(namespace, selector, 'PodScheduled')

if (!cheClusterFailState && !podFailState) {
break
}
}

if (cheClusterFailState) {
task.title = `${task.title}...failed`
throw new Error(`Eclipse Che operator failed, reason: ${cheClusterFailState.reason}, message: ${cheClusterFailState.message}. Consider increasing error recheck timeout with --k8spoderrorrechecktimeout flag.`)
}

if (podFailState) {
task.title = `${task.title}...failed`
throw new Error(`Failed to schedule a pod, reason: ${podFailState.reason}, message: ${podFailState.message}. Consider increasing error recheck timeout with --k8spoderrorrechecktimeout flag.`)
Expand All @@ -74,9 +85,19 @@ export class KubeTasks {
for (let i = 1; i <= iterations; i++) {
const failedState = await this.getContainerFailState(namespace, selector, 'Pending')
if (failedState) {
await cli.wait(this.kubeHelper.podErrorRecheckTimeout)
const iterations = this.kubeHelper.podErrorRecheckTimeout / 1000
let failedState: FailState | undefined

for (let j = 0; j < iterations; j++) {
await cli.wait(1000)

failedState = await this.getContainerFailState(namespace, selector, 'Pending')

if (!failedState) {
break
}
}

const failedState = await this.getContainerFailState(namespace, selector, 'Pending')
if (failedState) {
task.title = `${task.title}...failed`
throw new Error(`Failed to download image, reason: ${failedState.reason}, message: ${failedState.message}.`)
Expand All @@ -103,16 +124,28 @@ export class KubeTasks {
for (let i = 1; i <= iterations; i++) {
// check cheCluster status
const cheClusterFailState = await this.getCheClusterFailState(namespace)
if (cheClusterFailState) {
task.title = `${task.title}...failed`
throw new Error(`Eclipse Che operator failed, reason: ${cheClusterFailState.reason}, message: ${cheClusterFailState.message}. Consider increasing error recheck timeout with --k8spoderrorrechecktimeout flag.`)
}

const failedState = await this.getContainerFailState(namespace, selector, 'Running')
if (failedState) {
await cli.wait(this.kubeHelper.podErrorRecheckTimeout)
if (cheClusterFailState || failedState) {
const iterations = this.kubeHelper.podErrorRecheckTimeout / 1000
let cheClusterFailState: FailState | undefined
let failedState: FailState | undefined

for (let j = 0; j < iterations; j++) {
await cli.wait(1000)

cheClusterFailState = await this.getCheClusterFailState(namespace)
failedState = await this.getContainerFailState(namespace, selector, 'Running')

if (!cheClusterFailState && !failedState) {
break
}
}

if (cheClusterFailState) {
task.title = `${task.title}...failed`
throw new Error(`Eclipse Che operator failed, reason: ${cheClusterFailState.reason}, message: ${cheClusterFailState.message}. Consider increasing error recheck timeout with --k8spoderrorrechecktimeout flag.`)
}

const failedState = await this.getContainerFailState(namespace, selector, 'Running')
if (failedState) {
task.title = `${task.title}...failed`
throw new Error(`Failed to start a pod, reason: ${failedState.reason}, message: ${failedState.message}`)
Expand Down

0 comments on commit 603df9d

Please sign in to comment.