Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: retry timeout of 60s for reconnection #3270

Merged
merged 2 commits into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 26 additions & 20 deletions packages/hms-video-store/src/transport/RetryScheduler.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { Dependencies as TFCDependencies, TransportFailureCategory as TFC } from './models/TransportFailureCategory';
import { TransportState } from './models/TransportState';
import { HMSException } from '../error/HMSException';
import { MAX_TRANSPORT_RETRIES, MAX_TRANSPORT_RETRY_DELAY } from '../utils/constants';
import { MAX_TRANSPORT_RETRY_TIME } from '../utils/constants';
import HMSLogger from '../utils/logger';
import { PromiseWithCallbacks } from '../utils/promise';

Expand All @@ -23,7 +23,7 @@ interface ScheduleTaskParams {
error: HMSException;
task: RetryTask;
originalState: TransportState;
maxFailedRetries?: number;
maxRetryTime?: number;
changeState?: boolean;
}

Expand All @@ -42,10 +42,10 @@ export class RetryScheduler {
error,
task,
originalState,
maxFailedRetries = MAX_TRANSPORT_RETRIES,
maxRetryTime = MAX_TRANSPORT_RETRY_TIME,
changeState = true,
}: ScheduleTaskParams) {
await this.scheduleTask({ category, error, changeState, task, originalState, maxFailedRetries });
await this.scheduleTask({ category, error, changeState, task, originalState, maxRetryTime, failedAt: Date.now() });
}

reset() {
Expand All @@ -65,9 +65,10 @@ export class RetryScheduler {
changeState,
task,
originalState,
maxFailedRetries = MAX_TRANSPORT_RETRIES,
failedAt,
maxRetryTime = MAX_TRANSPORT_RETRY_TIME,
failedRetryCount = 0,
}: ScheduleTaskParams & { failedRetryCount?: number }): Promise<void> {
}: ScheduleTaskParams & { failedAt: number; failedRetryCount?: number }): Promise<void> {
raviteja83 marked this conversation as resolved.
Show resolved Hide resolved
HMSLogger.d(this.TAG, 'schedule: ', { category: TFC[category], error });

// First schedule call
Expand Down Expand Up @@ -113,8 +114,9 @@ export class RetryScheduler {
}
}

if (failedRetryCount >= maxFailedRetries || hasFailedDependency) {
error.description += `. [${TFC[category]}] Could not recover after ${failedRetryCount} tries`;
const timeElapsedSinceError = Date.now() - failedAt;
if (timeElapsedSinceError >= maxRetryTime || hasFailedDependency) {
error.description += `. [${TFC[category]}] Could not recover after ${timeElapsedSinceError} milliseconds`;

if (hasFailedDependency) {
error.description += ` Could not recover all of it's required dependencies - [${(dependencies as Array<TFC>)
Expand Down Expand Up @@ -144,7 +146,7 @@ export class RetryScheduler {
this.onStateChange(TransportState.Reconnecting, error);
}

const delay = this.getDelayForRetryCount(category, failedRetryCount);
const delay = this.getDelayForRetryCount(category);

HMSLogger.d(
this.TAG,
Expand All @@ -171,33 +173,37 @@ export class RetryScheduler {
if (changeState && this.inProgress.size === 0) {
this.onStateChange(originalState);
}
HMSLogger.d(this.TAG, `schedule: [${TFC[category]}] [failedRetryCount=${failedRetryCount}] Recovered ♻️`);
HMSLogger.d(
this.TAG,
`schedule: [${TFC[category]}] [failedRetryCount=${failedRetryCount}] Recovered ♻️ after ${timeElapsedSinceError}ms`,
);
} else {
await this.scheduleTask({
category,
error,
changeState,
task,
originalState,
maxFailedRetries,
maxRetryTime,
failedAt,
failedRetryCount: failedRetryCount + 1,
});
}
}

private getBaseDelayForTask(category: TFC, n: number) {
private getDelayForRetryCount(category: TFC) {
const jitter = category === TFC.JoinWSMessageFailed ? Math.random() * 2 : Math.random();
let delaySeconds = 0;
if (category === TFC.JoinWSMessageFailed) {
// linear backoff(2 + jitter for every retry)
return 2;
delaySeconds = 2 + jitter;
} else if (category === TFC.SignalDisconnect) {
delaySeconds = 1;
raviteja83 marked this conversation as resolved.
Show resolved Hide resolved
} else {
delaySeconds = 0;
raviteja83 marked this conversation as resolved.
Show resolved Hide resolved
}
// exponential backoff
return Math.pow(2, n);
}

private getDelayForRetryCount(category: TFC, n: number) {
const delay = this.getBaseDelayForTask(category, n);
const jitter = category === TFC.JoinWSMessageFailed ? Math.random() * 2 : Math.random();
return Math.round(Math.min(delay + jitter, MAX_TRANSPORT_RETRY_DELAY) * 1000);
return delaySeconds * 1000;
}

private async setTimeoutPromise<T>(task: () => Promise<T>, delay: number): Promise<T> {
Expand Down
4 changes: 0 additions & 4 deletions packages/hms-video-store/src/transport/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ import { ISignalEventsObserver } from '../signal/ISignalEventsObserver';
import JsonRpcSignal from '../signal/jsonrpc';
import {
ICE_DISCONNECTION_TIMEOUT,
MAX_TRANSPORT_RETRIES,
PROTOCOL_SPEC,
PROTOCOL_VERSION,
PUBLISH_STATS_PUSH_INTERVAL,
Expand Down Expand Up @@ -352,7 +351,6 @@ export default class HMSTransport {
error,
task,
originalState: this.state,
maxFailedRetries: MAX_TRANSPORT_RETRIES,
changeState: false,
});
} else {
Expand Down Expand Up @@ -923,7 +921,6 @@ export default class HMSTransport {
error: hmsError,
task,
originalState: TransportState.Joined,
maxFailedRetries: 3,
changeState: false,
});
} else {
Expand Down Expand Up @@ -1087,7 +1084,6 @@ export default class HMSTransport {
error,
task: this.retrySubscribeIceFailedTask,
originalState: TransportState.Joined,
maxFailedRetries: 1,
});
}
}
Expand Down
5 changes: 2 additions & 3 deletions packages/hms-video-store/src/utils/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@ export const API_DATA_CHANNEL = 'ion-sfu';
export const ANALYTICS_BUFFER_SIZE = 100;

/**
* Maximum number of retries that transport-layer will try
* Maximum time that transport-layer will try
* before giving up on the connection and returning a failure
*
* Refer https://100ms.atlassian.net/browse/HMS-2369
*/
export const MAX_TRANSPORT_RETRIES = 5;
export const MAX_TRANSPORT_RETRY_DELAY = 60;
export const MAX_TRANSPORT_RETRY_TIME = 60_000;

export const DEFAULT_SIGNAL_PING_TIMEOUT = 12_000;
export const DEFAULT_SIGNAL_PING_INTERVAL = 3_000;
Expand Down
Loading