From e6dacf7ca5c88ffa41680007d663767496547870 Mon Sep 17 00:00:00 2001 From: Alex Gherghisan Date: Thu, 2 May 2024 06:07:17 +0000 Subject: [PATCH] feat: add proving retries --- .../prover-pool/memory-proving-queue.test.ts | 17 +++++++++++++-- .../src/prover-pool/memory-proving-queue.ts | 21 +++++++++++++++++-- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/yarn-project/prover-client/src/prover-pool/memory-proving-queue.test.ts b/yarn-project/prover-client/src/prover-pool/memory-proving-queue.test.ts index b9520f333a5..dc3d77ab7dc 100644 --- a/yarn-project/prover-client/src/prover-pool/memory-proving-queue.test.ts +++ b/yarn-project/prover-client/src/prover-pool/memory-proving-queue.test.ts @@ -45,14 +45,27 @@ describe('MemoryProvingQueue', () => { await expect(promise).resolves.toEqual(new RootParityInput(proof, vk, publicInputs)); }); - it('notifies of errors', async () => { + it('retries failed jobs', async () => { const inputs = makeBaseParityInputs(); - const promise = queue.getBaseParityProof(inputs); + void queue.getBaseParityProof(inputs); + const job = await queue.getProvingJob(); expect(job?.request.inputs).toEqual(inputs); const error = new Error('test error'); + await queue.rejectProvingJob(job!.id, error); + await expect(queue.getProvingJob()).resolves.toEqual(job); + }); + + it('notifies errors', async () => { + const promise = queue.getBaseParityProof(makeBaseParityInputs()); + + const error = new Error('test error'); + await queue.rejectProvingJob((await queue.getProvingJob())!.id, error); + await queue.rejectProvingJob((await queue.getProvingJob())!.id, error); + await queue.rejectProvingJob((await queue.getProvingJob())!.id, error); + await expect(promise).rejects.toEqual(error); }); }); diff --git a/yarn-project/prover-client/src/prover-pool/memory-proving-queue.ts b/yarn-project/prover-client/src/prover-pool/memory-proving-queue.ts index 88afa9859ed..c3e5194f027 100644 --- a/yarn-project/prover-client/src/prover-pool/memory-proving-queue.ts +++ b/yarn-project/prover-client/src/prover-pool/memory-proving-queue.ts @@ -33,8 +33,11 @@ type ProvingJobWithResolvers = { id: string; request: T; signal?: AbortSignal; + attempts: number; } & PromiseWithResolvers>; +const MAX_RETRIES = 3; + export class MemoryProvingQueue implements CircuitProver, ProvingJobSource { private jobId = 0; private log = createDebugLogger('aztec:prover-client:prover-pool:queue'); @@ -95,7 +98,18 @@ export class MemoryProvingQueue implements CircuitProver, ProvingJobSource { return Promise.resolve(); } - job.reject(err); + if (job.attempts < MAX_RETRIES) { + job.attempts++; + this.log.warn( + `Job id=${job.id} type=${ProvingRequestType[job.request.type]} failed with error: ${err}. Retry ${ + job.attempts + }/${MAX_RETRIES}`, + ); + this.queue.put(job); + } else { + this.log.error(`Job id=${job.id} type=${ProvingRequestType[job.request.type]} failed with error: ${err}`); + job.reject(err); + } return Promise.resolve(); } @@ -111,13 +125,16 @@ export class MemoryProvingQueue implements CircuitProver, ProvingJobSource { promise, resolve, reject, + attempts: 1, }; if (signal) { signal.addEventListener('abort', () => reject(new AbortedError('Operation has been aborted'))); } - this.log.info(`Adding ${ProvingRequestType[request.type]} proving job to queue`); + this.log.info( + `Adding id=${item.id} type=${ProvingRequestType[request.type]} proving job to queue depth=${this.queue.length()}`, + ); // TODO (alexg) remove the `any` if (!this.queue.put(item as any)) { throw new Error();