From e6d8d3e1a763071bf2685750e1d04200daf528aa Mon Sep 17 00:00:00 2001
From: ben <benstav@miggo.io>
Date: Fri, 10 Nov 2023 12:02:11 +0200
Subject: [PATCH] fix(sdk-trace-base): Export processed spans while exporter
 failed

While the exporter deals with a batch of spans, new spans may come in and wait
to be exported. As previously implemented, a successful export would notice
these waiting spans, triggering a renewed timer check, but not so for an
unsuccessful export.

The result was that, prior to this commit, a failing export may end up in a
situation where no further spans will be exported. This is due to the behaviour
of `_addToBuffer` when the queue is full: Imagine an export which fails after a
long timeout (because of, for instance, network troubles). While the connection
waits to be timed out, the span queue fills up. Once completely full, no new
calls to recheck the timer will be done. On its own, this behaviour is
fine. When combined with the patched bug, this leads to a rather confusing case
where the exporter never tries exporting.
---
 .../src/export/BatchSpanProcessorBase.ts      |  2 +-
 .../export/BatchSpanProcessorBase.test.ts     | 51 +++++++++++++++++++
 2 files changed, 52 insertions(+), 1 deletion(-)
diff --git a/packages/opentelemetry-sdk-trace-base/src/export/BatchSpanProcessorBase.ts b/packages/opentelemetry-sdk-trace-base/src/export/BatchSpanProcessorBase.ts
index fef5c80dc0f..f069aac0c7d 100644
--- a/packages/opentelemetry-sdk-trace-base/src/export/BatchSpanProcessorBase.ts
+++ b/packages/opentelemetry-sdk-trace-base/src/export/BatchSpanProcessorBase.ts
@@ -221,7 +221,7 @@ export abstract class BatchSpanProcessorBase<T extends BufferConfig>
     const flush = () => {
       this._isExporting = true;
       this._flushOneBatch()
-        .then(() => {
+        .finally(() => {
           this._isExporting = false;
           if (this._finishedSpans.length > 0) {
             this._clearTimer();
diff --git a/packages/opentelemetry-sdk-trace-base/test/common/export/BatchSpanProcessorBase.test.ts b/packages/opentelemetry-sdk-trace-base/test/common/export/BatchSpanProcessorBase.test.ts
index 83fb3ebe44f..251971385c1 100644
--- a/packages/opentelemetry-sdk-trace-base/test/common/export/BatchSpanProcessorBase.test.ts
+++ b/packages/opentelemetry-sdk-trace-base/test/common/export/BatchSpanProcessorBase.test.ts
@@ -387,6 +387,57 @@ describe('BatchSpanProcessorBase', () => {
         });
       });
 
+      it('should still export when previously failed', async () => {
+        // The scenario is made of several parts:
+        // 1. The exporter tries to export some spans
+        // 2. While it does so, more spans are processed
+        // 3. The exporter fails
+        // 4. Spans arriving during step 2 should be exported
+
+        let firstCall = true;
+        const fillingExportStub = sinon
+          .stub(exporter, 'export')
+          .callsFake((spans, cb) => {
+            // The first time export is called, add some spans to the processor.
+            // Any other time, call through. We don't simply restore the stub
+            // so we can count the calls with `sinon.assert`
+            if (!firstCall) {
+              return fillingExportStub.wrappedMethod.call(exporter, spans, cb);
+            }
+
+            // Step 2: During export, add another span
+            firstCall = false;
+            processSpan();
+
+            return fillingExportStub.wrappedMethod.call(exporter, spans, () => {
+              // Step 3: Mock failure
+              cb({
+                code: ExportResultCode.FAILED,
+              });
+            });
+          });
+
+        const clock = sinon.useFakeTimers();
+
+        // Step 1: Export a span
+        processSpan();
+        await clock.runAllAsync();
+
+        clock.restore();
+        fillingExportStub.restore();
+
+        // Step 4: Make sure all spans were processed
+        assert.equal(exporter['_finishedSpans'].length, 2);
+        assert.equal(processor['_finishedSpans'].length, 0);
+        sinon.assert.calledTwice(fillingExportStub);
+
+        function processSpan() {
+          const span = createSampledSpan('test');
+          processor.onStart(span, ROOT_CONTEXT);
+          processor.onEnd(span);
+        }
+      });
+
       it('should wait for pending resource on flush', async () => {
         const tracer = new BasicTracerProvider({
           resource: new Resource(