Skip to content

Commit

Permalink
Merge pull request #30066: [flink] Fix watermark generation for empty…
Browse files Browse the repository at this point in the history
… UnboundedSource
  • Loading branch information
je-ik authored Jan 23, 2024
2 parents cffeff7 + 5865037 commit 90e7c9e
Show file tree
Hide file tree
Showing 5 changed files with 154 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ public CompletableFuture<Void> isAvailable() {
.thenAccept(ignored -> {});
} else if (noMoreSplits) {
// All the splits have been read, wait for idle timeout.
LOG.debug("All splits have been read, waiting for shutdown timeout {}", idleTimeoutMs);
checkIdleTimeoutAndMaybeStartCountdown();
return idleTimeoutFuture;
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.beam.sdk.io.Source;
import org.apache.beam.sdk.io.UnboundedSource;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
import org.apache.beam.sdk.transforms.windowing.PaneInfo;
import org.apache.beam.sdk.util.WindowedValue;
Expand Down Expand Up @@ -131,15 +132,23 @@ public InputStatus pollNext(ReaderOutput<WindowedValue<ValueWithRecordId<T>>> ou
if (reader != null) {
emitRecord(reader, output);
return InputStatus.MORE_AVAILABLE;
} else if (noMoreSplits()) {
LOG.trace("No more splits.");
} else if (noMoreSplits() && isEndOfAllReaders()) {
LOG.info("No more splits and no reader available. Terminating consumption.");
return InputStatus.END_OF_INPUT;
} else {
LOG.trace("No data available for now.");
return InputStatus.NOTHING_AVAILABLE;
}
}

private boolean isEndOfAllReaders() {
return allReaders().values().stream()
.mapToLong(r -> asUnbounded(r.reader).getWatermark().getMillis())
.min()
.orElse(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis())
>= BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis();
}

/**
* Check whether there are data available from alive readers. If not, set a future and wait for
* the periodically running wake-up task to complete that future when the check interval passes.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.runners.flink.translation.wrappers.streaming.io.source;

import java.io.IOException;
import java.io.Serializable;
import java.util.Collections;
import java.util.List;
import java.util.NoSuchElementException;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.coders.SerializableCoder;
import org.apache.beam.sdk.io.UnboundedSource;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.joda.time.Instant;

public class EmptyUnboundedSource<T>
extends UnboundedSource<T, EmptyUnboundedSource.DummyCheckpointMark> {

Instant watermark = BoundedWindow.TIMESTAMP_MIN_VALUE;

public static class DummyCheckpointMark implements UnboundedSource.CheckpointMark, Serializable {
@Override
public void finalizeCheckpoint() {}
}

@Override
public List<? extends EmptyUnboundedSource<T>> split(
int desiredNumSplits, PipelineOptions options) throws Exception {
return Collections.singletonList(this);
}

@Override
public UnboundedReader<T> createReader(
PipelineOptions options, @Nullable DummyCheckpointMark checkpointMark) throws IOException {
return new UnboundedReader<T>() {
@Override
public boolean start() throws IOException {
return advance();
}

@Override
public boolean advance() throws IOException {
return false;
}

@Override
public Instant getWatermark() {
return watermark;
}

@Override
public CheckpointMark getCheckpointMark() {
return new DummyCheckpointMark();
}

@Override
public UnboundedSource<T, ?> getCurrentSource() {
return EmptyUnboundedSource.this;
}

@Override
public T getCurrent() throws NoSuchElementException {
throw new NoSuchElementException();
}

@Override
public Instant getCurrentTimestamp() throws NoSuchElementException {
throw new NoSuchElementException();
}

@Override
public void close() {}
};
}

@Override
public Coder<DummyCheckpointMark> getCheckpointMarkCoder() {
return SerializableCoder.of(DummyCheckpointMark.class);
}

public void setWatermark(Instant watermark) {
this.watermark = watermark;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ScheduledExecutorService;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import javax.annotation.Nullable;
import org.apache.beam.runners.flink.translation.wrappers.streaming.io.TestCountingSource;
import org.apache.beam.sdk.io.Source;
Expand Down Expand Up @@ -294,6 +296,12 @@ protected List<FlinkSourceSplit<KV<Integer, Integer>>> createSplits(
return splitList;
}

protected <T> List<FlinkSourceSplit<T>> createEmptySplits(int numSplits) {
return IntStream.range(0, numSplits)
.mapToObj(i -> new FlinkSourceSplit<>(i, new EmptyUnboundedSource<T>()))
.collect(Collectors.toList());
}

protected void verifyBeamReaderClosed(List<FlinkSourceSplit<KV<Integer, Integer>>> splits) {
splits.forEach(
split -> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,13 @@
import java.util.function.Function;
import org.apache.beam.runners.flink.FlinkPipelineOptions;
import org.apache.beam.runners.flink.translation.wrappers.streaming.io.TestCountingSource;
import org.apache.beam.runners.flink.translation.wrappers.streaming.io.source.EmptyUnboundedSource;
import org.apache.beam.runners.flink.translation.wrappers.streaming.io.source.FlinkSourceReaderTestBase;
import org.apache.beam.runners.flink.translation.wrappers.streaming.io.source.FlinkSourceSplit;
import org.apache.beam.runners.flink.translation.wrappers.streaming.io.source.SourceTestCompat.TestMetricGroup;
import org.apache.beam.sdk.io.Source;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
import org.apache.beam.sdk.util.WindowedValue;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.ValueWithRecordId;
Expand Down Expand Up @@ -222,6 +224,37 @@ public void testWatermark() throws Exception {
}
}

@Test
public void testWatermarkOnEmptySource() throws Exception {
ManuallyTriggeredScheduledExecutorService executor =
new ManuallyTriggeredScheduledExecutorService();
try (FlinkUnboundedSourceReader<KV<Integer, Integer>> reader =
(FlinkUnboundedSourceReader<KV<Integer, Integer>>) createReader(executor, -1L)) {
List<FlinkSourceSplit<KV<Integer, Integer>>> splits = createEmptySplits(2);
reader.start();
reader.addSplits(splits);
reader.notifyNoMoreSplits();

for (int i = 0; i < 4; i++) {
assertEquals(InputStatus.NOTHING_AVAILABLE, reader.pollNext(null));
}

// move first reader to end of time
((EmptyUnboundedSource<KV<Integer, Integer>>) splits.get(0).getBeamSplitSource())
.setWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);

for (int i = 0; i < 4; i++) {
assertEquals(InputStatus.NOTHING_AVAILABLE, reader.pollNext(null));
}

// move the second reader to end of time
((EmptyUnboundedSource<KV<Integer, Integer>>) splits.get(1).getBeamSplitSource())
.setWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);

assertEquals(InputStatus.END_OF_INPUT, reader.pollNext(null));
}
}

@Test
public void testPendingBytesMetric() throws Exception {
ManuallyTriggeredScheduledExecutorService executor =
Expand Down

0 comments on commit 90e7c9e

Please sign in to comment.