Skip to content

Commit

Permalink
Add seeking support to the SubtitleExtractor
Browse files Browse the repository at this point in the history
SubtitleExtractor is using IndexSeekMap with only one position to
indicate that its output is seekable. SubtitleExtractor is keeping Cues
in memory anyway so more seek points are not needed. SubtitleExtractor
gets notified about seek occurrence through seek() method. Inside that
method extractor saves seekTimeUs, and on the next call to read()
extractor outputs all cues that should be displayed at this time and
later.

PiperOrigin-RevId: 395477127
  • Loading branch information
apodob authored and icbaker committed Sep 8, 2021
1 parent 87d2054 commit 837667d
Show file tree
Hide file tree
Showing 2 changed files with 182 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
import com.google.android.exoplayer2.extractor.Extractor;
import com.google.android.exoplayer2.extractor.ExtractorInput;
import com.google.android.exoplayer2.extractor.ExtractorOutput;
import com.google.android.exoplayer2.extractor.IndexSeekMap;
import com.google.android.exoplayer2.extractor.PositionHolder;
import com.google.android.exoplayer2.extractor.SeekMap;
import com.google.android.exoplayer2.extractor.TrackOutput;
import com.google.android.exoplayer2.text.Cue;
import com.google.android.exoplayer2.text.CueEncoder;
Expand All @@ -37,42 +37,56 @@
import com.google.android.exoplayer2.text.SubtitleOutputBuffer;
import com.google.android.exoplayer2.util.MimeTypes;
import com.google.android.exoplayer2.util.ParsableByteArray;
import com.google.android.exoplayer2.util.Util;
import com.google.common.primitives.Ints;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.util.ArrayList;
import java.util.List;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;

/** Generic extractor for extracting subtitles from various subtitle formats. */
public class SubtitleExtractor implements Extractor {
@Retention(RetentionPolicy.SOURCE)
@IntDef({STATE_CREATED, STATE_INITIALIZED, STATE_EXTRACTING, STATE_FINISHED, STATE_RELEASED})
@IntDef({
STATE_CREATED,
STATE_INITIALIZED,
STATE_EXTRACTING,
STATE_SEEKING,
STATE_FINISHED,
STATE_RELEASED
})
private @interface State {}

/** The extractor has been created. */
private static final int STATE_CREATED = 0;
/** The extractor has been initialized. */
private static final int STATE_INITIALIZED = 1;
/** The extractor is reading from input and writing to output. */
/** The extractor is reading from the input and writing to the output. */
private static final int STATE_EXTRACTING = 2;
/** The extractor has finished. */
private static final int STATE_FINISHED = 3;
/** The extractor has received a seek() operation after it has already finished extracting. */
private static final int STATE_SEEKING = 3;
/** The extractor has finished extracting the input. */
private static final int STATE_FINISHED = 4;
/** The extractor has been released. */
private static final int STATE_RELEASED = 4;
private static final int STATE_RELEASED = 5;

private static final int DEFAULT_BUFFER_SIZE = 1024;

private final SubtitleDecoder subtitleDecoder;
private final CueEncoder cueEncoder;
private final ParsableByteArray subtitleData;
private final Format format;
private final List<Long> timestamps;
private final List<ParsableByteArray> samples;

private @MonotonicNonNull ExtractorOutput extractorOutput;
private @MonotonicNonNull TrackOutput trackOutput;
private int bytesRead;
@State private int state;
private long seekTimeUs;

/**
* @param subtitleDecoder The decoder used for decoding the subtitle data. The extractor will
Expand All @@ -89,7 +103,10 @@ public SubtitleExtractor(SubtitleDecoder subtitleDecoder, Format format) {
.setSampleMimeType(MimeTypes.TEXT_EXOPLAYER_CUES)
.setCodecs(format.sampleMimeType)
.build();
timestamps = new ArrayList<>();
samples = new ArrayList<>();
state = STATE_CREATED;
seekTimeUs = C.TIME_UNSET;
}

@Override
Expand All @@ -106,7 +123,11 @@ public void init(ExtractorOutput output) {
extractorOutput = output;
trackOutput = extractorOutput.track(/* id= */ 0, C.TRACK_TYPE_TEXT);
extractorOutput.endTracks();
extractorOutput.seekMap(new SeekMap.Unseekable(C.TIME_UNSET));
extractorOutput.seekMap(
new IndexSeekMap(
/* positions= */ new long[] {0},
/* timesUs= */ new long[] {0},
/* durationUs= */ C.TIME_UNSET));
trackOutput.format(format);
state = STATE_INITIALIZED;
}
Expand All @@ -125,7 +146,15 @@ public int read(ExtractorInput input, PositionHolder seekPosition) throws IOExce
if (state == STATE_EXTRACTING) {
boolean inputFinished = readFromInput(input);
if (inputFinished) {
decodeAndWriteToOutput();
decode();
writeToOutput();
state = STATE_FINISHED;
}
}
if (state == STATE_SEEKING) {
boolean inputFinished = skipInput(input);
if (inputFinished) {
writeToOutput();
state = STATE_FINISHED;
}
}
Expand All @@ -138,6 +167,13 @@ public int read(ExtractorInput input, PositionHolder seekPosition) throws IOExce
@Override
public void seek(long position, long timeUs) {
checkState(state != STATE_CREATED && state != STATE_RELEASED);
seekTimeUs = timeUs;
if (state == STATE_EXTRACTING) {
state = STATE_INITIALIZED;
}
if (state == STATE_FINISHED) {
state = STATE_SEEKING;
}
}

/** Releases the extractor's resources, including the {@link SubtitleDecoder}. */
Expand All @@ -150,6 +186,15 @@ public void release() {
state = STATE_RELEASED;
}

/** Returns whether the input has been fully skipped. */
private boolean skipInput(ExtractorInput input) throws IOException {
return input.skip(
input.getLength() != C.LENGTH_UNSET
? Ints.checkedCast(input.getLength())
: DEFAULT_BUFFER_SIZE)
== C.RESULT_END_OF_INPUT;
}

/** Returns whether reading has been finished. */
private boolean readFromInput(ExtractorInput input) throws IOException {
if (subtitleData.capacity() == bytesRead) {
Expand All @@ -163,9 +208,8 @@ private boolean readFromInput(ExtractorInput input) throws IOException {
return readResult == C.RESULT_END_OF_INPUT;
}

/** Decodes subtitle data and writes samples to the output. */
private void decodeAndWriteToOutput() throws IOException {
checkStateNotNull(this.trackOutput);
/** Decodes the subtitle data and stores the samples in the memory of the extractor. */
private void decode() throws IOException {
try {
@Nullable SubtitleInputBuffer inputBuffer = subtitleDecoder.dequeueInputBuffer();
while (inputBuffer == null) {
Expand All @@ -183,13 +227,8 @@ private void decodeAndWriteToOutput() throws IOException {
for (int i = 0; i < outputBuffer.getEventTimeCount(); i++) {
List<Cue> cues = outputBuffer.getCues(outputBuffer.getEventTime(i));
byte[] cuesSample = cueEncoder.encode(cues);
trackOutput.sampleData(new ParsableByteArray(cuesSample), cuesSample.length);
trackOutput.sampleMetadata(
/* timeUs= */ outputBuffer.getEventTime(i),
/* flags= */ C.BUFFER_FLAG_KEY_FRAME,
/* size= */ cuesSample.length,
/* offset= */ 0,
/* cryptoData= */ null);
timestamps.add(outputBuffer.getEventTime(i));
samples.add(new ParsableByteArray(cuesSample));
}
outputBuffer.release();
} catch (InterruptedException e) {
Expand All @@ -199,4 +238,26 @@ private void decodeAndWriteToOutput() throws IOException {
throw ParserException.createForMalformedContainer("SubtitleDecoder failed.", e);
}
}

private void writeToOutput() {
checkStateNotNull(this.trackOutput);
checkState(timestamps.size() == samples.size());
int index =
seekTimeUs == C.TIME_UNSET
? 0
: Util.binarySearchFloor(
timestamps, seekTimeUs, /* inclusive= */ true, /* stayInBounds= */ true);
for (int i = index; i < samples.size(); i++) {
ParsableByteArray sample = samples.get(i);
sample.setPosition(0);
int size = sample.getData().length;
trackOutput.sampleData(sample, size);
trackOutput.sampleMetadata(
/* timeUs= */ timestamps.get(i),
/* flags= */ C.BUFFER_FLAG_KEY_FRAME,
/* size= */ size,
/* offset= */ 0,
/* cryptoData= */ null);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,24 +36,25 @@
/** Tests for {@link SubtitleExtractor}. */
@RunWith(AndroidJUnit4.class)
public class SubtitleExtractorTest {
private static final String TEST_DATA =
"WEBVTT\n"
+ "\n"
+ "00:00.000 --> 00:01.234\n"
+ "This is the first subtitle.\n"
+ "\n"
+ "00:02.345 --> 00:03.456\n"
+ "This is the second subtitle.\n"
+ "\n"
+ "00:02.600 --> 00:04.567\n"
+ "This is the third subtitle.";

@Test
public void extractor_outputsCues() throws Exception {
String testData =
"WEBVTT\n"
+ "\n"
+ "00:00.000 --> 00:01.234\n"
+ "This is the first subtitle.\n"
+ "\n"
+ "00:02.345 --> 00:03.456\n"
+ "This is the second subtitle.\n"
+ "\n"
+ "00:02.600 --> 00:04.567\n"
+ "This is the third subtitle.";
CueDecoder decoder = new CueDecoder();
FakeExtractorOutput output = new FakeExtractorOutput();
FakeExtractorInput input =
new FakeExtractorInput.Builder()
.setData(Util.getUtf8Bytes(testData))
.setData(Util.getUtf8Bytes(TEST_DATA))
.setSimulatePartialReads(true)
.build();
SubtitleExtractor extractor =
Expand Down Expand Up @@ -95,6 +96,96 @@ public void extractor_outputsCues() throws Exception {
assertThat(cues5).isEmpty();
}

@Test
public void extractor_seekAfterExtracting_outputsCues() throws Exception {
CueDecoder decoder = new CueDecoder();
FakeExtractorOutput output = new FakeExtractorOutput();
FakeExtractorInput input =
new FakeExtractorInput.Builder()
.setData(Util.getUtf8Bytes(TEST_DATA))
.setSimulatePartialReads(true)
.build();
SubtitleExtractor extractor =
new SubtitleExtractor(
new WebvttDecoder(),
new Format.Builder().setSampleMimeType(MimeTypes.TEXT_VTT).build());
extractor.init(output);
FakeTrackOutput trackOutput = output.trackOutputs.get(0);

while (extractor.read(input, null) != Extractor.RESULT_END_OF_INPUT) {}
extractor.seek((int) output.seekMap.getSeekPoints(2_445_000L).first.position, 2_445_000L);
input.setPosition((int) output.seekMap.getSeekPoints(2_445_000L).first.position);
trackOutput.clear();
while (extractor.read(input, null) != Extractor.RESULT_END_OF_INPUT) {}

assertThat(trackOutput.lastFormat.sampleMimeType).isEqualTo(MimeTypes.TEXT_EXOPLAYER_CUES);
assertThat(trackOutput.lastFormat.codecs).isEqualTo(MimeTypes.TEXT_VTT);
assertThat(trackOutput.getSampleCount()).isEqualTo(4);
// Check sample timestamps.
assertThat(trackOutput.getSampleTimeUs(0)).isEqualTo(2_345_000L);
assertThat(trackOutput.getSampleTimeUs(1)).isEqualTo(2_600_000L);
assertThat(trackOutput.getSampleTimeUs(2)).isEqualTo(3_456_000L);
assertThat(trackOutput.getSampleTimeUs(3)).isEqualTo(4_567_000L);
// Check sample content.
List<Cue> cues0 = decoder.decode(trackOutput.getSampleData(0));
assertThat(cues0).hasSize(1);
assertThat(cues0.get(0).text.toString()).isEqualTo("This is the second subtitle.");
List<Cue> cues1 = decoder.decode(trackOutput.getSampleData(1));
assertThat(cues1).hasSize(2);
assertThat(cues1.get(0).text.toString()).isEqualTo("This is the second subtitle.");
assertThat(cues1.get(1).text.toString()).isEqualTo("This is the third subtitle.");
List<Cue> cues2 = decoder.decode(trackOutput.getSampleData(2));
assertThat(cues2).hasSize(1);
assertThat(cues2.get(0).text.toString()).isEqualTo("This is the third subtitle.");
List<Cue> cues3 = decoder.decode(trackOutput.getSampleData(3));
assertThat(cues3).isEmpty();
}

@Test
public void extractor_seekBetweenReads_outputsCues() throws Exception {
CueDecoder decoder = new CueDecoder();
FakeExtractorOutput output = new FakeExtractorOutput();
FakeExtractorInput input =
new FakeExtractorInput.Builder()
.setData(Util.getUtf8Bytes(TEST_DATA))
.setSimulatePartialReads(true)
.build();
SubtitleExtractor extractor =
new SubtitleExtractor(
new WebvttDecoder(),
new Format.Builder().setSampleMimeType(MimeTypes.TEXT_VTT).build());
extractor.init(output);
FakeTrackOutput trackOutput = output.trackOutputs.get(0);

assertThat(extractor.read(input, null)).isNotEqualTo(Extractor.RESULT_END_OF_INPUT);
extractor.seek((int) output.seekMap.getSeekPoints(2_345_000L).first.position, 2_345_000L);
input.setPosition((int) output.seekMap.getSeekPoints(2_345_000L).first.position);
trackOutput.clear();
while (extractor.read(input, null) != Extractor.RESULT_END_OF_INPUT) {}

assertThat(trackOutput.lastFormat.sampleMimeType).isEqualTo(MimeTypes.TEXT_EXOPLAYER_CUES);
assertThat(trackOutput.lastFormat.codecs).isEqualTo(MimeTypes.TEXT_VTT);
assertThat(trackOutput.getSampleCount()).isEqualTo(4);
// Check sample timestamps.
assertThat(trackOutput.getSampleTimeUs(0)).isEqualTo(2_345_000L);
assertThat(trackOutput.getSampleTimeUs(1)).isEqualTo(2_600_000L);
assertThat(trackOutput.getSampleTimeUs(2)).isEqualTo(3_456_000L);
assertThat(trackOutput.getSampleTimeUs(3)).isEqualTo(4_567_000L);
// Check sample content.
List<Cue> cues0 = decoder.decode(trackOutput.getSampleData(0));
assertThat(cues0).hasSize(1);
assertThat(cues0.get(0).text.toString()).isEqualTo("This is the second subtitle.");
List<Cue> cues1 = decoder.decode(trackOutput.getSampleData(1));
assertThat(cues1).hasSize(2);
assertThat(cues1.get(0).text.toString()).isEqualTo("This is the second subtitle.");
assertThat(cues1.get(1).text.toString()).isEqualTo("This is the third subtitle.");
List<Cue> cues2 = decoder.decode(trackOutput.getSampleData(2));
assertThat(cues2).hasSize(1);
assertThat(cues2.get(0).text.toString()).isEqualTo("This is the third subtitle.");
List<Cue> cues3 = decoder.decode(trackOutput.getSampleData(3));
assertThat(cues3).isEmpty();
}

@Test
public void read_withoutInit_fails() {
FakeExtractorInput input = new FakeExtractorInput.Builder().setData(new byte[0]).build();
Expand Down

0 comments on commit 837667d

Please sign in to comment.