From 4d6b008ef664b053133d6b22ce5bc6ff4f8dd19e Mon Sep 17 00:00:00 2001 From: ojw28 Date: Wed, 3 Oct 2018 22:15:11 +0100 Subject: [PATCH] Merge pull request #4582 from szaboa/feature/4306_srt_position_tags #4306 - Extract tags from SubRip subtitles, add support for alignment --- .../exoplayer2/text/subrip/SubripDecoder.java | 158 +++++++++++++++++- .../src/test/assets/subrip/typical_with_tags | 56 +++++++ .../text/subrip/SubripDecoderTest.java | 89 ++++++++++ 3 files changed, 300 insertions(+), 3 deletions(-) create mode 100644 library/core/src/test/assets/subrip/typical_with_tags diff --git a/library/core/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java b/library/core/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java index 5598e063a6c..182f1cf4b0b 100644 --- a/library/core/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java +++ b/library/core/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java @@ -15,7 +15,9 @@ */ package com.google.android.exoplayer2.text.subrip; +import android.support.annotation.StringDef; import android.text.Html; +import android.text.Layout; import android.text.Spanned; import android.text.TextUtils; import com.google.android.exoplayer2.text.Cue; @@ -23,6 +25,9 @@ import com.google.android.exoplayer2.util.Log; import com.google.android.exoplayer2.util.LongArray; import com.google.android.exoplayer2.util.ParsableByteArray; + +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -38,6 +43,33 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { private static final Pattern SUBRIP_TIMING_LINE = Pattern.compile("\\s*(" + SUBRIP_TIMECODE + ")\\s*-->\\s*(" + SUBRIP_TIMECODE + ")?\\s*"); + private static final Pattern SUBRIP_TAG_PATTERN = Pattern.compile("\\{\\\\.*?\\}"); + private static final String SUBRIP_ALIGNMENT_TAG = "\\{\\\\an[1-9]\\}"; + + private static final float DEFAULT_START_FRACTION = 0.08f; + private static final float DEFAULT_END_FRACTION = 1 - DEFAULT_START_FRACTION; + private static final float DEFAULT_MID_FRACTION = 0.5f; + + @Retention(RetentionPolicy.SOURCE) + @StringDef({ + ALIGN_BOTTOM_LEFT, ALIGN_BOTTOM_MID, ALIGN_BOTTOM_RIGHT, + ALIGN_MID_LEFT, ALIGN_MID_MID, ALIGN_MID_RIGHT, + ALIGN_TOP_LEFT, ALIGN_TOP_MID, ALIGN_TOP_RIGHT + }) + + private @interface SubRipTag {} + + // Possible valid alignment tags based on SSA v4+ specs + private static final String ALIGN_BOTTOM_LEFT = "{\\an1}"; + private static final String ALIGN_BOTTOM_MID = "{\\an2}"; + private static final String ALIGN_BOTTOM_RIGHT = "{\\an3}"; + private static final String ALIGN_MID_LEFT = "{\\an4}"; + private static final String ALIGN_MID_MID = "{\\an5}"; + private static final String ALIGN_MID_RIGHT = "{\\an6}"; + private static final String ALIGN_TOP_LEFT = "{\\an7}"; + private static final String ALIGN_TOP_MID = "{\\an8}"; + private static final String ALIGN_TOP_RIGHT = "{\\an9}"; + private final StringBuilder textBuilder; public SubripDecoder() { @@ -87,16 +119,32 @@ protected SubripSubtitle decode(byte[] bytes, int length, boolean reset) { } // Read and parse the text. + ArrayList tags = new ArrayList<>(); textBuilder.setLength(0); while (!TextUtils.isEmpty(currentLine = subripData.readLine())) { if (textBuilder.length() > 0) { textBuilder.append("
"); } - textBuilder.append(currentLine.trim()); + textBuilder.append(processLine(currentLine, tags)); } Spanned text = Html.fromHtml(textBuilder.toString()); - cues.add(new Cue(text)); + Cue cue = null; + + // At end of this loop the clue must be created with the applied tags + for (String tag : tags) { + + // Check if the tag is an alignment tag + if (tag.matches(SUBRIP_ALIGNMENT_TAG)) { + cue = buildCue(text, tag); + + // Based on the specs, in case of alignment tags only the first appearance counts, so break + break; + } + } + + cues.add(cue == null ? new Cue(text) : cue); + if (haveEndTimecode) { cues.add(null); } @@ -108,6 +156,111 @@ protected SubripSubtitle decode(byte[] bytes, int length, boolean reset) { return new SubripSubtitle(cuesArray, cueTimesUsArray); } + /** + * Process the given line by first trimming it then extracting the tags from it + *

+ * The pattern that is used to extract the tags is specified in SSA v4+ specs and + * has the following form: "{\...}". + *

+ * "All override codes appear within braces {}" + * "All override codes are always preceded by a backslash \" + * + * @param currentLine Current line + * @param tags Extracted tags will be stored in this array list + * @return Processed line + */ + private String processLine(String currentLine, ArrayList tags) { + // Trim line + String trimmedLine = currentLine.trim(); + + // Extract tags + int replacedCharacters = 0; + StringBuilder processedLine = new StringBuilder(trimmedLine); + Matcher matcher = SUBRIP_TAG_PATTERN.matcher(trimmedLine); + + while (matcher.find()) { + String tag = matcher.group(); + tags.add(tag); + processedLine.replace(matcher.start() - replacedCharacters, matcher.end() - replacedCharacters, ""); + replacedCharacters += tag.length(); + } + + return processedLine.toString(); + } + + /** + * Build a {@link Cue} based on the given text and tag + *

+ * Match the alignment tag and calculate the line, position, position anchor accordingly + *

+ * Based on SSA v4+ specs the alignment tag can have the following form: {\an[1-9}, + * where the number specifies the direction (based on the numpad layout). + * Note. older SSA scripts may contain tags like {\a1[1-9]} but these are based on + * other direction rules, but multiple sources says that these are deprecated, so no support here either + * + * @param alignmentTag Alignment tag + * @return Built cue + */ + private Cue buildCue(Spanned text, String alignmentTag) { + float line, position; + @Cue.AnchorType int positionAnchor; + @Cue.AnchorType int lineAnchor; + + // Set position and position anchor (horizontal alignment) + switch (alignmentTag) { + case ALIGN_BOTTOM_LEFT: + case ALIGN_MID_LEFT: + case ALIGN_TOP_LEFT: + position = DEFAULT_START_FRACTION; + positionAnchor = Cue.ANCHOR_TYPE_START; + break; + case ALIGN_BOTTOM_MID: + case ALIGN_MID_MID: + case ALIGN_TOP_MID: + position = DEFAULT_MID_FRACTION; + positionAnchor = Cue.ANCHOR_TYPE_MIDDLE; + break; + case ALIGN_BOTTOM_RIGHT: + case ALIGN_MID_RIGHT: + case ALIGN_TOP_RIGHT: + position = DEFAULT_END_FRACTION; + positionAnchor = Cue.ANCHOR_TYPE_END; + break; + default: + position = DEFAULT_MID_FRACTION; + positionAnchor = Cue.ANCHOR_TYPE_MIDDLE; + break; + } + + // Set line and line anchor (vertical alignment) + switch (alignmentTag) { + case ALIGN_BOTTOM_LEFT: + case ALIGN_BOTTOM_MID: + case ALIGN_BOTTOM_RIGHT: + line = DEFAULT_END_FRACTION; + lineAnchor = Cue.ANCHOR_TYPE_END; + break; + case ALIGN_MID_LEFT: + case ALIGN_MID_MID: + case ALIGN_MID_RIGHT: + line = DEFAULT_MID_FRACTION; + lineAnchor = Cue.ANCHOR_TYPE_MIDDLE; + break; + case ALIGN_TOP_LEFT: + case ALIGN_TOP_MID: + case ALIGN_TOP_RIGHT: + line = DEFAULT_START_FRACTION; + lineAnchor = Cue.ANCHOR_TYPE_START; + break; + default: + line = DEFAULT_END_FRACTION; + lineAnchor = Cue.ANCHOR_TYPE_END; + break; + } + + return new Cue(text, null, line, Cue.LINE_TYPE_FRACTION, lineAnchor, position, positionAnchor, Cue.DIMEN_UNSET); + } + private static long parseTimecode(Matcher matcher, int groupOffset) { long timestampMs = Long.parseLong(matcher.group(groupOffset + 1)) * 60 * 60 * 1000; timestampMs += Long.parseLong(matcher.group(groupOffset + 2)) * 60 * 1000; @@ -115,5 +268,4 @@ private static long parseTimecode(Matcher matcher, int groupOffset) { timestampMs += Long.parseLong(matcher.group(groupOffset + 4)); return timestampMs * 1000; } - } diff --git a/library/core/src/test/assets/subrip/typical_with_tags b/library/core/src/test/assets/subrip/typical_with_tags new file mode 100644 index 00000000000..af196f8a042 --- /dev/null +++ b/library/core/src/test/assets/subrip/typical_with_tags @@ -0,0 +1,56 @@ +1 +00:00:00,000 --> 00:00:01,234 +This is {\an1} the first subtitle. + +2 +00:00:02,345 --> 00:00:03,456 +This is the second subtitle. +Second {\ an 2} subtitle with second line. + +3 +00:00:04,567 --> 00:00:08,901 +This {\an2} is the third {\ tag} subtitle. + +4 +00:00:09,567 --> 00:00:12,901 +This { \an2} is the fourth subtitle. + +5 +00:00:013,567 --> 00:00:14,901 +This {\an2} is the fifth subtitle with multiple {\xyz} valid {\qwe} tags. + +6 +00:00:015,567 --> 00:00:15,901 +This {\an1} is a lines. + +7 +00:00:016,567 --> 00:00:16,901 +This {\an2} is a line. + +8 +00:00:017,567 --> 00:00:17,901 +This {\an3} is a line. + +9 +00:00:018,567 --> 00:00:18,901 +This {\an4} is a line. + +10 +00:00:019,567 --> 00:00:19,901 +This {\an5} is a line. + +11 +00:00:020,567 --> 00:00:20,901 +This {\an6} is a line. + +12 +00:00:021,567 --> 00:00:22,901 +This {\an7} is a line. + +13 +00:00:023,567 --> 00:00:23,901 +This {\an8} is a line. + +14 +00:00:024,567 --> 00:00:24,901 +This {\an9} is a line. \ No newline at end of file diff --git a/library/core/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java b/library/core/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java index e9abaca0752..554184da5d4 100644 --- a/library/core/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java +++ b/library/core/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java @@ -18,6 +18,8 @@ import static com.google.common.truth.Truth.assertThat; import com.google.android.exoplayer2.testutil.TestUtil; +import com.google.android.exoplayer2.text.Cue; + import java.io.IOException; import org.junit.Test; import org.junit.runner.RunWith; @@ -36,6 +38,7 @@ public final class SubripDecoderTest { private static final String TYPICAL_MISSING_SEQUENCE = "subrip/typical_missing_sequence"; private static final String TYPICAL_NEGATIVE_TIMESTAMPS = "subrip/typical_negative_timestamps"; private static final String TYPICAL_UNEXPECTED_END = "subrip/typical_unexpected_end"; + private static final String TYPICAL_WITH_TAGS = "subrip/typical_with_tags"; private static final String NO_END_TIMECODES_FILE = "subrip/no_end_timecodes"; @Test @@ -154,6 +157,92 @@ public void testDecodeNoEndTimecodes() throws IOException { .isEqualTo("Or to the end of the media."); } + @Test + public void testDecodeCueWithTag() throws IOException{ + SubripDecoder decoder = new SubripDecoder(); + byte[] bytes = TestUtil.getByteArray(RuntimeEnvironment.application, TYPICAL_WITH_TAGS); + SubripSubtitle subtitle = decoder.decode(bytes, bytes.length, false); + assertThat(subtitle.getCues(subtitle.getEventTime(0)).get(0).text.toString()) + .isEqualTo("This is the first subtitle."); + assertThat(subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString()) + .isEqualTo("This is the second subtitle.\nSecond subtitle with second line."); + assertThat(subtitle.getCues(subtitle.getEventTime(4)).get(0).text.toString()) + .isEqualTo("This is the third subtitle."); + + // Based on the SSA v4+ specs the curly bracket must be followed by a backslash, so this is + // not a valid tag (won't be parsed / replaced) + assertThat(subtitle.getCues(subtitle.getEventTime(6)).get(0).text.toString()) + .isEqualTo("This { \\an2} is the fourth subtitle."); + + assertThat(subtitle.getCues(subtitle.getEventTime(8)).get(0).text.toString()) + .isEqualTo("This is the fifth subtitle with multiple valid tags."); + + // Verify positions + + // {/an1} + assertThat(subtitle.getCues(subtitle.getEventTime(10)).get(0).positionAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_START); + + assertThat(subtitle.getCues(subtitle.getEventTime(10)).get(0).lineAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_END); + + // {/an2} + assertThat(subtitle.getCues(subtitle.getEventTime(12)).get(0).positionAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_MIDDLE); + + assertThat(subtitle.getCues(subtitle.getEventTime(12)).get(0).lineAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_END); + + // {/an3} + assertThat(subtitle.getCues(subtitle.getEventTime(14)).get(0).positionAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_END); + + assertThat(subtitle.getCues(subtitle.getEventTime(14)).get(0).lineAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_END); + + // {/an4} + assertThat(subtitle.getCues(subtitle.getEventTime(16)).get(0).positionAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_START); + + assertThat(subtitle.getCues(subtitle.getEventTime(16)).get(0).lineAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_MIDDLE); + + // {/an5} + assertThat(subtitle.getCues(subtitle.getEventTime(18)).get(0).positionAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_MIDDLE); + + assertThat(subtitle.getCues(subtitle.getEventTime(18)).get(0).lineAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_MIDDLE); + + // {/an6} + assertThat(subtitle.getCues(subtitle.getEventTime(20)).get(0).positionAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_END); + + assertThat(subtitle.getCues(subtitle.getEventTime(20)).get(0).lineAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_MIDDLE); + + // {/an7} + assertThat(subtitle.getCues(subtitle.getEventTime(22)).get(0).positionAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_START); + + assertThat(subtitle.getCues(subtitle.getEventTime(22)).get(0).lineAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_START); + + // {/an8} + assertThat(subtitle.getCues(subtitle.getEventTime(24)).get(0).positionAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_MIDDLE); + + assertThat(subtitle.getCues(subtitle.getEventTime(24)).get(0).lineAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_START); + + // {/an9} + assertThat(subtitle.getCues(subtitle.getEventTime(26)).get(0).positionAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_END); + + assertThat(subtitle.getCues(subtitle.getEventTime(26)).get(0).lineAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_START); + } + private static void assertTypicalCue1(SubripSubtitle subtitle, int eventIndex) { assertThat(subtitle.getEventTime(eventIndex)).isEqualTo(0); assertThat(subtitle.getCues(subtitle.getEventTime(eventIndex)).get(0).text.toString())