diff --git a/RELEASENOTES.md b/RELEASENOTES.md index e1893650ce1..fb6bf0ab7ca 100644 --- a/RELEASENOTES.md +++ b/RELEASENOTES.md @@ -121,6 +121,9 @@ * IMA DAI extension: * Fix a bug where a new ad group is inserted in live streams because the calculated content position in consecutive timelines varies slightly. +* Text: + * SSA: Add support for UTF-16 files if they start with a byte order mark + ([#319](https://github.com/androidx/media/issues/319)). * Remove deprecated symbols: * Remove `DefaultAudioSink` constructors, use `DefaultAudioSink.Builder` instead. diff --git a/libraries/common/src/main/java/androidx/media3/common/util/ParsableByteArray.java b/libraries/common/src/main/java/androidx/media3/common/util/ParsableByteArray.java index bd1117bc785..9e9e6ead0ad 100644 --- a/libraries/common/src/main/java/androidx/media3/common/util/ParsableByteArray.java +++ b/libraries/common/src/main/java/androidx/media3/common/util/ParsableByteArray.java @@ -233,11 +233,28 @@ public int peekUnsignedByte() { return (data[position] & 0xFF); } - /** Peeks at the next char. */ + /** + * Peeks at the next char. + * + *

Equivalent to passing {@link Charsets#UTF_16} or {@link Charsets#UTF_16BE} to {@link + * #peekChar(Charset)}. + */ public char peekChar() { return (char) ((data[position] & 0xFF) << 8 | (data[position + 1] & 0xFF)); } + /** + * Peeks at the next char (as decoded by {@code charset}) + * + * @throws IllegalArgumentException if charset is not supported. Only US_ASCII, UTF-8, UTF-16, + * UTF-16BE, and UTF-16LE are supported. + */ + public char peekChar(Charset charset) { + Assertions.checkArgument( + SUPPORTED_CHARSETS_FOR_READLINE.contains(charset), "Unsupported charset: " + charset); + return (char) (peekCharacterAndSize(charset) >> Short.SIZE); + } + /** Reads the next byte as an unsigned value. */ public int readUnsignedByte() { return (data[position++] & 0xFF); @@ -649,27 +666,42 @@ private void skipLineTerminator(Charset charset) { * UTF-8 and two bytes for UTF-16). */ private char readCharacterIfInList(Charset charset, char[] chars) { - char character; - int characterSize; + int characterAndSize = peekCharacterAndSize(charset); + + if (characterAndSize != 0 && Chars.contains(chars, (char) (characterAndSize >> Short.SIZE))) { + position += characterAndSize & 0xFFFF; + return (char) (characterAndSize >> Short.SIZE); + } else { + return 0; + } + } + + /** + * Peeks at the character at {@link #position} (as decoded by {@code charset}), returns it and the + * number of bytes the character takes up within the array packed into an int. First four bytes + * are the character and the second four is the size in bytes it takes. Returns 0 if {@link + * #bytesLeft()} doesn't allow reading a whole character in {@code charset} or if the {@code + * charset} is not one of US_ASCII, UTF-8, UTF-16, UTF-16BE, or UTF-16LE. + * + *

Only supports characters that occupy a single code unit (i.e. one byte for UTF-8 and two + * bytes for UTF-16). + */ + private int peekCharacterAndSize(Charset charset) { + byte character; + short characterSize; if ((charset.equals(Charsets.UTF_8) || charset.equals(Charsets.US_ASCII)) && bytesLeft() >= 1) { - character = Chars.checkedCast(UnsignedBytes.toInt(data[position])); + character = (byte) Chars.checkedCast(UnsignedBytes.toInt(data[position])); characterSize = 1; } else if ((charset.equals(Charsets.UTF_16) || charset.equals(Charsets.UTF_16BE)) && bytesLeft() >= 2) { - character = Chars.fromBytes(data[position], data[position + 1]); + character = (byte) Chars.fromBytes(data[position], data[position + 1]); characterSize = 2; } else if (charset.equals(Charsets.UTF_16LE) && bytesLeft() >= 2) { - character = Chars.fromBytes(data[position + 1], data[position]); + character = (byte) Chars.fromBytes(data[position + 1], data[position]); characterSize = 2; } else { return 0; } - - if (Chars.contains(chars, character)) { - position += characterSize; - return Chars.checkedCast(character); - } else { - return 0; - } + return (Chars.checkedCast(character) << Short.SIZE) + characterSize; } } diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/text/ssa/SsaDecoder.java b/libraries/extractor/src/main/java/androidx/media3/extractor/text/ssa/SsaDecoder.java index a981193f992..30017383e9e 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/text/ssa/SsaDecoder.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/text/ssa/SsaDecoder.java @@ -37,6 +37,8 @@ import androidx.media3.extractor.text.SimpleSubtitleDecoder; import androidx.media3.extractor.text.Subtitle; import com.google.common.base.Ascii; +import com.google.common.base.Charsets; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; @@ -98,11 +100,14 @@ public SsaDecoder(@Nullable List initializationData) { if (initializationData != null && !initializationData.isEmpty()) { haveInitializationData = true; + // Currently, construction with initialization data is only relevant to SSA subtitles muxed + // in a MKV. According to https://www.matroska.org/technical/subtitles.html, these muxed + // subtitles are always encoded in UTF-8. String formatLine = Util.fromUtf8Bytes(initializationData.get(0)); Assertions.checkArgument(formatLine.startsWith(FORMAT_LINE_PREFIX)); dialogueFormatFromInitializationData = Assertions.checkNotNull(SsaDialogueFormat.fromFormatLine(formatLine)); - parseHeader(new ParsableByteArray(initializationData.get(1))); + parseHeader(new ParsableByteArray(initializationData.get(1)), Charsets.UTF_8); } else { haveInitializationData = false; dialogueFormatFromInitializationData = null; @@ -115,25 +120,37 @@ protected Subtitle decode(byte[] data, int length, boolean reset) { List cueTimesUs = new ArrayList<>(); ParsableByteArray parsableData = new ParsableByteArray(data, length); + Charset charset = detectUtfCharset(parsableData); + if (!haveInitializationData) { - parseHeader(parsableData); + parseHeader(parsableData, charset); } - parseEventBody(parsableData, cues, cueTimesUs); + parseEventBody(parsableData, cues, cueTimesUs, charset); return new SsaSubtitle(cues, cueTimesUs); } + /** + * Determine UTF encoding of the byte array from a byte order mark (BOM), defaulting to UTF-8 if + * no BOM is found. + */ + private Charset detectUtfCharset(ParsableByteArray data) { + @Nullable Charset charset = data.readUtfCharsetFromBom(); + return charset != null ? charset : Charsets.UTF_8; + } + /** * Parses the header of the subtitle. * * @param data A {@link ParsableByteArray} from which the header should be read. + * @param charset The {@code Charset} of the encoding of {@code data}. */ - private void parseHeader(ParsableByteArray data) { + private void parseHeader(ParsableByteArray data, Charset charset) { @Nullable String currentLine; - while ((currentLine = data.readLine()) != null) { + while ((currentLine = data.readLine(charset)) != null) { if ("[Script Info]".equalsIgnoreCase(currentLine)) { - parseScriptInfo(data); + parseScriptInfo(data, charset); } else if ("[V4+ Styles]".equalsIgnoreCase(currentLine)) { - styles = parseStyles(data); + styles = parseStyles(data, charset); } else if ("[V4 Styles]".equalsIgnoreCase(currentLine)) { Log.i(TAG, "[V4 Styles] are not supported"); } else if ("[Events]".equalsIgnoreCase(currentLine)) { @@ -151,11 +168,12 @@ private void parseHeader(ParsableByteArray data) { * * @param data A {@link ParsableByteArray} with {@link ParsableByteArray#getPosition() position} * set to the beginning of the first line after {@code [Script Info]}. + * @param charset The {@code Charset} of the encoding of {@code data}. */ - private void parseScriptInfo(ParsableByteArray data) { + private void parseScriptInfo(ParsableByteArray data, Charset charset) { @Nullable String currentLine; - while ((currentLine = data.readLine()) != null - && (data.bytesLeft() == 0 || data.peekUnsignedByte() != '[')) { + while ((currentLine = data.readLine(charset)) != null + && (data.bytesLeft() == 0 || data.peekChar(charset) != '[')) { String[] infoNameAndValue = currentLine.split(":"); if (infoNameAndValue.length != 2) { continue; @@ -187,13 +205,14 @@ private void parseScriptInfo(ParsableByteArray data) { * * @param data A {@link ParsableByteArray} with {@link ParsableByteArray#getPosition()} pointing * at the beginning of the first line after {@code [V4+ Styles]}. + * @param charset The {@code Charset} of the encoding of {@code data}. */ - private static Map parseStyles(ParsableByteArray data) { + private static Map parseStyles(ParsableByteArray data, Charset charset) { Map styles = new LinkedHashMap<>(); @Nullable SsaStyle.Format formatInfo = null; @Nullable String currentLine; - while ((currentLine = data.readLine()) != null - && (data.bytesLeft() == 0 || data.peekUnsignedByte() != '[')) { + while ((currentLine = data.readLine(charset)) != null + && (data.bytesLeft() == 0 || data.peekChar(charset) != '[')) { if (currentLine.startsWith(FORMAT_LINE_PREFIX)) { formatInfo = SsaStyle.Format.fromFormatLine(currentLine); } else if (currentLine.startsWith(STYLE_LINE_PREFIX)) { @@ -216,12 +235,14 @@ private static Map parseStyles(ParsableByteArray data) { * @param data A {@link ParsableByteArray} from which the body should be read. * @param cues A list to which parsed cues will be added. * @param cueTimesUs A sorted list to which parsed cue timestamps will be added. + * @param charset The {@code Charset} of the encoding of {@code data}. */ - private void parseEventBody(ParsableByteArray data, List> cues, List cueTimesUs) { + private void parseEventBody( + ParsableByteArray data, List> cues, List cueTimesUs, Charset charset) { @Nullable SsaDialogueFormat format = haveInitializationData ? dialogueFormatFromInitializationData : null; @Nullable String currentLine; - while ((currentLine = data.readLine()) != null) { + while ((currentLine = data.readLine(charset)) != null) { if (currentLine.startsWith(FORMAT_LINE_PREFIX)) { format = SsaDialogueFormat.fromFormatLine(currentLine); } else if (currentLine.startsWith(DIALOGUE_LINE_PREFIX)) { diff --git a/libraries/extractor/src/test/java/androidx/media3/extractor/text/ssa/SsaDecoderTest.java b/libraries/extractor/src/test/java/androidx/media3/extractor/text/ssa/SsaDecoderTest.java index 0c9cfff208f..e831a0460c0 100644 --- a/libraries/extractor/src/test/java/androidx/media3/extractor/text/ssa/SsaDecoderTest.java +++ b/libraries/extractor/src/test/java/androidx/media3/extractor/text/ssa/SsaDecoderTest.java @@ -30,6 +30,7 @@ import com.google.common.collect.Iterables; import java.io.IOException; import java.util.ArrayList; +import java.util.Objects; import org.junit.Test; import org.junit.runner.RunWith; @@ -43,6 +44,8 @@ public final class SsaDecoderTest { private static final String TYPICAL_HEADER_ONLY = "media/ssa/typical_header"; private static final String TYPICAL_DIALOGUE_ONLY = "media/ssa/typical_dialogue"; private static final String TYPICAL_FORMAT_ONLY = "media/ssa/typical_format"; + private static final String TYPICAL_UTF16LE = "media/ssa/typical_utf16le"; + private static final String TYPICAL_UTF16BE = "media/ssa/typical_utf16be"; private static final String OVERLAPPING_TIMECODES = "media/ssa/overlapping_timecodes"; private static final String POSITIONS = "media/ssa/positioning"; private static final String INVALID_TIMECODES = "media/ssa/invalid_timecodes"; @@ -130,6 +133,58 @@ public void decodeTypicalWithInitializationData() throws IOException { assertTypicalCue3(subtitle, 4); } + @Test + public void decodeTypicalUtf16le() throws IOException { + SsaDecoder decoder = new SsaDecoder(); + byte[] bytes = + TestUtil.getByteArray(ApplicationProvider.getApplicationContext(), TYPICAL_UTF16LE); + Subtitle subtitle = decoder.decode(bytes, bytes.length, false); + + assertThat(subtitle.getEventTimeCount()).isEqualTo(6); + // Check position, line, anchors & alignment are set from Alignment Style (2 - bottom-center). + Cue firstCue = subtitle.getCues(subtitle.getEventTime(0)).get(0); + assertWithMessage("Cue.textAlignment") + .that(firstCue.textAlignment) + .isEqualTo(Layout.Alignment.ALIGN_CENTER); + assertWithMessage("Cue.positionAnchor") + .that(firstCue.positionAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_MIDDLE); + assertThat(firstCue.position).isEqualTo(0.5f); + assertThat(firstCue.lineAnchor).isEqualTo(Cue.ANCHOR_TYPE_END); + assertThat(firstCue.lineType).isEqualTo(Cue.LINE_TYPE_FRACTION); + assertThat(firstCue.line).isEqualTo(0.95f); + + assertTypicalCue1(subtitle, 0); + assertTypicalCue2(subtitle, 2); + assertTypicalCue3(subtitle, 4); + } + + @Test + public void decodeTypicalUtf16be() throws IOException { + SsaDecoder decoder = new SsaDecoder(); + byte[] bytes = + TestUtil.getByteArray(ApplicationProvider.getApplicationContext(), TYPICAL_UTF16BE); + Subtitle subtitle = decoder.decode(bytes, bytes.length, false); + + assertThat(subtitle.getEventTimeCount()).isEqualTo(6); + // Check position, line, anchors & alignment are set from Alignment Style (2 - bottom-center). + Cue firstCue = subtitle.getCues(subtitle.getEventTime(0)).get(0); + assertWithMessage("Cue.textAlignment") + .that(firstCue.textAlignment) + .isEqualTo(Layout.Alignment.ALIGN_CENTER); + assertWithMessage("Cue.positionAnchor") + .that(firstCue.positionAnchor) + .isEqualTo(Cue.ANCHOR_TYPE_MIDDLE); + assertThat(firstCue.position).isEqualTo(0.5f); + assertThat(firstCue.lineAnchor).isEqualTo(Cue.ANCHOR_TYPE_END); + assertThat(firstCue.lineType).isEqualTo(Cue.LINE_TYPE_FRACTION); + assertThat(firstCue.line).isEqualTo(0.95f); + + assertTypicalCue1(subtitle, 0); + assertTypicalCue2(subtitle, 2); + assertTypicalCue3(subtitle, 4); + } + @Test public void decodeOverlappingTimecodes() throws IOException { SsaDecoder decoder = new SsaDecoder(); @@ -438,6 +493,10 @@ private static void assertTypicalCue1(Subtitle subtitle, int eventIndex) { assertThat(subtitle.getEventTime(eventIndex)).isEqualTo(0); assertThat(subtitle.getCues(subtitle.getEventTime(eventIndex)).get(0).text.toString()) .isEqualTo("This is the first subtitle."); + assertThat( + Objects.requireNonNull( + subtitle.getCues(subtitle.getEventTime(eventIndex)).get(0).textAlignment)) + .isEqualTo(Layout.Alignment.ALIGN_CENTER); assertThat(subtitle.getEventTime(eventIndex + 1)).isEqualTo(1230000); } diff --git a/libraries/test_data/src/test/assets/media/ssa/typical_utf16be b/libraries/test_data/src/test/assets/media/ssa/typical_utf16be new file mode 100644 index 00000000000..6b11ad0ed57 Binary files /dev/null and b/libraries/test_data/src/test/assets/media/ssa/typical_utf16be differ diff --git a/libraries/test_data/src/test/assets/media/ssa/typical_utf16le b/libraries/test_data/src/test/assets/media/ssa/typical_utf16le new file mode 100644 index 00000000000..da098604d0e Binary files /dev/null and b/libraries/test_data/src/test/assets/media/ssa/typical_utf16le differ