Skip to content

Commit

Permalink
feat: HLS / DASH support forced subtitle (shaka-project#1020)
Browse files Browse the repository at this point in the history
Closes shaka-project#988

---------

Co-authored-by: Cosmin Stejerean <cstejerean@meta.com>
  • Loading branch information
vish91 and cosmin authored Feb 15, 2024
1 parent e19d733 commit f73ad0d
Show file tree
Hide file tree
Showing 28 changed files with 236 additions and 12 deletions.
4 changes: 2 additions & 2 deletions docs/source/options/dash_stream_descriptors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ DASH specific stream descriptor fields

Optional semicolon separated list of values for DASH Role element. The
value should be one of: **caption**, **subtitle**, **main**, **alternate**,
**supplementary**, **commentary**, **description** and **dub**. See
DASH (ISO/IEC 23009-1) specification for details.
**supplementary**, **commentary**, **description**, **dub** and **forced-subtitle** .
See DASH (ISO/IEC 23009-1) specification for details.
9 changes: 9 additions & 0 deletions docs/source/options/stream_descriptors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,15 @@ These are the available fields:
CEA allows specifying up to 4 streams within a single video stream. If not
specified, all subtitles will be merged together.

:forced_subtitle:

Optional boolean value (0|1). If set to 1 indicates that this stream is a
Forced Narrative subtitle that should be displayed when subtitles are otherwise
off, for example used to caption short portions of the audio that might be in
a foreign language. For DASH this will set role to **forced_subtitle**, for HLS
it will set FORCED=YES and AUTOSELECT=YES. Only valid for subtitles.


.. include:: /options/drm_stream_descriptors.rst
.. include:: /options/dash_stream_descriptors.rst
.. include:: /options/hls_stream_descriptors.rst
3 changes: 3 additions & 0 deletions include/packager/packager.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@ struct StreamDescriptor {
/// Set to true to indicate that the stream is for hls only.
bool hls_only = false;

/// Optional, indicates if this is a Forced Narrative subtitle stream.
bool forced_subtitle = false;

/// Optional for DASH output. It defines the Label element in Adaptation Set.
std::string dash_label;
};
Expand Down
12 changes: 10 additions & 2 deletions packager/app/packager_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,16 @@ const char kUsage[] =
" in the format: scheme_id_uri=value.\n"
" - dash_roles (roles): Optional semicolon separated list of values for\n"
" DASH Role elements. The value should be one of: caption, subtitle,\n"
" main, alternate, supplementary, commentary, description and dub. See\n"
" DASH (ISO/IEC 23009-1) specification for details.\n";
" forced-subtitle, main, alternate, supplementary, commentary, \n"
" description and dub. See DASH\n"
" (ISO/IEC 23009-1) specification for details.\n"
" - forced_subtitle: Optional boolean value (0|1). If set to 1 \n"
" indicates that this stream is a Forced Narrative subtitle that \n"
" should be displayed when subtitles are otherwise off, for example \n"
" used to caption short portions of the audio that might be in a \n"
" foreign language. For DASH this will set role to forced_subtitle, \n"
" for HLS it will set FORCED=YES and AUTOSELECT=YES. \n"
" Only valid for subtitles.";

// Labels for parameters in RawKey key info.
const char kDrmLabelLabel[] = "label";
Expand Down
25 changes: 25 additions & 0 deletions packager/app/stream_descriptor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ enum FieldType {
kDashOnlyField,
kHlsOnlyField,
kDashLabelField,
kForcedSubtitleField,
};

struct FieldNameToTypeMapping {
Expand Down Expand Up @@ -88,6 +89,7 @@ const FieldNameToTypeMapping kFieldNameTypeMappings[] = {
{"dash_only", kDashOnlyField},
{"hls_only", kHlsOnlyField},
{"dash_label", kDashLabelField},
{"forced_subtitle", kForcedSubtitleField},
};

FieldType GetFieldType(const std::string& field_name) {
Expand Down Expand Up @@ -255,12 +257,35 @@ std::optional<StreamDescriptor> ParseStreamDescriptor(
case kDashLabelField:
descriptor.dash_label = pair.second;
break;
case kForcedSubtitleField:
unsigned forced_subtitle_value;
if (!absl::SimpleAtoi(pair.second, &forced_subtitle_value)) {
LOG(ERROR) << "Non-numeric option for forced field "
"specified ("
<< pair.second << ").";
return std::nullopt;
}
if (forced_subtitle_value > 1) {
LOG(ERROR) << "forced should be either 0 or 1.";
return std::nullopt;
}
descriptor.forced_subtitle = forced_subtitle_value > 0;
break;
default:
LOG(ERROR) << "Unknown field in stream descriptor (\"" << pair.first
<< "\").";
return std::nullopt;
}
}

if (descriptor.forced_subtitle) {
auto itr = std::find(descriptor.dash_roles.begin(),
descriptor.dash_roles.end(), "forced-subtitle");
if (itr == descriptor.dash_roles.end()) {
descriptor.dash_roles.push_back("forced-subtitle");
}
}

return descriptor;
}

Expand Down
26 changes: 23 additions & 3 deletions packager/app/test/packager_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,8 @@ def _GetStream(self,
skip_encryption=None,
bandwidth=None,
split_content_on_ad_cues=False,
test_file=None):
test_file=None,
forced_subtitle=None):
"""Get a stream descriptor as a string.
Expand Down Expand Up @@ -347,8 +348,9 @@ def _GetStream(self,
into multiple files, with a total of NumAdCues + 1 files.
test_file: The input file to use. If the input file is not specified, a
default file will be used.
forced_subtitle: If set to true, it marks this as a Forced Narrative
subtitle, marked in DASH using forced-subtitle role and
in HLS using FORCED=YES.
Returns:
A string that makes up a single stream descriptor for input to the
packager.
Expand Down Expand Up @@ -402,6 +404,9 @@ def _GetStream(self,
if dash_only:
stream.Append('dash_only', 1)

if forced_subtitle:
stream.Append('forced_subtitle', 1)

if dash_label:
stream.Append('dash_label', dash_label)

Expand Down Expand Up @@ -799,6 +804,21 @@ def testDashLabel(self):
self.assertPackageSuccess(streams, self._GetFlags(output_dash=True))
self._CheckTestResults('dash-label')

def testForcedSubtitle(self):
streams = [
self._GetStream('audio', hls=True),
self._GetStream('video', hls=True),
]

streams += self._GetStreams(
['text'],
test_files=['bear-english.vtt'],
forced_subtitle=True)

self.assertPackageSuccess(streams, self._GetFlags(output_dash=True,
output_hls=True))
self._CheckTestResults('forced-subtitle')

def testAudioVideoWithLanguageOverride(self):
self.assertPackageSuccess(
self._GetStreams(['audio', 'video'], language='por', hls=True),
Expand Down
16 changes: 16 additions & 0 deletions packager/app/test/testdata/forced-subtitle/bear-640x360-audio.m3u8
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#EXTM3U
#EXT-X-VERSION:6
## Generated with https://github.com/shaka-project/shaka-packager version <tag>-<hash>-<test>
#EXT-X-TARGETDURATION:5
#EXT-X-PLAYLIST-TYPE:VOD
#EXT-X-MAP:URI="bear-640x360-audio.mp4",BYTERANGE="804@0"
#EXTINF:1.022,
#EXT-X-BYTERANGE:17028@872
bear-640x360-audio.mp4
#EXTINF:0.998,
#EXT-X-BYTERANGE:16285
bear-640x360-audio.mp4
#EXTINF:0.720,
#EXT-X-BYTERANGE:9558
bear-640x360-audio.mp4
#EXT-X-ENDLIST
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#EXTM3U
#EXT-X-VERSION:6
## Generated with https://github.com/shaka-project/shaka-packager version <tag>-<hash>-<test>
#EXT-X-TARGETDURATION:5
#EXT-X-PLAYLIST-TYPE:VOD
#EXT-X-I-FRAMES-ONLY
#EXT-X-MAP:URI="bear-640x360-video.mp4",BYTERANGE="870@0"
#EXTINF:1.001,
#EXT-X-BYTERANGE:15581@938
bear-640x360-video.mp4
#EXTINF:1.001,
#EXT-X-BYTERANGE:18221@100251
bear-640x360-video.mp4
#EXTINF:0.734,
#EXT-X-BYTERANGE:19663@222058
bear-640x360-video.mp4
#EXT-X-ENDLIST
16 changes: 16 additions & 0 deletions packager/app/test/testdata/forced-subtitle/bear-640x360-video.m3u8
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#EXTM3U
#EXT-X-VERSION:6
## Generated with https://github.com/shaka-project/shaka-packager version <tag>-<hash>-<test>
#EXT-X-TARGETDURATION:5
#EXT-X-PLAYLIST-TYPE:VOD
#EXT-X-MAP:URI="bear-640x360-video.mp4",BYTERANGE="870@0"
#EXTINF:1.001,
#EXT-X-BYTERANGE:99313@938
bear-640x360-video.mp4
#EXTINF:1.001,
#EXT-X-BYTERANGE:121807
bear-640x360-video.mp4
#EXTINF:0.734,
#EXT-X-BYTERANGE:79662
bear-640x360-video.mp4
#EXT-X-ENDLIST
Binary file not shown.
11 changes: 11 additions & 0 deletions packager/app/test/testdata/forced-subtitle/bear-english-text.vtt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
WEBVTT
STYLE
::cue { color:lime }
00:00:00.000 --> 00:00:00.800 align:center
Yup, that's a bear, eh.

00:00:01.000 --> 00:00:04.700 align:center
He 's... um... doing bear-like stuff.

13 changes: 13 additions & 0 deletions packager/app/test/testdata/forced-subtitle/output.m3u8
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#EXTM3U
## Generated with https://github.com/shaka-project/shaka-packager version <tag>-<hash>-<test>

#EXT-X-INDEPENDENT-SEGMENTS

#EXT-X-MEDIA:TYPE=AUDIO,URI="bear-640x360-audio.m3u8",GROUP-ID="default-audio-group",NAME="stream_0",DEFAULT=NO,AUTOSELECT=YES,CHANNELS="2"

#EXT-X-MEDIA:TYPE=SUBTITLES,URI="stream_2.m3u8",GROUP-ID="default-text-group",NAME="stream_2",DEFAULT=NO,AUTOSELECT=YES,FORCED=YES

#EXT-X-STREAM-INF:BANDWIDTH=1106817,AVERAGE-BANDWIDTH=1004632,CODECS="avc1.64001e,mp4a.40.2",RESOLUTION=640x360,FRAME-RATE=29.970,AUDIO="default-audio-group",SUBTITLES="default-text-group",CLOSED-CAPTIONS=NONE
bear-640x360-video.m3u8

#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=214292,AVERAGE-BANDWIDTH=156327,CODECS="avc1.64001e",RESOLUTION=640x360,CLOSED-CAPTIONS=NONE,URI="bear-640x360-video-iframe.m3u8"
29 changes: 29 additions & 0 deletions packager/app/test/testdata/forced-subtitle/output.mpd
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Generated with https://github.com/shaka-project/shaka-packager version <tag>-<hash>-<test>-->
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" minBufferTime="PT2S" type="static" mediaPresentationDuration="PT2.736067S">
<Period id="0">
<AdaptationSet id="0" contentType="audio" subsegmentAlignment="true">
<Representation id="0" bandwidth="133334" codecs="mp4a.40.2" mimeType="audio/mp4" audioSamplingRate="44100">
<AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
<BaseURL>bear-640x360-audio.mp4</BaseURL>
<SegmentBase indexRange="804-871" timescale="44100">
<Initialization range="0-803"/>
</SegmentBase>
</Representation>
</AdaptationSet>
<AdaptationSet id="1" contentType="video" width="640" height="360" frameRate="30000/1001" subsegmentAlignment="true" par="16:9">
<Representation id="1" bandwidth="973483" codecs="avc1.64001e" mimeType="video/mp4" sar="1:1">
<BaseURL>bear-640x360-video.mp4</BaseURL>
<SegmentBase indexRange="870-937" timescale="30000">
<Initialization range="0-869"/>
</SegmentBase>
</Representation>
</AdaptationSet>
<AdaptationSet id="2" contentType="text" subsegmentAlignment="true">
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="forced-subtitle"/>
<Representation id="2" bandwidth="317" mimeType="text/vtt">
<BaseURL>bear-english-text.vtt</BaseURL>
</Representation>
</AdaptationSet>
</Period>
</MPD>
8 changes: 8 additions & 0 deletions packager/app/test/testdata/forced-subtitle/stream_2.m3u8
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#EXTM3U
#EXT-X-VERSION:6
## Generated with https://github.com/shaka-project/shaka-packager version <tag>-<hash>-<test>
#EXT-X-TARGETDURATION:5
#EXT-X-PLAYLIST-TYPE:VOD
#EXTINF:4.700,
bear-english-text.vtt
#EXT-X-ENDLIST
13 changes: 12 additions & 1 deletion packager/hls/base/master_playlist.cc
Original file line number Diff line number Diff line change
Expand Up @@ -318,11 +318,16 @@ void BuildMediaTag(const MediaPlaylist& playlist,
} else {
tag.AddString("DEFAULT", "NO");
}

if (is_autoselect) {
tag.AddString("AUTOSELECT", "YES");
}

if (playlist.stream_type() ==
MediaPlaylist::MediaPlaylistStreamType::kSubtitle &&
playlist.forced_subtitle()) {
tag.AddString("FORCED", "YES");
}

const std::vector<std::string>& characteristics = playlist.characteristics();
if (!characteristics.empty()) {
tag.AddQuotedString("CHARACTERISTICS", absl::StrJoin(characteristics, ","));
Expand Down Expand Up @@ -401,6 +406,12 @@ void BuildMediaTags(
}
}

if (playlist->stream_type() ==
MediaPlaylist::MediaPlaylistStreamType::kSubtitle &&
playlist->forced_subtitle()) {
is_autoselect = true;
}

BuildMediaTag(*playlist, group_id, is_default, is_autoselect, base_url,
out);
}
Expand Down
6 changes: 6 additions & 0 deletions packager/hls/base/media_playlist.cc
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,10 @@ void MediaPlaylist::SetCharacteristicsForTesting(
characteristics_ = characteristics;
}

void MediaPlaylist::SetForcedSubtitleForTesting(const bool forced_subtitle) {
forced_subtitle_ = forced_subtitle;
}

bool MediaPlaylist::SetMediaInfo(const MediaInfo& media_info) {
const int32_t time_scale = GetTimeScale(media_info);
if (time_scale == 0) {
Expand Down Expand Up @@ -400,6 +404,8 @@ bool MediaPlaylist::SetMediaInfo(const MediaInfo& media_info) {
std::vector<std::string>(media_info_.hls_characteristics().begin(),
media_info_.hls_characteristics().end());

forced_subtitle_ = media_info_.forced_subtitle();

return true;
}

Expand Down
6 changes: 6 additions & 0 deletions packager/hls/base/media_playlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ class MediaPlaylist {
/// For testing only.
void SetLanguageForTesting(const std::string& language);

/// For testing only.
void SetForcedSubtitleForTesting(const bool forced_subtitle);

/// For testing only.
void SetCharacteristicsForTesting(
const std::vector<std::string>& characteristics);
Expand Down Expand Up @@ -223,6 +226,8 @@ class MediaPlaylist {
return characteristics_;
}

bool forced_subtitle() const { return forced_subtitle_; }

bool is_dvs() const {
// HLS Authoring Specification for Apple Devices
// https://developer.apple.com/documentation/http_live_streaming/hls_authoring_specification_for_apple_devices#overview
Expand Down Expand Up @@ -262,6 +267,7 @@ class MediaPlaylist {
std::string codec_;
std::string language_;
std::vector<std::string> characteristics_;
bool forced_subtitle_ = false;
uint32_t media_sequence_number_ = 0;
bool inserted_discontinuity_tag_ = false;
int discontinuity_sequence_number_ = 0;
Expand Down
5 changes: 5 additions & 0 deletions packager/media/event/hls_notify_muxer_listener.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,15 @@ HlsNotifyMuxerListener::HlsNotifyMuxerListener(
const std::string& ext_x_media_name,
const std::string& ext_x_media_group_id,
const std::vector<std::string>& characteristics,
bool forced_subtitle,
hls::HlsNotifier* hls_notifier,
std::optional<uint32_t> index)
: playlist_name_(playlist_name),
iframes_only_(iframes_only),
ext_x_media_name_(ext_x_media_name),
ext_x_media_group_id_(ext_x_media_group_id),
characteristics_(characteristics),
forced_subtitle_(forced_subtitle),
hls_notifier_(hls_notifier),
index_(index) {
DCHECK(hls_notifier);
Expand Down Expand Up @@ -103,6 +105,9 @@ void HlsNotifyMuxerListener::OnMediaStart(const MuxerOptions& muxer_options,
for (const std::string& characteristic : characteristics_)
media_info->add_hls_characteristics(characteristic);
}
if (forced_subtitle_) {
media_info->set_forced_subtitle(forced_subtitle_);
}
if (index_.has_value())
media_info->set_index(index_.value());

Expand Down
Loading

0 comments on commit f73ad0d

Please sign in to comment.