Skip to content

Commit

Permalink
Use relative paths for DASH fragments (closes #12990)
Browse files Browse the repository at this point in the history
10x reduced JSON size
refs #13810
  • Loading branch information
dstftw committed Aug 5, 2017
1 parent 8519b88 commit 1141e91
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 10 deletions.
14 changes: 10 additions & 4 deletions youtube_dl/downloader/dash.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from .fragment import FragmentFD
from ..compat import compat_urllib_error
from ..utils import urljoin


class DashSegmentsFD(FragmentFD):
Expand All @@ -12,12 +13,13 @@ class DashSegmentsFD(FragmentFD):
FD_NAME = 'dashsegments'

def real_download(self, filename, info_dict):
segments = info_dict['fragments'][:1] if self.params.get(
fragment_base_url = info_dict.get('fragment_base_url')
fragments = info_dict['fragments'][:1] if self.params.get(
'test', False) else info_dict['fragments']

ctx = {
'filename': filename,
'total_frags': len(segments),
'total_frags': len(fragments),
}

self._prepare_and_start_frag_download(ctx)
Expand All @@ -26,7 +28,7 @@ def real_download(self, filename, info_dict):
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)

frag_index = 0
for i, segment in enumerate(segments):
for i, fragment in enumerate(fragments):
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
Expand All @@ -36,7 +38,11 @@ def real_download(self, filename, info_dict):
count = 0
while count <= fragment_retries:
try:
success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
fragment_url = fragment.get('url')
if not fragment_url:
assert fragment_base_url
fragment_url = urljoin(fragment_base_url, fragment['path'])
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
if not success:
return False
self._append_fragment(ctx, frag_content)
Expand Down
16 changes: 10 additions & 6 deletions youtube_dl/extractor/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1892,9 +1892,13 @@ def prepare_template(template_name, identifiers):
'Bandwidth': bandwidth,
}

def location_key(location):
return 'url' if re.match(r'^https?://', location) else 'path'

if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:

media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
media_location_key = location_key(media_template)

# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
# can't be used at the same time
Expand All @@ -1904,7 +1908,7 @@ def prepare_template(template_name, identifiers):
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
representation_ms_info['fragments'] = [{
'url': media_template % {
media_location_key: media_template % {
'Number': segment_number,
'Bandwidth': bandwidth,
},
Expand All @@ -1928,7 +1932,7 @@ def add_segment_url():
'Number': segment_number,
}
representation_ms_info['fragments'].append({
'url': segment_url,
media_location_key: segment_url,
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
})

Expand All @@ -1952,8 +1956,9 @@ def add_segment_url():
for s in representation_ms_info['s']:
duration = float_or_none(s['d'], timescale)
for r in range(s.get('r', 0) + 1):
segment_uri = representation_ms_info['segment_urls'][segment_index]
fragments.append({
'url': representation_ms_info['segment_urls'][segment_index],
location_key(segment_uri): segment_uri,
'duration': duration,
})
segment_index += 1
Expand All @@ -1962,17 +1967,16 @@ def add_segment_url():
# No fragments key is present in this case.
if 'fragments' in representation_ms_info:
f.update({
'fragment_base_url': base_url,
'fragments': [],
'protocol': 'http_dash_segments',
})
if 'initialization_url' in representation_ms_info:
initialization_url = representation_ms_info['initialization_url']
if not f.get('url'):
f['url'] = initialization_url
f['fragments'].append({'url': initialization_url})
f['fragments'].append({location_key(initialization_url): initialization_url})
f['fragments'].extend(representation_ms_info['fragments'])
for fragment in f['fragments']:
fragment['url'] = urljoin(base_url, fragment['url'])
try:
existing_format = next(
fo for fo in formats
Expand Down

0 comments on commit 1141e91

Please sign in to comment.