Skip to content

Commit

Permalink
Merge branch 'ytdl-org:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
hellopony authored Jul 29, 2023
2 parents cfd88f2 + abef534 commit feccf27
Show file tree
Hide file tree
Showing 6 changed files with 203 additions and 85 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ jobs:
#-------- Jython ------
- name: Set up Java 8
if: ${{ matrix.python-impl == 'jython' }}
uses: actions/setup-java@v2
uses: actions/setup-java@v3
with:
java-version: 8
distribution: 'zulu'
Expand Down
30 changes: 17 additions & 13 deletions test/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,12 @@ def gzip_compress(p):
respond()
elif self.path == '/%c7%9f':
respond()
elif self.path == '/redirect_dotsegments':
self.send_response(301)
# redirect to /headers but with dot segments before
self.send_header('Location', '/a/b/./../../headers')
self.send_header('Content-Length', '0')
self.end_headers()
elif self.path.startswith('/redirect_'):
self._redirect()
elif self.path.startswith('/method'):
Expand Down Expand Up @@ -461,33 +467,23 @@ def __test_compression(self, encoding):
sanitized_Request(
self._test_url('content-encoding'),
headers={'ytdl-encoding': encoding}))
self.assertEqual(res.headers.get('Content-Encoding'), encoding)
# decoded encodings are removed: only check for valid decompressed data
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')

@unittest.skipUnless(brotli, 'brotli support is not installed')
@unittest.expectedFailure
def test_brotli(self):
self.__test_compression('br')

@unittest.expectedFailure
def test_deflate(self):
self.__test_compression('deflate')

@unittest.expectedFailure
def test_gzip(self):
self.__test_compression('gzip')

@unittest.expectedFailure # not yet implemented
def test_multiple_encodings(self):
# https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4
with FakeYDL() as ydl:
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
res = ydl.urlopen(
sanitized_Request(
self._test_url('content-encoding'),
headers={'ytdl-encoding': pair}))
self.assertEqual(res.headers.get('Content-Encoding'), pair)
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
self.__test_compression(pair)

def test_unsupported_encoding(self):
# it should return the raw content
Expand All @@ -499,6 +495,14 @@ def test_unsupported_encoding(self):
self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported')
self.assertEqual(res.read(), b'raw')

def test_remove_dot_segments(self):
with FakeYDL() as ydl:
res = ydl.urlopen(sanitized_Request(self._test_url('a/b/./../../headers')))
self.assertEqual(compat_urllib_parse.urlparse(res.geturl()).path, '/headers')

res = ydl.urlopen(sanitized_Request(self._test_url('redirect_dotsegments')))
self.assertEqual(compat_urllib_parse.urlparse(res.geturl()).path, '/headers')


def _build_proxy_handler(name):
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
Expand Down
29 changes: 28 additions & 1 deletion test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
parse_age_limit,
parse_duration,
parse_filesize,
parse_codecs,
parse_count,
parse_iso8601,
parse_resolution,
Expand Down Expand Up @@ -114,7 +115,7 @@
cli_option,
cli_valueless_option,
cli_bool_option,
parse_codecs,
YoutubeDLHandler,
)
from youtube_dl.compat import (
compat_chr,
Expand Down Expand Up @@ -905,6 +906,32 @@ def test_escape_url(self):
)
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')

def test_remove_dot_segments(self):

def remove_dot_segments(p):
q = '' if p.startswith('/') else '/'
p = 'http://example.com' + q + p
p = compat_urlparse.urlsplit(YoutubeDLHandler._fix_path(p)).path
return p[1:] if q else p

self.assertEqual(remove_dot_segments('/a/b/c/./../../g'), '/a/g')
self.assertEqual(remove_dot_segments('mid/content=5/../6'), 'mid/6')
self.assertEqual(remove_dot_segments('/ad/../cd'), '/cd')
self.assertEqual(remove_dot_segments('/ad/../cd/'), '/cd/')
self.assertEqual(remove_dot_segments('/..'), '/')
self.assertEqual(remove_dot_segments('/./'), '/')
self.assertEqual(remove_dot_segments('/./a'), '/a')
self.assertEqual(remove_dot_segments('/abc/./.././d/././e/.././f/./../../ghi'), '/ghi')
self.assertEqual(remove_dot_segments('/'), '/')
self.assertEqual(remove_dot_segments('/t'), '/t')
self.assertEqual(remove_dot_segments('t'), 't')
self.assertEqual(remove_dot_segments(''), '')
self.assertEqual(remove_dot_segments('/../a/b/c'), '/a/b/c')
self.assertEqual(remove_dot_segments('../a'), 'a')
self.assertEqual(remove_dot_segments('./a'), 'a')
self.assertEqual(remove_dot_segments('.'), '')
self.assertEqual(remove_dot_segments('////'), '////')

def test_js_to_json_vars_strings(self):
self.assertDictEqual(
json.loads(js_to_json(
Expand Down
23 changes: 0 additions & 23 deletions youtube_dl/YoutubeDL.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@
format_bytes,
formatSeconds,
GeoRestrictedError,
HEADRequest,
int_or_none,
ISO3166Utils,
join_nonempty,
Expand All @@ -88,7 +87,6 @@
preferredencoding,
prepend_extension,
process_communicate_or_kill,
PUTRequest,
register_socks_protocols,
render_table,
replace_extension,
Expand Down Expand Up @@ -2460,27 +2458,6 @@ def urlopen(self, req):
""" Start an HTTP download """
if isinstance(req, compat_basestring):
req = sanitized_Request(req)
# an embedded /../ sequence is not automatically handled by urllib2
# see https://github.com/yt-dlp/yt-dlp/issues/3355
url = req.get_full_url()
parts = url.partition('/../')
if parts[1]:
url = compat_urllib_parse.urljoin(parts[0] + parts[1][:1], parts[1][1:] + parts[2])
if url:
# worse, URL path may have initial /../ against RFCs: work-around
# by stripping such prefixes, like eg Firefox
parts = compat_urllib_parse.urlsplit(url)
path = parts.path
while path.startswith('/../'):
path = path[3:]
url = parts._replace(path=path).geturl()
# get a new Request with the munged URL
if url != req.get_full_url():
req_type = {'HEAD': HEADRequest, 'PUT': PUTRequest}.get(
req.get_method(), compat_urllib_request.Request)
req = req_type(
url, data=req.data, headers=dict(req.header_items()),
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
return self._opener.open(req, timeout=self._socket_timeout)

def print_debug_header(self):
Expand Down
14 changes: 14 additions & 0 deletions youtube_dl/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3200,6 +3200,18 @@ def compat_register_utf8():
def compat_datetime_timedelta_total_seconds(td):
return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6

# optional decompression packages
# PyPi brotli package implements 'br' Content-Encoding
try:
import brotli as compat_brotli
except ImportError:
compat_brotli = None
# PyPi ncompress package implements 'compress' Content-Encoding
try:
import ncompress as compat_ncompress
except ImportError:
compat_ncompress = None


legacy = [
'compat_HTMLParseError',
Expand Down Expand Up @@ -3234,6 +3246,7 @@ def compat_datetime_timedelta_total_seconds(td):
'compat_Struct',
'compat_base64_b64decode',
'compat_basestring',
'compat_brotli',
'compat_casefold',
'compat_chr',
'compat_collections_abc',
Expand All @@ -3259,6 +3272,7 @@ def compat_datetime_timedelta_total_seconds(td):
'compat_itertools_zip_longest',
'compat_kwargs',
'compat_map',
'compat_ncompress',
'compat_numeric_types',
'compat_open',
'compat_ord',
Expand Down
Loading

0 comments on commit feccf27

Please sign in to comment.