Skip to content

Commit

Permalink
Streams publicly available tracks using a public client id to unload …
Browse files Browse the repository at this point in the history
…use of standard app client id
  • Loading branch information
laurent authored and laurent committed Apr 6, 2021
1 parent c23b993 commit 26f86b6
Show file tree
Hide file tree
Showing 7 changed files with 2,096 additions and 140 deletions.
123 changes: 104 additions & 19 deletions mopidy_soundcloud/soundcloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from contextlib import closing
from multiprocessing.pool import ThreadPool
from urllib.parse import quote_plus
from bs4 import BeautifulSoup

import requests
from requests.adapters import HTTPAdapter
Expand Down Expand Up @@ -36,7 +37,7 @@ def readable_url(uri):
).strip()


def streamble_url(url, client_id):
def streamable_url(url, client_id):
return f"{url}?client_id={client_id}"


Expand All @@ -47,18 +48,31 @@ def get_user_url(user_id):
return f"users/{user_id}"


def get_requests_session(proxy_config, user_agent, token):
def get_requests_session(proxy_config, user_agent, token, public=False):
proxy = httpclient.format_proxy(proxy_config)
full_user_agent = httpclient.format_user_agent(user_agent)

session = requests.Session()
session.proxies.update({"http": proxy, "https": proxy})
session.headers.update({"user-agent": full_user_agent})
session.headers.update({"Authorization": f"OAuth {token}"})
if not public:
session.headers.update({"user-agent": full_user_agent})
session.headers.update({"Authorization": f"OAuth {token}"})

return session


def get_mopidy_requests_session(config, public=False):
return get_requests_session(
proxy_config=config["proxy"],
user_agent=(
f"{mopidy_soundcloud.Extension.dist_name}/"
f"{mopidy_soundcloud.__version__}"
),
token=config["soundcloud"]["auth_token"],
public=public,
)


class cache: # noqa
# TODO: merge this to util library

Expand Down Expand Up @@ -143,22 +157,21 @@ def send(self, request, **kwargs):
class SoundCloudClient:
CLIENT_ID = "93e33e327fd8a9b77becd179652272e2"

public_client_id = None

def __init__(self, config):
super().__init__()
self.explore_songs = config["soundcloud"].get("explore_songs", 25)
self.http_client = get_requests_session(
proxy_config=config["proxy"],
user_agent=(
f"{mopidy_soundcloud.Extension.dist_name}/"
f"{mopidy_soundcloud.__version__}"
),
token=config["soundcloud"]["auth_token"],
)
self.http_client = get_mopidy_requests_session(config)
adapter = ThrottlingHttpAdapter(
burst_length=3, burst_window=1, wait_window=10
)
self.http_client.mount("https://api.soundcloud.com/", adapter)

self.public_stream_client = get_mopidy_requests_session(
config, public=True
)

@property
@cache()
def user(self):
Expand Down Expand Up @@ -236,7 +249,8 @@ def get_track(self, track_id, streamable=False):
except Exception:
return None

def parse_track_uri(self, track):
@staticmethod
def parse_track_uri(track):
logger.debug(f"Parsing track {track}")
if hasattr(track, "uri"):
track = track.uri
Expand Down Expand Up @@ -304,7 +318,7 @@ def parse_track(self, data, remote_url=False):
)
return None
if not data.get("kind") == "track":
logger.debug(f"{data.get('title')} is not track")
logger.debug(f"{data.get('title')} is not a track")
return None

track_kwargs = {}
Expand All @@ -326,7 +340,8 @@ def parse_track(self, data, remote_url=False):
track_kwargs["date"] = data["date"]

if remote_url:
track_kwargs["uri"] = self.get_streamble_url(data["stream_url"])
args = (data["sharing"], data["permalink_url"], data["stream_url"])
track_kwargs["uri"] = self.get_streamable_url(*args)
if track_kwargs["uri"] is None:
logger.info(
f"{data.get('title')} can't be streamed from SoundCloud"
Expand All @@ -348,14 +363,84 @@ def parse_track(self, data, remote_url=False):

return Track(**track_kwargs)

def _update_public_client_id(self):
""" Gets a client id which can be used to stream publicly available tracks """

def get_page(url):
return self.public_stream_client.get(url).content.decode("utf-8")

public_page = get_page("https://soundcloud.com/")
regex_str = r"client_id=([a-zA-Z0-9]{16,})"
soundcloud_soup = BeautifulSoup(public_page, "html.parser")
scripts = soundcloud_soup.find_all("script", attrs={"src": True})
self.public_client_id = None
for script in scripts:
for match in re.finditer(regex_str, get_page(script["src"])):
self.public_client_id = match.group(1)
logger.debug(
f"Updated SoundCloud public client id to: {self.public_client_id}"
)
return

def _get_public_stream(self, progr_stream):
params = [("client_id", self.public_client_id)]
return self.public_stream_client.get(progr_stream, params=params)

@staticmethod
def parse_fail_reason(reason):
return "" if reason == "Unknown" else f"({reason})"

@cache()
def get_streamble_url(self, url):
req = self.http_client.head(streamble_url(url, self.CLIENT_ID))
def get_streamable_url(self, sharing, permalink_url, stream_url):

if self.public_client_id is None:
self._update_public_client_id()

progressive_urls = {}
if sharing == "public" and self.public_client_id is not None:
res = self.public_stream_client.get(permalink_url)

for html_substring in res.text.split('"'):
if html_substring.endswith("preview/progressive"):
progressive_urls["preview"] = html_substring
elif html_substring.endswith("stream/progressive"):
progressive_urls["stream"] = html_substring

if progressive_urls.get("preview"):
if progressive_urls.get("stream"):
break

if progressive_urls.get("stream"):
stream = self._get_public_stream(progressive_urls["stream"])
if stream.status_code in [401, 403, 429]:
self._update_public_client_id() # refresh public client id once
stream = self._get_public_stream(progressive_urls["stream"])

try:
return stream.json().get("url")
except Exception as e:
logger.info(
"Streaming of public song using public client id failed, "
"trying with standard app client id.."
)
logger.debug(
f"Caught public client id stream fail:\n{str(e)}"
f"\n{self.parse_fail_reason(stream.reason)}"
)

# ~quickly yields rate limit errors
req = self.http_client.head(streamable_url(stream_url, self.CLIENT_ID))
if req.status_code == 302:
return req.headers.get("Location", None)
elif req.status_code == 429:
reason = "" if req.reason == "Unknown" else f" ({req.reason})"
logger.warning(f"SoundCloud daily rate limit exceeded{reason}")
logger.warning(
"SoundCloud daily rate limit exceeded "
f"{self.parse_fail_reason(req.reason)}"
)
if progressive_urls.get("preview"):
logger.info("Playing public preview stream")
stream = self._get_public_stream(progressive_urls["preview"])
return stream.json().get("url")

def resolve_tracks(self, track_ids):
"""Resolve tracks concurrently emulating browser
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ install_requires =
Pykka >= 2.0.1
setuptools
requests >= 2.0.0
beautifulsoup4 >= 4.7.1


[options.extras_require]
Expand Down
Loading

0 comments on commit 26f86b6

Please sign in to comment.