Skip to content

Commit

Permalink
Fix provider TVChaosUK (#8737)
Browse files Browse the repository at this point in the history
* Fix login

* Fix searching

* Add freeleech

* Fix flake issues

* Missed space

* Update changelog

* Fix typos

* yarn dev
  • Loading branch information
p0psicles authored Nov 17, 2020
1 parent 32c2b78 commit 63f2873
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 164 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,17 @@
#### New Features

#### Improvements
- Vueified add existing shows ([8448](https://github.com/pymedusa/Medusa/pull/8448))
- Get real time progress update on the shows adding
- Add shows automatically when metadata is available
- Preset show options
- Vueified add show ([8448](https://github.com/pymedusa/Medusa/pull/8448))
- No more page redirects when you add a show that already exists
- Select show list while adding show
- Map show lists to anime, when configured in config -> anime

#### Fixes
- Fixed provider TVChaosUK ([8737](https://github.com/pymedusa/Medusa/pull/8737))

-----

Expand Down
152 changes: 68 additions & 84 deletions medusa/providers/torrent/html/tvchaosuk.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,11 @@
from medusa.bs4_parser import BS4Parser
from medusa.helper.common import (
convert_size,
try_int,
)
from medusa.helper.exceptions import AuthException
from medusa.logger.adapters.style import BraceAdapter
from medusa.providers.torrent.torrent_provider import TorrentProvider

from requests.compat import urljoin
from requests.compat import quote, urljoin

log = BraceAdapter(logging.getLogger(__name__))
log.logger.addHandler(logging.NullHandler())
Expand All @@ -35,12 +33,10 @@ def __init__(self):
self.password = None

# URLs
self.url = 'https://www.tvchaosuk.com'
self.url = 'https://tvchaosuk.com'
self.urls = {
'login': urljoin(self.url, 'takelogin.php'),
'index': urljoin(self.url, 'index.php'),
'search': urljoin(self.url, 'browse.php'),
'query': urljoin(self.url, 'scripts/autocomplete/query.php'),
'login': urljoin(self.url, 'login'),
'search': urljoin(self.url, 'torrents/filter')
}

# Proper Strings
Expand All @@ -53,6 +49,9 @@ def __init__(self):
# Cache
self.cache = tv.Cache(self)

# Store _token as it's needed for searches.
self._token = ''

def search(self, search_strings, age=0, ep_obj=None, **kwargs):
"""
Search a provider and parse the results.
Expand All @@ -63,32 +62,37 @@ def search(self, search_strings, age=0, ep_obj=None, **kwargs):
:returns: A list of search results (structure)
"""
results = []
if not self.login():
if not self.login() or not self._token:
return results

# Search Params
search_params = {
'do': 'search',
'search_type': 't_name',
'category': 0,
'include_dead_torrents': 'no',
'submit': 'search',
'_token': self._token,
'search': '',
'description': '',
'uploader': '',
'imdb': '',
'tvdb': '',
'view': 'list',
'tmdb': '',
'start_year': '',
'end_year': '',
'page': 0,
'qty': 100,
}

for mode in search_strings:
log.debug('Search mode: {0}', mode)

for search_string in search_strings[mode]:

if mode == 'Season':
search_string = re.sub(r'(.*)S0?', r'\1Series ', search_string)

elif mode != 'RSS':
if mode != 'RSS':
log.debug('Search string: {search}',
{'search': search_string})

search_params['keywords'] = search_string
response = self.session.post(self.urls['search'], data=search_params)
search_params['search'] = quote(search_string)

response = self.session.get(self.urls['search'], params='&'.join('{}={}'.format(k, v) for k, v in search_params.items()))
if not response or not response.text:
log.debug('No data returned from provider')
continue
Expand All @@ -107,46 +111,39 @@ def parse(self, data, mode, **kwargs):
:return: A list of items found
"""
# Units
units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
units = ['B', 'KB', 'MIB', 'GIB', 'TB', 'PB']

items = []

keywords = kwargs.pop('keywords', None)

with BS4Parser(data, 'html5lib') as html:
torrent_table = html.find(id='sortabletable')
torrent_table = html.find('table', class_='table')
torrent_rows = torrent_table('tr') if torrent_table else []

# Continue only if at least one release is found
if len(torrent_rows) < 2:
log.debug('Data returned from provider does not contain any torrents')
return items

labels = [label.img['title'] if label.img else label.get_text(strip=True) for label in
torrent_rows[0]('td')]
labels = [label.get_text(strip=True) for label in torrent_rows[0]('th')]

# Skip column headers
for row in torrent_rows[1:]:
try:
# Skip highlighted torrents
if mode == 'RSS' and row.get('class') == ['highlight']:
continue
cells = row('td')

if self.freeleech and not row.find('img', alt=re.compile('Free Torrent')):
continue
if self.freeleech:
badges = cells[labels.index('Name')]('span', class_='badge-extra')
if 'Freeleech' not in [badge.get_text(strip=True) for badge in badges]:
continue

title = cells[labels.index('Name')].find('a', class_='view-torrent').get_text(strip=True)
download_url = cells[labels.index('Name')].find('button').parent['href']

title = row.find(class_='tooltip-content')
title = title.div.get_text(strip=True) if title else None
download_url = row.find(title='Click to Download this Torrent!')
download_url = download_url.parent['href'] if download_url else None
if not all([title, download_url]):
continue

if title.endswith('...'):
title = self.get_full_title(title)

seeders = try_int(row.find(title='Seeders').get_text(strip=True))
leechers = try_int(row.find(title='Leechers').get_text(strip=True))
seeders = int(cells[labels.index('S')].get_text(strip=True))
leechers = int(cells[labels.index('L')].get_text(strip=True))

# Filter unseeded torrent
if seeders < self.minseed:
Expand All @@ -156,30 +153,19 @@ def parse(self, data, mode, **kwargs):
title, seeders)
continue

# Chop off tracker/channel prefix or we cant parse the result!
if mode != 'RSS' and keywords:
show_name_first_word = re.search(r'^[^ .]+', keywords).group()
if not title.startswith(show_name_first_word):
title = re.sub(r'.*(' + show_name_first_word + '.*)', r'\1', title)

# Change title from Series to Season, or we can't parse
if mode == 'Season':
title = re.sub(r'(.*)(?i)Series', r'\1Season', title)

# Strip year from the end or we can't parse it!
title = re.sub(r'(.*)[\. ]?\(\d{4}\)', r'\1', title)
title = re.sub(r'\s+', r' ', title)

torrent_size = row('td')[labels.index('Size')].get_text(strip=True)
torrent_size = cells[labels.index('Size')].get_text(strip=True)
size = convert_size(torrent_size, units=units) or -1

pubdate_raw = cells[labels.index('Created at')].get_text(strip=True)
pubdate = self.parse_pubdate(pubdate_raw, human_time=True)

item = {
'title': title + '.hdtv.x264',
'title': title,
'link': download_url,
'size': size,
'seeders': seeders,
'leechers': leechers,
'pubdate': None,
'pubdate': pubdate,
}
if mode != 'RSS':
log.debug('Found result: {0} with {1} seeders and {2} leechers',
Expand All @@ -196,47 +182,45 @@ def login(self):
if len(self.session.cookies) >= 4:
return True

# Get the _token
response_token = self.session.get(self.urls['login'])
if not response_token or not response_token.text:
log.warning('Provider not reachable')
return False

match_token = re.search(r'<meta name="csrf-token" content="([^"]+)">', response_token.text)
match_captcha = re.search(r'<input type="hidden" name="_captcha" value="([^"]+)" />', response_token.text)
match_hash = re.search(r'<input type="hidden".+name="([^"]+)".+value="(\d+)"', response_token.text)

if not match_token or not match_captcha or not match_hash:
log.warning('Could not get token or captcha')
return False

self._token = match_token.group(1)
captcha = match_captcha.group(1)
hash_key = match_hash.group(1)
hash_value = match_hash.group(2)

login_params = {
'_token': self._token,
'username': self.username,
'password': self.password,
'logout': 'no',
'submit': 'LOGIN',
'returnto': '/browse.php',
'remember': 'on',
'_captcha': captcha,
'_username': '',
hash_key: hash_value
}

response = self.session.post(self.urls['login'], data=login_params)
if not response or not response.text:
log.warning('Unable to connect to provider')
return False

if re.search('Error: Username or password incorrect!', response.text):
if re.search('These credentials do not match our records', response.text):
log.warning('Invalid username or password. Check your settings')
return False

return True

def _check_auth(self):
if self.username and self.password:
return True

raise AuthException('Your authentication credentials for {0} are missing,'
' check your config.'.format(self.name))

def get_full_title(self, title):
"""Get full title of release as provider add a "..." in the end of title in the html."""
# Strip trailing 3 dots
title = title[:-3]
search_params = {'input': title}
response = self.session.get(self.urls['query'], params=search_params)
if not response or not response.text:
log.debug("Couldn't retrieve the full release title")
return title

with BS4Parser(response.text, 'html5lib') as html:
titles = html('results')
for item in titles:
title = item.text
return title


provider = TVChaosUKProvider()
Loading

0 comments on commit 63f2873

Please sign in to comment.