subliminal-2.1.0/ 0000755 0001750 0001750 00000000000 13653235562 014141 5 ustar jones jones 0000000 0000000 subliminal-2.1.0/subliminal/ 0000755 0001750 0001750 00000000000 13653235562 016300 5 ustar jones jones 0000000 0000000 subliminal-2.1.0/subliminal/converters/ 0000755 0001750 0001750 00000000000 13653235562 020472 5 ustar jones jones 0000000 0000000 subliminal-2.1.0/subliminal/converters/__init__.py 0000664 0001750 0001750 00000000000 13420412567 022565 0 ustar jones jones 0000000 0000000 subliminal-2.1.0/subliminal/converters/addic7ed.py 0000664 0001750 0001750 00000003306 13420412567 022506 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
from babelfish import LanguageReverseConverter, language_converters
class Addic7edConverter(LanguageReverseConverter):
def __init__(self):
self.name_converter = language_converters['name']
self.from_addic7ed = {u'Català': ('cat',), 'Chinese (Simplified)': ('zho',), 'Chinese (Traditional)': ('zho',),
'Euskera': ('eus',), 'Galego': ('glg',), 'Greek': ('ell',), 'Malay': ('msa',),
'Portuguese (Brazilian)': ('por', 'BR'), 'Serbian (Cyrillic)': ('srp', None, 'Cyrl'),
'Serbian (Latin)': ('srp',), 'Spanish (Latin America)': ('spa',),
'Spanish (Spain)': ('spa',)}
self.to_addic7ed = {('cat',): 'Català', ('zho',): 'Chinese (Simplified)', ('eus',): 'Euskera',
('glg',): 'Galego', ('ell',): 'Greek', ('msa',): 'Malay',
('por', 'BR'): 'Portuguese (Brazilian)', ('srp', None, 'Cyrl'): 'Serbian (Cyrillic)'}
self.codes = self.name_converter.codes | set(self.from_addic7ed.keys())
def convert(self, alpha3, country=None, script=None):
if (alpha3, country, script) in self.to_addic7ed:
return self.to_addic7ed[(alpha3, country, script)]
if (alpha3, country) in self.to_addic7ed:
return self.to_addic7ed[(alpha3, country)]
if (alpha3,) in self.to_addic7ed:
return self.to_addic7ed[(alpha3,)]
return self.name_converter.convert(alpha3, country, script)
def reverse(self, addic7ed):
if addic7ed in self.from_addic7ed:
return self.from_addic7ed[addic7ed]
return self.name_converter.reverse(addic7ed)
subliminal-2.1.0/subliminal/converters/legendastv.py 0000664 0001750 0001750 00000002363 13420412567 023200 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
from babelfish import LanguageReverseConverter
from ..exceptions import ConfigurationError
class LegendasTVConverter(LanguageReverseConverter):
def __init__(self):
self.from_legendastv = {1: ('por', 'BR'), 2: ('eng',), 3: ('spa',), 4: ('fra',), 5: ('deu',), 6: ('jpn',),
7: ('dan',), 8: ('nor',), 9: ('swe',), 10: ('por',), 11: ('ara',), 12: ('ces',),
13: ('zho',), 14: ('kor',), 15: ('bul',), 16: ('ita',), 17: ('pol',)}
self.to_legendastv = {v: k for k, v in self.from_legendastv.items()}
self.codes = set(self.from_legendastv.keys())
def convert(self, alpha3, country=None, script=None):
if (alpha3, country) in self.to_legendastv:
return self.to_legendastv[(alpha3, country)]
if (alpha3,) in self.to_legendastv:
return self.to_legendastv[(alpha3,)]
raise ConfigurationError('Unsupported language code for legendastv: %s, %s, %s' % (alpha3, country, script))
def reverse(self, legendastv):
if legendastv in self.from_legendastv:
return self.from_legendastv[legendastv]
raise ConfigurationError('Unsupported language number for legendastv: %s' % legendastv)
subliminal-2.1.0/subliminal/converters/shooter.py 0000664 0001750 0001750 00000001512 13420412567 022522 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
from babelfish import LanguageReverseConverter
from ..exceptions import ConfigurationError
class ShooterConverter(LanguageReverseConverter):
def __init__(self):
self.from_shooter = {'chn': ('zho',), 'eng': ('eng',)}
self.to_shooter = {v: k for k, v in self.from_shooter.items()}
self.codes = set(self.from_shooter.keys())
def convert(self, alpha3, country=None, script=None):
if (alpha3,) in self.to_shooter:
return self.to_shooter[(alpha3,)]
raise ConfigurationError('Unsupported language for shooter: %s, %s, %s' % (alpha3, country, script))
def reverse(self, shooter):
if shooter in self.from_shooter:
return self.from_shooter[shooter]
raise ConfigurationError('Unsupported language code for shooter: %s' % shooter)
subliminal-2.1.0/subliminal/converters/thesubdb.py 0000664 0001750 0001750 00000002143 13420412567 022640 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
from babelfish import LanguageReverseConverter
from ..exceptions import ConfigurationError
class TheSubDBConverter(LanguageReverseConverter):
def __init__(self):
self.from_thesubdb = {'en': ('eng',), 'es': ('spa',), 'fr': ('fra',), 'it': ('ita',), 'nl': ('nld',),
'pl': ('pol',), 'pt': ('por', 'BR'), 'ro': ('ron',), 'sv': ('swe',), 'tr': ('tur',)}
self.to_thesubdb = {v: k for k, v in self.from_thesubdb.items()}
self.codes = set(self.from_thesubdb.keys())
def convert(self, alpha3, country=None, script=None):
if (alpha3, country) in self.to_thesubdb:
return self.to_thesubdb[(alpha3, country)]
if (alpha3,) in self.to_thesubdb:
return self.to_thesubdb[(alpha3,)]
raise ConfigurationError('Unsupported language for thesubdb: %s, %s, %s' % (alpha3, country, script))
def reverse(self, thesubdb):
if thesubdb in self.from_thesubdb:
return self.from_thesubdb[thesubdb]
raise ConfigurationError('Unsupported language code for thesubdb: %s' % thesubdb)
subliminal-2.1.0/subliminal/converters/tvsubtitles.py 0000664 0001750 0001750 00000002114 13420412567 023426 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
from babelfish import LanguageReverseConverter, language_converters
class TVsubtitlesConverter(LanguageReverseConverter):
def __init__(self):
self.alpha2_converter = language_converters['alpha2']
self.from_tvsubtitles = {'br': ('por', 'BR'), 'ua': ('ukr',), 'gr': ('ell',), 'cn': ('zho',), 'jp': ('jpn',),
'cz': ('ces',)}
self.to_tvsubtitles = {v: k for k, v in self.from_tvsubtitles.items()}
self.codes = self.alpha2_converter.codes | set(self.from_tvsubtitles.keys())
def convert(self, alpha3, country=None, script=None):
if (alpha3, country) in self.to_tvsubtitles:
return self.to_tvsubtitles[(alpha3, country)]
if (alpha3,) in self.to_tvsubtitles:
return self.to_tvsubtitles[(alpha3,)]
return self.alpha2_converter.convert(alpha3, country, script)
def reverse(self, tvsubtitles):
if tvsubtitles in self.from_tvsubtitles:
return self.from_tvsubtitles[tvsubtitles]
return self.alpha2_converter.reverse(tvsubtitles)
subliminal-2.1.0/subliminal/providers/ 0000755 0001750 0001750 00000000000 13653235562 020315 5 ustar jones jones 0000000 0000000 subliminal-2.1.0/subliminal/providers/__init__.py 0000644 0001750 0001750 00000014165 13653235327 022434 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
import logging
from bs4 import BeautifulSoup, FeatureNotFound
from six.moves.xmlrpc_client import SafeTransport
from .. import __short_version__
from ..video import Episode, Movie
logger = logging.getLogger(__name__)
class TimeoutSafeTransport(SafeTransport):
"""Timeout support for ``xmlrpc.client.SafeTransport``."""
def __init__(self, timeout, *args, **kwargs):
SafeTransport.__init__(self, *args, **kwargs)
self.timeout = timeout
def make_connection(self, host):
c = SafeTransport.make_connection(self, host)
c.timeout = self.timeout
return c
class ParserBeautifulSoup(BeautifulSoup):
"""A ``bs4.BeautifulSoup`` that picks the first parser available in `parsers`.
:param markup: markup for the ``bs4.BeautifulSoup``.
:param list parsers: parser names, in order of preference.
"""
def __init__(self, markup, parsers, **kwargs):
# reject features
if set(parsers).intersection({'fast', 'permissive', 'strict', 'xml', 'html', 'html5'}):
raise ValueError('Features not allowed, only parser names')
# reject some kwargs
if 'features' in kwargs:
raise ValueError('Cannot use features kwarg')
if 'builder' in kwargs:
raise ValueError('Cannot use builder kwarg')
# pick the first parser available
for parser in parsers:
try:
super(ParserBeautifulSoup, self).__init__(markup, parser, **kwargs)
return
except FeatureNotFound:
pass
raise FeatureNotFound
class Provider(object):
"""Base class for providers.
If any configuration is possible for the provider, like credentials, it must take place during instantiation.
:raise: :class:`~subliminal.exceptions.ConfigurationError` if there is a configuration error
"""
#: Supported set of :class:`~babelfish.language.Language`
languages = set()
#: Supported video types
video_types = (Episode, Movie)
#: Required hash, if any
required_hash = None
#: Subtitle class to use
subtitle_class = None
#: User Agent to use
user_agent = 'Subliminal/%s' % __short_version__
def __enter__(self):
self.initialize()
return self
def __exit__(self, exc_type, exc_value, traceback):
self.terminate()
def initialize(self):
"""Initialize the provider.
Must be called when starting to work with the provider. This is the place for network initialization
or login operations.
.. note::
This is called automatically when entering the `with` statement
"""
raise NotImplementedError
def terminate(self):
"""Terminate the provider.
Must be called when done with the provider. This is the place for network shutdown or logout operations.
.. note::
This is called automatically when exiting the `with` statement
"""
raise NotImplementedError
@classmethod
def check(cls, video):
"""Check if the `video` can be processed.
The `video` is considered invalid if not an instance of :attr:`video_types` or if the :attr:`required_hash` is
not present in :attr:`~subliminal.video.Video.hashes` attribute of the `video`.
:param video: the video to check.
:type video: :class:`~subliminal.video.Video`
:return: `True` if the `video` is valid, `False` otherwise.
:rtype: bool
"""
if not cls.check_types(video):
return False
if cls.required_hash is not None and cls.required_hash not in video.hashes:
return False
return True
@classmethod
def check_types(cls, video):
"""Check if the `video` type is supported by the provider.
The `video` is considered invalid if not an instance of :attr:`video_types`.
:param video: the video to check.
:type video: :class:`~subliminal.video.Video`
:return: `True` if the `video` is valid, `False` otherwise.
:rtype: bool
"""
return isinstance(video, cls.video_types)
@classmethod
def check_languages(cls, languages):
"""Check if the `languages` are supported by the provider.
A subset of the supported languages is returned.
:param languages: the languages to check.
:type languages: set of :class:`~babelfish.language.Language`
:return: subset of the supported languages.
:rtype: set of :class:`~babelfish.language.Language`
"""
return cls.languages & languages
def query(self, *args, **kwargs):
"""Query the provider for subtitles.
Arguments should match as much as possible the actual parameters for querying the provider
:return: found subtitles.
:rtype: list of :class:`~subliminal.subtitle.Subtitle`
:raise: :class:`~subliminal.exceptions.ProviderError`
"""
raise NotImplementedError
def list_subtitles(self, video, languages):
"""List subtitles for the `video` with the given `languages`.
This will call the :meth:`query` method internally. The parameters passed to the :meth:`query` method may
vary depending on the amount of information available in the `video`.
:param video: video to list subtitles for.
:type video: :class:`~subliminal.video.Video`
:param languages: languages to search for.
:type languages: set of :class:`~babelfish.language.Language`
:return: found subtitles.
:rtype: list of :class:`~subliminal.subtitle.Subtitle`
:raise: :class:`~subliminal.exceptions.ProviderError`
"""
raise NotImplementedError
def download_subtitle(self, subtitle):
"""Download `subtitle`'s :attr:`~subliminal.subtitle.Subtitle.content`.
:param subtitle: subtitle to download.
:type subtitle: :class:`~subliminal.subtitle.Subtitle`
:raise: :class:`~subliminal.exceptions.ProviderError`
"""
raise NotImplementedError
def __repr__(self):
return '<%s [%r]>' % (self.__class__.__name__, self.video_types)
subliminal-2.1.0/subliminal/providers/addic7ed.py 0000644 0001750 0001750 00000027437 13653235327 022347 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
import logging
import re
from babelfish import Language, language_converters
from guessit import guessit
from requests import Session
from . import ParserBeautifulSoup, Provider
from ..cache import SHOW_EXPIRATION_TIME, region
from ..exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded
from ..matches import guess_matches
from ..subtitle import Subtitle, fix_line_ending
from ..utils import sanitize
from ..video import Episode
logger = logging.getLogger(__name__)
language_converters.register('addic7ed = subliminal.converters.addic7ed:Addic7edConverter')
# Series cell matching regex
show_cells_re = re.compile(b'
.*? | ', re.DOTALL)
#: Series header parsing regex
series_year_re = re.compile(r'^(?P[ \w\'.:(),*&!?-]+?)(?: \((?P\d{4})\))?$')
class Addic7edSubtitle(Subtitle):
"""Addic7ed Subtitle."""
provider_name = 'addic7ed'
def __init__(self, language, hearing_impaired, page_link, series, season, episode, title, year, version,
download_link):
super(Addic7edSubtitle, self).__init__(language, hearing_impaired=hearing_impaired, page_link=page_link)
self.series = series
self.season = season
self.episode = episode
self.title = title
self.year = year
self.version = version
self.download_link = download_link
@property
def id(self):
return self.download_link
@property
def info(self):
return '{series}{yopen}{year}{yclose} s{season:02d}e{episode:02d}{topen}{title}{tclose}{version}'.format(
series=self.series, season=self.season, episode=self.episode, title=self.title, year=self.year or '',
version=self.version, yopen=' (' if self.year else '', yclose=')' if self.year else '',
topen=' - ' if self.title else '', tclose=' - ' if self.version else ''
)
def get_matches(self, video):
# series name
matches = guess_matches(video, {
'title': self.series,
'season': self.season,
'episode': self.episode,
'episode_title': self.title,
'year': self.year,
'release_group': self.version,
})
# resolution
if video.resolution and self.version and video.resolution in self.version.lower():
matches.add('resolution')
# other properties
if self.version:
matches |= guess_matches(video, guessit(self.version, {'type': 'episode'}), partial=True)
return matches
class Addic7edProvider(Provider):
"""Addic7ed Provider."""
languages = {Language('por', 'BR')} | {Language(l) for l in [
'ara', 'aze', 'ben', 'bos', 'bul', 'cat', 'ces', 'dan', 'deu', 'ell', 'eng', 'eus', 'fas', 'fin', 'fra', 'glg',
'heb', 'hrv', 'hun', 'hye', 'ind', 'ita', 'jpn', 'kor', 'mkd', 'msa', 'nld', 'nor', 'pol', 'por', 'ron', 'rus',
'slk', 'slv', 'spa', 'sqi', 'srp', 'swe', 'tha', 'tur', 'ukr', 'vie', 'zho'
]}
video_types = (Episode,)
server_url = 'http://www.addic7ed.com/'
subtitle_class = Addic7edSubtitle
def __init__(self, username=None, password=None):
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
self.username = username
self.password = password
self.logged_in = False
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = self.user_agent
# login
if self.username and self.password:
logger.info('Logging in')
data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'}
r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10)
if r.status_code != 302:
raise AuthenticationError(self.username)
logger.debug('Logged in')
self.logged_in = True
def terminate(self):
# logout
if self.logged_in:
logger.info('Logging out')
r = self.session.get(self.server_url + 'logout.php', timeout=10)
r.raise_for_status()
logger.debug('Logged out')
self.logged_in = False
self.session.close()
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
def _get_show_ids(self):
"""Get the ``dict`` of show ids per series by querying the `shows.php` page.
:return: show id per series, lower case and without quotes.
:rtype: dict
"""
# get the show page
logger.info('Getting show ids')
r = self.session.get(self.server_url + 'shows.php', timeout=10)
r.raise_for_status()
# LXML parser seems to fail when parsing Addic7ed.com HTML markup.
# Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
# Assuming the site's markup is bad, and stripping it down to only contain what's needed.
show_cells = re.findall(show_cells_re, r.content)
if show_cells:
soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser'])
else:
# If RegEx fails, fall back to original r.content and use 'html.parser'
soup = ParserBeautifulSoup(r.content, ['html.parser'])
# populate the show ids
show_ids = {}
for show in soup.select('td.version > h3 > a[href^="/show/"]'):
show_ids[sanitize(show.text)] = int(show['href'][6:])
logger.debug('Found %d show ids', len(show_ids))
return show_ids
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
def _search_show_id(self, series, year=None):
"""Search the show id from the `series` and `year`.
:param str series: series of the episode.
:param year: year of the series, if any.
:type year: int
:return: the show id, if found.
:rtype: int
"""
# addic7ed doesn't support search with quotes
series = series.replace('\'', ' ')
# build the params
series_year = '%s %d' % (series, year) if year is not None else series
params = {'search': series_year, 'Submit': 'Search'}
# make the search
logger.info('Searching show ids with %r', params)
r = self.session.get(self.server_url + 'srch.php', params=params, timeout=10)
r.raise_for_status()
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# get the suggestion
suggestion = soup.select('span.titulo > a[href^="/show/"]')
if not suggestion:
logger.warning('Show id not found: no suggestion')
return None
if not sanitize(suggestion[0].i.text.replace('\'', ' ')) == sanitize(series_year):
logger.warning('Show id not found: suggestion does not match')
return None
show_id = int(suggestion[0]['href'][6:])
logger.debug('Found show id %d', show_id)
return show_id
def get_show_id(self, series, year=None, country_code=None):
"""Get the best matching show id for `series`, `year` and `country_code`.
First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`.
:param str series: series of the episode.
:param year: year of the series, if any.
:type year: int
:param country_code: country code of the series, if any.
:type country_code: str
:return: the show id, if found.
:rtype: int
"""
series_sanitized = sanitize(series).lower()
show_ids = self._get_show_ids()
show_id = None
# attempt with country
if not show_id and country_code:
logger.debug('Getting show id with country')
show_id = show_ids.get('%s %s' % (series_sanitized, country_code.lower()))
# attempt with year
if not show_id and year:
logger.debug('Getting show id with year')
show_id = show_ids.get('%s %d' % (series_sanitized, year))
# attempt clean
if not show_id:
logger.debug('Getting show id')
show_id = show_ids.get(series_sanitized)
# search as last resort
if not show_id:
logger.warning('Series %s not found in show ids', series)
show_id = self._search_show_id(series)
return show_id
def query(self, show_id, series, season, year=None, country=None):
# get the page of the season of the show
logger.info('Getting the page of show id %d, season %d', show_id, season)
r = self.session.get(self.server_url + 'show/%d' % show_id, params={'season': season}, timeout=10)
r.raise_for_status()
if not r.content:
# Provider returns a status of 304 Not Modified with an empty content
# raise_for_status won't raise exception for that status code
logger.debug('No data returned from provider')
return []
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# loop over subtitle rows
match = series_year_re.match(soup.select('#header font')[0].text.strip()[:-10])
series = match.group('series')
year = int(match.group('year')) if match.group('year') else None
subtitles = []
for row in soup.select('tr.epeven'):
cells = row('td')
# ignore incomplete subtitles
status = cells[5].text
if status != 'Completed':
logger.debug('Ignoring subtitle with status %s', status)
continue
# read the item
language = Language.fromaddic7ed(cells[3].text)
hearing_impaired = bool(cells[6].text)
page_link = self.server_url + cells[2].a['href'][1:]
season = int(cells[0].text)
episode = int(cells[1].text)
title = cells[2].text
version = cells[4].text
download_link = cells[9].a['href'][1:]
subtitle = self.subtitle_class(language, hearing_impaired, page_link, series, season, episode, title, year,
version, download_link)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
# lookup show_id
titles = [video.series] + video.alternative_series
show_id = None
for title in titles:
show_id = self.get_show_id(title, video.year)
if show_id is not None:
break
# query for subtitles with the show_id
if show_id is not None:
subtitles = [s for s in self.query(show_id, title, video.season, video.year)
if s.language in languages and s.episode == video.episode]
if subtitles:
return subtitles
else:
logger.error('No show id found for %r (%r)', video.series, {'year': video.year})
return []
def download_subtitle(self, subtitle):
# download the subtitle
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(self.server_url + subtitle.download_link, headers={'Referer': subtitle.page_link},
timeout=10)
r.raise_for_status()
if not r.content:
# Provider returns a status of 304 Not Modified with an empty content
# raise_for_status won't raise exception for that status code
logger.debug('Unable to download subtitle. No data returned from provider')
return
# detect download limit exceeded
if r.headers['Content-Type'] == 'text/html':
raise DownloadLimitExceeded
subtitle.content = fix_line_ending(r.content)
subliminal-2.1.0/subliminal/providers/argenteam.py 0000644 0001750 0001750 00000010516 13653235327 022634 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
import io
import json
import logging
from zipfile import ZipFile
from babelfish import Language
from guessit import guessit
from requests import Session
from six.moves import urllib
from . import Provider
from ..cache import EPISODE_EXPIRATION_TIME, region
from ..exceptions import ProviderError
from ..matches import guess_matches
from ..subtitle import Subtitle, fix_line_ending
from ..video import Episode
logger = logging.getLogger(__name__)
class ArgenteamSubtitle(Subtitle):
provider_name = 'argenteam'
def __init__(self, language, download_link, series, season, episode, release, version):
super(ArgenteamSubtitle, self).__init__(language, download_link)
self.download_link = download_link
self.series = series
self.season = season
self.episode = episode
self.release = release
self.version = version
@property
def id(self):
return self.download_link
@property
def info(self):
return urllib.parse.unquote(self.download_link.rsplit('/')[-1])
def get_matches(self, video):
matches = guess_matches(video, {
'title': self.series,
'season': self.season,
'episode': self.episode,
'release_group': self.version
})
# resolution
if video.resolution and self.version and video.resolution in self.version.lower():
matches.add('resolution')
matches |= guess_matches(video, guessit(self.version, {'type': 'episode'}), partial=True)
return matches
class ArgenteamProvider(Provider):
provider_name = 'argenteam'
language = Language.fromalpha2('es')
languages = {language}
video_types = (Episode,)
server_url = "http://argenteam.net/api/v1/"
subtitle_class = ArgenteamSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = self.user_agent
def terminate(self):
self.session.close()
@region.cache_on_arguments(expiration_time=EPISODE_EXPIRATION_TIME, should_cache_fn=lambda value: value)
def search_episode_id(self, series, season, episode):
"""Search the episode id from the `series`, `season` and `episode`.
:param str series: series of the episode.
:param int season: season of the episode.
:param int episode: episode number.
:return: the episode id, if any.
:rtype: int or None
"""
# make the search
query = '%s S%#02dE%#02d' % (series, season, episode)
logger.info('Searching episode id for %r', query)
r = self.session.get(self.server_url + 'search', params={'q': query}, timeout=10)
r.raise_for_status()
results = json.loads(r.text)
if results['total'] == 1:
return results['results'][0]['id']
logger.error('No episode id found for %r', series)
def query(self, series, season, episode):
episode_id = self.search_episode_id(series, season, episode)
if episode_id is None:
return []
response = self.session.get(self.server_url + 'episode', params={'id': episode_id}, timeout=10)
response.raise_for_status()
content = json.loads(response.text)
subtitles = []
for r in content['releases']:
for s in r['subtitles']:
subtitle = self.subtitle_class(self.language, s['uri'], series, season, episode, r['team'], r['tags'])
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
titles = [video.series] + video.alternative_series
for title in titles:
subs = self.query(title, video.season, video.episode)
if subs:
return subs
return []
def download_subtitle(self, subtitle):
# download as a zip
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(subtitle.download_link, timeout=10)
r.raise_for_status()
# open the zip
with ZipFile(io.BytesIO(r.content)) as zf:
if len(zf.namelist()) > 1:
raise ProviderError('More than one file to unzip')
subtitle.content = fix_line_ending(zf.read(zf.namelist()[0]))
subliminal-2.1.0/subliminal/providers/legendastv.py 0000644 0001750 0001750 00000046052 13653235327 023031 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
import io
import json
import logging
import os
import re
from babelfish import Language, language_converters
from datetime import datetime, timedelta
from dogpile.cache.api import NO_VALUE
from guessit import guessit
import pytz
import rarfile
from rarfile import RarFile, is_rarfile
from rebulk.loose import ensure_list
from requests import Session
from zipfile import ZipFile, is_zipfile
from . import ParserBeautifulSoup, Provider
from ..cache import SHOW_EXPIRATION_TIME, region
from ..exceptions import AuthenticationError, ConfigurationError, ProviderError, ServiceUnavailable
from ..matches import guess_matches
from ..subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending
from ..utils import sanitize
from ..video import Episode, Movie
logger = logging.getLogger(__name__)
language_converters.register('legendastv = subliminal.converters.legendastv:LegendasTVConverter')
# Configure :mod:`rarfile` to use the same path separator as :mod:`zipfile`
rarfile.PATH_SEP = '/'
#: Conversion map for types
type_map = {'M': 'movie', 'S': 'episode', 'C': 'episode'}
#: BR title season parsing regex
season_re = re.compile(r' - (?P\d+)(\xaa|a|st|nd|rd|th) (temporada|season)', re.IGNORECASE)
#: Downloads parsing regex
downloads_re = re.compile(r'(?P\d+) downloads')
#: Rating parsing regex
rating_re = re.compile(r'nota (?P\d+)')
#: Timestamp parsing regex
timestamp_re = re.compile(r'(?P\d+)/(?P\d+)/(?P\d+) - (?P\d+):(?P\d+)')
#: Title with year/country regex
title_re = re.compile(r'^(?P.*?)(?: \((?:(?P\d{4})|(?P[A-Z]{2}))\))?$')
#: Cache key for releases
releases_key = __name__ + ':releases|{archive_id}|{archive_name}'
class LegendasTVArchive(object):
"""LegendasTV Archive.
:param str id: identifier.
:param str name: name.
:param bool pack: contains subtitles for multiple episodes.
:param bool pack: featured.
:param str link: link.
:param int downloads: download count.
:param int rating: rating (0-10).
:param timestamp: timestamp.
:type timestamp: datetime.datetime
"""
def __init__(self, id, name, pack, featured, link, downloads=0, rating=0, timestamp=None):
#: Identifier
self.id = id
#: Name
self.name = name
#: Pack
self.pack = pack
#: Featured
self.featured = featured
#: Link
self.link = link
#: Download count
self.downloads = downloads
#: Rating (0-10)
self.rating = rating
#: Timestamp
self.timestamp = timestamp
#: Compressed content as :class:`rarfile.RarFile` or :class:`zipfile.ZipFile`
self.content = None
def __repr__(self):
return '<%s [%s] %r>' % (self.__class__.__name__, self.id, self.name)
class LegendasTVSubtitle(Subtitle):
"""LegendasTV Subtitle."""
provider_name = 'legendastv'
def __init__(self, language, type, title, year, imdb_id, season, archive, name):
super(LegendasTVSubtitle, self).__init__(language, page_link=archive.link)
self.type = type
self.title = title
self.year = year
self.imdb_id = imdb_id
self.season = season
self.archive = archive
self.name = name
@property
def id(self):
return '%s-%s' % (self.archive.id, self.name.lower())
@property
def info(self):
return self.name
def get_matches(self, video, hearing_impaired=False):
matches = guess_matches(video, {
'title': self.title,
'year': self.year
})
# episode
if isinstance(video, Episode) and self.type == 'episode':
# imdb_id
if video.series_imdb_id and self.imdb_id == video.series_imdb_id:
matches.add('series_imdb_id')
# movie
elif isinstance(video, Movie) and self.type == 'movie':
# imdb_id
if video.imdb_id and self.imdb_id == video.imdb_id:
matches.add('imdb_id')
# name
matches |= guess_matches(video, guessit(self.name, {'type': self.type}))
return matches
class LegendasTVProvider(Provider):
"""LegendasTV Provider.
:param str username: username.
:param str password: password.
"""
languages = {Language.fromlegendastv(l) for l in language_converters['legendastv'].codes}
server_url = 'http://legendas.tv/'
subtitle_class = LegendasTVSubtitle
def __init__(self, username=None, password=None):
# Provider needs UNRAR installed. If not available raise ConfigurationError
try:
rarfile.custom_check([rarfile.UNRAR_TOOL], True)
except rarfile.RarExecError:
raise ConfigurationError('UNRAR tool not available')
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
self.username = username
self.password = password
self.logged_in = False
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = self.user_agent
# login
if self.username and self.password:
logger.info('Logging in')
data = {'_method': 'POST', 'data[User][username]': self.username, 'data[User][password]': self.password}
r = self.session.post(self.server_url + 'login', data, allow_redirects=False, timeout=10)
raise_for_status(r)
soup = ParserBeautifulSoup(r.content, ['html.parser'])
if soup.find('div', {'class': 'alert-error'}, string=re.compile(u'Usuário ou senha inválidos')):
raise AuthenticationError(self.username)
logger.debug('Logged in')
self.logged_in = True
def terminate(self):
# logout
if self.logged_in:
logger.info('Logging out')
r = self.session.get(self.server_url + 'users/logout', allow_redirects=False, timeout=10)
raise_for_status(r)
logger.debug('Logged out')
self.logged_in = False
self.session.close()
@staticmethod
def is_valid_title(title, title_id, sanitized_title, season, year):
"""Check if is a valid title."""
sanitized_result = sanitize(title['title'])
if sanitized_result != sanitized_title:
logger.debug("Mismatched title, discarding title %d (%s)",
title_id, sanitized_result)
return
# episode type
if season:
# discard mismatches on type
if title['type'] != 'episode':
logger.debug("Mismatched 'episode' type, discarding title %d (%s)", title_id, sanitized_result)
return
# discard mismatches on season
if 'season' not in title or title['season'] != season:
logger.debug('Mismatched season %s, discarding title %d (%s)',
title.get('season'), title_id, sanitized_result)
return
# movie type
else:
# discard mismatches on type
if title['type'] != 'movie':
logger.debug("Mismatched 'movie' type, discarding title %d (%s)", title_id, sanitized_result)
return
# discard mismatches on year
if year is not None and 'year' in title and title['year'] != year:
logger.debug("Mismatched movie year, discarding title %d (%s)", title_id, sanitized_result)
return
return True
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, should_cache_fn=lambda value: value)
def search_titles(self, title, season, title_year):
"""Search for titles matching the `title`.
For episodes, each season has it own title
:param str title: the title to search for.
:param int season: season of the title
:param int title_year: year of the title
:return: found titles.
:rtype: dict
"""
titles = {}
sanitized_titles = [sanitize(title)]
ignore_characters = {'\'', '.'}
if any(c in title for c in ignore_characters):
sanitized_titles.append(sanitize(title, ignore_characters=ignore_characters))
for sanitized_title in sanitized_titles:
# make the query
if season:
logger.info('Searching episode title %r for season %r', sanitized_title, season)
else:
logger.info('Searching movie title %r', sanitized_title)
r = self.session.get(self.server_url + 'legenda/sugestao/{}'.format(sanitized_title), timeout=10)
raise_for_status(r)
results = json.loads(r.text)
# loop over results
for result in results:
source = result['_source']
# extract id
title_id = int(source['id_filme'])
# extract type
title = {'type': type_map[source['tipo']]}
# extract title, year and country
name, year, country = title_re.match(source['dsc_nome']).groups()
title['title'] = name
# extract imdb_id
if source['id_imdb'] != '0':
if not source['id_imdb'].startswith('tt'):
title['imdb_id'] = 'tt' + source['id_imdb'].zfill(7)
else:
title['imdb_id'] = source['id_imdb']
# extract season
if title['type'] == 'episode':
if source['temporada'] and source['temporada'].isdigit():
title['season'] = int(source['temporada'])
else:
match = season_re.search(source['dsc_nome_br'])
if match:
title['season'] = int(match.group('season'))
else:
logger.debug('No season detected for title %d (%s)', title_id, name)
# extract year
if year:
title['year'] = int(year)
elif source['dsc_data_lancamento'] and source['dsc_data_lancamento'].isdigit():
# year is based on season air date hence the adjustment
title['year'] = int(source['dsc_data_lancamento']) - title.get('season', 1) + 1
# add title only if is valid
# Check against title without ignored chars
if self.is_valid_title(title, title_id, sanitized_titles[0], season, title_year):
titles[title_id] = title
logger.debug('Found %d titles', len(titles))
return titles
@region.cache_on_arguments(expiration_time=timedelta(minutes=15).total_seconds())
def get_archives(self, title_id, language_code, title_type, season, episodes):
"""Get the archive list from a given `title_id`, `language_code`, `title_type`, `season` and `episode`.
:param int title_id: title id.
:param int language_code: language code.
:param str title_type: episode or movie
:param int season: season
:param list episodes: episodes
:return: the archives.
:rtype: list of :class:`LegendasTVArchive`
"""
archives = []
page = 0
while True:
# get the archive page
url = self.server_url + 'legenda/busca/-/{language}/-/{page}/{title}'.format(
language=language_code, page=page, title=title_id)
r = self.session.get(url)
raise_for_status(r)
# parse the results
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
for archive_soup in soup.select('div.list_element > article > div > div.f_left'):
# create archive
archive = LegendasTVArchive(archive_soup.a['href'].split('/')[2],
archive_soup.a.text,
'pack' in archive_soup.parent['class'],
'destaque' in archive_soup.parent['class'],
self.server_url + archive_soup.a['href'][1:])
# clean name of path separators and pack flags
clean_name = archive.name.replace('/', '-')
if archive.pack and clean_name.startswith('(p)'):
clean_name = clean_name[3:]
# guess from name
guess = guessit(clean_name, {'type': title_type})
# episode
if season and episodes:
# discard mismatches on episode in non-pack archives
# Guessit may return int for single episode or list for multi-episode
# Check if archive name has multiple episodes releases on it
if not archive.pack and 'episode' in guess:
wanted_episode = set(episodes)
archive_episode = set(ensure_list(guess['episode']))
if not wanted_episode.intersection(archive_episode):
logger.debug('Mismatched episode %s, discarding archive: %s', guess['episode'], clean_name)
continue
# extract text containing downloads, rating and timestamp
data_text = archive_soup.find('p', class_='data').text
# match downloads
archive.downloads = int(downloads_re.search(data_text).group('downloads'))
# match rating
match = rating_re.search(data_text)
if match:
archive.rating = int(match.group('rating'))
# match timestamp and validate it
time_data = {k: int(v) for k, v in timestamp_re.search(data_text).groupdict().items()}
archive.timestamp = pytz.timezone('America/Sao_Paulo').localize(datetime(**time_data))
if archive.timestamp > datetime.utcnow().replace(tzinfo=pytz.utc):
raise ProviderError('Archive timestamp is in the future')
# add archive
logger.info('Found archive for title %d and language %d at page %s: %s',
title_id, language_code, page, archive)
archives.append(archive)
# stop on last page
if soup.find('a', attrs={'class': 'load_more'}, string='carregar mais') is None:
break
# increment page count
page += 1
logger.debug('Found %d archives', len(archives))
return archives
def download_archive(self, archive):
"""Download an archive's :attr:`~LegendasTVArchive.content`.
:param archive: the archive to download :attr:`~LegendasTVArchive.content` of.
:type archive: :class:`LegendasTVArchive`
"""
logger.info('Downloading archive %s', archive.id)
r = self.session.get(self.server_url + 'downloadarquivo/{}'.format(archive.id))
raise_for_status(r)
# open the archive
archive_stream = io.BytesIO(r.content)
if is_rarfile(archive_stream):
logger.debug('Identified rar archive')
archive.content = RarFile(archive_stream)
elif is_zipfile(archive_stream):
logger.debug('Identified zip archive')
archive.content = ZipFile(archive_stream)
else:
raise ValueError('Not a valid archive')
def query(self, language, title, season=None, episodes=None, year=None):
# search for titles
titles = self.search_titles(title, season, year)
subtitles = []
# iterate over titles
for title_id, t in titles.items():
logger.info('Getting archives for title %d and language %d', title_id, language.legendastv)
archives = self.get_archives(title_id, language.legendastv, t['type'], season, episodes or [])
if not archives:
logger.info('No archives found for title %d and language %d', title_id, language.legendastv)
# iterate over title's archives
for a in archives:
# compute an expiration time based on the archive timestamp
expiration_time = (datetime.utcnow().replace(tzinfo=pytz.utc) - a.timestamp).total_seconds()
# attempt to get the releases from the cache
cache_key = releases_key.format(archive_id=a.id, archive_name=a.name)
releases = region.get(cache_key, expiration_time=expiration_time)
# the releases are not in cache or cache is expired
if releases == NO_VALUE:
logger.info('Releases not found in cache')
# download archive
self.download_archive(a)
# extract the releases
releases = []
for name in a.content.namelist():
# discard the legendastv file
if name.startswith('Legendas.tv'):
continue
# discard hidden files
if os.path.split(name)[-1].startswith('.'):
continue
# discard non-subtitle files
if not name.lower().endswith(SUBTITLE_EXTENSIONS):
continue
releases.append(name)
# cache the releases
region.set(cache_key, releases)
# iterate over releases
for r in releases:
subtitle = self.subtitle_class(language, t['type'], t['title'], t.get('year'), t.get('imdb_id'),
t.get('season'), a, r)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
season = None
episodes = []
if isinstance(video, Episode):
titles = [video.series] + video.alternative_series
season = video.season
episodes = video.episodes
else:
titles = [video.title] + video.alternative_titles
for title in titles:
subtitles = [s for l in languages for s in
self.query(l, title, season=season, episodes=episodes, year=video.year)]
if subtitles:
return subtitles
return []
def download_subtitle(self, subtitle):
# download archive in case we previously hit the releases cache and didn't download it
if subtitle.archive.content is None:
self.download_archive(subtitle.archive)
# extract subtitle's content
subtitle.content = fix_line_ending(subtitle.archive.content.read(subtitle.name))
def raise_for_status(r):
# When site is under maintaince and http status code 200.
if 'Em breve estaremos de volta' in r.text:
raise ServiceUnavailable
else:
r.raise_for_status()
subliminal-2.1.0/subliminal/providers/napiprojekt.py 0000644 0001750 0001750 00000005454 13653235327 023224 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
import logging
from babelfish import Language
from requests import Session
from . import Provider
from ..subtitle import Subtitle
logger = logging.getLogger(__name__)
def get_subhash(hash):
"""Get a second hash based on napiprojekt's hash.
:param str hash: napiprojekt's hash.
:return: the subhash.
:rtype: str
"""
idx = [0xe, 0x3, 0x6, 0x8, 0x2]
mul = [2, 2, 5, 4, 3]
add = [0, 0xd, 0x10, 0xb, 0x5]
b = []
for i in range(len(idx)):
a = add[i]
m = mul[i]
i = idx[i]
t = a + int(hash[i], 16)
v = int(hash[t:t + 2], 16)
b.append(('%x' % (v * m))[-1])
return ''.join(b)
class NapiProjektSubtitle(Subtitle):
"""NapiProjekt Subtitle."""
provider_name = 'napiprojekt'
def __init__(self, language, hash):
super(NapiProjektSubtitle, self).__init__(language)
self.hash = hash
self.content = None
@property
def id(self):
return self.hash
@property
def info(self):
return self.hash
def get_matches(self, video):
matches = set()
# hash
if 'napiprojekt' in video.hashes and video.hashes['napiprojekt'] == self.hash:
matches.add('hash')
return matches
class NapiProjektProvider(Provider):
"""NapiProjekt Provider."""
languages = {Language.fromalpha2(l) for l in ['pl']}
required_hash = 'napiprojekt'
server_url = 'http://napiprojekt.pl/unit_napisy/dl.php'
subtitle_class = NapiProjektSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = self.user_agent
def terminate(self):
self.session.close()
def query(self, language, hash):
params = {
'v': 'dreambox',
'kolejka': 'false',
'nick': '',
'pass': '',
'napios': 'Linux',
'l': language.alpha2.upper(),
'f': hash,
't': get_subhash(hash)}
logger.info('Searching subtitle %r', params)
r = self.session.get(self.server_url, params=params, timeout=10)
r.raise_for_status()
# handle subtitles not found and errors
if r.content[:4] == b'NPc0':
logger.debug('No subtitles found')
return None
subtitle = self.subtitle_class(language, hash)
subtitle.content = r.content
logger.debug('Found subtitle %r', subtitle)
return subtitle
def list_subtitles(self, video, languages):
return [s for s in [self.query(l, video.hashes['napiprojekt']) for l in languages] if s is not None]
def download_subtitle(self, subtitle):
# there is no download step, content is already filled from listing subtitles
pass
subliminal-2.1.0/subliminal/providers/opensubtitles.py 0000644 0001750 0001750 00000026024 13653235327 023572 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
import base64
import logging
import os
import re
import zlib
from babelfish import Language, language_converters
from guessit import guessit
from six.moves.xmlrpc_client import ServerProxy
from . import Provider, TimeoutSafeTransport
from .. import __short_version__
from ..exceptions import (AuthenticationError, ConfigurationError, DownloadLimitExceeded, ProviderError,
ServiceUnavailable)
from ..matches import guess_matches
from ..subtitle import Subtitle, fix_line_ending
from ..video import Episode, Movie
logger = logging.getLogger(__name__)
class OpenSubtitlesSubtitle(Subtitle):
"""OpenSubtitles Subtitle."""
provider_name = 'opensubtitles'
series_re = re.compile(r'^"(?P.*)" (?P.*)$')
def __init__(self, language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash, movie_name,
movie_release_name, movie_year, movie_imdb_id, series_season, series_episode, filename, encoding):
super(OpenSubtitlesSubtitle, self).__init__(language, hearing_impaired=hearing_impaired,
page_link=page_link, encoding=encoding)
self.subtitle_id = subtitle_id
self.matched_by = matched_by
self.movie_kind = movie_kind
self.hash = hash
self.movie_name = movie_name
self.movie_release_name = movie_release_name
self.movie_year = movie_year
self.movie_imdb_id = movie_imdb_id
self.series_season = series_season
self.series_episode = series_episode
self.filename = filename
@property
def id(self):
return str(self.subtitle_id)
@property
def info(self):
if not self.filename and not self.movie_release_name:
return self.subtitle_id
if self.movie_release_name and len(self.movie_release_name) > len(self.filename):
return self.movie_release_name
return self.filename
@property
def series_name(self):
return self.series_re.match(self.movie_name).group('series_name')
@property
def series_title(self):
return self.series_re.match(self.movie_name).group('series_title')
def get_matches(self, video):
if (isinstance(video, Episode) and self.movie_kind != 'episode') or (
isinstance(video, Movie) and self.movie_kind != 'movie'):
logger.info('%r is not a valid movie_kind', self.movie_kind)
return set()
matches = guess_matches(video, {
'title': self.series_name if self.movie_kind == 'episode' else self.movie_name,
'episode_title': self.series_title if self.movie_kind == 'episode' else None,
'year': self.movie_year,
'season': self.series_season,
'episode': self.series_episode
})
# tag
if self.matched_by == 'tag':
if not video.imdb_id or self.movie_imdb_id == video.imdb_id:
if self.movie_kind == 'episode':
matches |= {'series', 'year', 'season', 'episode'}
elif self.movie_kind == 'movie':
matches |= {'title', 'year'}
# guess
matches |= guess_matches(video, guessit(self.movie_release_name, {'type': self.movie_kind}))
matches |= guess_matches(video, guessit(self.filename, {'type': self.movie_kind}))
# hash
if 'opensubtitles' in video.hashes and self.hash == video.hashes['opensubtitles']:
if self.movie_kind == 'movie' and 'title' in matches:
matches.add('hash')
elif self.movie_kind == 'episode' and 'series' in matches and 'season' in matches and 'episode' in matches:
matches.add('hash')
else:
logger.debug('Match on hash discarded')
# imdb_id
if video.imdb_id and self.movie_imdb_id == video.imdb_id:
matches.add('imdb_id')
return matches
class OpenSubtitlesProvider(Provider):
"""OpenSubtitles Provider.
:param str username: username.
:param str password: password.
"""
languages = {Language.fromopensubtitles(l) for l in language_converters['opensubtitles'].codes}
server_url = 'https://api.opensubtitles.org/xml-rpc'
subtitle_class = OpenSubtitlesSubtitle
user_agent = 'subliminal v%s' % __short_version__
def __init__(self, username=None, password=None):
self.server = ServerProxy(self.server_url, TimeoutSafeTransport(10))
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
# None values not allowed for logging in, so replace it by ''
self.username = username or ''
self.password = password or ''
self.token = None
def initialize(self):
logger.info('Logging in')
response = checked(self.server.LogIn(self.username, self.password, 'eng', self.user_agent))
self.token = response['token']
logger.debug('Logged in with token %r', self.token)
def terminate(self):
logger.info('Logging out')
checked(self.server.LogOut(self.token))
self.server.close()
self.token = None
logger.debug('Logged out')
def no_operation(self):
logger.debug('No operation')
checked(self.server.NoOperation(self.token))
def query(self, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None, tag=None):
# fill the search criteria
criteria = []
if hash and size:
criteria.append({'moviehash': hash, 'moviebytesize': str(size)})
if imdb_id:
if season and episode:
criteria.append({'imdbid': imdb_id[2:], 'season': season, 'episode': episode})
else:
criteria.append({'imdbid': imdb_id[2:]})
if tag:
criteria.append({'tag': tag})
if query and season and episode:
criteria.append({'query': query.replace('\'', ''), 'season': season, 'episode': episode})
elif query:
criteria.append({'query': query.replace('\'', '')})
if not criteria:
raise ValueError('Not enough information')
# add the language
for criterion in criteria:
criterion['sublanguageid'] = ','.join(sorted(l.opensubtitles for l in languages))
# query the server
logger.info('Searching subtitles %r', criteria)
response = checked(self.server.SearchSubtitles(self.token, criteria))
subtitles = []
# exit if no data
if not response['data']:
logger.debug('No subtitles found')
return subtitles
# loop over subtitle items
for subtitle_item in response['data']:
# read the item
language = Language.fromopensubtitles(subtitle_item['SubLanguageID'])
hearing_impaired = bool(int(subtitle_item['SubHearingImpaired']))
page_link = subtitle_item['SubtitlesLink']
subtitle_id = int(subtitle_item['IDSubtitleFile'])
matched_by = subtitle_item['MatchedBy']
movie_kind = subtitle_item['MovieKind']
hash = subtitle_item['MovieHash']
movie_name = subtitle_item['MovieName']
movie_release_name = subtitle_item['MovieReleaseName']
movie_year = int(subtitle_item['MovieYear']) if subtitle_item['MovieYear'] else None
movie_imdb_id = 'tt' + subtitle_item['IDMovieImdb']
series_season = int(subtitle_item['SeriesSeason']) if subtitle_item['SeriesSeason'] else None
series_episode = int(subtitle_item['SeriesEpisode']) if subtitle_item['SeriesEpisode'] else None
filename = subtitle_item['SubFileName']
encoding = subtitle_item.get('SubEncoding') or None
subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind,
hash, movie_name, movie_release_name, movie_year, movie_imdb_id,
series_season, series_episode, filename, encoding)
logger.debug('Found subtitle %r by %s', subtitle, matched_by)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
season = episode = None
if isinstance(video, Episode):
query = video.series
season = video.season
episode = video.episode
else:
query = video.title
return self.query(languages, hash=video.hashes.get('opensubtitles'), size=video.size, imdb_id=video.imdb_id,
query=query, season=season, episode=episode, tag=os.path.basename(video.name))
def download_subtitle(self, subtitle):
logger.info('Downloading subtitle %r', subtitle)
response = checked(self.server.DownloadSubtitles(self.token, [str(subtitle.subtitle_id)]))
subtitle.content = fix_line_ending(zlib.decompress(base64.b64decode(response['data'][0]['data']), 47))
class OpenSubtitlesVipSubtitle(OpenSubtitlesSubtitle):
"""OpenSubtitles Subtitle."""
provider_name = 'opensubtitlesvip'
class OpenSubtitlesVipProvider(OpenSubtitlesProvider):
"""OpenSubtitles Provider using VIP url."""
server_url = 'https://vip-api.opensubtitles.org/xml-rpc'
subtitle_class = OpenSubtitlesVipSubtitle
class OpenSubtitlesError(ProviderError):
"""Base class for non-generic :class:`OpenSubtitlesProvider` exceptions."""
pass
class Unauthorized(OpenSubtitlesError, AuthenticationError):
"""Exception raised when status is '401 Unauthorized'."""
pass
class NoSession(OpenSubtitlesError, AuthenticationError):
"""Exception raised when status is '406 No session'."""
pass
class DownloadLimitReached(OpenSubtitlesError, DownloadLimitExceeded):
"""Exception raised when status is '407 Download limit reached'."""
pass
class InvalidImdbid(OpenSubtitlesError):
"""Exception raised when status is '413 Invalid ImdbID'."""
pass
class UnknownUserAgent(OpenSubtitlesError, AuthenticationError):
"""Exception raised when status is '414 Unknown User Agent'."""
pass
class DisabledUserAgent(OpenSubtitlesError, AuthenticationError):
"""Exception raised when status is '415 Disabled user agent'."""
pass
def checked(response):
"""Check a response status before returning it.
:param response: a response from a XMLRPC call to OpenSubtitles.
:return: the response.
:raise: :class:`OpenSubtitlesError`
"""
status_code = int(response['status'][:3])
if status_code == 401:
raise Unauthorized
if status_code == 406:
raise NoSession
if status_code == 407:
raise DownloadLimitReached
if status_code == 413:
raise InvalidImdbid
if status_code == 414:
raise UnknownUserAgent
if status_code == 415:
raise DisabledUserAgent
if status_code == 503:
raise ServiceUnavailable
if status_code != 200:
raise OpenSubtitlesError(response['status'])
return response
subliminal-2.1.0/subliminal/providers/podnapisi.py 0000644 0001750 0001750 00000014546 13653235327 022666 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
import io
import logging
import re
from babelfish import Language, language_converters
from guessit import guessit
try:
from lxml import etree
except ImportError:
try:
import xml.etree.cElementTree as etree
except ImportError:
import xml.etree.ElementTree as etree
from requests import Session
from zipfile import ZipFile
from . import Provider
from ..exceptions import ProviderError
from ..matches import guess_matches
from ..subtitle import Subtitle, fix_line_ending
from ..video import Episode
logger = logging.getLogger(__name__)
class PodnapisiSubtitle(Subtitle):
"""Podnapisi Subtitle."""
provider_name = 'podnapisi'
def __init__(self, language, hearing_impaired, page_link, pid, releases, title, season=None, episode=None,
year=None):
super(PodnapisiSubtitle, self).__init__(language, hearing_impaired=hearing_impaired, page_link=page_link)
self.pid = pid
self.releases = releases
self.title = title
self.season = season
self.episode = episode
self.year = year
@property
def id(self):
return self.pid
@property
def info(self):
return ' '.join(self.releases) or self.pid
def get_matches(self, video):
matches = guess_matches(video, {
'title': self.title,
'year': self.year,
'season': self.season,
'episode': self.episode
})
video_type = 'episode' if isinstance(video, Episode) else 'movie'
for release in self.releases:
matches |= guess_matches(video, guessit(release, {'type': video_type}))
return matches
class PodnapisiProvider(Provider):
"""Podnapisi Provider."""
languages = ({Language('por', 'BR'), Language('srp', script='Latn')} |
{Language.fromalpha2(l) for l in language_converters['alpha2'].codes})
server_url = 'https://www.podnapisi.net/subtitles/'
subtitle_class = PodnapisiSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = self.user_agent
def terminate(self):
self.session.close()
def query(self, language, keyword, season=None, episode=None, year=None):
# set parameters, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164#p212652
params = {'sXML': 1, 'sL': str(language), 'sK': keyword}
is_episode = False
if season and episode:
is_episode = True
params['sTS'] = season
params['sTE'] = episode
if year:
params['sY'] = year
# loop over paginated results
logger.info('Searching subtitles %r', params)
subtitles = []
pids = set()
while True:
# query the server
r = self.session.get(self.server_url + 'search/old', params=params, timeout=10)
r.raise_for_status()
xml = etree.fromstring(r.content)
# exit if no results
if not int(xml.find('pagination/results').text):
logger.debug('No subtitles found')
break
# loop over subtitles
for subtitle_xml in xml.findall('subtitle'):
# read xml elements
pid = subtitle_xml.find('pid').text
# ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321
if pid in pids:
continue
language = Language.fromietf(subtitle_xml.find('language').text)
hearing_impaired = 'n' in (subtitle_xml.find('flags').text or '')
page_link = subtitle_xml.find('url').text
releases = []
if subtitle_xml.find('release').text:
for release in subtitle_xml.find('release').text.split():
release = re.sub(r'\.+$', '', release) # remove trailing dots
release = ''.join(filter(lambda x: ord(x) < 128, release)) # remove non-ascii characters
releases.append(release)
title = subtitle_xml.find('title').text
season = int(subtitle_xml.find('tvSeason').text)
episode = int(subtitle_xml.find('tvEpisode').text)
year = int(subtitle_xml.find('year').text)
if is_episode:
subtitle = self.subtitle_class(language, hearing_impaired, page_link, pid, releases, title,
season=season, episode=episode, year=year)
else:
subtitle = self.subtitle_class(language, hearing_impaired, page_link, pid, releases, title,
year=year)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
pids.add(pid)
# stop on last page
if int(xml.find('pagination/current').text) >= int(xml.find('pagination/count').text):
break
# increment current page
params['page'] = int(xml.find('pagination/current').text) + 1
logger.debug('Getting page %d', params['page'])
return subtitles
def list_subtitles(self, video, languages):
season = episode = None
if isinstance(video, Episode):
titles = [video.series] + video.alternative_series
season = video.season
episode = video.episode
else:
titles = [video.title] + video.alternative_titles
for title in titles:
subtitles = [s for l in languages for s in
self.query(l, title, season=season, episode=episode, year=video.year)]
if subtitles:
return subtitles
return []
def download_subtitle(self, subtitle):
# download as a zip
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(self.server_url + subtitle.pid + '/download', params={'container': 'zip'}, timeout=10)
r.raise_for_status()
# open the zip
with ZipFile(io.BytesIO(r.content)) as zf:
if len(zf.namelist()) > 1:
raise ProviderError('More than one file to unzip')
subtitle.content = fix_line_ending(zf.read(zf.namelist()[0]))
subliminal-2.1.0/subliminal/providers/shooter.py 0000644 0001750 0001750 00000004675 13653235327 022365 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
import json
import logging
import os
from babelfish import Language, language_converters
from requests import Session
from . import Provider
from ..subtitle import Subtitle, fix_line_ending
logger = logging.getLogger(__name__)
language_converters.register('shooter = subliminal.converters.shooter:ShooterConverter')
class ShooterSubtitle(Subtitle):
"""Shooter Subtitle."""
provider_name = 'shooter'
def __init__(self, language, hash, download_link):
super(ShooterSubtitle, self).__init__(language)
self.hash = hash
self.download_link = download_link
@property
def id(self):
return self.download_link
@property
def info(self):
return self.hash
def get_matches(self, video):
matches = set()
# hash
if 'shooter' in video.hashes and video.hashes['shooter'] == self.hash:
matches.add('hash')
return matches
class ShooterProvider(Provider):
"""Shooter Provider."""
languages = {Language(l) for l in ['eng', 'zho']}
server_url = 'https://www.shooter.cn/api/subapi.php'
subtitle_class = ShooterSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = self.user_agent
def terminate(self):
self.session.close()
def query(self, language, filename, hash=None):
# query the server
params = {'filehash': hash, 'pathinfo': os.path.realpath(filename), 'format': 'json', 'lang': language.shooter}
logger.debug('Searching subtitles %r', params)
r = self.session.post(self.server_url, params=params, timeout=10)
r.raise_for_status()
# handle subtitles not found
if r.content == b'\xff':
logger.debug('No subtitles found')
return []
# parse the subtitles
results = json.loads(r.text)
subtitles = [self.subtitle_class(language, hash, t['Link']) for s in results for t in s['Files']]
return subtitles
def list_subtitles(self, video, languages):
return [s for l in languages for s in self.query(l, video.name, video.hashes.get('shooter'))]
def download_subtitle(self, subtitle):
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(subtitle.download_link, timeout=10)
r.raise_for_status()
subtitle.content = fix_line_ending(r.content)
subliminal-2.1.0/subliminal/providers/thesubdb.py 0000644 0001750 0001750 00000005345 13653235327 022475 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
import logging
from babelfish import Language, language_converters
from requests import Session
from . import Provider
from .. import __short_version__
from ..subtitle import Subtitle, fix_line_ending
logger = logging.getLogger(__name__)
language_converters.register('thesubdb = subliminal.converters.thesubdb:TheSubDBConverter')
class TheSubDBSubtitle(Subtitle):
"""TheSubDB Subtitle."""
provider_name = 'thesubdb'
def __init__(self, language, hash):
super(TheSubDBSubtitle, self).__init__(language)
self.hash = hash
@property
def id(self):
return self.hash + '-' + str(self.language)
@property
def info(self):
return self.hash
def get_matches(self, video):
matches = set()
# hash
if 'thesubdb' in video.hashes and video.hashes['thesubdb'] == self.hash:
matches.add('hash')
return matches
class TheSubDBProvider(Provider):
"""TheSubDB Provider."""
languages = {Language.fromthesubdb(l) for l in language_converters['thesubdb'].codes}
required_hash = 'thesubdb'
server_url = 'http://api.thesubdb.com/'
subtitle_class = TheSubDBSubtitle
user_agent = 'SubDB/1.0 (subliminal/%s; https://github.com/Diaoul/subliminal)' % __short_version__
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = self.user_agent
def terminate(self):
self.session.close()
def query(self, hash):
# make the query
params = {'action': 'search', 'hash': hash}
logger.info('Searching subtitles %r', params)
r = self.session.get(self.server_url, params=params, timeout=10)
# handle subtitles not found and errors
if r.status_code == 404:
logger.debug('No subtitles found')
return []
r.raise_for_status()
# loop over languages
subtitles = []
for language_code in r.text.split(','):
language = Language.fromthesubdb(language_code)
subtitle = self.subtitle_class(language, hash)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
return [s for s in self.query(video.hashes['thesubdb']) if s.language in languages]
def download_subtitle(self, subtitle):
logger.info('Downloading subtitle %r', subtitle)
params = {'action': 'download', 'hash': subtitle.hash, 'language': subtitle.language.alpha2}
r = self.session.get(self.server_url, params=params, timeout=10)
r.raise_for_status()
subtitle.content = fix_line_ending(r.content)
subliminal-2.1.0/subliminal/providers/tvsubtitles.py 0000644 0001750 0001750 00000017326 13653235327 023267 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
import io
import logging
import re
from zipfile import ZipFile
from babelfish import Language, language_converters
from guessit import guessit
from requests import Session
from . import ParserBeautifulSoup, Provider
from ..cache import EPISODE_EXPIRATION_TIME, SHOW_EXPIRATION_TIME, region
from ..exceptions import ProviderError
from ..matches import guess_matches
from ..subtitle import Subtitle, fix_line_ending
from ..video import Episode
logger = logging.getLogger(__name__)
language_converters.register('tvsubtitles = subliminal.converters.tvsubtitles:TVsubtitlesConverter')
link_re = re.compile(r'^(?P.+?)(?: \(?\d{4}\)?| \((?:US|UK)\))? \((?P\d{4})-\d{4}\)$')
episode_id_re = re.compile(r'^episode-\d+\.html$')
class TVsubtitlesSubtitle(Subtitle):
"""TVsubtitles Subtitle."""
provider_name = 'tvsubtitles'
def __init__(self, language, page_link, subtitle_id, series, season, episode, year, rip, release):
super(TVsubtitlesSubtitle, self).__init__(language, page_link=page_link)
self.subtitle_id = subtitle_id
self.series = series
self.season = season
self.episode = episode
self.year = year
self.rip = rip
self.release = release
@property
def id(self):
return str(self.subtitle_id)
@property
def info(self):
return self.release or self.rip
def get_matches(self, video):
matches = guess_matches(video, {
'title': self.series,
'season': self.season,
'episode': self.episode,
'year': self.year,
'release_group': self.release
})
# other properties
if self.release:
matches |= guess_matches(video, guessit(self.release, {'type': 'episode'}), partial=True)
if self.rip:
matches |= guess_matches(video, guessit(self.rip, {'type': 'episode'}), partial=True)
return matches
class TVsubtitlesProvider(Provider):
"""TVsubtitles Provider."""
languages = {Language('por', 'BR')} | {Language(l) for l in [
'ara', 'bul', 'ces', 'dan', 'deu', 'ell', 'eng', 'fin', 'fra', 'hun', 'ita', 'jpn', 'kor', 'nld', 'pol', 'por',
'ron', 'rus', 'spa', 'swe', 'tur', 'ukr', 'zho'
]}
video_types = (Episode,)
server_url = 'http://www.tvsubtitles.net/'
subtitle_class = TVsubtitlesSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = self.user_agent
def terminate(self):
self.session.close()
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
def search_show_id(self, series, year=None):
"""Search the show id from the `series` and `year`.
:param str series: series of the episode.
:param year: year of the series, if any.
:type year: int
:return: the show id, if any.
:rtype: int
"""
# make the search
logger.info('Searching show id for %r', series)
r = self.session.post(self.server_url + 'search.php', data={'q': series}, timeout=10)
r.raise_for_status()
# get the series out of the suggestions
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
show_id = None
for suggestion in soup.select('div.left li div a[href^="/tvshow-"]'):
match = link_re.match(suggestion.text)
if not match:
logger.error('Failed to match %s', suggestion.text)
continue
if match.group('series').lower() == series.lower():
if year is not None and int(match.group('first_year')) != year:
logger.debug('Year does not match')
continue
show_id = int(suggestion['href'][8:-5])
logger.debug('Found show id %d', show_id)
break
return show_id
@region.cache_on_arguments(expiration_time=EPISODE_EXPIRATION_TIME)
def get_episode_ids(self, show_id, season):
"""Get episode ids from the show id and the season.
:param int show_id: show id.
:param int season: season of the episode.
:return: episode ids per episode number.
:rtype: dict
"""
# get the page of the season of the show
logger.info('Getting the page of show id %d, season %d', show_id, season)
r = self.session.get(self.server_url + 'tvshow-%d-%d.html' % (show_id, season), timeout=10)
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# loop over episode rows
episode_ids = {}
for row in soup.select('table#table5 tr'):
# skip rows that do not have a link to the episode page
if not row('a', href=episode_id_re):
continue
# extract data from the cells
cells = row('td')
episode = int(cells[0].text.split('x')[1])
episode_id = int(cells[1].a['href'][8:-5])
episode_ids[episode] = episode_id
if episode_ids:
logger.debug('Found episode ids %r', episode_ids)
else:
logger.warning('No episode ids found')
return episode_ids
def query(self, show_id, series, season, episode, year=None):
# get the episode ids
episode_ids = self.get_episode_ids(show_id, season)
if episode not in episode_ids:
logger.error('Episode %d not found', episode)
return []
# get the episode page
logger.info('Getting the page for episode %d', episode_ids[episode])
r = self.session.get(self.server_url + 'episode-%d.html' % episode_ids[episode], timeout=10)
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# loop over subtitles rows
subtitles = []
for row in soup.select('.subtitlen'):
# read the item
language = Language.fromtvsubtitles(row.h5.img['src'][13:-4])
subtitle_id = int(row.parent['href'][10:-5])
page_link = self.server_url + 'subtitle-%d.html' % subtitle_id
rip = row.find('p', title='rip').text.strip() or None
release = row.find('h5').text.strip() or None
subtitle = self.subtitle_class(language, page_link, subtitle_id, series, season, episode, year, rip,
release)
logger.debug('Found subtitle %s', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
# lookup show_id
titles = [video.series] + video.alternative_series
show_id = None
for title in titles:
show_id = self.search_show_id(title, video.year)
if show_id is not None:
break
# query for subtitles with the show_id
if show_id is not None:
subtitles = [s for s in self.query(show_id, title, video.season, video.episode, video.year)
if s.language in languages and s.episode == video.episode]
if subtitles:
return subtitles
else:
logger.error('No show id found for %r (%r)', video.series, {'year': video.year})
return []
def download_subtitle(self, subtitle):
# download as a zip
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(self.server_url + 'download-%d.html' % subtitle.subtitle_id, timeout=10)
r.raise_for_status()
# open the zip
with ZipFile(io.BytesIO(r.content)) as zf:
if len(zf.namelist()) > 1:
raise ProviderError('More than one file to unzip')
subtitle.content = fix_line_ending(zf.read(zf.namelist()[0]))
subliminal-2.1.0/subliminal/refiners/ 0000755 0001750 0001750 00000000000 13653235562 020115 5 ustar jones jones 0000000 0000000 subliminal-2.1.0/subliminal/refiners/__init__.py 0000664 0001750 0001750 00000000500 13420412567 022215 0 ustar jones jones 0000000 0000000 """
Refiners enrich a :class:`~subliminal.video.Video` object by adding information to it.
A refiner is a simple function:
.. py:function:: refine(video, **kwargs)
:param video: the video to refine.
:type video: :class:`~subliminal.video.Video`
:param \*\*kwargs: additional parameters for refiners.
"""
subliminal-2.1.0/subliminal/refiners/hash.py 0000644 0001750 0001750 00000002423 13653235327 021412 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
import logging
from ..extensions import provider_manager, default_providers
from ..utils import hash_napiprojekt, hash_opensubtitles, hash_shooter, hash_thesubdb
logger = logging.getLogger(__name__)
hash_functions = {
'napiprojekt': hash_napiprojekt,
'opensubtitles': hash_opensubtitles,
'opensubtitlesvip': hash_opensubtitles,
'shooter': hash_shooter,
'thesubdb': hash_thesubdb
}
def refine(video, providers=None, languages=None, **kwargs):
"""Refine a video computing required hashes for the given providers.
The following :class:`~subliminal.video.Video` attribute can be found:
* :attr:`~subliminal.video.Video.hashes`
"""
if video.size <= 10485760:
logger.warning('Size is lower than 10MB: hashes not computed')
return
logger.debug('Computing hashes for %r', video.name)
for name in providers or default_providers:
provider = provider_manager[name].plugin
if name not in hash_functions:
continue
if not provider.check_types(video):
continue
if languages and not provider.check_languages(languages):
continue
video.hashes[name] = hash_functions[name](video.name)
logger.debug('Computed hashes %r', video.hashes)
subliminal-2.1.0/subliminal/refiners/metadata.py 0000644 0001750 0001750 00000007662 13653235327 022261 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
import logging
import os
from babelfish import Error as BabelfishError, Language
from enzyme import MKV
logger = logging.getLogger(__name__)
def refine(video, embedded_subtitles=True, **kwargs):
"""Refine a video by searching its metadata.
Several :class:`~subliminal.video.Video` attributes can be found:
* :attr:`~subliminal.video.Video.resolution`
* :attr:`~subliminal.video.Video.video_codec`
* :attr:`~subliminal.video.Video.audio_codec`
* :attr:`~subliminal.video.Video.subtitle_languages`
:param bool embedded_subtitles: search for embedded subtitles.
"""
# skip non existing videos
if not video.exists:
return
# check extensions
extension = os.path.splitext(video.name)[1]
if extension == '.mkv':
with open(video.name, 'rb') as f:
mkv = MKV(f)
# main video track
if mkv.video_tracks:
video_track = mkv.video_tracks[0]
# resolution
if video_track.height in (480, 720, 1080):
if video_track.interlaced:
video.resolution = '%di' % video_track.height
else:
video.resolution = '%dp' % video_track.height
logger.debug('Found resolution %s', video.resolution)
# video codec
if video_track.codec_id == 'V_MPEG4/ISO/AVC':
video.video_codec = 'H.264'
logger.debug('Found video_codec %s', video.video_codec)
elif video_track.codec_id == 'V_MPEG4/ISO/SP':
video.video_codec = 'DivX'
logger.debug('Found video_codec %s', video.video_codec)
elif video_track.codec_id == 'V_MPEG4/ISO/ASP':
video.video_codec = 'Xvid'
logger.debug('Found video_codec %s', video.video_codec)
else:
logger.warning('MKV has no video track')
# main audio track
if mkv.audio_tracks:
audio_track = mkv.audio_tracks[0]
# audio codec
if audio_track.codec_id == 'A_AC3':
video.audio_codec = 'Dolby Digital'
logger.debug('Found audio_codec %s', video.audio_codec)
elif audio_track.codec_id == 'A_DTS':
video.audio_codec = 'DTS'
logger.debug('Found audio_codec %s', video.audio_codec)
elif audio_track.codec_id == 'A_AAC':
video.audio_codec = 'AAC'
logger.debug('Found audio_codec %s', video.audio_codec)
else:
logger.warning('MKV has no audio track')
# subtitle tracks
if mkv.subtitle_tracks:
if embedded_subtitles:
embedded_subtitle_languages = set()
for st in mkv.subtitle_tracks:
if st.language:
try:
embedded_subtitle_languages.add(Language.fromalpha3b(st.language))
except BabelfishError:
logger.error('Embedded subtitle track language %r is not a valid language', st.language)
embedded_subtitle_languages.add(Language('und'))
elif st.name:
try:
embedded_subtitle_languages.add(Language.fromname(st.name))
except BabelfishError:
logger.debug('Embedded subtitle track name %r is not a valid language', st.name)
embedded_subtitle_languages.add(Language('und'))
else:
embedded_subtitle_languages.add(Language('und'))
logger.debug('Found embedded subtitle %r', embedded_subtitle_languages)
video.subtitle_languages |= embedded_subtitle_languages
else:
logger.debug('MKV has no subtitle track')
else:
logger.debug('Unsupported video extension %s', extension)
subliminal-2.1.0/subliminal/refiners/omdb.py 0000644 0001750 0001750 00000013343 13653235327 021413 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
import logging
import operator
import requests
from .. import __short_version__
from ..cache import REFINER_EXPIRATION_TIME, region
from ..video import Episode, Movie
logger = logging.getLogger(__name__)
class OMDBClient(object):
base_url = 'http://www.omdbapi.com'
def __init__(self, version=1, session=None, headers=None, timeout=10):
#: Session for the requests
self.session = session or requests.Session()
self.session.timeout = timeout
self.session.headers.update(headers or {})
self.session.params['r'] = 'json'
self.session.params['v'] = version
def get(self, id=None, title=None, type=None, year=None, plot='short', tomatoes=False):
# build the params
params = {}
if id:
params['i'] = id
if title:
params['t'] = title
if not params:
raise ValueError('At least id or title is required')
params['type'] = type
params['y'] = year
params['plot'] = plot
params['tomatoes'] = tomatoes
# perform the request
r = self.session.get(self.base_url, params=params)
r.raise_for_status()
# get the response as json
j = r.json()
# check response status
if j['Response'] == 'False':
return None
return j
def search(self, title, type=None, year=None, page=1):
# build the params
params = {'s': title, 'type': type, 'y': year, 'page': page}
# perform the request
r = self.session.get(self.base_url, params=params)
r.raise_for_status()
# get the response as json
j = r.json()
# check response status
if j['Response'] == 'False':
return None
return j
user_agent = 'Subliminal/%s' % __short_version__
omdb_client = OMDBClient(headers={'User-Agent': user_agent})
@region.cache_on_arguments(expiration_time=REFINER_EXPIRATION_TIME)
def search(title, type, year):
results = omdb_client.search(title, type, year)
if not results:
return None
# fetch all paginated results
all_results = results['Search']
total_results = int(results['totalResults'])
page = 1
while total_results > page * 10:
page += 1
results = omdb_client.search(title, type, year, page=page)
all_results.extend(results['Search'])
return all_results
def refine(video, apikey=None, **kwargs):
"""Refine a video by searching `OMDb API `_.
Several :class:`~subliminal.video.Episode` attributes can be found:
* :attr:`~subliminal.video.Episode.series`
* :attr:`~subliminal.video.Episode.year`
* :attr:`~subliminal.video.Episode.series_imdb_id`
Similarly, for a :class:`~subliminal.video.Movie`:
* :attr:`~subliminal.video.Movie.title`
* :attr:`~subliminal.video.Movie.year`
* :attr:`~subliminal.video.Video.imdb_id`
"""
if not apikey:
logger.warning('No apikey. Skipping omdb refiner.')
return
omdb_client.session.params['apikey'] = apikey
if isinstance(video, Episode):
# exit if the information is complete
if video.series_imdb_id:
logger.debug('No need to search')
return
# search the series
results = search(video.series, 'series', video.year)
if not results:
logger.warning('No results for series')
return
logger.debug('Found %d results', len(results))
# filter the results
results = [r for r in results if video.matches(r['Title'])]
if not results:
logger.warning('No matching series found')
return
# process the results
found = False
for result in sorted(results, key=operator.itemgetter('Year')):
if video.original_series and video.year is None:
logger.debug('Found result for original series without year')
found = True
break
if video.year == int(result['Year'].split(u'\u2013')[0]):
logger.debug('Found result with matching year')
found = True
break
if not found:
logger.warning('No matching series found')
return
# add series information
logger.debug('Found series %r', result)
video.series = result['Title']
video.year = int(result['Year'].split(u'\u2013')[0])
video.series_imdb_id = result['imdbID']
elif isinstance(video, Movie):
# exit if the information is complete
if video.imdb_id:
return
# search the movie
results = search(video.title, 'movie', video.year)
if not results:
logger.warning('No results for movie')
return
logger.debug('Found %d results', len(results))
# filter the results
results = [r for r in results if video.matches(r['Title'])]
if not results:
logger.warning('No matching movie found')
return
# process the results
found = False
for result in results:
if video.year is None:
logger.debug('Found result for movie without year')
found = True
break
if video.year == int(result['Year']):
logger.debug('Found result with matching year')
found = True
break
if not found:
logger.warning('No matching movie found')
return
# add movie information
logger.debug('Found movie %r', result)
video.title = result['Title']
video.year = int(result['Year'].split(u'\u2013')[0])
video.imdb_id = result['imdbID']
subliminal-2.1.0/subliminal/refiners/tvdb.py 0000644 0001750 0001750 00000027500 13653235327 021431 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
from datetime import datetime, timedelta
from functools import wraps
import logging
import re
from babelfish import Country
import guessit
import requests
from .. import __short_version__
from ..cache import REFINER_EXPIRATION_TIME, region
from ..utils import sanitize
from ..video import Episode
logger = logging.getLogger(__name__)
series_re = re.compile(r'^(?P.*?)(?: \((?:(?P\d{4})|(?P[A-Z]{2}))\))?$')
def requires_auth(func):
"""Decorator for :class:`TVDBClient` methods that require authentication"""
@wraps(func)
def wrapper(self, *args, **kwargs):
if self.token is None or self.token_expired:
self.login()
elif self.token_needs_refresh:
self.refresh_token()
return func(self, *args, **kwargs)
return wrapper
class TVDBClient(object):
"""TVDB REST API Client
:param str apikey: API key to use.
:param str username: username to use.
:param str password: password to use.
:param str language: language of the responses.
:param session: session object to use.
:type session: :class:`requests.sessions.Session` or compatible.
:param dict headers: additional headers.
:param int timeout: timeout for the requests.
"""
#: Base URL of the API
base_url = 'https://api.thetvdb.com'
#: Token lifespan
token_lifespan = timedelta(hours=1)
#: Minimum token age before a :meth:`refresh_token` is triggered
refresh_token_every = timedelta(minutes=30)
def __init__(self, apikey=None, username=None, password=None, language='en', session=None, headers=None,
timeout=10):
#: API key
self.apikey = apikey
#: Username
self.username = username
#: Password
self.password = password
#: Last token acquisition date
self.token_date = datetime.utcnow() - self.token_lifespan
#: Session for the requests
self.session = session or requests.Session()
self.session.timeout = timeout
self.session.headers.update(headers or {})
self.session.headers['Content-Type'] = 'application/json'
self.session.headers['Accept-Language'] = language
@property
def language(self):
return self.session.headers['Accept-Language']
@language.setter
def language(self, value):
self.session.headers['Accept-Language'] = value
@property
def token(self):
if 'Authorization' not in self.session.headers:
return None
return self.session.headers['Authorization'][7:]
@property
def token_expired(self):
return datetime.utcnow() - self.token_date > self.token_lifespan
@property
def token_needs_refresh(self):
return datetime.utcnow() - self.token_date > self.refresh_token_every
def login(self):
"""Login"""
# perform the request
data = {'apikey': self.apikey, 'username': self.username, 'password': self.password}
r = self.session.post(self.base_url + '/login', json=data)
r.raise_for_status()
# set the Authorization header
self.session.headers['Authorization'] = 'Bearer ' + r.json()['token']
# update token_date
self.token_date = datetime.utcnow()
def refresh_token(self):
"""Refresh token"""
# perform the request
r = self.session.get(self.base_url + '/refresh_token')
r.raise_for_status()
# set the Authorization header
self.session.headers['Authorization'] = 'Bearer ' + r.json()['token']
# update token_date
self.token_date = datetime.utcnow()
@requires_auth
def search_series(self, name=None, imdb_id=None, zap2it_id=None):
"""Search series"""
# perform the request
params = {'name': name, 'imdbId': imdb_id, 'zap2itId': zap2it_id}
r = self.session.get(self.base_url + '/search/series', params=params)
if r.status_code == 404:
return None
r.raise_for_status()
return r.json()['data']
@requires_auth
def get_series(self, id):
"""Get series"""
# perform the request
r = self.session.get(self.base_url + '/series/{}'.format(id))
if r.status_code == 404:
return None
r.raise_for_status()
return r.json()['data']
@requires_auth
def get_series_actors(self, id):
"""Get series actors"""
# perform the request
r = self.session.get(self.base_url + '/series/{}/actors'.format(id))
if r.status_code == 404:
return None
r.raise_for_status()
return r.json()['data']
@requires_auth
def get_series_episodes(self, id, page=1):
"""Get series episodes"""
# perform the request
params = {'page': page}
r = self.session.get(self.base_url + '/series/{}/episodes'.format(id), params=params)
if r.status_code == 404:
return None
r.raise_for_status()
return r.json()
@requires_auth
def query_series_episodes(self, id, absolute_number=None, aired_season=None, aired_episode=None, dvd_season=None,
dvd_episode=None, imdb_id=None, page=1):
"""Query series episodes"""
# perform the request
params = {'absoluteNumber': absolute_number, 'airedSeason': aired_season, 'airedEpisode': aired_episode,
'dvdSeason': dvd_season, 'dvdEpisode': dvd_episode, 'imdbId': imdb_id, 'page': page}
r = self.session.get(self.base_url + '/series/{}/episodes/query'.format(id), params=params)
if r.status_code == 404:
return None
r.raise_for_status()
return r.json()
@requires_auth
def get_episode(self, id):
"""Get episode"""
# perform the request
r = self.session.get(self.base_url + '/episodes/{}'.format(id))
if r.status_code == 404:
return None
r.raise_for_status()
return r.json()['data']
#: User-Agent to use
user_agent = 'Subliminal/%s' % __short_version__
#: Configured instance of :class:`TVDBClient`
tvdb_client = TVDBClient('5EC930FB90DA1ADA', headers={'User-Agent': user_agent})
#: Configure guessit in order to use GuessitCountryConverter
guessit.api.configure()
@region.cache_on_arguments(expiration_time=REFINER_EXPIRATION_TIME)
def search_series(name):
"""Search series.
:param str name: name of the series.
:return: the search results.
:rtype: list
"""
return tvdb_client.search_series(name)
@region.cache_on_arguments(expiration_time=REFINER_EXPIRATION_TIME)
def get_series(id):
"""Get series.
:param int id: id of the series.
:return: the series data.
:rtype: dict
"""
return tvdb_client.get_series(id)
@region.cache_on_arguments(expiration_time=REFINER_EXPIRATION_TIME)
def get_series_episode(series_id, season, episode):
"""Get an episode of a series.
:param int series_id: id of the series.
:param int season: season number of the episode.
:param int episode: episode number of the episode.
:return: the episode data.
:rtype: dict
"""
result = tvdb_client.query_series_episodes(series_id, aired_season=season, aired_episode=episode)
if result:
return tvdb_client.get_episode(result['data'][0]['id'])
def refine(video, **kwargs):
"""Refine a video by searching `TheTVDB `_.
.. note::
This refiner only work for instances of :class:`~subliminal.video.Episode`.
Several attributes can be found:
* :attr:`~subliminal.video.Episode.series`
* :attr:`~subliminal.video.Episode.year`
* :attr:`~subliminal.video.Episode.series_imdb_id`
* :attr:`~subliminal.video.Episode.series_tvdb_id`
* :attr:`~subliminal.video.Episode.title`
* :attr:`~subliminal.video.Video.imdb_id`
* :attr:`~subliminal.video.Episode.tvdb_id`
"""
# only deal with Episode videos
if not isinstance(video, Episode):
logger.error('Cannot refine episodes')
return
# exit if the information is complete
if video.series_tvdb_id and video.tvdb_id:
logger.debug('No need to search')
return
# search the series
logger.info('Searching series %r', video.series)
results = search_series(video.series.lower())
if not results:
logger.warning('No results for series')
return
logger.debug('Found %d results', len(results))
# search for exact matches
matching_results = []
for result in results:
matching_result = {}
# use seriesName and aliases
series_names = [result['seriesName']]
series_names.extend(result['aliases'])
# parse the original series as series + year or country
original_match = series_re.match(result['seriesName']).groupdict()
# parse series year
series_year = None
if result['firstAired']:
series_year = datetime.strptime(result['firstAired'], '%Y-%m-%d').year
# discard mismatches on year
if video.year and series_year and video.year != series_year:
logger.debug('Discarding series %r mismatch on year %d', result['seriesName'], series_year)
continue
# iterate over series names
for series_name in series_names:
# parse as series, year and country
series, year, country = series_re.match(series_name).groups()
if year:
year = int(year)
if country:
country = Country.fromguessit(country)
# discard mismatches on year
if year and (video.original_series or video.year != year):
logger.debug('Discarding series name %r mismatch on year %d', series, year)
continue
# discard mismatches on country
if video.country and video.country != country:
logger.debug('Discarding series name %r mismatch on country %r', series, country)
continue
# match on sanitized series name
if sanitize(series) == sanitize(video.series):
logger.debug('Found exact match on series %r', series_name)
matching_result['match'] = {
'series': original_match['series'],
'year': series_year or year,
'country': country,
'original_series': original_match['year'] is None and country is None
}
break
# add the result on match
if matching_result:
matching_result['data'] = result
matching_results.append(matching_result)
# exit if we don't have exactly 1 matching result
if not matching_results:
logger.error('No matching series found')
return
if len(matching_results) > 1:
logger.error('Multiple matches found')
return
# get the series
matching_result = matching_results[0]
series = get_series(matching_result['data']['id'])
# add series information
logger.debug('Found series %r', series)
video.series = matching_result['match']['series']
video.alternative_series.extend(series['aliases'])
video.year = matching_result['match']['year']
video.country = matching_result['match']['country']
video.original_series = matching_result['match']['original_series']
video.series_tvdb_id = series['id']
video.series_imdb_id = series['imdbId'] or None
# get the episode
logger.info('Getting series episode %dx%d', video.season, video.episode)
episode = get_series_episode(video.series_tvdb_id, video.season, video.episode)
if not episode:
logger.warning('No results for episode')
return
# add episode information
logger.debug('Found episode %r', episode)
video.tvdb_id = episode['id']
video.title = episode['episodeName'] or None
video.imdb_id = episode['imdbId'] or None
subliminal-2.1.0/subliminal/__init__.py 0000644 0001750 0001750 00000001462 13653235424 020411 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
__title__ = 'subliminal'
__version__ = '2.1.0'
__short_version__ = '.'.join(__version__.split('.')[:2])
__author__ = 'Antoine Bertin'
__license__ = 'MIT'
__copyright__ = 'Copyright 2016, Antoine Bertin'
import logging
from .core import (AsyncProviderPool, ProviderPool, check_video, download_best_subtitles, download_subtitles,
list_subtitles, refine, save_subtitles, scan_video, scan_videos)
from .cache import region
from .exceptions import Error, ProviderError
from .extensions import provider_manager, refiner_manager
from .providers import Provider
from .score import compute_score, get_scores
from .subtitle import SUBTITLE_EXTENSIONS, Subtitle
from .video import VIDEO_EXTENSIONS, Episode, Movie, Video
logging.getLogger(__name__).addHandler(logging.NullHandler())
subliminal-2.1.0/subliminal/cache.py 0000644 0001750 0001750 00000002222 13653235327 017712 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
import datetime
import six
from dogpile.cache import make_region
from dogpile.cache.util import function_key_generator
#: Expiration time for show caching
SHOW_EXPIRATION_TIME = datetime.timedelta(weeks=3).total_seconds()
#: Expiration time for episode caching
EPISODE_EXPIRATION_TIME = datetime.timedelta(days=3).total_seconds()
#: Expiration time for scraper searches
REFINER_EXPIRATION_TIME = datetime.timedelta(weeks=1).total_seconds()
def _to_native_str(value):
if six.PY2:
# In Python 2, the native string type is bytes
if isinstance(value, six.text_type): # unicode for Python 2
return value.encode('utf-8')
else:
return six.binary_type(value)
else:
# In Python 3, the native string type is unicode
if isinstance(value, six.binary_type): # bytes for Python 3
return value.decode('utf-8')
else:
return six.text_type(value)
def to_native_str_key_generator(namespace, fn, to_str=_to_native_str):
return function_key_generator(namespace, fn, to_str)
region = make_region(function_key_generator=to_native_str_key_generator)
subliminal-2.1.0/subliminal/cli.py 0000644 0001750 0001750 00000051151 13653235327 017423 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
"""
Subliminal uses `click `_ to provide a powerful :abbr:`CLI (command-line interface)`.
"""
from __future__ import division
from collections import defaultdict
from datetime import timedelta
import glob
import json
import logging
import os
import re
from appdirs import AppDirs
from babelfish import Error as BabelfishError, Language
import click
from dogpile.cache.backends.file import AbstractFileLock
from dogpile.util.readwrite_lock import ReadWriteMutex
from six.moves import configparser
from subliminal import (AsyncProviderPool, Episode, Movie, Video, __version__, check_video, compute_score, get_scores,
provider_manager, refine, refiner_manager, region, save_subtitles, scan_video, scan_videos)
from subliminal.core import ARCHIVE_EXTENSIONS, search_external_subtitles
logger = logging.getLogger(__name__)
class MutexLock(AbstractFileLock):
""":class:`MutexLock` is a thread-based rw lock based on :class:`dogpile.core.ReadWriteMutex`."""
def __init__(self, filename):
self.mutex = ReadWriteMutex()
def acquire_read_lock(self, wait):
ret = self.mutex.acquire_read_lock(wait)
return wait or ret
def acquire_write_lock(self, wait):
ret = self.mutex.acquire_write_lock(wait)
return wait or ret
def release_read_lock(self):
return self.mutex.release_read_lock()
def release_write_lock(self):
return self.mutex.release_write_lock()
class Config(object):
"""A :class:`~configparser.ConfigParser` wrapper to store configuration.
Interaction with the configuration is done with the properties.
:param str path: path to the configuration file.
"""
def __init__(self, path):
#: Path to the configuration file
self.path = path
#: The underlying configuration object
self.config = configparser.SafeConfigParser()
self.config.add_section('general')
self.config.set('general', 'languages', json.dumps(['en']))
self.config.set('general', 'providers', json.dumps(sorted([p.name for p in provider_manager])))
self.config.set('general', 'refiners', json.dumps(sorted([r.name for r in refiner_manager])))
self.config.set('general', 'single', str(0))
self.config.set('general', 'embedded_subtitles', str(1))
self.config.set('general', 'age', str(int(timedelta(weeks=2).total_seconds())))
self.config.set('general', 'hearing_impaired', str(1))
self.config.set('general', 'min_score', str(0))
def read(self):
"""Read the configuration from :attr:`path`"""
self.config.read(self.path)
def write(self):
"""Write the configuration to :attr:`path`"""
with open(self.path, 'w') as f:
self.config.write(f)
@property
def languages(self):
return {Language.fromietf(l) for l in json.loads(self.config.get('general', 'languages'))}
@languages.setter
def languages(self, value):
self.config.set('general', 'languages', json.dumps(sorted([str(l) for l in value])))
@property
def providers(self):
return json.loads(self.config.get('general', 'providers'))
@providers.setter
def providers(self, value):
self.config.set('general', 'providers', json.dumps(sorted([p.lower() for p in value])))
@property
def refiners(self):
return json.loads(self.config.get('general', 'refiners'))
@refiners.setter
def refiners(self, value):
self.config.set('general', 'refiners', json.dumps([r.lower() for r in value]))
@property
def single(self):
return self.config.getboolean('general', 'single')
@single.setter
def single(self, value):
self.config.set('general', 'single', str(int(value)))
@property
def embedded_subtitles(self):
return self.config.getboolean('general', 'embedded_subtitles')
@embedded_subtitles.setter
def embedded_subtitles(self, value):
self.config.set('general', 'embedded_subtitles', str(int(value)))
@property
def age(self):
return timedelta(seconds=self.config.getint('general', 'age'))
@age.setter
def age(self, value):
self.config.set('general', 'age', str(int(value.total_seconds())))
@property
def hearing_impaired(self):
return self.config.getboolean('general', 'hearing_impaired')
@hearing_impaired.setter
def hearing_impaired(self, value):
self.config.set('general', 'hearing_impaired', str(int(value)))
@property
def min_score(self):
return self.config.getfloat('general', 'min_score')
@min_score.setter
def min_score(self, value):
self.config.set('general', 'min_score', str(value))
@property
def provider_configs(self):
rv = {}
for provider in provider_manager:
if self.config.has_section(provider.name):
rv[provider.name] = {k: v for k, v in self.config.items(provider.name)}
return rv
@provider_configs.setter
def provider_configs(self, value):
# loop over provider configurations
for provider, config in value.items():
# create the corresponding section if necessary
if not self.config.has_section(provider):
self.config.add_section(provider)
# add config options
for k, v in config.items():
self.config.set(provider, k, v)
@property
def refiner_configs(self):
rv = {}
for refiner in refiner_manager:
if self.config.has_section(refiner.name):
rv[refiner.name] = {k: v for k, v in self.config.items(refiner.name)}
return rv
@refiner_configs.setter
def refiner_configs(self, value):
# loop over refiner configurations
for refiner, config in value.items():
# create the corresponding section if necessary
if not self.config.has_section(refiner):
self.config.add_section(refiner)
# add config options
for k, v in config.items():
self.config.set(refiner, k, v)
class LanguageParamType(click.ParamType):
""":class:`~click.ParamType` for languages that returns a :class:`~babelfish.language.Language`"""
name = 'language'
def convert(self, value, param, ctx):
try:
return Language.fromietf(value)
except BabelfishError:
self.fail('%s is not a valid language' % value)
LANGUAGE = LanguageParamType()
class AgeParamType(click.ParamType):
""":class:`~click.ParamType` for age strings that returns a :class:`~datetime.timedelta`
An age string is in the form `number + identifier` with possible identifiers:
* ``w`` for weeks
* ``d`` for days
* ``h`` for hours
The form can be specified multiple times but only with that idenfier ordering. For example:
* ``1w2d4h`` for 1 week, 2 days and 4 hours
* ``2w`` for 2 weeks
* ``3w6h`` for 3 weeks and 6 hours
"""
name = 'age'
def convert(self, value, param, ctx):
match = re.match(r'^(?:(?P\d+?)w)?(?:(?P\d+?)d)?(?:(?P\d+?)h)?$', value)
if not match:
self.fail('%s is not a valid age' % value)
return timedelta(**{k: int(v) for k, v in match.groupdict(0).items()})
AGE = AgeParamType()
PROVIDER = click.Choice(sorted(provider_manager.names()))
REFINER = click.Choice(sorted(refiner_manager.names()))
dirs = AppDirs('subliminal')
cache_file = 'subliminal.dbm'
config_file = 'config.ini'
@click.group(context_settings={'max_content_width': 100}, epilog='Suggestions and bug reports are greatly appreciated: '
'https://github.com/Diaoul/subliminal/')
@click.option('--addic7ed', type=click.STRING, nargs=2, metavar='USERNAME PASSWORD', help='Addic7ed configuration.')
@click.option('--legendastv', type=click.STRING, nargs=2, metavar='USERNAME PASSWORD', help='LegendasTV configuration.')
@click.option('--opensubtitles', type=click.STRING, nargs=2, metavar='USERNAME PASSWORD',
help='OpenSubtitles configuration.')
@click.option('--omdb', type=click.STRING, nargs=1, metavar='APIKEY', help='OMDB API key.')
@click.option('--cache-dir', type=click.Path(writable=True, file_okay=False), default=dirs.user_cache_dir,
show_default=True, expose_value=True, help='Path to the cache directory.')
@click.option('--debug', is_flag=True, help='Print useful information for debugging subliminal and for reporting bugs.')
@click.version_option(__version__)
@click.pass_context
def subliminal(ctx, addic7ed, legendastv, opensubtitles, omdb, cache_dir, debug):
"""Subtitles, faster than your thoughts."""
# create cache directory
try:
os.makedirs(cache_dir)
except OSError:
if not os.path.isdir(cache_dir):
raise
# configure cache
region.configure('dogpile.cache.dbm', expiration_time=timedelta(days=30),
arguments={'filename': os.path.join(cache_dir, cache_file), 'lock_factory': MutexLock})
# configure logging
if debug:
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter(logging.BASIC_FORMAT))
logging.getLogger('subliminal').addHandler(handler)
logging.getLogger('subliminal').setLevel(logging.DEBUG)
ctx.obj = {
'provider_configs': {},
'refiner_configs': {}
}
# provider configs
if addic7ed:
ctx.obj['provider_configs']['addic7ed'] = {'username': addic7ed[0], 'password': addic7ed[1]}
if legendastv:
ctx.obj['provider_configs']['legendastv'] = {'username': legendastv[0], 'password': legendastv[1]}
if opensubtitles:
ctx.obj['provider_configs']['opensubtitles'] = {'username': opensubtitles[0], 'password': opensubtitles[1]}
ctx.obj['provider_configs']['opensubtitlesvip'] = {'username': opensubtitles[0], 'password': opensubtitles[1]}
# refiner configs
if omdb:
ctx.obj['refiner_configs']['omdb'] = {'apikey': omdb}
@subliminal.command()
@click.option('--clear-subliminal', is_flag=True, help='Clear subliminal\'s cache. Use this ONLY if your cache is '
'corrupted or if you experience issues.')
@click.pass_context
def cache(ctx, clear_subliminal):
"""Cache management."""
if clear_subliminal:
for file in glob.glob(os.path.join(ctx.parent.params['cache_dir'], cache_file) + '*'):
os.remove(file)
click.echo('Subliminal\'s cache cleared.')
else:
click.echo('Nothing done.')
@subliminal.command()
@click.option('-l', '--language', type=LANGUAGE, required=True, multiple=True, help='Language as IETF code, '
'e.g. en, pt-BR (can be used multiple times).')
@click.option('-p', '--provider', type=PROVIDER, multiple=True, help='Provider to use (can be used multiple times).')
@click.option('-r', '--refiner', type=REFINER, multiple=True, help='Refiner to use (can be used multiple times).')
@click.option('-a', '--age', type=AGE, help='Filter videos newer than AGE, e.g. 12h, 1w2d.')
@click.option('-d', '--directory', type=click.STRING, metavar='DIR', help='Directory where to save subtitles, '
'default is next to the video file.')
@click.option('-e', '--encoding', type=click.STRING, metavar='ENC', help='Subtitle file encoding, default is to '
'preserve original encoding.')
@click.option('-s', '--single', is_flag=True, default=False, help='Save subtitle without language code in the file '
'name, i.e. use .srt extension. Do not use this unless your media player requires it.')
@click.option('-f', '--force', is_flag=True, default=False, help='Force download even if a subtitle already exist.')
@click.option('-hi', '--hearing-impaired', is_flag=True, default=False, help='Prefer hearing impaired subtitles.')
@click.option('-m', '--min-score', type=click.IntRange(0, 100), default=0, help='Minimum score for a subtitle '
'to be downloaded (0 to 100).')
@click.option('-w', '--max-workers', type=click.IntRange(1, 50), default=None, help='Maximum number of threads to use.')
@click.option('-z/-Z', '--archives/--no-archives', default=True, show_default=True, help='Scan archives for videos '
'(supported extensions: %s).' % ', '.join(ARCHIVE_EXTENSIONS))
@click.option('-v', '--verbose', count=True, help='Increase verbosity.')
@click.argument('path', type=click.Path(), required=True, nargs=-1)
@click.pass_obj
def download(obj, provider, refiner, language, age, directory, encoding, single, force, hearing_impaired, min_score,
max_workers, archives, verbose, path):
"""Download best subtitles.
PATH can be an directory containing videos, a video file path or a video file name. It can be used multiple times.
If an existing subtitle is detected (external or embedded) in the correct language, the download is skipped for
the associated video.
"""
# process parameters
language = set(language)
# scan videos
videos = []
ignored_videos = []
errored_paths = []
with click.progressbar(path, label='Collecting videos', item_show_func=lambda p: p or '') as bar:
for p in bar:
logger.debug('Collecting path %s', p)
# non-existing
if not os.path.exists(p):
try:
video = Video.fromname(p)
except:
logger.exception('Unexpected error while collecting non-existing path %s', p)
errored_paths.append(p)
continue
if not force:
video.subtitle_languages |= set(search_external_subtitles(video.name, directory=directory).values())
if check_video(video, languages=language, age=age, undefined=single):
refine(video, episode_refiners=refiner, movie_refiners=refiner,
refiner_configs=obj['refiner_configs'],
embedded_subtitles=not force, providers=provider, languages=language)
videos.append(video)
continue
# directories
if os.path.isdir(p):
try:
scanned_videos = scan_videos(p, age=age, archives=archives)
except:
logger.exception('Unexpected error while collecting directory path %s', p)
errored_paths.append(p)
continue
for video in scanned_videos:
if not force:
video.subtitle_languages |= set(search_external_subtitles(video.name,
directory=directory).values())
if check_video(video, languages=language, age=age, undefined=single):
refine(video, episode_refiners=refiner, movie_refiners=refiner,
refiner_configs=obj['refiner_configs'], embedded_subtitles=not force,
providers=provider, languages=language)
videos.append(video)
else:
ignored_videos.append(video)
continue
# other inputs
try:
video = scan_video(p)
except:
logger.exception('Unexpected error while collecting path %s', p)
errored_paths.append(p)
continue
if not force:
video.subtitle_languages |= set(search_external_subtitles(video.name, directory=directory).values())
if check_video(video, languages=language, age=age, undefined=single):
refine(video, episode_refiners=refiner, movie_refiners=refiner,
refiner_configs=obj['refiner_configs'], embedded_subtitles=not force,
providers=provider, languages=language)
videos.append(video)
else:
ignored_videos.append(video)
# output errored paths
if verbose > 0:
for p in errored_paths:
click.secho('%s errored' % p, fg='red')
# output ignored videos
if verbose > 1:
for video in ignored_videos:
click.secho('%s ignored - subtitles: %s / age: %d day%s' % (
os.path.split(video.name)[1],
', '.join(str(s) for s in video.subtitle_languages) or 'none',
video.age.days,
's' if video.age.days > 1 else ''
), fg='yellow')
# report collected videos
click.echo('%s video%s collected / %s video%s ignored / %s error%s' % (
click.style(str(len(videos)), bold=True, fg='green' if videos else None),
's' if len(videos) > 1 else '',
click.style(str(len(ignored_videos)), bold=True, fg='yellow' if ignored_videos else None),
's' if len(ignored_videos) > 1 else '',
click.style(str(len(errored_paths)), bold=True, fg='red' if errored_paths else None),
's' if len(errored_paths) > 1 else '',
))
# exit if no video collected
if not videos:
return
# download best subtitles
downloaded_subtitles = defaultdict(list)
with AsyncProviderPool(max_workers=max_workers, providers=provider, provider_configs=obj['provider_configs']) as p:
with click.progressbar(videos, label='Downloading subtitles',
item_show_func=lambda v: os.path.split(v.name)[1] if v is not None else '') as bar:
for v in bar:
scores = get_scores(v)
subtitles = p.download_best_subtitles(p.list_subtitles(v, language - v.subtitle_languages),
v, language, min_score=scores['hash'] * min_score / 100,
hearing_impaired=hearing_impaired, only_one=single)
downloaded_subtitles[v] = subtitles
if p.discarded_providers:
click.secho('Some providers have been discarded due to unexpected errors: %s' %
', '.join(p.discarded_providers), fg='yellow')
# save subtitles
total_subtitles = 0
for v, subtitles in downloaded_subtitles.items():
saved_subtitles = save_subtitles(v, subtitles, single=single, directory=directory, encoding=encoding)
total_subtitles += len(saved_subtitles)
if verbose > 0:
click.echo('%s subtitle%s downloaded for %s' % (click.style(str(len(saved_subtitles)), bold=True),
's' if len(saved_subtitles) > 1 else '',
os.path.split(v.name)[1]))
if verbose > 1:
for s in saved_subtitles:
matches = s.get_matches(v)
score = compute_score(s, v)
# score color
score_color = None
scores = get_scores(v)
if isinstance(v, Movie):
if score < scores['title']:
score_color = 'red'
elif score < scores['title'] + scores['year'] + scores['release_group']:
score_color = 'yellow'
else:
score_color = 'green'
elif isinstance(v, Episode):
if score < scores['series'] + scores['season'] + scores['episode']:
score_color = 'red'
elif score < scores['series'] + scores['season'] + scores['episode'] + scores['release_group']:
score_color = 'yellow'
else:
score_color = 'green'
# scale score from 0 to 100 taking out preferences
scaled_score = score
if s.hearing_impaired == hearing_impaired:
scaled_score -= scores['hearing_impaired']
scaled_score *= 100 / scores['hash']
# echo some nice colored output
click.echo(' - [{score}] {language} subtitle from {provider_name} (match on {matches})'.format(
score=click.style('{:5.1f}'.format(scaled_score), fg=score_color, bold=score >= scores['hash']),
language=s.language.name if s.language.country is None else '%s (%s)' % (s.language.name,
s.language.country.name),
provider_name=s.provider_name,
matches=', '.join(sorted(matches, key=scores.get, reverse=True))
))
if verbose == 0:
click.echo('Downloaded %s subtitle%s' % (click.style(str(total_subtitles), bold=True),
's' if total_subtitles > 1 else ''))
subliminal-2.1.0/subliminal/core.py 0000644 0001750 0001750 00000065277 13653235327 017622 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
import io
import itertools
import logging
import operator
import os
from babelfish import Language, LanguageReverseError
from guessit import guessit
from rarfile import BadRarFile, NotRarFile, RarCannotExec, RarFile, Error, is_rarfile
from zipfile import BadZipfile
from .extensions import provider_manager, default_providers, refiner_manager
from .score import compute_score as default_compute_score
from .subtitle import SUBTITLE_EXTENSIONS
from .utils import handle_exception
from .video import VIDEO_EXTENSIONS, Episode, Movie, Video
#: Supported archive extensions
ARCHIVE_EXTENSIONS = ('.rar',)
logger = logging.getLogger(__name__)
class ProviderPool(object):
"""A pool of providers with the same API as a single :class:`~subliminal.providers.Provider`.
It has a few extra features:
* Lazy loads providers when needed and supports the `with` statement to :meth:`terminate`
the providers on exit.
* Automatically discard providers on failure.
:param list providers: name of providers to use, if not all.
:param dict provider_configs: provider configuration as keyword arguments per provider name to pass when
instantiating the :class:`~subliminal.providers.Provider`.
"""
def __init__(self, providers=None, provider_configs=None):
#: Name of providers to use
self.providers = providers or default_providers
#: Provider configuration
self.provider_configs = provider_configs or {}
#: Initialized providers
self.initialized_providers = {}
#: Discarded providers
self.discarded_providers = set()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.terminate()
def __getitem__(self, name):
if name not in self.providers:
raise KeyError
if name not in self.initialized_providers:
logger.info('Initializing provider %s', name)
provider = provider_manager[name].plugin(**self.provider_configs.get(name, {}))
provider.initialize()
self.initialized_providers[name] = provider
return self.initialized_providers[name]
def __delitem__(self, name):
if name not in self.initialized_providers:
raise KeyError(name)
try:
logger.info('Terminating provider %s', name)
self.initialized_providers[name].terminate()
except Exception as e:
handle_exception(e, 'Provider {} improperly terminated'.format(name))
del self.initialized_providers[name]
def __iter__(self):
return iter(self.initialized_providers)
def list_subtitles_provider(self, provider, video, languages):
"""List subtitles with a single provider.
The video and languages are checked against the provider.
:param str provider: name of the provider.
:param video: video to list subtitles for.
:type video: :class:`~subliminal.video.Video`
:param languages: languages to search for.
:type languages: set of :class:`~babelfish.language.Language`
:return: found subtitles.
:rtype: list of :class:`~subliminal.subtitle.Subtitle` or None
"""
# check video validity
if not provider_manager[provider].plugin.check(video):
logger.info('Skipping provider %r: not a valid video', provider)
return []
# check supported languages
provider_languages = provider_manager[provider].plugin.check_languages(languages)
if not provider_languages:
logger.info('Skipping provider %r: no language to search for', provider)
return []
# list subtitles
logger.info('Listing subtitles with provider %r and languages %r', provider, provider_languages)
try:
return self[provider].list_subtitles(video, provider_languages)
except Exception as e:
handle_exception(e, 'Provider {}'.format(provider))
def list_subtitles(self, video, languages):
"""List subtitles.
:param video: video to list subtitles for.
:type video: :class:`~subliminal.video.Video`
:param languages: languages to search for.
:type languages: set of :class:`~babelfish.language.Language`
:return: found subtitles.
:rtype: list of :class:`~subliminal.subtitle.Subtitle`
"""
subtitles = []
for name in self.providers:
# check discarded providers
if name in self.discarded_providers:
logger.debug('Skipping discarded provider %r', name)
continue
# list subtitles
provider_subtitles = self.list_subtitles_provider(name, video, languages)
if provider_subtitles is None:
logger.info('Discarding provider %s', name)
self.discarded_providers.add(name)
continue
# add the subtitles
subtitles.extend(provider_subtitles)
return subtitles
def download_subtitle(self, subtitle):
"""Download `subtitle`'s :attr:`~subliminal.subtitle.Subtitle.content`.
:param subtitle: subtitle to download.
:type subtitle: :class:`~subliminal.subtitle.Subtitle`
:return: `True` if the subtitle has been successfully downloaded, `False` otherwise.
:rtype: bool
"""
# check discarded providers
if subtitle.provider_name in self.discarded_providers:
logger.warning('Provider %r is discarded', subtitle.provider_name)
return False
logger.info('Downloading subtitle %r', subtitle)
try:
self[subtitle.provider_name].download_subtitle(subtitle)
except (BadZipfile, BadRarFile):
logger.error('Bad archive for subtitle %r', subtitle)
except Exception as e:
handle_exception(e, 'Discarding provider {}'.format(subtitle.provider_name))
self.discarded_providers.add(subtitle.provider_name)
# check subtitle validity
if not subtitle.is_valid():
logger.error('Invalid subtitle')
return False
return True
def download_best_subtitles(self, subtitles, video, languages, min_score=0, hearing_impaired=False, only_one=False,
compute_score=None):
"""Download the best matching subtitles.
:param subtitles: the subtitles to use.
:type subtitles: list of :class:`~subliminal.subtitle.Subtitle`
:param video: video to download subtitles for.
:type video: :class:`~subliminal.video.Video`
:param languages: languages to download.
:type languages: set of :class:`~babelfish.language.Language`
:param int min_score: minimum score for a subtitle to be downloaded.
:param bool hearing_impaired: hearing impaired preference.
:param bool only_one: download only one subtitle, not one per language.
:param compute_score: function that takes `subtitle` and `video` as positional arguments,
`hearing_impaired` as keyword argument and returns the score.
:return: downloaded subtitles.
:rtype: list of :class:`~subliminal.subtitle.Subtitle`
"""
compute_score = compute_score or default_compute_score
# sort subtitles by score
scored_subtitles = sorted([(s, compute_score(s, video, hearing_impaired=hearing_impaired))
for s in subtitles], key=operator.itemgetter(1), reverse=True)
# download best subtitles, falling back on the next on error
downloaded_subtitles = []
for subtitle, score in scored_subtitles:
# check score
if score < min_score:
logger.info('Score %d is below min_score (%d)', score, min_score)
break
# check downloaded languages
if subtitle.language in set(s.language for s in downloaded_subtitles):
logger.debug('Skipping subtitle: %r already downloaded', subtitle.language)
continue
# download
if self.download_subtitle(subtitle):
downloaded_subtitles.append(subtitle)
# stop when all languages are downloaded
if set(s.language for s in downloaded_subtitles) == languages:
logger.debug('All languages downloaded')
break
# stop if only one subtitle is requested
if only_one:
logger.debug('Only one subtitle downloaded')
break
return downloaded_subtitles
def terminate(self):
"""Terminate all the :attr:`initialized_providers`."""
logger.debug('Terminating initialized providers')
for name in list(self.initialized_providers):
del self[name]
class AsyncProviderPool(ProviderPool):
"""Subclass of :class:`ProviderPool` with asynchronous support for :meth:`~ProviderPool.list_subtitles`.
:param int max_workers: maximum number of threads to use. If `None`, :attr:`max_workers` will be set
to the number of :attr:`~ProviderPool.providers`.
"""
def __init__(self, max_workers=None, *args, **kwargs):
super(AsyncProviderPool, self).__init__(*args, **kwargs)
#: Maximum number of threads to use
self.max_workers = max_workers or len(self.providers)
def list_subtitles_provider(self, provider, video, languages):
return provider, super(AsyncProviderPool, self).list_subtitles_provider(provider, video, languages)
def list_subtitles(self, video, languages):
subtitles = []
with ThreadPoolExecutor(self.max_workers) as executor:
for provider, provider_subtitles in executor.map(self.list_subtitles_provider, self.providers,
itertools.repeat(video, len(self.providers)),
itertools.repeat(languages, len(self.providers))):
# discard provider that failed
if provider_subtitles is None:
logger.info('Discarding provider %s', provider)
self.discarded_providers.add(provider)
continue
# add subtitles
subtitles.extend(provider_subtitles)
return subtitles
def check_video(video, languages=None, age=None, undefined=False):
"""Perform some checks on the `video`.
All the checks are optional. Return `False` if any of this check fails:
* `languages` already exist in `video`'s :attr:`~subliminal.video.Video.subtitle_languages`.
* `video` is older than `age`.
* `video` has an `undefined` language in :attr:`~subliminal.video.Video.subtitle_languages`.
:param video: video to check.
:type video: :class:`~subliminal.video.Video`
:param languages: desired languages.
:type languages: set of :class:`~babelfish.language.Language`
:param datetime.timedelta age: maximum age of the video.
:param bool undefined: fail on existing undefined language.
:return: `True` if the video passes the checks, `False` otherwise.
:rtype: bool
"""
# language test
if languages and not (languages - video.subtitle_languages):
logger.debug('All languages %r exist', languages)
return False
# age test
if age and video.age > age:
logger.debug('Video is older than %r', age)
return False
# undefined test
if undefined and Language('und') in video.subtitle_languages:
logger.debug('Undefined language found')
return False
return True
def search_external_subtitles(path, directory=None):
"""Search for external subtitles from a video `path` and their associated language.
Unless `directory` is provided, search will be made in the same directory as the video file.
:param str path: path to the video.
:param str directory: directory to search for subtitles.
:return: found subtitles with their languages.
:rtype: dict
"""
# split path
dirpath, filename = os.path.split(path)
dirpath = dirpath or '.'
fileroot, fileext = os.path.splitext(filename)
# search for subtitles
subtitles = {}
for p in os.listdir(directory or dirpath):
# keep only valid subtitle filenames
if not p.startswith(fileroot) or not p.lower().endswith(SUBTITLE_EXTENSIONS):
continue
# extract the potential language code
language = Language('und')
language_code = p[len(fileroot):-len(os.path.splitext(p)[1])].replace(fileext, '').replace('_', '-')[1:]
if language_code:
try:
language = Language.fromietf(language_code)
except (ValueError, LanguageReverseError):
logger.error('Cannot parse language code %r', language_code)
subtitles[p] = language
logger.debug('Found subtitles %r', subtitles)
return subtitles
def scan_video(path):
"""Scan a video from a `path`.
:param str path: existing path to the video.
:return: the scanned video.
:rtype: :class:`~subliminal.video.Video`
"""
# check for non-existing path
if not os.path.exists(path):
raise ValueError('Path does not exist')
# check video extension
if not path.lower().endswith(VIDEO_EXTENSIONS):
raise ValueError('%r is not a valid video extension' % os.path.splitext(path)[1])
dirpath, filename = os.path.split(path)
logger.info('Scanning video %r in %r', filename, dirpath)
# guess
video = Video.fromguess(path, guessit(path))
# size
video.size = os.path.getsize(path)
logger.debug('Size is %d', video.size)
return video
def scan_archive(path):
"""Scan an archive from a `path`.
:param str path: existing path to the archive.
:return: the scanned video.
:rtype: :class:`~subliminal.video.Video`
"""
# check for non-existing path
if not os.path.exists(path):
raise ValueError('Path does not exist')
if not is_rarfile(path):
raise ValueError("'{0}' is not a valid archive".format(os.path.splitext(path)[1]))
dir_path, filename = os.path.split(path)
logger.info('Scanning archive %r in %r', filename, dir_path)
# Get filename and file size from RAR
rar = RarFile(path)
# check that the rar doesnt need a password
if rar.needs_password():
raise ValueError('Rar requires a password')
# raise an exception if the rar file is broken
# must be called to avoid a potential deadlock with some broken rars
rar.testrar()
file_info = [f for f in rar.infolist() if not f.isdir() and f.filename.endswith(VIDEO_EXTENSIONS)]
# sort by file size descending, the largest video in the archive is the one we want, there may be samples or intros
file_info.sort(key=operator.attrgetter('file_size'), reverse=True)
# no video found
if not file_info:
raise ValueError('No video in archive')
# Free the information about irrelevant files before guessing
file_info = file_info[0]
# guess
video_filename = file_info.filename
video_path = os.path.join(dir_path, video_filename)
video = Video.fromguess(video_path, guessit(video_path))
# size
video.size = file_info.file_size
return video
def scan_videos(path, age=None, archives=True):
"""Scan `path` for videos and their subtitles.
See :func:`refine` to find additional information for the video.
:param str path: existing directory path to scan.
:param datetime.timedelta age: maximum age of the video or archive.
:param bool archives: scan videos in archives.
:return: the scanned videos.
:rtype: list of :class:`~subliminal.video.Video`
"""
# check for non-existing path
if not os.path.exists(path):
raise ValueError('Path does not exist')
# check for non-directory path
if not os.path.isdir(path):
raise ValueError('Path is not a directory')
# walk the path
videos = []
for dirpath, dirnames, filenames in os.walk(path):
logger.debug('Walking directory %r', dirpath)
# remove badly encoded and hidden dirnames
for dirname in list(dirnames):
if dirname.startswith('.'):
logger.debug('Skipping hidden dirname %r in %r', dirname, dirpath)
dirnames.remove(dirname)
# Skip Sample folder
if dirname.lower() == 'sample':
logger.debug('Skipping sample dirname %r in %r', dirname, dirpath)
dirnames.remove(dirname)
# scan for videos
for filename in filenames:
# filter on videos and archives
if not (filename.lower().endswith(VIDEO_EXTENSIONS) or
archives and filename.lower().endswith(ARCHIVE_EXTENSIONS)):
continue
# skip hidden files
if filename.startswith('.'):
logger.debug('Skipping hidden filename %r in %r', filename, dirpath)
continue
# skip 'sample' media files
if os.path.splitext(filename)[0].lower() == 'sample':
logger.debug('Skipping sample filename %r in %r', filename, dirpath)
continue
# reconstruct the file path
filepath = os.path.join(dirpath, filename)
# skip links
if os.path.islink(filepath):
logger.debug('Skipping link %r in %r', filename, dirpath)
continue
# skip old files
try:
file_age = datetime.utcfromtimestamp(os.path.getmtime(filepath))
except ValueError:
logger.warning('Could not get age of file %r in %r', filename, dirpath)
continue
else:
if age and datetime.utcnow() - file_age > age:
logger.debug('Skipping old file %r in %r', filename, dirpath)
continue
# scan
if filename.lower().endswith(VIDEO_EXTENSIONS): # video
try:
video = scan_video(filepath)
except ValueError: # pragma: no cover
logger.exception('Error scanning video')
continue
elif archives and filename.lower().endswith(ARCHIVE_EXTENSIONS): # archive
try:
video = scan_archive(filepath)
except (Error, NotRarFile, RarCannotExec, ValueError): # pragma: no cover
logger.exception('Error scanning archive')
continue
else: # pragma: no cover
raise ValueError('Unsupported file %r' % filename)
videos.append(video)
return videos
def refine(video, episode_refiners=None, movie_refiners=None, refiner_configs=None, **kwargs):
"""Refine a video using :ref:`refiners`.
.. note::
Exceptions raised in refiners are silently passed and logged.
:param video: the video to refine.
:type video: :class:`~subliminal.video.Video`
:param tuple episode_refiners: refiners to use for episodes.
:param tuple movie_refiners: refiners to use for movies.
:param dict refiner_configs: refiner configuration as keyword arguments per refiner name to pass when
calling the refine method
:param \*\*kwargs: additional parameters for the :func:`~subliminal.refiners.refine` functions.
"""
refiners = ()
if isinstance(video, Episode):
refiners = episode_refiners or ('metadata', 'tvdb', 'omdb')
elif isinstance(video, Movie):
refiners = movie_refiners or ('metadata', 'omdb')
for refiner in ('hash', ) + refiners:
logger.info('Refining video with %s', refiner)
try:
refiner_manager[refiner].plugin(video, **dict((refiner_configs or {}).get(refiner, {}), **kwargs))
except Exception as e:
handle_exception(e, 'Failed to refine video {0!r}'.format(video.name))
def list_subtitles(videos, languages, pool_class=ProviderPool, **kwargs):
"""List subtitles.
The `videos` must pass the `languages` check of :func:`check_video`.
:param videos: videos to list subtitles for.
:type videos: set of :class:`~subliminal.video.Video`
:param languages: languages to search for.
:type languages: set of :class:`~babelfish.language.Language`
:param pool_class: class to use as provider pool.
:type pool_class: :class:`ProviderPool`, :class:`AsyncProviderPool` or similar
:param \*\*kwargs: additional parameters for the provided `pool_class` constructor.
:return: found subtitles per video.
:rtype: dict of :class:`~subliminal.video.Video` to list of :class:`~subliminal.subtitle.Subtitle`
"""
listed_subtitles = defaultdict(list)
# check videos
checked_videos = []
for video in videos:
if not check_video(video, languages=languages):
logger.info('Skipping video %r', video)
continue
checked_videos.append(video)
# return immediately if no video passed the checks
if not checked_videos:
return listed_subtitles
# list subtitles
with pool_class(**kwargs) as pool:
for video in checked_videos:
logger.info('Listing subtitles for %r', video)
subtitles = pool.list_subtitles(video, languages - video.subtitle_languages)
listed_subtitles[video].extend(subtitles)
logger.info('Found %d subtitle(s)', len(subtitles))
return listed_subtitles
def download_subtitles(subtitles, pool_class=ProviderPool, **kwargs):
"""Download :attr:`~subliminal.subtitle.Subtitle.content` of `subtitles`.
:param subtitles: subtitles to download.
:type subtitles: list of :class:`~subliminal.subtitle.Subtitle`
:param pool_class: class to use as provider pool.
:type pool_class: :class:`ProviderPool`, :class:`AsyncProviderPool` or similar
:param \*\*kwargs: additional parameters for the provided `pool_class` constructor.
"""
with pool_class(**kwargs) as pool:
for subtitle in subtitles:
logger.info('Downloading subtitle %r', subtitle)
pool.download_subtitle(subtitle)
def download_best_subtitles(videos, languages, min_score=0, hearing_impaired=False, only_one=False, compute_score=None,
pool_class=ProviderPool, **kwargs):
"""List and download the best matching subtitles.
The `videos` must pass the `languages` and `undefined` (`only_one`) checks of :func:`check_video`.
:param videos: videos to download subtitles for.
:type videos: set of :class:`~subliminal.video.Video`
:param languages: languages to download.
:type languages: set of :class:`~babelfish.language.Language`
:param int min_score: minimum score for a subtitle to be downloaded.
:param bool hearing_impaired: hearing impaired preference.
:param bool only_one: download only one subtitle, not one per language.
:param compute_score: function that takes `subtitle` and `video` as positional arguments,
`hearing_impaired` as keyword argument and returns the score.
:param pool_class: class to use as provider pool.
:type pool_class: :class:`ProviderPool`, :class:`AsyncProviderPool` or similar
:param \*\*kwargs: additional parameters for the provided `pool_class` constructor.
:return: downloaded subtitles per video.
:rtype: dict of :class:`~subliminal.video.Video` to list of :class:`~subliminal.subtitle.Subtitle`
"""
downloaded_subtitles = defaultdict(list)
# check videos
checked_videos = []
for video in videos:
if not check_video(video, languages=languages, undefined=only_one):
logger.info('Skipping video %r', video)
continue
checked_videos.append(video)
# return immediately if no video passed the checks
if not checked_videos:
return downloaded_subtitles
# download best subtitles
with pool_class(**kwargs) as pool:
for video in checked_videos:
logger.info('Downloading best subtitles for %r', video)
subtitles = pool.download_best_subtitles(pool.list_subtitles(video, languages - video.subtitle_languages),
video, languages, min_score=min_score,
hearing_impaired=hearing_impaired, only_one=only_one,
compute_score=compute_score)
logger.info('Downloaded %d subtitle(s)', len(subtitles))
downloaded_subtitles[video].extend(subtitles)
return downloaded_subtitles
def save_subtitles(video, subtitles, single=False, directory=None, encoding=None):
"""Save subtitles on filesystem.
Subtitles are saved in the order of the list. If a subtitle with a language has already been saved, other subtitles
with the same language are silently ignored.
The extension used is `.lang.srt` by default or `.srt` is `single` is `True`, with `lang` being the IETF code for
the :attr:`~subliminal.subtitle.Subtitle.language` of the subtitle.
:param video: video of the subtitles.
:type video: :class:`~subliminal.video.Video`
:param subtitles: subtitles to save.
:type subtitles: list of :class:`~subliminal.subtitle.Subtitle`
:param bool single: save a single subtitle, default is to save one subtitle per language.
:param str directory: path to directory where to save the subtitles, default is next to the video.
:param str encoding: encoding in which to save the subtitles, default is to keep original encoding.
:return: the saved subtitles
:rtype: list of :class:`~subliminal.subtitle.Subtitle`
"""
saved_subtitles = []
for subtitle in subtitles:
# check content
if subtitle.content is None:
logger.error('Skipping subtitle %r: no content', subtitle)
continue
# check language
if subtitle.language in set(s.language for s in saved_subtitles):
logger.debug('Skipping subtitle %r: language already saved', subtitle)
continue
# create subtitle path
subtitle_path = subtitle.get_path(video, single=single)
if directory is not None:
subtitle_path = os.path.join(directory, os.path.split(subtitle_path)[1])
# save content as is or in the specified encoding
logger.info('Saving %r to %r', subtitle, subtitle_path)
if encoding is None:
with io.open(subtitle_path, 'wb') as f:
f.write(subtitle.content)
else:
with io.open(subtitle_path, 'w', encoding=encoding) as f:
f.write(subtitle.text)
saved_subtitles.append(subtitle)
# check single
if single:
break
return saved_subtitles
subliminal-2.1.0/subliminal/exceptions.py 0000644 0001750 0001750 00000001241 13653235327 021030 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
class Error(Exception):
"""Base class for exceptions in subliminal."""
pass
class ProviderError(Error):
"""Exception raised by providers."""
pass
class ConfigurationError(ProviderError):
"""Exception raised by providers when badly configured."""
pass
class AuthenticationError(ProviderError):
"""Exception raised by providers when authentication failed."""
pass
class ServiceUnavailable(ProviderError):
"""Exception raised when status is '503 Service Unavailable'."""
pass
class DownloadLimitExceeded(ProviderError):
"""Exception raised by providers when download limit is exceeded."""
pass
subliminal-2.1.0/subliminal/extensions.py 0000644 0001750 0001750 00000010273 13653235327 021053 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
from pkg_resources import EntryPoint
from stevedore import ExtensionManager
class RegistrableExtensionManager(ExtensionManager):
""":class:~stevedore.extensions.ExtensionManager` with support for registration.
It allows loading of internal extensions without setup and registering/unregistering additional extensions.
Loading is done in this order:
* Entry point extensions
* Internal extensions
* Registered extensions
:param str namespace: namespace argument for :class:~stevedore.extensions.ExtensionManager`.
:param list internal_extensions: internal extensions to use with entry point syntax.
:param \*\*kwargs: additional parameters for the :class:~stevedore.extensions.ExtensionManager` constructor.
"""
def __init__(self, namespace, internal_extensions, **kwargs):
#: Registered extensions with entry point syntax
self.registered_extensions = []
#: Internal extensions with entry point syntax
self.internal_extensions = internal_extensions
super(RegistrableExtensionManager, self).__init__(namespace, **kwargs)
def list_entry_points(self):
# copy of default extensions
eps = list(super(RegistrableExtensionManager, self).list_entry_points())
# internal extensions
for iep in self.internal_extensions:
ep = EntryPoint.parse(iep)
if ep.name not in [e.name for e in eps]:
eps.append(ep)
# registered extensions
for rep in self.registered_extensions:
ep = EntryPoint.parse(rep)
if ep.name not in [e.name for e in eps]:
eps.append(ep)
return eps
def register(self, entry_point):
"""Register an extension
:param str entry_point: extension to register (entry point syntax).
:raise: ValueError if already registered.
"""
if entry_point in self.registered_extensions:
raise ValueError('Extension already registered')
ep = EntryPoint.parse(entry_point)
if ep.name in self.names():
raise ValueError('An extension with the same name already exist')
ext = self._load_one_plugin(ep, False, (), {}, False)
self.extensions.append(ext)
if self._extensions_by_name is not None:
self._extensions_by_name[ext.name] = ext
self.registered_extensions.insert(0, entry_point)
def unregister(self, entry_point):
"""Unregister a provider
:param str entry_point: provider to unregister (entry point syntax).
"""
if entry_point not in self.registered_extensions:
raise ValueError('Extension not registered')
ep = EntryPoint.parse(entry_point)
self.registered_extensions.remove(entry_point)
if self._extensions_by_name is not None:
del self._extensions_by_name[ep.name]
for i, ext in enumerate(self.extensions):
if ext.name == ep.name:
del self.extensions[i]
break
#: Provider manager
provider_manager = RegistrableExtensionManager('subliminal.providers', [
'addic7ed = subliminal.providers.addic7ed:Addic7edProvider',
'argenteam = subliminal.providers.argenteam:ArgenteamProvider',
'legendastv = subliminal.providers.legendastv:LegendasTVProvider',
'opensubtitles = subliminal.providers.opensubtitles:OpenSubtitlesProvider',
'opensubtitlesvip = subliminal.providers.opensubtitles:OpenSubtitlesVipProvider',
'podnapisi = subliminal.providers.podnapisi:PodnapisiProvider',
'shooter = subliminal.providers.shooter:ShooterProvider',
'thesubdb = subliminal.providers.thesubdb:TheSubDBProvider',
'tvsubtitles = subliminal.providers.tvsubtitles:TVsubtitlesProvider'
])
#: Disabled providers
disabled_providers = ['opensubtitlesvip']
#: Default enabled providers
default_providers = [p for p in provider_manager.names() if p not in disabled_providers]
#: Refiner manager
refiner_manager = RegistrableExtensionManager('subliminal.refiners', [
'hash = subliminal.refiners.hash:refine',
'metadata = subliminal.refiners.metadata:refine',
'omdb = subliminal.refiners.omdb:refine',
'tvdb = subliminal.refiners.tvdb:refine'
])
subliminal-2.1.0/subliminal/matches.py 0000644 0001750 0001750 00000015441 13653235327 020302 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
from rebulk.loose import ensure_list
from .score import get_equivalent_release_groups, score_keys
from .video import Episode, Movie
from .utils import sanitize, sanitize_release_group
def series_matches(video, title=None, **kwargs):
"""Whether the `video` matches the series title.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param str title: the series name.
:return: whether there's a match
:rtype: bool
"""
if isinstance(video, Episode):
return video.series and sanitize(title) in (
sanitize(name) for name in [video.series] + video.alternative_series
)
def title_matches(video, title=None, episode_title=None, **kwargs):
"""Whether the movie matches the movie `title` or the series matches the `episode_title`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param str title: the movie title.
:param str episode_title: the series episode title.
:return: whether there's a match
:rtype: bool
"""
if isinstance(video, Episode):
return video.title and sanitize(episode_title) == sanitize(video.title)
if isinstance(video, Movie):
return video.title and sanitize(title) == sanitize(video.title)
def season_matches(video, season=None, **kwargs):
"""Whether the episode matches the `season`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param int season: the episode season.
:return: whether there's a match
:rtype: bool
"""
if isinstance(video, Episode):
return video.season and season == video.season
def episode_matches(video, episode=None, **kwargs):
"""Whether the episode matches the `episode`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param episode: the episode season.
:type: list of int or int
:return: whether there's a match
:rtype: bool
"""
if isinstance(video, Episode):
return video.episodes and ensure_list(episode) == video.episodes
def year_matches(video, year=None, partial=False, **kwargs):
"""Whether the video matches the `year`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param int year: the video year.
:param bool partial: whether or not the guess is partial.
:return: whether there's a match
:rtype: bool
"""
if video.year and year == video.year:
return True
if isinstance(video, Episode):
# count "no year" as an information
return not partial and video.original_series and not year
def country_matches(video, country=None, partial=False, **kwargs):
"""Whether the video matches the `country`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param country: the video country.
:type country: :class:`~babelfish.country.Country`
:param bool partial: whether or not the guess is partial.
:return: whether there's a match
:rtype: bool
"""
if video.country and country == video.country:
return True
if isinstance(video, Episode):
# count "no country" as an information
return not partial and video.original_series and not country
if isinstance(video, Movie):
# count "no country" as an information
return not video.country and not country
def release_group_matches(video, release_group=None, **kwargs):
"""Whether the video matches the `release_group`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param str release_group: the video release group.
:return: whether there's a match
:rtype: bool
"""
return (video.release_group and release_group and
any(r in sanitize_release_group(release_group)
for r in get_equivalent_release_groups(sanitize_release_group(video.release_group))))
def streaming_service_matches(video, streaming_service=None, **kwargs):
"""Whether the video matches the `streaming_service`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param str streaming_service: the video streaming service
:return: whether there's a match
:rtype: bool
"""
return video.streaming_service and streaming_service == video.streaming_service
def resolution_matches(video, screen_size=None, **kwargs):
"""Whether the video matches the `resolution`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param str screen_size: the video resolution
:return: whether there's a match
:rtype: bool
"""
return video.resolution and screen_size == video.resolution
def source_matches(video, source=None, **kwargs):
"""Whether the video matches the `source`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param str source: the video source
:return: whether there's a match
:rtype: bool
"""
return video.source and source == video.source
def video_codec_matches(video, video_codec=None, **kwargs):
"""Whether the video matches the `video_codec`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param str video_codec: the video codec
:return: whether there's a match
:rtype: bool
"""
return video.video_codec and video_codec == video.video_codec
def audio_codec_matches(video, audio_codec=None, **kwargs):
"""Whether the video matches the `audio_codec`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param str audio_codec: the video audio codec
:return: whether there's a match
:rtype: bool
"""
return video.audio_codec and audio_codec == video.audio_codec
#: Available matches functions
matches_manager = {
'series': series_matches,
'title': title_matches,
'season': season_matches,
'episode': episode_matches,
'year': year_matches,
'country': country_matches,
'release_group': release_group_matches,
'streaming_service': streaming_service_matches,
'resolution': resolution_matches,
'source': source_matches,
'video_codec': video_codec_matches,
'audio_codec': audio_codec_matches
}
def guess_matches(video, guess, partial=False):
"""Get matches between a `video` and a `guess`.
If a guess is `partial`, the absence information won't be counted as a match.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param guess: the guess.
:type guess: dict
:param bool partial: whether or not the guess is partial.
:return: matches between the `video` and the `guess`.
:rtype: set
"""
matches = set()
for key in score_keys:
if key in matches_manager and matches_manager[key](video, partial=partial, **guess):
matches.add(key)
return matches
subliminal-2.1.0/subliminal/score.py 0000755 0001750 0001750 00000022770 13653235327 017777 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
"""
This module provides the default implementation of the `compute_score` parameter in
:meth:`~subliminal.core.ProviderPool.download_best_subtitles` and :func:`~subliminal.core.download_best_subtitles`.
.. note::
To avoid unnecessary dependency on `sympy `_ and boost subliminal's import time, the
resulting scores are hardcoded here and manually updated when the set of equations change.
Available matches:
* hash
* title
* year
* country
* series
* season
* episode
* release_group
* streaming_service
* source
* audio_codec
* resolution
* hearing_impaired
* video_codec
* series_imdb_id
* imdb_id
* tvdb_id
"""
from __future__ import division, print_function
import logging
from .video import Episode, Movie
logger = logging.getLogger(__name__)
#: Scores for episodes
episode_scores = {'hash': 809, 'series': 405, 'year': 135, 'country': 135, 'season': 45, 'episode': 45,
'release_group': 15, 'streaming_service': 15, 'source': 7, 'audio_codec': 3, 'resolution': 2,
'video_codec': 2, 'hearing_impaired': 1}
#: Scores for movies
movie_scores = {'hash': 269, 'title': 135, 'year': 45, 'country': 45, 'release_group': 15, 'streaming_service': 15,
'source': 7, 'audio_codec': 3, 'resolution': 2, 'video_codec': 2, 'hearing_impaired': 1}
#: All scores names
score_keys = set([s for s in episode_scores.keys()] + [s for s in movie_scores.keys()])
#: Equivalent release groups
equivalent_release_groups = ({'LOL', 'DIMENSION'}, {'ASAP', 'IMMERSE', 'FLEET'}, {'AVS', 'SVA'})
def get_equivalent_release_groups(release_group):
"""Get all the equivalents of the given release group.
:param str release_group: the release group to get the equivalents of.
:return: the equivalent release groups.
:rtype: set
"""
for equivalent_release_group in equivalent_release_groups:
if release_group in equivalent_release_group:
return equivalent_release_group
return {release_group}
def get_scores(video):
"""Get the scores dict for the given `video`.
This will return either :data:`episode_scores` or :data:`movie_scores` based on the type of the `video`.
:param video: the video to compute the score against.
:type video: :class:`~subliminal.video.Video`
:return: the scores dict.
:rtype: dict
"""
if isinstance(video, Episode):
return episode_scores
elif isinstance(video, Movie):
return movie_scores
raise ValueError('video must be an instance of Episode or Movie')
def compute_score(subtitle, video, hearing_impaired=None):
"""Compute the score of the `subtitle` against the `video` with `hearing_impaired` preference.
:func:`compute_score` uses the :meth:`Subtitle.get_matches ` method and
applies the scores (either from :data:`episode_scores` or :data:`movie_scores`) after some processing.
:param subtitle: the subtitle to compute the score of.
:type subtitle: :class:`~subliminal.subtitle.Subtitle`
:param video: the video to compute the score against.
:type video: :class:`~subliminal.video.Video`
:param bool hearing_impaired: hearing impaired preference.
:return: score of the subtitle.
:rtype: int
"""
logger.info('Computing score of %r for video %r with %r', subtitle, video, dict(hearing_impaired=hearing_impaired))
# get the scores dict
scores = get_scores(video)
logger.debug('Using scores %r', scores)
# get the matches
matches = subtitle.get_matches(video)
logger.debug('Found matches %r', matches)
# on hash match, discard everything else
if 'hash' in matches:
logger.debug('Keeping only hash match')
matches &= {'hash'}
# handle equivalent matches
if isinstance(video, Episode):
if 'title' in matches:
logger.debug('Adding title match equivalent')
matches.add('episode')
if 'series_imdb_id' in matches:
logger.debug('Adding series_imdb_id match equivalent')
matches |= {'series', 'year', 'country'}
if 'imdb_id' in matches:
logger.debug('Adding imdb_id match equivalents')
matches |= {'series', 'year', 'country', 'season', 'episode'}
if 'tvdb_id' in matches:
logger.debug('Adding tvdb_id match equivalents')
matches |= {'series', 'year', 'country', 'season', 'episode'}
if 'series_tvdb_id' in matches:
logger.debug('Adding series_tvdb_id match equivalents')
matches |= {'series', 'year', 'country'}
elif isinstance(video, Movie):
if 'imdb_id' in matches:
logger.debug('Adding imdb_id match equivalents')
matches |= {'title', 'year', 'country'}
# handle hearing impaired
if hearing_impaired is not None and subtitle.hearing_impaired == hearing_impaired:
logger.debug('Matched hearing_impaired')
matches.add('hearing_impaired')
# compute the score
score = sum((scores.get(match, 0) for match in matches))
logger.info('Computed score %r with final matches %r', score, matches)
# ensure score is within valid bounds
assert 0 <= score <= scores['hash'] + scores['hearing_impaired']
return score
def solve_episode_equations():
from sympy import Eq, solve, symbols
hash, series, year, country, season, episode = symbols('hash series year country season episode')
release_group, streaming_service, source = symbols('release_group streaming_service source')
audio_codec, resolution, video_codec = symbols('audio_codec resolution video_codec')
hearing_impaired = symbols('hearing_impaired')
equations = [
# hash is best
Eq(hash, series + year + country + season + episode +
release_group + streaming_service + source + audio_codec + resolution + video_codec),
# series counts for the most part in the total score
Eq(series, year + country + season + episode + release_group + streaming_service + source +
audio_codec + resolution + video_codec + 1),
# year is the second most important part
Eq(year, season + episode + release_group + streaming_service + source +
audio_codec + resolution + video_codec + 1),
# year counts as much as country
Eq(year, country),
# season is important too
Eq(season, release_group + streaming_service + source + audio_codec + resolution + video_codec + 1),
# episode is equally important to season
Eq(episode, season),
# release group is the next most wanted match
Eq(release_group, source + audio_codec + resolution + video_codec + 1),
# streaming service counts as much as release group
Eq(release_group, streaming_service),
# source counts as much as audio_codec, resolution and video_codec
Eq(source, audio_codec + resolution + video_codec),
# audio_codec is more valuable than video_codec
Eq(audio_codec, video_codec + 1),
# resolution counts as much as video_codec
Eq(resolution, video_codec),
# video_codec is the least valuable match but counts more than the sum of all scoring increasing matches
Eq(video_codec, hearing_impaired + 1),
# hearing impaired is only used for score increasing, so put it to 1
Eq(hearing_impaired, 1),
]
return solve(equations, [hash, series, year, country, season, episode, release_group, streaming_service, source,
audio_codec, resolution, hearing_impaired, video_codec])
def solve_movie_equations():
from sympy import Eq, solve, symbols
hash, title, year, country, release_group = symbols('hash title year country release_group')
streaming_service, source, audio_codec, resolution = symbols('streaming_service source audio_codec resolution')
video_codec, hearing_impaired = symbols('video_codec hearing_impaired')
equations = [
# hash is best
Eq(hash, title + year + country + release_group + streaming_service +
source + audio_codec + resolution + video_codec),
# title counts for the most part in the total score
Eq(title, year + country + release_group + streaming_service +
source + audio_codec + resolution + video_codec + 1),
# year is the second most important part
Eq(year, release_group + streaming_service + source + audio_codec + resolution + video_codec + 1),
# year counts as much as country
Eq(year, country),
# release group is the next most wanted match
Eq(release_group, source + audio_codec + resolution + video_codec + 1),
# streaming service counts as much as release group
Eq(release_group, streaming_service),
# source counts as much as audio_codec, resolution and video_codec
Eq(source, audio_codec + resolution + video_codec),
# audio_codec is more valuable than video_codec
Eq(audio_codec, video_codec + 1),
# resolution counts as much as video_codec
Eq(resolution, video_codec),
# video_codec is the least valuable match but counts more than the sum of all scoring increasing matches
Eq(video_codec, hearing_impaired + 1),
# hearing impaired is only used for score increasing, so put it to 1
Eq(hearing_impaired, 1),
]
return solve(equations, [hash, title, year, country, release_group, streaming_service, source, audio_codec,
resolution, hearing_impaired, video_codec])
subliminal-2.1.0/subliminal/subtitle.py 0000644 0001750 0001750 00000014463 13653235327 020514 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
import codecs
import logging
import os
import chardet
import pysrt
from six import text_type
logger = logging.getLogger(__name__)
#: Subtitle extensions
SUBTITLE_EXTENSIONS = ('.srt', '.sub', '.smi', '.txt', '.ssa', '.ass', '.mpl')
class Subtitle(object):
"""Base class for subtitle.
:param language: language of the subtitle.
:type language: :class:`~babelfish.language.Language`
:param bool hearing_impaired: whether or not the subtitle is hearing impaired.
:param page_link: URL of the web page from which the subtitle can be downloaded.
:type page_link: str
:param encoding: Text encoding of the subtitle.
:type encoding: str
"""
#: Name of the provider that returns that class of subtitle
provider_name = ''
def __init__(self, language, hearing_impaired=False, page_link=None, encoding=None):
#: Language of the subtitle
self.language = language
#: Whether or not the subtitle is hearing impaired
self.hearing_impaired = hearing_impaired
#: URL of the web page from which the subtitle can be downloaded
self.page_link = page_link
#: Content as bytes
self.content = None
#: Encoding to decode with when accessing :attr:`text`
self.encoding = None
# validate the encoding
if encoding:
try:
self.encoding = codecs.lookup(encoding).name
except (TypeError, LookupError):
logger.debug('Unsupported encoding %s', encoding)
@property
def id(self):
"""Unique identifier of the subtitle"""
raise NotImplementedError
@property
def info(self):
"""Info of the subtitle, human readable. Usually the subtitle name for GUI rendering"""
raise NotImplementedError
@property
def text(self):
"""Content as string
If :attr:`encoding` is None, the encoding is guessed with :meth:`guess_encoding`
"""
if not self.content:
return
if not isinstance(self.content, text_type):
if self.encoding:
return self.content.decode(self.encoding, errors='replace')
guessed_encoding = self.guess_encoding()
if guessed_encoding:
return self.content.decode(guessed_encoding, errors='replace')
return None
return self.content
def is_valid(self):
"""Check if a :attr:`text` is a valid SubRip format.
:return: whether or not the subtitle is valid.
:rtype: bool
"""
if not self.text:
return False
try:
pysrt.from_string(self.text, error_handling=pysrt.ERROR_RAISE)
except pysrt.Error as e:
if e.args[0] < 80:
return False
return True
def guess_encoding(self):
"""Guess encoding using the language, falling back on chardet.
:return: the guessed encoding.
:rtype: str
"""
logger.info('Guessing encoding for language %s', self.language)
# always try utf-8 first
encodings = ['utf-8']
# add language-specific encodings
if self.language.alpha3 == 'zho':
encodings.extend(['gb18030', 'big5'])
elif self.language.alpha3 == 'jpn':
encodings.append('shift-jis')
elif self.language.alpha3 == 'ara':
encodings.append('windows-1256')
elif self.language.alpha3 == 'heb':
encodings.append('windows-1255')
elif self.language.alpha3 == 'tur':
encodings.extend(['iso-8859-9', 'windows-1254'])
elif self.language.alpha3 == 'pol':
# Eastern European Group 1
encodings.extend(['windows-1250'])
elif self.language.alpha3 == 'bul':
# Eastern European Group 2
encodings.extend(['windows-1251'])
else:
# Western European (windows-1252)
encodings.append('latin-1')
# try to decode
logger.debug('Trying encodings %r', encodings)
for encoding in encodings:
try:
self.content.decode(encoding)
except UnicodeDecodeError:
pass
else:
logger.info('Guessed encoding %s', encoding)
return encoding
logger.warning('Could not guess encoding from language')
# fallback on chardet
encoding = chardet.detect(self.content)['encoding']
logger.info('Chardet found encoding %s', encoding)
return encoding
def get_path(self, video, single=False):
"""Get the subtitle path using the `video`, `language` and `extension`.
:param video: path to the video.
:type video: :class:`~subliminal.video.Video`
:param bool single: save a single subtitle, default is to save one subtitle per language.
:return: path of the subtitle.
:rtype: str
"""
return get_subtitle_path(video.name, None if single else self.language)
def get_matches(self, video):
"""Get the matches against the `video`.
:param video: the video to get the matches with.
:type video: :class:`~subliminal.video.Video`
:return: matches of the subtitle.
:rtype: set
"""
raise NotImplementedError
def __hash__(self):
return hash(self.provider_name + '-' + self.id)
def __repr__(self):
return '<%s %r [%s]>' % (self.__class__.__name__, self.id, self.language)
def get_subtitle_path(video_path, language=None, extension='.srt'):
"""Get the subtitle path using the `video_path` and `language`.
:param str video_path: path to the video.
:param language: language of the subtitle to put in the path.
:type language: :class:`~babelfish.language.Language`
:param str extension: extension of the subtitle.
:return: path of the subtitle.
:rtype: str
"""
subtitle_root = os.path.splitext(video_path)[0]
if language:
subtitle_root += '.' + str(language)
return subtitle_root + extension
def fix_line_ending(content):
"""Fix line ending of `content` by changing it to \n.
:param bytes content: content of the subtitle.
:return: the content with fixed line endings.
:rtype: bytes
"""
return content.replace(b'\r\n', b'\n').replace(b'\r', b'\n')
subliminal-2.1.0/subliminal/utils.py 0000644 0001750 0001750 00000013557 13653235327 020024 0 ustar jones jones 0000000 0000000 # -*- coding: utf-8 -*-
import logging
from datetime import datetime
import hashlib
import os
import re
import socket
import struct
import requests
from requests.exceptions import SSLError
from six.moves.xmlrpc_client import ProtocolError
from .exceptions import ServiceUnavailable
logger = logging.getLogger(__name__)
def hash_opensubtitles(video_path):
"""Compute a hash using OpenSubtitles' algorithm.
:param str video_path: path of the video.
:return: the hash.
:rtype: str
"""
bytesize = struct.calcsize(b'