Compare commits

...

24 Commits

Author SHA1 Message Date
Philipp Hagemeister
7d6413341a release 2014.04.11.1 2014-04-11 01:29:54 +02:00
Philipp Hagemeister
140012d0f6 release 2014.04.11 2014-04-11 01:28:30 +02:00
Philipp Hagemeister
4be9f8c814 [ninegag] Add support for p/ URLs 2014-04-11 01:25:24 +02:00
Sergey M․
5c802bac37 [byutv] Fix test 2014-04-10 19:37:55 +07:00
Sergey M․
6c30ff756a [mpora] Fix test 2014-04-10 19:10:03 +07:00
Jaime Marquínez Ferrándiz
62749e4708 [morningstar] Also support 'Cover' (#2729) 2014-04-09 20:51:28 +02:00
Jaime Marquínez Ferrándiz
6b7dee4b38 [morningstar] Recognize urls that use 'videoCenter' (fixes #2729) 2014-04-09 20:45:49 +02:00
Sergey M․
ef2041eb4e [br] Add audio extraction and support more URLs (Closes #2728) 2014-04-09 20:19:27 +07:00
Philipp Hagemeister
29e3e682af [comedycentral] Match more URLs
Looks like they only offer clips instead of full episodes now. We'll need to add new parsing code as well.
2014-04-09 11:43:15 +02:00
Philipp Hagemeister
f983c44199 Merge pull request #2725 from foolscap/subtitles-error-fix
Fix subtitle download error reporting (Fixes #2724)
2014-04-09 10:16:06 +02:00
robbie
e4db19511a Fix subtitle download error reporting (Fixes #2724) 2014-04-08 15:59:27 +01:00
Sergey M․
c47d21da80 [ntv] Update test 2014-04-08 19:11:40 +07:00
Philipp Hagemeister
269aecd0c0 [ffmpeg] Do not pass in byets to subprocess (Fixes #2717) 2014-04-07 23:33:05 +02:00
Philipp Hagemeister
aafddb2b0a Merge remote-tracking branch 'anisse/fix-content-encoding-charset' 2014-04-07 23:27:03 +02:00
Philipp Hagemeister
6262ac8ac5 release 2014.04.07.4 2014-04-07 23:23:54 +02:00
Philipp Hagemeister
89938c719e Fix Windows output for non-BMP unicode characters 2014-04-07 23:23:48 +02:00
Anisse Astier
ec0fafbb19 [extractor/common] fallback on utf-8 when charset is not found
fixes #2721
2014-04-07 23:10:16 +02:00
Philipp Hagemeister
a5863bdf33 release 2014.04.07.3 2014-04-07 22:48:45 +02:00
Philipp Hagemeister
b58ddb32ba [utils] Completely rewrite Windows output (Fixes #2672) 2014-04-07 22:48:13 +02:00
Philipp Hagemeister
b9e12a8140 release 2014.04.07.2 2014-04-07 21:41:20 +02:00
Philipp Hagemeister
104aa7388a Use our own encoding when writing strings 2014-04-07 21:40:34 +02:00
Philipp Hagemeister
c3855d28b0 Merge branch 'master' of github.com:rg3/youtube-dl 2014-04-07 19:57:51 +02:00
Philipp Hagemeister
734f90bb41 Use --encoding when outputting 2014-04-07 19:57:42 +02:00
Jaime Marquínez Ferrándiz
91a6addeeb Add support for rtve.es/alacarta 2014-04-07 17:30:32 +02:00
15 changed files with 334 additions and 110 deletions

View File

@@ -156,6 +156,15 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch( self.assertMatch(
'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3', 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
['ComedyCentralShows']) ['ComedyCentralShows'])
self.assertMatch(
'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
['ComedyCentralShows'])
self.assertMatch(
'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
['ComedyCentralShows'])
self.assertMatch(
'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
['ComedyCentralShows'])
def test_yahoo_https(self): def test_yahoo_https(self):
# https://github.com/rg3/youtube-dl/issues/2701 # https://github.com/rg3/youtube-dl/issues/2701
@@ -163,5 +172,6 @@ class TestAllURLsMatching(unittest.TestCase):
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', 'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
['Yahoo']) ['Yahoo'])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

34
youtube_dl/YoutubeDL.py Normal file → Executable file
View File

@@ -286,6 +286,9 @@ class YoutubeDL(object):
"""Print message to stdout if not in quiet mode.""" """Print message to stdout if not in quiet mode."""
return self.to_stdout(message, skip_eol, check_quiet=True) return self.to_stdout(message, skip_eol, check_quiet=True)
def _write_string(self, s, out=None):
write_string(s, out=out, encoding=self.params.get('encoding'))
def to_stdout(self, message, skip_eol=False, check_quiet=False): def to_stdout(self, message, skip_eol=False, check_quiet=False):
"""Print message to stdout if not in quiet mode.""" """Print message to stdout if not in quiet mode."""
if self.params.get('logger'): if self.params.get('logger'):
@@ -295,7 +298,7 @@ class YoutubeDL(object):
terminator = ['\n', ''][skip_eol] terminator = ['\n', ''][skip_eol]
output = message + terminator output = message + terminator
write_string(output, self._screen_file) self._write_string(output, self._screen_file)
def to_stderr(self, message): def to_stderr(self, message):
"""Print message to stderr.""" """Print message to stderr."""
@@ -305,7 +308,7 @@ class YoutubeDL(object):
else: else:
message = self._bidi_workaround(message) message = self._bidi_workaround(message)
output = message + '\n' output = message + '\n'
write_string(output, self._err_file) self._write_string(output, self._err_file)
def to_console_title(self, message): def to_console_title(self, message):
if not self.params.get('consoletitle', False): if not self.params.get('consoletitle', False):
@@ -315,21 +318,21 @@ class YoutubeDL(object):
# already of type unicode() # already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ: elif 'TERM' in os.environ:
write_string('\033]0;%s\007' % message, self._screen_file) self._write_string('\033]0;%s\007' % message, self._screen_file)
def save_console_title(self): def save_console_title(self):
if not self.params.get('consoletitle', False): if not self.params.get('consoletitle', False):
return return
if 'TERM' in os.environ: if 'TERM' in os.environ:
# Save the title on stack # Save the title on stack
write_string('\033[22;0t', self._screen_file) self._write_string('\033[22;0t', self._screen_file)
def restore_console_title(self): def restore_console_title(self):
if not self.params.get('consoletitle', False): if not self.params.get('consoletitle', False):
return return
if 'TERM' in os.environ: if 'TERM' in os.environ:
# Restore the title from stack # Restore the title from stack
write_string('\033[23;0t', self._screen_file) self._write_string('\033[23;0t', self._screen_file)
def __enter__(self): def __enter__(self):
self.save_console_title() self.save_console_title()
@@ -933,7 +936,7 @@ class YoutubeDL(object):
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
subfile.write(sub) subfile.write(sub)
except (OSError, IOError): except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + descfn) self.report_error('Cannot write subtitles file ' + sub_filename)
return return
if self.params.get('writeinfojson', False): if self.params.get('writeinfojson', False):
@@ -1211,9 +1214,16 @@ class YoutubeDL(object):
if not self.params.get('verbose'): if not self.params.get('verbose'):
return return
write_string('[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % write_string(
(locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, self.get_encoding())) '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
write_string('[debug] youtube-dl version ' + __version__ + '\n') locale.getpreferredencoding(),
sys.getfilesystemencoding(),
sys.stdout.encoding,
self.get_encoding()),
encoding=None
)
self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
try: try:
sp = subprocess.Popen( sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'], ['git', 'rev-parse', '--short', 'HEAD'],
@@ -1222,20 +1232,20 @@ class YoutubeDL(object):
out, err = sp.communicate() out, err = sp.communicate()
out = out.decode().strip() out = out.decode().strip()
if re.match('[0-9a-f]+', out): if re.match('[0-9a-f]+', out):
write_string('[debug] Git HEAD: ' + out + '\n') self._write_string('[debug] Git HEAD: ' + out + '\n')
except: except:
try: try:
sys.exc_clear() sys.exc_clear()
except: except:
pass pass
write_string('[debug] Python version %s - %s' % self._write_string('[debug] Python version %s - %s' %
(platform.python_version(), platform_name()) + '\n') (platform.python_version(), platform_name()) + '\n')
proxy_map = {} proxy_map = {}
for handler in self._opener.handlers: for handler in self._opener.handlers:
if hasattr(handler, 'proxies'): if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies) proxy_map.update(handler.proxies)
write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n') self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
def _setup_opener(self): def _setup_opener(self):
timeout_val = self.params.get('socket_timeout') timeout_val = self.params.get('socket_timeout')

View File

@@ -208,6 +208,7 @@ from .rottentomatoes import RottenTomatoesIE
from .roxwel import RoxwelIE from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE from .rtlnow import RTLnowIE
from .rts import RTSIE from .rts import RTSIE
from .rtve import RTVEALaCartaIE
from .rutube import ( from .rutube import (
RutubeIE, RutubeIE,
RutubeChannelIE, RutubeChannelIE,

View File

@@ -4,39 +4,72 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..utils import (
ExtractorError,
int_or_none,
)
class BRIE(InfoExtractor): class BRIE(InfoExtractor):
IE_DESC = "Bayerischer Rundfunk Mediathek" IE_DESC = 'Bayerischer Rundfunk Mediathek'
_VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-/]+/)?(?P<id>[a-z0-9\-]+)\.html$" _VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-]+/)+(?P<id>[a-z0-9\-]+)\.html'
_BASE_URL = "http://www.br.de" _BASE_URL = 'http://www.br.de'
_TESTS = [ _TESTS = [
{ {
"url": "http://www.br.de/mediathek/video/anselm-gruen-114.html", 'url': 'http://www.br.de/mediathek/video/anselm-gruen-114.html',
"md5": "c4f83cf0f023ba5875aba0bf46860df2", 'md5': 'c4f83cf0f023ba5875aba0bf46860df2',
"info_dict": { 'info_dict': {
"id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532", 'id': '2c8d81c5-6fb7-4a74-88d4-e768e5856532',
"ext": "mp4", 'ext': 'mp4',
"title": "Feiern und Verzichten", 'title': 'Feiern und Verzichten',
"description": "Anselm Grün: Feiern und Verzichten", 'description': 'Anselm Grün: Feiern und Verzichten',
"uploader": "BR/Birgit Baier", 'uploader': 'BR/Birgit Baier',
"upload_date": "20140301" 'upload_date': '20140301',
} }
}, },
{ {
"url": "http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html", 'url': 'http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html',
"md5": "ab451b09d861dbed7d7cc9ab0be19ebe", 'md5': 'ab451b09d861dbed7d7cc9ab0be19ebe',
"info_dict": { 'info_dict': {
"id": "2c060e69-3a27-4e13-b0f0-668fac17d812", 'id': '2c060e69-3a27-4e13-b0f0-668fac17d812',
"ext": "mp4", 'ext': 'mp4',
"title": "Über den Pass", 'title': 'Über den Pass',
"description": "Die Eroberung der Alpen: Über den Pass", 'description': 'Die Eroberung der Alpen: Über den Pass',
"uploader": None,
"upload_date": None
} }
} },
{
'url': 'http://www.br.de/nachrichten/schaeuble-haushaltsentwurf-bundestag-100.html',
'md5': '3db0df1a9a9cd9fa0c70e6ea8aa8e820',
'info_dict': {
'id': 'c6aae3de-2cf9-43f2-957f-f17fef9afaab',
'ext': 'aac',
'title': '"Keine neuen Schulden im nächsten Jahr"',
'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"',
}
},
{
'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html',
'md5': 'dbab0aef2e047060ea7a21fc1ce1078a',
'info_dict': {
'id': '6ba73750-d405-45d3-861d-1ce8c524e059',
'ext': 'mp4',
'title': 'Umweltbewusster Häuslebauer',
'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer',
}
},
{
'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html',
'md5': '23bca295f1650d698f94fc570977dae3',
'info_dict': {
'id': 'd982c9ce-8648-4753-b358-98abb8aec43d',
'ext': 'mp4',
'title': 'Folge 1 - Metaphysik',
'description': 'Kant für Anfänger: Folge 1 - Metaphysik',
'uploader': 'Eva Maria Steimle',
'upload_date': '20140117',
}
},
] ]
def _real_extract(self, url): def _real_extract(self, url):
@@ -44,56 +77,63 @@ class BRIE(InfoExtractor):
display_id = mobj.group('id') display_id = mobj.group('id')
page = self._download_webpage(url, display_id) page = self._download_webpage(url, display_id)
xml_url = self._search_regex( xml_url = self._search_regex(
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL") r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
xml = self._download_xml(self._BASE_URL + xml_url, None) xml = self._download_xml(self._BASE_URL + xml_url, None)
videos = [] medias = []
for xml_video in xml.findall("video"):
video = {
"id": xml_video.get("externalId"),
"title": xml_video.find("title").text,
"formats": self._extract_formats(xml_video.find("assets")),
"thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
"description": " ".join(xml_video.find("shareTitle").text.splitlines()),
"webpage_url": xml_video.find("permalink").text
}
if xml_video.find("author").text:
video["uploader"] = xml_video.find("author").text
if xml_video.find("broadcastDate").text:
video["upload_date"] = "".join(reversed(xml_video.find("broadcastDate").text.split(".")))
videos.append(video)
if len(videos) > 1: for xml_media in xml.findall('video') + xml.findall('audio'):
media = {
'id': xml_media.get('externalId'),
'title': xml_media.find('title').text,
'formats': self._extract_formats(xml_media.find('assets')),
'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')),
'description': ' '.join(xml_media.find('shareTitle').text.splitlines()),
'webpage_url': xml_media.find('permalink').text
}
if xml_media.find('author').text:
media['uploader'] = xml_media.find('author').text
if xml_media.find('broadcastDate').text:
media['upload_date'] = ''.join(reversed(xml_media.find('broadcastDate').text.split('.')))
medias.append(media)
if len(medias) > 1:
self._downloader.report_warning( self._downloader.report_warning(
'found multiple videos; please ' 'found multiple medias; please '
'report this with the video URL to http://yt-dl.org/bug') 'report this with the video URL to http://yt-dl.org/bug')
if not videos: if not medias:
raise ExtractorError('No video entries found') raise ExtractorError('No media entries found')
return videos[0] return medias[0]
def _extract_formats(self, assets): def _extract_formats(self, assets):
def text_or_none(asset, tag):
elem = asset.find(tag)
return None if elem is None else elem.text
formats = [{ formats = [{
"url": asset.find("downloadUrl").text, 'url': text_or_none(asset, 'downloadUrl'),
"ext": asset.find("mediaType").text, 'ext': text_or_none(asset, 'mediaType'),
"format_id": asset.get("type"), 'format_id': asset.get('type'),
"width": int(asset.find("frameWidth").text), 'width': int_or_none(text_or_none(asset, 'frameWidth')),
"height": int(asset.find("frameHeight").text), 'height': int_or_none(text_or_none(asset, 'frameHeight')),
"tbr": int(asset.find("bitrateVideo").text), 'tbr': int_or_none(text_or_none(asset, 'bitrateVideo')),
"abr": int(asset.find("bitrateAudio").text), 'abr': int_or_none(text_or_none(asset, 'bitrateAudio')),
"vcodec": asset.find("codecVideo").text, 'vcodec': text_or_none(asset, 'codecVideo'),
"container": asset.find("mediaType").text, 'acodec': text_or_none(asset, 'codecAudio'),
"filesize": int(asset.find("size").text), 'container': text_or_none(asset, 'mediaType'),
} for asset in assets.findall("asset") 'filesize': int_or_none(text_or_none(asset, 'size')),
if asset.find("downloadUrl") is not None] } for asset in assets.findall('asset')
if asset.find('downloadUrl') is not None]
self._sort_formats(formats) self._sort_formats(formats)
return formats return formats
def _extract_thumbnails(self, variants): def _extract_thumbnails(self, variants):
thumbnails = [{ thumbnails = [{
"url": self._BASE_URL + variant.find("url").text, 'url': self._BASE_URL + variant.find('url').text,
"width": int(variant.find("width").text), 'width': int_or_none(variant.find('width').text),
"height": int(variant.find("height").text), 'height': int_or_none(variant.find('height').text),
} for variant in variants.findall("variant")] } for variant in variants.findall('variant')]
thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True) thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
return thumbnails return thumbnails

View File

@@ -4,9 +4,7 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import ExtractorError
ExtractorError,
)
class BYUtvIE(InfoExtractor): class BYUtvIE(InfoExtractor):
@@ -16,7 +14,7 @@ class BYUtvIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'granite-flats-talking', 'id': 'granite-flats-talking',
'ext': 'mp4', 'ext': 'mp4',
'description': 'md5:1a7ae3e153359b7cc355ef3963441e5f', 'description': 'md5:4e9a7ce60f209a33eca0ac65b4918e1c',
'title': 'Talking', 'title': 'Talking',
'thumbnail': 're:^https?://.*promo.*' 'thumbnail': 're:^https?://.*promo.*'
}, },

View File

@@ -41,9 +41,9 @@ class ComedyCentralShowsIE(InfoExtractor):
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport) _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
|https?://(:www\.)? |https?://(:www\.)?
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/ (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
(full-episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)| ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
(?P<clip> (?P<clip>
(?:(?:guests/[^/]+|videos)/[^/]+/(?P<videotitle>[^/?#]+)) (?:(?:guests/[^/]+|videos|video-playlists)/[^/]+/(?P<videotitle>[^/?#]+))
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)) |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
)| )|

View File

@@ -251,7 +251,10 @@ class InfoExtractor(object):
with open(filename, 'wb') as outf: with open(filename, 'wb') as outf:
outf.write(webpage_bytes) outf.write(webpage_bytes)
content = webpage_bytes.decode(encoding, 'replace') try:
content = webpage_bytes.decode(encoding, 'replace')
except LookupError:
content = webpage_bytes.decode('utf-8', 'replace')
if (u'<title>Access to this site is blocked</title>' in content and if (u'<title>Access to this site is blocked</title>' in content and
u'Websense' in content[:512]): u'Websense' in content[:512]):

View File

@@ -8,7 +8,7 @@ from .common import InfoExtractor
class MorningstarIE(InfoExtractor): class MorningstarIE(InfoExtractor):
IE_DESC = 'morningstar.com' IE_DESC = 'morningstar.com'
_VALID_URL = r'https?://(?:www\.)?morningstar\.com/cover/videocenter\.aspx\?id=(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
_TEST = { _TEST = {
'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869', 'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
'md5': '6c0acface7a787aadc8391e4bbf7b0f5', 'md5': '6c0acface7a787aadc8391e4bbf7b0f5',

View File

@@ -4,9 +4,7 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import int_or_none
int_or_none,
)
class MporaIE(InfoExtractor): class MporaIE(InfoExtractor):
@@ -20,7 +18,7 @@ class MporaIE(InfoExtractor):
'info_dict': { 'info_dict': {
'title': 'Katy Curd - Winter in the Forest', 'title': 'Katy Curd - Winter in the Forest',
'duration': 416, 'duration': 416,
'uploader': 'petenewman', 'uploader': 'Peter Newman Media',
}, },
} }

View File

@@ -7,9 +7,14 @@ from .common import InfoExtractor
class NineGagIE(InfoExtractor): class NineGagIE(InfoExtractor):
IE_NAME = '9gag' IE_NAME = '9gag'
_VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)' _VALID_URL = r'''(?x)^https?://(?:www\.)?9gag\.tv/
(?:
v/(?P<numid>[0-9]+)|
p/(?P<id>[a-zA-Z0-9]+)/(?P<display_id>[^?#/]+)
)
'''
_TEST = { _TESTS = [{
"url": "http://9gag.tv/v/1912", "url": "http://9gag.tv/v/1912",
"info_dict": { "info_dict": {
"id": "1912", "id": "1912",
@@ -20,17 +25,33 @@ class NineGagIE(InfoExtractor):
"thumbnail": "re:^https?://", "thumbnail": "re:^https?://",
}, },
'add_ie': ['Youtube'] 'add_ie': ['Youtube']
} },
{
'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
'info_dict': {
'id': 'KklwM',
'ext': 'mp4',
'display_id': 'alternate-banned-opening-scene-of-gravity',
"description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.",
'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie",
},
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('numid') or mobj.group('id')
display_id = mobj.group('display_id') or video_id
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, display_id)
youtube_id = self._html_search_regex( youtube_id = self._html_search_regex(
r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"', r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"',
webpage, 'video ID') webpage, 'video ID')
title = self._html_search_regex(
r'(?s)id="jsid-video-post-container".*?data-title="([^"]+)"',
webpage, 'title', default=None)
if not title:
title = self._og_search_title(webpage)
description = self._html_search_regex( description = self._html_search_regex(
r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage, r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage,
'description', fatal=False) 'description', fatal=False)
@@ -46,7 +67,8 @@ class NineGagIE(InfoExtractor):
'url': youtube_id, 'url': youtube_id,
'ie_key': 'Youtube', 'ie_key': 'Youtube',
'id': video_id, 'id': video_id,
'title': self._og_search_title(webpage), 'display_id': display_id,
'title': title,
'description': description, 'description': description,
'view_count': view_count, 'view_count': view_count,
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),

View File

@@ -59,11 +59,11 @@ class NTVIE(InfoExtractor):
{ {
'url': 'http://www.ntv.ru/kino/Koma_film', 'url': 'http://www.ntv.ru/kino/Koma_film',
'info_dict': { 'info_dict': {
'id': '750783', 'id': '758100',
'ext': 'flv', 'ext': 'flv',
'title': 'Остросюжетный фильм «Кома» — 4 апреля вечером на НТВ', 'title': 'Остросюжетный фильм «Кома»',
'description': 'Остросюжетный фильм «Кома» — 4 апреля вечером на НТВ', 'description': 'Остросюжетный фильм «Кома»',
'duration': 28, 'duration': 5592,
}, },
'params': { 'params': {
# rtmp download # rtmp download

View File

@@ -0,0 +1,84 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
import base64
from .common import InfoExtractor
from ..utils import (
struct_unpack,
)
class RTVEALaCartaIE(InfoExtractor):
IE_NAME = 'rtve.es:alacarta'
IE_DESC = 'RTVE a la carta'
_VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
_TEST = {
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
'md5': '18fcd45965bdd076efdb12cd7f6d7b9e',
'info_dict': {
'id': '2491869',
'ext': 'mp4',
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
},
}
def _decrypt_url(self, png):
encrypted_data = base64.b64decode(png)
text_index = encrypted_data.find(b'tEXt')
text_chunk = encrypted_data[text_index-4:]
length = struct_unpack('!I', text_chunk[:4])[0]
# Use bytearray to get integers when iterating in both python 2.x and 3.x
data = bytearray(text_chunk[8:8+length])
data = [chr(b) for b in data if b != 0]
hash_index = data.index('#')
alphabet_data = data[:hash_index]
url_data = data[hash_index+1:]
alphabet = []
e = 0
d = 0
for l in alphabet_data:
if d == 0:
alphabet.append(l)
d = e = (e + 1) % 4
else:
d -= 1
url = ''
f = 0
e = 3
b = 1
for letter in url_data:
if f == 0:
l = int(letter)*10
f = 1
else:
if e == 0:
l += int(letter)
url += alphabet[l]
e = (b + 3) % 4
f = 0
b += 1
else:
e -= 1
return url
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info = self._download_json(
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
video_id)['page']['items'][0]
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
png = self._download_webpage(png_url, video_id, 'Downloading url information')
video_url = self._decrypt_url(png)
return {
'id': video_id,
'title': info['title'],
'url': video_url,
'thumbnail': info['image'],
}

View File

@@ -53,8 +53,7 @@ class FFmpegPostProcessor(PostProcessor):
if self._downloader.params.get('verbose', False): if self._downloader.params.get('verbose', False):
self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd)) self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
bcmd = [self._downloader.encode(c) for c in cmd] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p = subprocess.Popen(bcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate() stdout, stderr = p.communicate()
if p.returncode != 0: if p.returncode != 0:
stderr = stderr.decode('utf-8', 'replace') stderr = stderr.decode('utf-8', 'replace')

View File

@@ -910,25 +910,84 @@ def platform_name():
return res return res
def write_string(s, out=None): def _windows_write_string(s, out):
""" Returns True if the string was written using special methods,
False if it has yet to be written out."""
# Adapted from http://stackoverflow.com/a/3259271/35070
import ctypes
import ctypes.wintypes
WIN_OUTPUT_IDS = {
1: -11,
2: -12,
}
def ucs2_len(s):
return sum((2 if ord(c) > 0xffff else 1) for c in s)
fileno = out.fileno()
if fileno not in WIN_OUTPUT_IDS:
return False
GetStdHandle = ctypes.WINFUNCTYPE(
ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
("GetStdHandle", ctypes.windll.kernel32))
h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
WriteConsoleW = ctypes.WINFUNCTYPE(
ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
written = ctypes.wintypes.DWORD(0)
GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
FILE_TYPE_CHAR = 0x0002
FILE_TYPE_REMOTE = 0x8000
GetConsoleMode = ctypes.WINFUNCTYPE(
ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
ctypes.POINTER(ctypes.wintypes.DWORD))(
("GetConsoleMode", ctypes.windll.kernel32))
INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
def not_a_console(handle):
if handle == INVALID_HANDLE_VALUE or handle is None:
return True
return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
if not_a_console(h):
return False
remaining = ucs2_len(s)
while remaining > 0:
ret = WriteConsoleW(
h, s, min(remaining, 1024), ctypes.byref(written), None)
if ret == 0:
raise OSError('Failed to write string')
remaining -= written.value
return True
def write_string(s, out=None, encoding=None):
if out is None: if out is None:
out = sys.stderr out = sys.stderr
assert type(s) == compat_str assert type(s) == compat_str
if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
if _windows_write_string(s, out):
return
if ('b' in getattr(out, 'mode', '') or if ('b' in getattr(out, 'mode', '') or
sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
s = s.encode(preferredencoding(), 'ignore') byt = s.encode(encoding or preferredencoding(), 'ignore')
try: out.write(byt)
elif hasattr(out, 'buffer'):
enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
byt = s.encode(enc, 'ignore')
out.buffer.write(byt)
else:
out.write(s) out.write(s)
except UnicodeEncodeError:
# In Windows shells, this can fail even when the codec is just charmap!?
# See https://wiki.python.org/moin/PrintFails#Issue
if sys.platform == 'win32' and hasattr(out, 'encoding'):
s = s.encode(out.encoding, 'ignore').decode(out.encoding)
out.write(s)
else:
raise
out.flush() out.flush()

View File

@@ -1,2 +1,2 @@
__version__ = '2014.04.07.1' __version__ = '2014.04.11.1'