mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-12-24 16:31:18 +01:00
Compare commits
166 Commits
2015.09.09
...
2015.09.28
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c254f75bbb | ||
|
|
86692c019c | ||
|
|
1ab1c4ef57 | ||
|
|
926fb62eec | ||
|
|
817690ff73 | ||
|
|
98e1c935a1 | ||
|
|
f30e9976d6 | ||
|
|
80e98aed69 | ||
|
|
6a24cb3d22 | ||
|
|
e13b9e7885 | ||
|
|
dd467d33d0 | ||
|
|
c6b8f4d0c9 | ||
|
|
95240b8093 | ||
|
|
2f962d0a91 | ||
|
|
3c63e1bb57 | ||
|
|
c471b34575 | ||
|
|
d045f0bdb7 | ||
|
|
22becac4bd | ||
|
|
9d632b1b27 | ||
|
|
95c5e10103 | ||
|
|
a5d09d684e | ||
|
|
8aab976bbd | ||
|
|
26c6d1922e | ||
|
|
cd1bb54990 | ||
|
|
d4cd06138c | ||
|
|
961c5cbf17 | ||
|
|
b65e5bb72f | ||
|
|
54914380c0 | ||
|
|
26ccc68bed | ||
|
|
ee3d5a6d47 | ||
|
|
46fde8a1a2 | ||
|
|
fe1d858e35 | ||
|
|
fc42bc6ec9 | ||
|
|
fe6ad195ae | ||
|
|
7193650641 | ||
|
|
5db34f680f | ||
|
|
a82ba8d0ce | ||
|
|
3706fb5dc8 | ||
|
|
08bea4adde | ||
|
|
4c917d0314 | ||
|
|
4866b72eb2 | ||
|
|
2d00be0477 | ||
|
|
3d09aa4c82 | ||
|
|
c44c7895b8 | ||
|
|
8de28761c4 | ||
|
|
711762f0b7 | ||
|
|
5773803961 | ||
|
|
140359fc2c | ||
|
|
8ddf48d59f | ||
|
|
2e40a12225 | ||
|
|
dade7245af | ||
|
|
0940c5b4c6 | ||
|
|
42ca72dff3 | ||
|
|
2949a6cda9 | ||
|
|
882fc9052e | ||
|
|
9b166fc1f8 | ||
|
|
d4364f30bd | ||
|
|
857421024d | ||
|
|
80faa7a152 | ||
|
|
545a23f11b | ||
|
|
caedb0721e | ||
|
|
47024eb564 | ||
|
|
9c58885c70 | ||
|
|
9fbd4b35a2 | ||
|
|
05b476a270 | ||
|
|
4395ca2e04 | ||
|
|
19f93d906e | ||
|
|
57565375c8 | ||
|
|
eb11cbe867 | ||
|
|
f102819463 | ||
|
|
b942db3dc3 | ||
|
|
78f9fb902b | ||
|
|
d8fef8faac | ||
|
|
8ea6bd2802 | ||
|
|
c659022b5c | ||
|
|
8ca2e93e1a | ||
|
|
5600e214c3 | ||
|
|
6400f8ec0f | ||
|
|
c3a4e2ec40 | ||
|
|
e28c794699 | ||
|
|
da9f180835 | ||
|
|
6b8ce312e3 | ||
|
|
de3fc356e1 | ||
|
|
d0fed4ac02 | ||
|
|
7ce50a355c | ||
|
|
9612f23399 | ||
|
|
cccedc1aa4 | ||
|
|
c430802e32 | ||
|
|
cb4e421901 | ||
|
|
8e97596b7b | ||
|
|
92085e7099 | ||
|
|
c6aa838b51 | ||
|
|
9f5e8d16b3 | ||
|
|
82c06a40ac | ||
|
|
4423eba49b | ||
|
|
5b4c54631a | ||
|
|
5a1a2e9454 | ||
|
|
f005f96ea5 | ||
|
|
5e39123b3b | ||
|
|
393ca8c94d | ||
|
|
f817adc468 | ||
|
|
6c91a5a7f5 | ||
|
|
749b09616d | ||
|
|
5de5ab89b4 | ||
|
|
1d67c96640 | ||
|
|
d1c694ea4a | ||
|
|
06368a232a | ||
|
|
8a7bbd1606 | ||
|
|
131d05033b | ||
|
|
1806a75415 | ||
|
|
659ffe204c | ||
|
|
4647fd8910 | ||
|
|
d492dad8f4 | ||
|
|
3368d70dce | ||
|
|
0e1b2566ff | ||
|
|
369e60162e | ||
|
|
d5e7657fe2 | ||
|
|
f84ce1ebaf | ||
|
|
12bc242944 | ||
|
|
88060cce10 | ||
|
|
272e4db5c7 | ||
|
|
6e21cc3b67 | ||
|
|
0391bc8176 | ||
|
|
3b9264a049 | ||
|
|
2b3c254678 | ||
|
|
287be8c615 | ||
|
|
953fed280f | ||
|
|
e2ff3df314 | ||
|
|
31208a07c2 | ||
|
|
ac7a1b0dfb | ||
|
|
c246773599 | ||
|
|
25cd56a715 | ||
|
|
82c18e2a53 | ||
|
|
d5d38d16ae | ||
|
|
e1cbf33573 | ||
|
|
2ffe3bc14b | ||
|
|
d5867276a9 | ||
|
|
f665ef8fc5 | ||
|
|
b264c21302 | ||
|
|
349b3a2ea0 | ||
|
|
87813a8570 | ||
|
|
aab135516b | ||
|
|
141ba36996 | ||
|
|
d434ca5448 | ||
|
|
94e507aea7 | ||
|
|
3ebc121293 | ||
|
|
41ebd6530b | ||
|
|
2ec7b7b79b | ||
|
|
60ed60353b | ||
|
|
586f1cc532 | ||
|
|
73eb13dfc7 | ||
|
|
1721fef28b | ||
|
|
364ca0582e | ||
|
|
133a2b4ac2 | ||
|
|
7b4137c351 | ||
|
|
376e1ad081 | ||
|
|
b58a22b963 | ||
|
|
64997815c4 | ||
|
|
3b18f539a7 | ||
|
|
f43c163158 | ||
|
|
673bf566fc | ||
|
|
f95c5e1218 | ||
|
|
f33f32f159 | ||
|
|
75b399f455 | ||
|
|
c23c3d7d7d | ||
|
|
bfed4813b2 |
@@ -5,6 +5,7 @@ python:
|
||||
- "3.2"
|
||||
- "3.3"
|
||||
- "3.4"
|
||||
- "3.5"
|
||||
sudo: false
|
||||
script: nosetests test --verbose
|
||||
notifications:
|
||||
|
||||
1
AUTHORS
1
AUTHORS
@@ -143,3 +143,4 @@ Shaun Walbridge
|
||||
Lee Jenkins
|
||||
Anssi Hannula
|
||||
Lukáš Lalinský
|
||||
Qijiang Fan
|
||||
|
||||
@@ -9,6 +9,7 @@ youtube-dl - download videos from youtube.com or other video platforms
|
||||
- [VIDEO SELECTION](#video-selection)
|
||||
- [FAQ](#faq)
|
||||
- [DEVELOPER INSTRUCTIONS](#developer-instructions)
|
||||
- [EMBEDDING YOUTUBE-DL](#embedding-youtube-dl)
|
||||
- [BUGS](#bugs)
|
||||
- [COPYRIGHT](#copyright)
|
||||
|
||||
@@ -261,7 +262,7 @@ For example:
|
||||
machine youtube login myaccount@gmail.com password my_youtube_password
|
||||
machine twitch login my_twitch_account_name password my_twitch_password
|
||||
```
|
||||
To activate authentication with `.netrc` file you should pass `--netrc` to youtube-dl or to place it in [configuration file](#configuration).
|
||||
To activate authentication with `.netrc` file you should pass `--netrc` to youtube-dl or place it in [configuration file](#configuration).
|
||||
|
||||
On Windows you may also need to setup `%HOME%` environment variable manually.
|
||||
|
||||
@@ -277,9 +278,10 @@ The `-o` option allows users to indicate a template for the output file names. T
|
||||
- `ext`: The sequence will be replaced by the appropriate extension (like flv or mp4).
|
||||
- `epoch`: The sequence will be replaced by the Unix epoch when creating the file.
|
||||
- `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero.
|
||||
- `playlist`: The name or the id of the playlist that contains the video.
|
||||
- `playlist_index`: The index of the video in the playlist, a five-digit number.
|
||||
- `playlist`: The sequence will be replaced by the name or the id of the playlist that contains the video.
|
||||
- `playlist_index`: The sequence will be replaced by the index of the video in the playlist padded with leading zeros according to the total length of the playlist.
|
||||
- `format_id`: The sequence will be replaced by the format code specified by `--format`.
|
||||
- `duration`: The sequence will be replaced by the length of the video in seconds.
|
||||
|
||||
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import os
|
||||
from os.path import dirname as dirn
|
||||
import sys
|
||||
|
||||
sys.path.append(dirn(dirn((os.path.abspath(__file__)))))
|
||||
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
||||
import youtube_dl
|
||||
|
||||
BASH_COMPLETION_FILE = "youtube-dl.bash-completion"
|
||||
|
||||
@@ -6,7 +6,7 @@ import os
|
||||
from os.path import dirname as dirn
|
||||
import sys
|
||||
|
||||
sys.path.append(dirn(dirn((os.path.abspath(__file__)))))
|
||||
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
||||
import youtube_dl
|
||||
from youtube_dl.utils import shell_quote
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ import os
|
||||
import textwrap
|
||||
|
||||
# We must be able to import youtube_dl
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
import youtube_dl
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ import sys
|
||||
|
||||
# Import youtube_dl
|
||||
ROOT_DIR = os.path.join(os.path.dirname(__file__), '..')
|
||||
sys.path.append(ROOT_DIR)
|
||||
sys.path.insert(0, ROOT_DIR)
|
||||
import youtube_dl
|
||||
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import os
|
||||
from os.path import dirname as dirn
|
||||
import sys
|
||||
|
||||
sys.path.append(dirn(dirn((os.path.abspath(__file__)))))
|
||||
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
||||
import youtube_dl
|
||||
|
||||
ZSH_COMPLETION_FILE = "youtube-dl.zsh"
|
||||
|
||||
@@ -101,7 +101,7 @@
|
||||
- **ComCarCoff**
|
||||
- **ComedyCentral**
|
||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||
- **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **Cracked**
|
||||
- **Criterion**
|
||||
- **CrooksAndLiars**
|
||||
@@ -122,7 +122,6 @@
|
||||
- **defense.gouv.fr**
|
||||
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
||||
- **Discovery**
|
||||
- **divxstage**: DivxStage
|
||||
- **Dotsub**
|
||||
- **DouyuTV**: 斗鱼
|
||||
- **dramafever**
|
||||
@@ -159,7 +158,6 @@
|
||||
- **faz.net**
|
||||
- **fc2**
|
||||
- **fernsehkritik.tv**
|
||||
- **fernsehkritik.tv:postecke**
|
||||
- **Firstpost**
|
||||
- **FiveTV**
|
||||
- **Flickr**
|
||||
@@ -209,7 +207,6 @@
|
||||
- **hitbox**
|
||||
- **hitbox:live**
|
||||
- **HornBunny**
|
||||
- **HostingBulk**
|
||||
- **HotNewHipHop**
|
||||
- **Howcast**
|
||||
- **HowStuffWorks**
|
||||
@@ -286,7 +283,7 @@
|
||||
- **Minhateca**
|
||||
- **MinistryGrid**
|
||||
- **miomio.tv**
|
||||
- **mitele.es**
|
||||
- **MiTele**: mitele.es
|
||||
- **mixcloud**
|
||||
- **MLB**
|
||||
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||
@@ -317,7 +314,6 @@
|
||||
- **Myvi**
|
||||
- **myvideo**
|
||||
- **MyVidster**
|
||||
- **N-JOY**
|
||||
- **n-tv.de**
|
||||
- **NationalGeographic**
|
||||
- **Naver**
|
||||
@@ -326,7 +322,9 @@
|
||||
- **NBCNews**
|
||||
- **NBCSports**
|
||||
- **NBCSportsVPlayer**
|
||||
- **ndr**: NDR.de - Mediathek
|
||||
- **ndr**: NDR.de - Norddeutscher Rundfunk
|
||||
- **ndr:embed**
|
||||
- **ndr:embed:base**
|
||||
- **NDTV**
|
||||
- **NerdCubedFeed**
|
||||
- **Nerdist**
|
||||
@@ -349,12 +347,16 @@
|
||||
- **nhl.com:videocenter**: NHL videocenter category
|
||||
- **niconico**: ニコニコ動画
|
||||
- **NiconicoPlaylist**
|
||||
- **njoy**: N-JOY
|
||||
- **njoy:embed**
|
||||
- **Noco**
|
||||
- **Normalboots**
|
||||
- **NosVideo**
|
||||
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
|
||||
- **novamov**: NovaMov
|
||||
- **Nowness**
|
||||
- **nowness**
|
||||
- **nowness:playlist**
|
||||
- **nowness:series**
|
||||
- **NowTV**
|
||||
- **nowvideo**: NowVideo
|
||||
- **npo**: npo.nl and ntr.nl
|
||||
@@ -375,7 +377,6 @@
|
||||
- **OnionStudios**
|
||||
- **Ooyala**
|
||||
- **OoyalaExternal**
|
||||
- **OpenFilm**
|
||||
- **orf:fm4**: radio FM4
|
||||
- **orf:iptv**: iptv.ORF.at
|
||||
- **orf:oe1**: Radio Österreich 1
|
||||
@@ -530,7 +531,7 @@
|
||||
- **techtv.mit.edu**
|
||||
- **ted**
|
||||
- **TeleBruxelles**
|
||||
- **telecinco.es**
|
||||
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
|
||||
- **Telegraaf**
|
||||
- **TeleMB**
|
||||
- **TeleTask**
|
||||
|
||||
2
tox.ini
2
tox.ini
@@ -1,5 +1,5 @@
|
||||
[tox]
|
||||
envlist = py26,py27,py33,py34
|
||||
envlist = py26,py27,py33,py34,py35
|
||||
[testenv]
|
||||
deps =
|
||||
nose
|
||||
|
||||
@@ -11,7 +11,7 @@ if __package__ is None and not hasattr(sys, "frozen"):
|
||||
# direct call of __main__.py
|
||||
import os.path
|
||||
path = os.path.realpath(os.path.abspath(__file__))
|
||||
sys.path.append(os.path.dirname(os.path.dirname(path)))
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(path)))
|
||||
|
||||
import youtube_dl
|
||||
|
||||
|
||||
@@ -80,6 +80,11 @@ try:
|
||||
except ImportError:
|
||||
import BaseHTTPServer as compat_http_server
|
||||
|
||||
try:
|
||||
compat_str = unicode # Python 2
|
||||
except NameError:
|
||||
compat_str = str
|
||||
|
||||
try:
|
||||
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
|
||||
from urllib.parse import unquote as compat_urllib_parse_unquote
|
||||
@@ -100,7 +105,7 @@ except ImportError: # Python 2
|
||||
# Is it a string-like object?
|
||||
string.split
|
||||
return b''
|
||||
if isinstance(string, unicode):
|
||||
if isinstance(string, compat_str):
|
||||
string = string.encode('utf-8')
|
||||
bits = string.split(b'%')
|
||||
if len(bits) == 1:
|
||||
@@ -150,11 +155,6 @@ except ImportError: # Python 2
|
||||
string = string.replace('+', ' ')
|
||||
return compat_urllib_parse_unquote(string, encoding, errors)
|
||||
|
||||
try:
|
||||
compat_str = unicode # Python 2
|
||||
except NameError:
|
||||
compat_str = str
|
||||
|
||||
try:
|
||||
compat_basestring = basestring # Python 2
|
||||
except NameError:
|
||||
@@ -234,7 +234,7 @@ else:
|
||||
# Working around shlex issue with unicode strings on some python 2
|
||||
# versions (see http://bugs.python.org/issue1548891)
|
||||
def compat_shlex_split(s, comments=False, posix=True):
|
||||
if isinstance(s, unicode):
|
||||
if isinstance(s, compat_str):
|
||||
s = s.encode('utf-8')
|
||||
return shlex.split(s, comments, posix)
|
||||
|
||||
|
||||
@@ -28,10 +28,19 @@ class HlsFD(FileDownloader):
|
||||
return False
|
||||
ffpp.check_version()
|
||||
|
||||
args = [
|
||||
encodeArgument(opt)
|
||||
for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
|
||||
args.append(encodeFilename(tmpfilename, True))
|
||||
args = [ffpp.executable, '-y']
|
||||
|
||||
if info_dict['http_headers']:
|
||||
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
||||
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
||||
args += [
|
||||
'-headers',
|
||||
''.join('%s: %s\r\n' % (key, val) for key, val in info_dict['http_headers'].items())]
|
||||
|
||||
args += ['-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc']
|
||||
|
||||
args = [encodeArgument(opt) for opt in args]
|
||||
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
|
||||
|
||||
self._debug_cmd(args)
|
||||
|
||||
@@ -92,6 +101,7 @@ class NativeHlsFD(FragmentFD):
|
||||
return False
|
||||
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
||||
ctx['dest_stream'].write(down.read())
|
||||
down.close()
|
||||
frags_filenames.append(frag_sanitized)
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
@@ -138,7 +138,6 @@ from .dump import DumpIE
|
||||
from .dumpert import DumpertIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .divxstage import DivxStageIE
|
||||
from .dropbox import DropboxIE
|
||||
from .eagleplatform import EaglePlatformIE
|
||||
from .ebaumsworld import EbaumsWorldIE
|
||||
@@ -170,10 +169,7 @@ from .firstpost import FirstpostIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fivemin import FiveMinIE
|
||||
from .fivetv import FiveTVIE
|
||||
from .fktv import (
|
||||
FKTVIE,
|
||||
FKTVPosteckeIE,
|
||||
)
|
||||
from .fktv import FKTVIE
|
||||
from .flickr import FlickrIE
|
||||
from .folketinget import FolketingetIE
|
||||
from .footyroom import FootyRoomIE
|
||||
@@ -229,7 +225,6 @@ from .historicfilms import HistoricFilmsIE
|
||||
from .history import HistoryIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hornbunny import HornBunnyIE
|
||||
from .hostingbulk import HostingBulkIE
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
from .howcast import HowcastIE
|
||||
from .howstuffworks import HowStuffWorksIE
|
||||
@@ -367,6 +362,9 @@ from .nbc import (
|
||||
from .ndr import (
|
||||
NDRIE,
|
||||
NJoyIE,
|
||||
NDREmbedBaseIE,
|
||||
NDREmbedIE,
|
||||
NJoyEmbedIE,
|
||||
)
|
||||
from .ndtv import NDTVIE
|
||||
from .netzkino import NetzkinoIE
|
||||
@@ -402,7 +400,11 @@ from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
from .nova import NovaIE
|
||||
from .novamov import NovaMovIE
|
||||
from .nowness import NownessIE
|
||||
from .nowness import (
|
||||
NownessIE,
|
||||
NownessPlaylistIE,
|
||||
NownessSeriesIE,
|
||||
)
|
||||
from .nowtv import NowTVIE
|
||||
from .nowvideo import NowVideoIE
|
||||
from .npo import (
|
||||
@@ -432,7 +434,6 @@ from .ooyala import (
|
||||
OoyalaIE,
|
||||
OoyalaExternalIE,
|
||||
)
|
||||
from .openfilm import OpenFilmIE
|
||||
from .orf import (
|
||||
ORFTVthekIE,
|
||||
ORFOE1IE,
|
||||
|
||||
@@ -4,6 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
find_xpath_attr,
|
||||
unified_strdate,
|
||||
@@ -77,7 +81,13 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||
json_url = self._html_search_regex(
|
||||
[r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'],
|
||||
webpage, 'json vp url')
|
||||
webpage, 'json vp url', default=None)
|
||||
if not json_url:
|
||||
iframe_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
||||
webpage, 'iframe url', group='url')
|
||||
json_url = compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
def _extract_from_json_url(self, json_url, video_id, lang):
|
||||
|
||||
@@ -21,6 +21,10 @@ class BBCCoUkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||
|
||||
_MEDIASELECTOR_URLS = [
|
||||
# Provides HQ HLS streams with even better quality that pc mediaset but fails
|
||||
# with geolocation in some cases when it's even not geo restricted at all (e.g.
|
||||
# http://www.bbc.co.uk/programmes/b06bp7lf)
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
|
||||
]
|
||||
|
||||
@@ -152,6 +156,21 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'geolocation',
|
||||
}, {
|
||||
# iptv-all mediaset fails with geolocation however there is no geo restriction
|
||||
# for this programme at all
|
||||
'url': 'http://www.bbc.co.uk/programmes/b06bp7lf',
|
||||
'info_dict': {
|
||||
'id': 'b06bp7kf',
|
||||
'ext': 'flv',
|
||||
'title': "Annie Mac's Friday Night, B.Traits sits in for Annie",
|
||||
'description': 'B.Traits sits in for Annie Mac with a Mini-Mix from Disclosure.',
|
||||
'duration': 10800,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
@@ -189,6 +208,12 @@ class BBCCoUkIE(InfoExtractor):
|
||||
# Skip DASH until supported
|
||||
elif transfer_format == 'dash':
|
||||
pass
|
||||
elif transfer_format == 'hls':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=supplier, fatal=False)
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
# Direct link
|
||||
else:
|
||||
formats.append({
|
||||
@@ -287,7 +312,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
return self._download_media_selector_url(
|
||||
mediaselector_url % programme_id, programme_id)
|
||||
except BBCCoUkIE.MediaSelectionError as e:
|
||||
if e.id == 'notukerror':
|
||||
if e.id in ('notukerror', 'geolocation'):
|
||||
last_exception = e
|
||||
continue
|
||||
self._raise_extractor_error(e)
|
||||
|
||||
@@ -12,9 +12,9 @@ from ..utils import (
|
||||
|
||||
|
||||
class ClubicIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?clubic\.com/video/[^/]+/video.*-(?P<id>[0-9]+)\.html'
|
||||
_VALID_URL = r'http://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html',
|
||||
'md5': '1592b694ba586036efac1776b0b43cd3',
|
||||
'info_dict': {
|
||||
@@ -24,7 +24,10 @@ class ClubicIE(InfoExtractor):
|
||||
'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*',
|
||||
'thumbnail': 're:^http://img\.clubic\.com/.*\.jpg$',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.clubic.com/video/video-clubic-week-2-0-apple-iphone-6s-et-plus-mais-surtout-le-pencil-469792.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
@@ -152,6 +152,7 @@ class InfoExtractor(object):
|
||||
description: Full video description.
|
||||
uploader: Full name of the video uploader.
|
||||
creator: The main artist who created the video.
|
||||
release_date: The date (YYYYMMDD) when the video was released.
|
||||
timestamp: UNIX timestamp of the moment the video became available.
|
||||
upload_date: Video upload date (YYYYMMDD).
|
||||
If not explicitly set, calculated from timestamp.
|
||||
@@ -516,6 +517,12 @@ class InfoExtractor(object):
|
||||
'%s. Use --username and --password or --netrc to provide account credentials.' % msg,
|
||||
expected=True)
|
||||
|
||||
@staticmethod
|
||||
def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'):
|
||||
raise ExtractorError(
|
||||
'%s. You might want to use --proxy to workaround.' % msg,
|
||||
expected=True)
|
||||
|
||||
# Methods for following #608
|
||||
@staticmethod
|
||||
def url_result(url, ie=None, video_id=None, video_title=None):
|
||||
@@ -731,8 +738,9 @@ class InfoExtractor(object):
|
||||
|
||||
@staticmethod
|
||||
def _hidden_inputs(html):
|
||||
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
|
||||
hidden_inputs = {}
|
||||
for input in re.findall(r'<input([^>]+)>', html):
|
||||
for input in re.findall(r'(?i)<input([^>]+)>', html):
|
||||
if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
|
||||
continue
|
||||
name = re.search(r'name=(["\'])(?P<value>.+?)\1', input)
|
||||
@@ -746,7 +754,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _form_hidden_inputs(self, form_id, html):
|
||||
form = self._search_regex(
|
||||
r'(?s)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
|
||||
r'(?is)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
|
||||
html, '%s form' % form_id, group='form')
|
||||
return self._hidden_inputs(form)
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@@ -12,6 +11,7 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
orderedSet,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,21 +24,33 @@ class CondeNastIE(InfoExtractor):
|
||||
# The keys are the supported sites and the values are the name to be shown
|
||||
# to the user and in the extractor description.
|
||||
_SITES = {
|
||||
'wired': 'WIRED',
|
||||
'gq': 'GQ',
|
||||
'vogue': 'Vogue',
|
||||
'glamour': 'Glamour',
|
||||
'wmagazine': 'W Magazine',
|
||||
'vanityfair': 'Vanity Fair',
|
||||
'allure': 'Allure',
|
||||
'architecturaldigest': 'Architectural Digest',
|
||||
'arstechnica': 'Ars Technica',
|
||||
'bonappetit': 'Bon Appétit',
|
||||
'brides': 'Brides',
|
||||
'cnevids': 'Condé Nast',
|
||||
'cntraveler': 'Condé Nast Traveler',
|
||||
'details': 'Details',
|
||||
'epicurious': 'Epicurious',
|
||||
'glamour': 'Glamour',
|
||||
'golfdigest': 'Golf Digest',
|
||||
'gq': 'GQ',
|
||||
'newyorker': 'The New Yorker',
|
||||
'self': 'SELF',
|
||||
'teenvogue': 'Teen Vogue',
|
||||
'vanityfair': 'Vanity Fair',
|
||||
'vogue': 'Vogue',
|
||||
'wired': 'WIRED',
|
||||
'wmagazine': 'W Magazine',
|
||||
}
|
||||
|
||||
_VALID_URL = r'http://(video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
|
||||
_VALID_URL = r'http://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
|
||||
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
|
||||
|
||||
EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed)/.+?' % '|'.join(_SITES.keys())
|
||||
EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed(?:js)?)/.+?' % '|'.join(_SITES.keys())
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
|
||||
'md5': '1921f713ed48aabd715691f774c451f7',
|
||||
'info_dict': {
|
||||
@@ -47,7 +59,16 @@ class CondeNastIE(InfoExtractor):
|
||||
'title': '3D Printed Speakers Lit With LED',
|
||||
'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# JS embed
|
||||
'url': 'http://player.cnevids.com/embedjs/55f9cf8b61646d1acf00000c/5511d76261646d5566020000.js',
|
||||
'md5': 'f1a6f9cafb7083bab74a710f65d08999',
|
||||
'info_dict': {
|
||||
'id': '55f9cf8b61646d1acf00000c',
|
||||
'ext': 'mp4',
|
||||
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
|
||||
}
|
||||
}]
|
||||
|
||||
def _extract_series(self, url, webpage):
|
||||
title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>',
|
||||
@@ -86,8 +107,8 @@ class CondeNastIE(InfoExtractor):
|
||||
info_url = base_info_url + data
|
||||
info_page = self._download_webpage(info_url, video_id,
|
||||
'Downloading video info')
|
||||
video_info = self._search_regex(r'var video = ({.+?});', info_page, 'video info')
|
||||
video_info = json.loads(video_info)
|
||||
video_info = self._search_regex(r'var\s+video\s*=\s*({.+?});', info_page, 'video info')
|
||||
video_info = self._parse_json(video_info, video_id)
|
||||
|
||||
formats = [{
|
||||
'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']),
|
||||
@@ -111,6 +132,13 @@ class CondeNastIE(InfoExtractor):
|
||||
url_type = mobj.group('type')
|
||||
item_id = mobj.group('id')
|
||||
|
||||
# Convert JS embed to regular embed
|
||||
if url_type == 'embedjs':
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
url = compat_urlparse.urlunparse(parsed_url._replace(
|
||||
path=remove_end(parsed_url.path, '.js').replace('/embedjs/', '/embed/')))
|
||||
url_type = 'embed'
|
||||
|
||||
self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site])
|
||||
webpage = self._download_webpage(url, item_id)
|
||||
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .novamov import NovaMovIE
|
||||
|
||||
|
||||
class DivxStageIE(NovaMovIE):
|
||||
IE_NAME = 'divxstage'
|
||||
IE_DESC = 'DivxStage'
|
||||
|
||||
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag|to)'}
|
||||
|
||||
_HOST = 'www.divxstage.eu'
|
||||
|
||||
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
|
||||
_TITLE_REGEX = r'<div class="video_det">\s*<strong>([^<]+)</strong>'
|
||||
_DESCRIPTION_REGEX = r'<div class="video_det">\s*<strong>[^<]+</strong>\s*<p>([^<]+)</p>'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.divxstage.eu/video/57f238e2e5e01',
|
||||
'md5': '63969f6eb26533a1968c4d325be63e72',
|
||||
'info_dict': {
|
||||
'id': '57f238e2e5e01',
|
||||
'ext': 'flv',
|
||||
'title': 'youtubedl test video',
|
||||
'description': 'This is a test video for youtubedl.',
|
||||
}
|
||||
}
|
||||
@@ -21,7 +21,7 @@ class EaglePlatformIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
# http://lenta.ru/news/2015/03/06/navalny/
|
||||
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
|
||||
'md5': '0b7994faa2bd5c0f69a3db6db28d078d',
|
||||
'md5': '70f5187fb620f2c1d503b3b22fd4efe3',
|
||||
'info_dict': {
|
||||
'id': '227304',
|
||||
'ext': 'mp4',
|
||||
@@ -36,7 +36,7 @@ class EaglePlatformIE(InfoExtractor):
|
||||
# http://muz-tv.ru/play/7129/
|
||||
# http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
|
||||
'url': 'eagleplatform:media.clipyou.ru:12820',
|
||||
'md5': '6c2ebeab03b739597ce8d86339d5a905',
|
||||
'md5': '90b26344ba442c8e44aa4cf8f301164a',
|
||||
'info_dict': {
|
||||
'id': '12820',
|
||||
'ext': 'mp4',
|
||||
@@ -48,7 +48,8 @@ class EaglePlatformIE(InfoExtractor):
|
||||
'skip': 'Georestricted',
|
||||
}]
|
||||
|
||||
def _handle_error(self, response):
|
||||
@staticmethod
|
||||
def _handle_error(response):
|
||||
status = int_or_none(response.get('status', 200))
|
||||
if status != 200:
|
||||
raise ExtractorError(' '.join(response['errors']), expected=True)
|
||||
@@ -58,6 +59,9 @@ class EaglePlatformIE(InfoExtractor):
|
||||
self._handle_error(response)
|
||||
return response
|
||||
|
||||
def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'):
|
||||
return self._download_json(url_or_request, video_id, note)['data'][0]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
|
||||
@@ -69,7 +73,7 @@ class EaglePlatformIE(InfoExtractor):
|
||||
|
||||
title = media['title']
|
||||
description = media.get('description')
|
||||
thumbnail = media.get('snapshot')
|
||||
thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:')
|
||||
duration = int_or_none(media.get('duration'))
|
||||
view_count = int_or_none(media.get('views'))
|
||||
|
||||
@@ -78,13 +82,20 @@ class EaglePlatformIE(InfoExtractor):
|
||||
if age_restriction:
|
||||
age_limit = 0 if age_restriction == 'allow_all' else 18
|
||||
|
||||
m3u8_data = self._download_json(
|
||||
self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:'),
|
||||
video_id, 'Downloading m3u8 JSON')
|
||||
secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:')
|
||||
|
||||
m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_data['data'][0], video_id,
|
||||
m3u8_url, video_id,
|
||||
'mp4', entry_protocol='m3u8_native')
|
||||
|
||||
mp4_url = self._get_video_url(
|
||||
# Secure mp4 URL is constructed according to Player.prototype.mp4 from
|
||||
# http://lentaru.media.eagleplatform.com/player/player.js
|
||||
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8),
|
||||
video_id, 'Downloading mp4 JSON')
|
||||
formats.append({'url': mp4_url, 'format_id': 'mp4'})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import random
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -17,66 +16,40 @@ class FKTVIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://fernsehkritik.tv/folge-1',
|
||||
'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79',
|
||||
'info_dict': {
|
||||
'id': '00011',
|
||||
'ext': 'flv',
|
||||
'id': '1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Folge 1 vom 10. April 2007',
|
||||
'description': 'md5:fb4818139c7cfe6907d4b83412a6864f',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode = int(self._match_id(url))
|
||||
episode = self._match_id(url)
|
||||
|
||||
video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%s.jpg' % episode
|
||||
start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/Start' % episode,
|
||||
episode)
|
||||
playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
|
||||
'playlist', flags=re.DOTALL)
|
||||
files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
|
||||
webpage = self._download_webpage(
|
||||
'http://fernsehkritik.tv/folge-%s/play' % episode, episode)
|
||||
title = clean_html(self._html_search_regex(
|
||||
'<h3>([^<]+)</h3>', webpage, 'title'))
|
||||
matches = re.search(
|
||||
r'(?s)<video(?:(?!poster)[^>])+(?:poster="([^"]+)")?[^>]*>(.*)</video>',
|
||||
webpage)
|
||||
if matches is None:
|
||||
raise ExtractorError('Unable to extract the video')
|
||||
|
||||
videos = []
|
||||
for i, _ in enumerate(files, 1):
|
||||
video_id = '%04d%d' % (episode, i)
|
||||
video_url = 'http://fernsehkritik.tv/js/directme.php?file=%s%s.flv' % (episode, '' if i == 1 else '-%d' % i)
|
||||
videos.append({
|
||||
'ext': 'flv',
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': clean_html(get_element_by_id('eptitle', start_webpage)),
|
||||
'description': clean_html(get_element_by_id('contentlist', start_webpage)),
|
||||
'thumbnail': video_thumbnail
|
||||
})
|
||||
poster, sources = matches.groups()
|
||||
if poster is None:
|
||||
self.report_warning('unable to extract thumbnail')
|
||||
|
||||
urls = re.findall(r'<source[^>]+src="([^"]+)"', sources)
|
||||
formats = [{
|
||||
'url': furl,
|
||||
'format_id': determine_ext(furl),
|
||||
} for furl in urls]
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'entries': videos,
|
||||
'id': 'folge-%s' % episode,
|
||||
}
|
||||
|
||||
|
||||
class FKTVPosteckeIE(InfoExtractor):
|
||||
IE_NAME = 'fernsehkritik.tv:postecke'
|
||||
_VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/inline-video/postecke\.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
|
||||
_TEST = {
|
||||
'url': 'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120',
|
||||
'md5': '262f0adbac80317412f7e57b4808e5c4',
|
||||
'info_dict': {
|
||||
'id': '0120',
|
||||
'ext': 'flv',
|
||||
'title': 'Postecke 120',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
episode = int(mobj.group('ep'))
|
||||
|
||||
server = random.randint(2, 4)
|
||||
video_id = '%04d' % episode
|
||||
video_url = 'http://dl%d.fernsehkritik.tv/postecke/postecke%d.flv' % (server, episode)
|
||||
video_title = 'Postecke %d' % episode
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'id': episode,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': poster,
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
@@ -49,6 +50,7 @@ from .dailymotion import DailymotionCloudIE
|
||||
from .onionstudios import OnionStudiosIE
|
||||
from .snagfilms import SnagFilmsEmbedIE
|
||||
from .screenwavemedia import ScreenwaveMediaIE
|
||||
from .mtv import MTVServicesEmbeddedIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -230,6 +232,22 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': False,
|
||||
}
|
||||
},
|
||||
{
|
||||
# redirect in Refresh HTTP header
|
||||
'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
|
||||
'info_dict': {
|
||||
'id': 'pO8h3EaFRdo',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
|
||||
'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
|
||||
'upload_date': '20150917',
|
||||
'uploader_id': 'brtvofficial',
|
||||
'uploader': 'Boiler Room',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': False,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
|
||||
'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
|
||||
@@ -1594,12 +1612,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(url, ie='Vulture')
|
||||
|
||||
# Look for embedded mtvservices player
|
||||
mobj = re.search(
|
||||
r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
return self.url_result(url, ie='MTVServicesEmbedded')
|
||||
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
|
||||
if mtvservices_url:
|
||||
return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
|
||||
|
||||
# Look for embedded yahoo player
|
||||
mobj = re.search(
|
||||
@@ -1638,7 +1653,7 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'), 'MLB')
|
||||
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
|
||||
r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
|
||||
@@ -1808,6 +1823,9 @@ class GenericIE(InfoExtractor):
|
||||
# Look also in Refresh HTTP header
|
||||
refresh_header = head_response.headers.get('Refresh')
|
||||
if refresh_header:
|
||||
# In python 2 response HTTP headers are bytestrings
|
||||
if sys.version_info < (3, 0) and isinstance(refresh_header, str):
|
||||
refresh_header = refresh_header.decode('iso-8859-1')
|
||||
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||
if found:
|
||||
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
|
||||
|
||||
@@ -1,80 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class HostingBulkIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?hostingbulk\.com/
|
||||
(?:embed-)?(?P<id>[A-Za-z0-9]{12})(?:-\d+x\d+)?\.html'''
|
||||
_FILE_DELETED_REGEX = r'<b>File Not Found</b>'
|
||||
_TEST = {
|
||||
'url': 'http://hostingbulk.com/n0ulw1hv20fm.html',
|
||||
'md5': '6c8653c8ecf7ebfa83b76e24b7b2fe3f',
|
||||
'info_dict': {
|
||||
'id': 'n0ulw1hv20fm',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:5afeba33f48ec87219c269e054afd622',
|
||||
'filesize': 6816081,
|
||||
'thumbnail': 're:^http://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url = 'http://hostingbulk.com/{0:}.html'.format(video_id)
|
||||
|
||||
# Custom request with cookie to set language to English, so our file
|
||||
# deleted regex would work.
|
||||
request = compat_urllib_request.Request(
|
||||
url, headers={'Cookie': 'lang=english'})
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
|
||||
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
|
||||
raise ExtractorError('Video %s does not exist' % video_id,
|
||||
expected=True)
|
||||
|
||||
title = self._html_search_regex(r'<h3>(.*?)</h3>', webpage, 'title')
|
||||
filesize = int_or_none(
|
||||
self._search_regex(
|
||||
r'<small>\((\d+)\sbytes?\)</small>',
|
||||
webpage,
|
||||
'filesize',
|
||||
fatal=False
|
||||
)
|
||||
)
|
||||
thumbnail = self._search_regex(
|
||||
r'<img src="([^"]+)".+?class="pic"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
fields = self._hidden_inputs(webpage)
|
||||
|
||||
request = compat_urllib_request.Request(url, urlencode_postdata(fields))
|
||||
request.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
response = self._request_webpage(request, video_id,
|
||||
'Submiting download request')
|
||||
video_url = response.geturl()
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'filesize': filesize,
|
||||
'url': video_url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -1,7 +1,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
get_element_by_id,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class IconosquareIE(InfoExtractor):
|
||||
@@ -12,7 +16,7 @@ class IconosquareIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '522207370455279102_24101272',
|
||||
'ext': 'mp4',
|
||||
'title': 'Instagram media by @aguynamedpatrick (Patrick Janelle)',
|
||||
'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
|
||||
'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d',
|
||||
'timestamp': 1376471991,
|
||||
'upload_date': '20130814',
|
||||
@@ -29,8 +33,7 @@ class IconosquareIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
media = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.media\s*=\s*({.+?});\n', webpage, 'media'),
|
||||
get_element_by_id('mediaJson', webpage),
|
||||
video_id)
|
||||
|
||||
formats = [{
|
||||
@@ -41,9 +44,7 @@ class IconosquareIE(InfoExtractor):
|
||||
} for format_id, f in media['videos'].items()]
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+?)(?: *\(Videos?\))? \| (?:Iconosquare|Statigram)</title>',
|
||||
webpage, 'title')
|
||||
title = remove_end(self._og_search_title(webpage), ' - via Iconosquare')
|
||||
|
||||
timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time'))
|
||||
description = media.get('caption', {}).get('text')
|
||||
@@ -61,6 +62,14 @@ class IconosquareIE(InfoExtractor):
|
||||
'height': int_or_none(t.get('height'))
|
||||
} for thumbnail_id, t in media.get('images', {}).items()]
|
||||
|
||||
comments = [{
|
||||
'id': comment.get('id'),
|
||||
'text': comment['text'],
|
||||
'timestamp': int_or_none(comment.get('created_time')),
|
||||
'author': comment.get('from', {}).get('full_name'),
|
||||
'author_id': comment.get('from', {}).get('username'),
|
||||
} for comment in media.get('comments', {}).get('data', []) if 'text' in comment]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@@ -72,4 +81,5 @@ class IconosquareIE(InfoExtractor):
|
||||
'comment_count': comment_count,
|
||||
'like_count': like_count,
|
||||
'formats': formats,
|
||||
'comments': comments,
|
||||
}
|
||||
|
||||
@@ -95,6 +95,10 @@ class IqiyiIE(InfoExtractor):
|
||||
('10', 'h1'),
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def md5_text(text):
|
||||
return hashlib.md5(text.encode('utf-8')).hexdigest()
|
||||
|
||||
def construct_video_urls(self, data, video_id, _uuid):
|
||||
def do_xor(x, y):
|
||||
a = y % 3
|
||||
@@ -121,7 +125,7 @@ class IqiyiIE(InfoExtractor):
|
||||
note='Download path key of segment %d for format %s' % (segment_index + 1, format_id)
|
||||
)['t']
|
||||
t = str(int(math.floor(int(tm) / (600.0))))
|
||||
return hashlib.md5((t + mg + x).encode('utf8')).hexdigest()
|
||||
return self.md5_text(t + mg + x)
|
||||
|
||||
video_urls_dict = {}
|
||||
for format_item in data['vp']['tkl'][0]['vs']:
|
||||
@@ -179,20 +183,19 @@ class IqiyiIE(InfoExtractor):
|
||||
|
||||
def get_raw_data(self, tvid, video_id, enc_key, _uuid):
|
||||
tm = str(int(time.time()))
|
||||
tail = tm + tvid
|
||||
param = {
|
||||
'key': 'fvip',
|
||||
'src': hashlib.md5(b'youtube-dl').hexdigest(),
|
||||
'src': self.md5_text('youtube-dl'),
|
||||
'tvId': tvid,
|
||||
'vid': video_id,
|
||||
'vinfo': 1,
|
||||
'tm': tm,
|
||||
'enc': hashlib.md5(
|
||||
(enc_key + tm + tvid).encode('utf8')).hexdigest(),
|
||||
'enc': self.md5_text((enc_key + tail)[1:64:2] + tail),
|
||||
'qyid': _uuid,
|
||||
'tn': random.random(),
|
||||
'um': 0,
|
||||
'authkey': hashlib.md5(
|
||||
(tm + tvid).encode('utf8')).hexdigest()
|
||||
'authkey': self.md5_text(self.md5_text('') + tail),
|
||||
}
|
||||
|
||||
api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
|
||||
@@ -201,7 +204,8 @@ class IqiyiIE(InfoExtractor):
|
||||
return raw_data
|
||||
|
||||
def get_enc_key(self, swf_url, video_id):
|
||||
enc_key = '3601ba290e4f4662848c710e2122007e' # last update at 2015-08-10 for Zombie
|
||||
# TODO: automatic key extraction
|
||||
enc_key = 'eac64f22daf001da6ba9aa8da4d501508bbe90a4d4091fea3b0582a85b38c2cc' # last update at 2015-09-23-23 for Zombie::bite
|
||||
return enc_key
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,46 +1,39 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class KeekIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?keek\.com/keek/(?P<id>\w+)'
|
||||
IE_NAME = 'keek'
|
||||
_TEST = {
|
||||
'url': 'https://www.keek.com/ytdl/keeks/NODfbab',
|
||||
'md5': '09c5c109067536c1cec8bac8c21fea05',
|
||||
'url': 'https://www.keek.com/keek/NODfbab',
|
||||
'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83',
|
||||
'info_dict': {
|
||||
'id': 'NODfbab',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'youtube-dl project',
|
||||
'uploader_id': 'ytdl',
|
||||
'title': 'test chars: "\'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de .',
|
||||
'title': 'md5:35d42050a3ece241d5ddd7fdcc6fd896',
|
||||
'uploader': 'ytdl',
|
||||
'uploader_id': 'eGT5bab',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_url = 'http://cdn.keek.com/keek/video/%s' % video_id
|
||||
thumbnail = 'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
raw_desc = self._html_search_meta('description', webpage)
|
||||
if raw_desc:
|
||||
uploader = self._html_search_regex(
|
||||
r'Watch (.*?)\s+\(', raw_desc, 'uploader', fatal=False)
|
||||
uploader_id = self._html_search_regex(
|
||||
r'Watch .*?\(@(.+?)\)', raw_desc, 'uploader_id', fatal=False)
|
||||
else:
|
||||
uploader = None
|
||||
uploader_id = None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'url': self._og_search_video_url(webpage),
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'title': self._og_search_description(webpage).strip(),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader': self._search_regex(
|
||||
r'data-username=(["\'])(?P<uploader>.+?)\1', webpage,
|
||||
'uploader', fatal=False, group='uploader'),
|
||||
'uploader_id': self._search_regex(
|
||||
r'data-user-id=(["\'])(?P<uploader_id>.+?)\1', webpage,
|
||||
'uploader id', fatal=False, group='uploader_id'),
|
||||
}
|
||||
|
||||
@@ -57,6 +57,7 @@ class KuwoIE(KuwoBaseIE):
|
||||
'upload_date': '20080122',
|
||||
'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c'
|
||||
},
|
||||
'skip': 'this song has been offline because of copyright issues',
|
||||
}, {
|
||||
'url': 'http://www.kuwo.cn/yinyue/6446136/',
|
||||
'info_dict': {
|
||||
@@ -76,9 +77,11 @@ class KuwoIE(KuwoBaseIE):
|
||||
webpage = self._download_webpage(
|
||||
url, song_id, note='Download song detail info',
|
||||
errnote='Unable to get song detail info')
|
||||
if '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage:
|
||||
raise ExtractorError('this song has been offline because of copyright issues', expected=True)
|
||||
|
||||
song_name = self._html_search_regex(
|
||||
r'<h1[^>]+title="([^"]+)">', webpage, 'song name')
|
||||
r'(?s)class="(?:[^"\s]+\s+)*title(?:\s+[^"\s]+)*".*?<h1[^>]+title="([^"]+)"', webpage, 'song name')
|
||||
singer_name = self._html_search_regex(
|
||||
r'<div[^>]+class="s_img">\s*<a[^>]+title="([^>]+)"',
|
||||
webpage, 'singer name', fatal=False)
|
||||
|
||||
@@ -1,74 +1,85 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
encode_dict,
|
||||
get_element_by_attribute,
|
||||
parse_duration,
|
||||
strip_jsonp,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class MiTeleIE(InfoExtractor):
|
||||
IE_NAME = 'mitele.es'
|
||||
IE_DESC = 'mitele.es'
|
||||
_VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
|
||||
'md5': 'ace7635b2a0b286aaa37d3ff192d2a8a',
|
||||
'info_dict': {
|
||||
'id': '0fce117d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Programa 144 - Tor, la web invisible',
|
||||
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
|
||||
'id': '0NF1jJnxS1Wu3pHrmvFyw2',
|
||||
'display_id': 'programa-144',
|
||||
'ext': 'flv',
|
||||
'title': 'Tor, la web invisible',
|
||||
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
|
||||
'thumbnail': 're:(?i)^https?://.*\.jpg$',
|
||||
'duration': 2913,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode)
|
||||
embed_data_json = self._search_regex(
|
||||
r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
|
||||
).replace('\'', '"')
|
||||
embed_data = json.loads(embed_data_json)
|
||||
display_id = self._match_id(url)
|
||||
|
||||
domain = embed_data['mediaUrl']
|
||||
if not domain.startswith('http'):
|
||||
# only happens in telecinco.es videos
|
||||
domain = 'http://' + domain
|
||||
info_url = compat_urlparse.urljoin(
|
||||
domain,
|
||||
compat_urllib_parse_unquote(embed_data['flashvars']['host'])
|
||||
)
|
||||
info_el = self._download_xml(info_url, episode).find('./video/info')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_link = info_el.find('videoUrl/link').text
|
||||
token_query = compat_urllib_parse.urlencode({'id': video_link})
|
||||
token_info = self._download_json(
|
||||
embed_data['flashvars']['ov_tk'] + '?' + token_query,
|
||||
episode,
|
||||
transform_source=strip_jsonp
|
||||
)
|
||||
formats = self._extract_m3u8_formats(
|
||||
token_info['tokenizedUrl'], episode, ext='mp4')
|
||||
config_url = self._search_regex(
|
||||
r'data-config\s*=\s*"([^"]+)"', webpage, 'data config url')
|
||||
|
||||
config = self._download_json(
|
||||
config_url, display_id, 'Downloading config JSON')
|
||||
|
||||
mmc = self._download_json(
|
||||
config['services']['mmc'], display_id, 'Downloading mmc JSON')
|
||||
|
||||
formats = []
|
||||
for location in mmc['locations']:
|
||||
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
||||
bas = location.get('bas')
|
||||
loc = location.get('loc')
|
||||
ogn = location.get('ogn')
|
||||
if None in (gat, bas, loc, ogn):
|
||||
continue
|
||||
token_data = {
|
||||
'bas': bas,
|
||||
'icd': loc,
|
||||
'ogn': ogn,
|
||||
'sta': '0',
|
||||
}
|
||||
media = self._download_json(
|
||||
'%s/?%s' % (gat, compat_urllib_parse.urlencode(encode_dict(token_data)).encode('utf-8')),
|
||||
display_id, 'Downloading %s JSON' % location['loc'])
|
||||
file_ = media.get('file')
|
||||
if not file_:
|
||||
continue
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
display_id, f4m_id=loc))
|
||||
|
||||
title = self._search_regex(
|
||||
r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title')
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-media-id\s*=\s*"([^"]+)"', webpage,
|
||||
'data media id', default=None) or display_id
|
||||
thumbnail = config.get('poster', {}).get('imageUrl')
|
||||
duration = int_or_none(mmc.get('duration'))
|
||||
|
||||
return {
|
||||
'id': embed_data['videoId'],
|
||||
'display_id': episode,
|
||||
'title': info_el.find('title').text,
|
||||
'formats': formats,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': get_element_by_attribute('class', 'text', webpage),
|
||||
'thumbnail': info_el.find('thumb').text,
|
||||
'duration': parse_duration(info_el.find('duration').text),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -200,7 +200,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
if mgid is None or ':' not in mgid:
|
||||
mgid = self._search_regex(
|
||||
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
|
||||
webpage, 'mgid')
|
||||
webpage, 'mgid', default=None)
|
||||
|
||||
if not mgid:
|
||||
sm4_embed = self._html_search_meta(
|
||||
'sm4:video:embed', webpage, 'sm4 embed', default='')
|
||||
mgid = self._search_regex(
|
||||
r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid')
|
||||
|
||||
videos_info = self._get_videos_info(mgid)
|
||||
return videos_info
|
||||
@@ -222,6 +228,13 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media.mtvnservices.com/embed/.+?)\1', webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _get_feed_url(self, uri):
|
||||
video_id = self._id_from_uri(uri)
|
||||
site_id = uri.replace(video_id, '')
|
||||
|
||||
@@ -1,130 +1,380 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class NDRBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
title = self._og_search_title(page).strip()
|
||||
description = self._og_search_description(page)
|
||||
if description:
|
||||
description = description.strip()
|
||||
|
||||
duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', default=None))
|
||||
if not duration:
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'(<span class="min">\d+</span>:<span class="sec">\d+</span>)',
|
||||
page, 'duration', default=None))
|
||||
|
||||
formats = []
|
||||
|
||||
mp3_url = re.search(r'''\{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
|
||||
if mp3_url:
|
||||
formats.append({
|
||||
'url': mp3_url.group('audio'),
|
||||
'format_id': 'mp3',
|
||||
})
|
||||
|
||||
thumbnail = None
|
||||
|
||||
video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.(lo|hi|hq)\.mp4', type:"video/mp4"},''', page)
|
||||
if video_url:
|
||||
thumbnails = re.findall(r'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page)
|
||||
if thumbnails:
|
||||
quality_key = qualities(['xs', 's', 'm', 'l', 'xl'])
|
||||
largest = max(thumbnails, key=lambda thumb: quality_key(thumb[1]))
|
||||
thumbnail = 'http://www.ndr.de' + largest[0]
|
||||
|
||||
for format_id in 'lo', 'hi', 'hq':
|
||||
formats.append({
|
||||
'url': '%s.%s.mp4' % (video_url.group('video'), format_id),
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
return self._extract_embed(webpage, display_id)
|
||||
|
||||
|
||||
class NDRIE(NDRBaseIE):
|
||||
IE_NAME = 'ndr'
|
||||
IE_DESC = 'NDR.de - Mediathek'
|
||||
_VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
|
||||
IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
|
||||
_VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)+(?P<id>[^/?#]+),[\da-z]+\.html'
|
||||
_TESTS = [{
|
||||
# httpVideo, same content id
|
||||
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
|
||||
'md5': '6515bc255dc5c5f8c85bbc38e035a659',
|
||||
'info_dict': {
|
||||
'id': 'hafengeburtstag988',
|
||||
'display_id': 'Party-Poette-und-Parade',
|
||||
'ext': 'mp4',
|
||||
'title': 'Party, Pötte und Parade',
|
||||
'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
|
||||
'uploader': 'ndrtv',
|
||||
'timestamp': 1431108900,
|
||||
'upload_date': '20150510',
|
||||
'duration': 3498,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# httpVideo, different content id
|
||||
'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
|
||||
'md5': '1043ff203eab307f0c51702ec49e9a71',
|
||||
'info_dict': {
|
||||
'id': 'osna272',
|
||||
'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch',
|
||||
'ext': 'mp4',
|
||||
'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights',
|
||||
'description': 'md5:32e9b800b3d2d4008103752682d5dc01',
|
||||
'uploader': 'ndrtv',
|
||||
'timestamp': 1442059200,
|
||||
'upload_date': '20150912',
|
||||
'duration': 510,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# httpAudio, same content id
|
||||
'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
|
||||
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
|
||||
'info_dict': {
|
||||
'id': 'audio51535',
|
||||
'display_id': 'La-Valette-entgeht-der-Hinrichtung',
|
||||
'ext': 'mp3',
|
||||
'title': 'La Valette entgeht der Hinrichtung',
|
||||
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
|
||||
'uploader': 'ndrinfo',
|
||||
'timestamp': 1290626100,
|
||||
'upload_date': '20140729',
|
||||
'duration': 884,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.ndr.de/fernsehen/sendungen/nordmagazin/Kartoffeltage-in-der-Lewitz,nordmagazin25866.html',
|
||||
'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c',
|
||||
'note': 'Video file',
|
||||
'info_dict': {
|
||||
'id': '25866',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kartoffeltage in der Lewitz',
|
||||
'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8',
|
||||
'duration': 166,
|
||||
},
|
||||
'skip': '404 Not found',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
|
||||
'md5': 'dadc003c55ae12a5d2f6bd436cd73f59',
|
||||
'info_dict': {
|
||||
'id': '988',
|
||||
'ext': 'mp4',
|
||||
'title': 'Party, Pötte und Parade',
|
||||
'description': 'Hunderttausende feiern zwischen Speicherstadt und St. Pauli den 826. Hafengeburtstag. Die NDR Sondersendung zeigt die schönsten und spektakulärsten Bilder vom Auftakt.',
|
||||
'duration': 3498,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ndr.de/info/audio51535.html',
|
||||
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
|
||||
'note': 'Audio file',
|
||||
'info_dict': {
|
||||
'id': '51535',
|
||||
'ext': 'mp3',
|
||||
'title': 'La Valette entgeht der Hinrichtung',
|
||||
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
|
||||
'duration': 884,
|
||||
}
|
||||
def _extract_embed(self, webpage, display_id):
|
||||
embed_url = self._html_search_meta(
|
||||
'embedURL', webpage, 'embed URL', fatal=True)
|
||||
description = self._search_regex(
|
||||
r'<p[^>]+itemprop="description">([^<]+)</p>',
|
||||
webpage, 'description', fatal=False)
|
||||
timestamp = parse_iso8601(
|
||||
self._search_regex(
|
||||
r'<span itemprop="datePublished" content="([^"]+)">',
|
||||
webpage, 'upload date', fatal=False))
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': embed_url,
|
||||
'display_id': display_id,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
class NJoyIE(NDRBaseIE):
|
||||
IE_NAME = 'N-JOY'
|
||||
_VALID_URL = r'https?://www\.n-joy\.de/.+?(?P<id>\d+)\.html'
|
||||
|
||||
_TEST = {
|
||||
IE_NAME = 'njoy'
|
||||
IE_DESC = 'N-JOY'
|
||||
_VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)+(?P<id>[^/?#]+),[\da-z]+\.html'
|
||||
_TESTS = [{
|
||||
# httpVideo, same content id
|
||||
'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html',
|
||||
'md5': 'cb63be60cd6f9dd75218803146d8dc67',
|
||||
'info_dict': {
|
||||
'id': '2480',
|
||||
'id': 'comedycontest2480',
|
||||
'display_id': 'Benaissa-beim-NDR-Comedy-Contest',
|
||||
'ext': 'mp4',
|
||||
'title': 'Benaissa beim NDR Comedy Contest',
|
||||
'description': 'Von seinem sehr "behaarten" Leben lässt sich Benaissa trotz aller Schwierigkeiten nicht unterkriegen.',
|
||||
'description': 'md5:f057a6c4e1c728b10d33b5ffd36ddc39',
|
||||
'uploader': 'ndrtv',
|
||||
'upload_date': '20141129',
|
||||
'duration': 654,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# httpVideo, different content id
|
||||
'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
|
||||
'md5': '417660fffa90e6df2fda19f1b40a64d8',
|
||||
'info_dict': {
|
||||
'id': 'dockville882',
|
||||
'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
|
||||
'ext': 'mp4',
|
||||
'title': '"Ich hab noch nie" mit Felix Jaehn',
|
||||
'description': 'md5:85dd312d53be1b99e1f998a16452a2f3',
|
||||
'uploader': 'njoy',
|
||||
'upload_date': '20150822',
|
||||
'duration': 211,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_embed(self, webpage, display_id):
|
||||
video_id = self._search_regex(
|
||||
r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id')
|
||||
description = self._search_regex(
|
||||
r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
|
||||
webpage, 'description', fatal=False)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'NDREmbedBase',
|
||||
'url': 'ndr:%s' % video_id,
|
||||
'display_id': display_id,
|
||||
'description': description,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class NDREmbedBaseIE(InfoExtractor):
|
||||
IE_NAME = 'ndr:embed:base'
|
||||
_VALID_URL = r'(?:ndr:(?P<id_s>[\da-z]+)|https?://www\.ndr\.de/(?P<id>[\da-z]+)-ppjson\.json)'
|
||||
_TESTS = [{
|
||||
'url': 'ndr:soundcheck3366',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ndr.de/soundcheck3366-ppjson.json',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id') or mobj.group('id_s')
|
||||
|
||||
ppjson = self._download_json(
|
||||
'http://www.ndr.de/%s-ppjson.json' % video_id, video_id)
|
||||
|
||||
playlist = ppjson['playlist']
|
||||
|
||||
formats = []
|
||||
quality_key = qualities(('xs', 's', 'm', 'l', 'xl'))
|
||||
|
||||
for format_id, f in playlist.items():
|
||||
src = f.get('src')
|
||||
if not src:
|
||||
continue
|
||||
ext = determine_ext(src, None)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, f4m_id='hds'))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, m3u8_id='hls', entry_protocol='m3u8_native'))
|
||||
else:
|
||||
quality = f.get('quality')
|
||||
ff = {
|
||||
'url': src,
|
||||
'format_id': quality or format_id,
|
||||
'quality': quality_key(quality),
|
||||
}
|
||||
type_ = f.get('type')
|
||||
if type_ and type_.split('/')[0] == 'audio':
|
||||
ff['vcodec'] = 'none'
|
||||
ff['ext'] = ext or 'mp3'
|
||||
formats.append(ff)
|
||||
self._sort_formats(formats)
|
||||
|
||||
config = playlist['config']
|
||||
|
||||
live = playlist.get('config', {}).get('streamType') in ['httpVideoLive', 'httpAudioLive']
|
||||
title = config['title']
|
||||
if live:
|
||||
title = self._live_title(title)
|
||||
uploader = ppjson.get('config', {}).get('branding')
|
||||
upload_date = ppjson.get('config', {}).get('publicationDate')
|
||||
duration = int_or_none(config.get('duration'))
|
||||
|
||||
thumbnails = [{
|
||||
'id': thumbnail.get('quality') or thumbnail_id,
|
||||
'url': thumbnail['src'],
|
||||
'preference': quality_key(thumbnail.get('quality')),
|
||||
} for thumbnail_id, thumbnail in config.get('poster', {}).items() if thumbnail.get('src')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'is_live': live,
|
||||
'uploader': uploader if uploader != '-' else None,
|
||||
'upload_date': upload_date[0:8] if upload_date else None,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class NDREmbedIE(NDREmbedBaseIE):
|
||||
IE_NAME = 'ndr:embed'
|
||||
_VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)+(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
|
||||
'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
|
||||
'info_dict': {
|
||||
'id': 'ndraktuell28488',
|
||||
'ext': 'mp4',
|
||||
'title': 'Norddeutschland begrüßt Flüchtlinge',
|
||||
'is_live': False,
|
||||
'uploader': 'ndrtv',
|
||||
'upload_date': '20150907',
|
||||
'duration': 132,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
|
||||
'md5': '002085c44bae38802d94ae5802a36e78',
|
||||
'info_dict': {
|
||||
'id': 'soundcheck3366',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ella Henderson braucht Vergleiche nicht zu scheuen',
|
||||
'is_live': False,
|
||||
'uploader': 'ndr2',
|
||||
'upload_date': '20150912',
|
||||
'duration': 3554,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ndr.de/info/audio51535-player.html',
|
||||
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
|
||||
'info_dict': {
|
||||
'id': 'audio51535',
|
||||
'ext': 'mp3',
|
||||
'title': 'La Valette entgeht der Hinrichtung',
|
||||
'is_live': False,
|
||||
'uploader': 'ndrinfo',
|
||||
'upload_date': '20140729',
|
||||
'duration': 884,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ndr.de/fernsehen/sendungen/visite/visite11010-externalPlayer.html',
|
||||
'md5': 'ae57f80511c1e1f2fd0d0d3d31aeae7c',
|
||||
'info_dict': {
|
||||
'id': 'visite11010',
|
||||
'ext': 'mp4',
|
||||
'title': 'Visite - die ganze Sendung',
|
||||
'is_live': False,
|
||||
'uploader': 'ndrtv',
|
||||
'upload_date': '20150902',
|
||||
'duration': 3525,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# httpVideoLive
|
||||
'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
|
||||
'info_dict': {
|
||||
'id': 'livestream217',
|
||||
'ext': 'flv',
|
||||
'title': 're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'is_live': True,
|
||||
'upload_date': '20150910',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ndr.de/ndrkultur/audio255020-player.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ndr.de/fernsehen/sendungen/nordtour/nordtour7124-player.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ndr.de/kultur/film/videos/videoimport10424-player.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ndr.de/fernsehen/sendungen/hamburg_journal/hamj43006-player.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ndr.de/fernsehen/sendungen/weltbilder/weltbilder4518-player.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ndr.de/fernsehen/doku952-player.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class NJoyEmbedIE(NDREmbedBaseIE):
|
||||
IE_NAME = 'njoy:embed'
|
||||
_VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)+(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html'
|
||||
_TESTS = [{
|
||||
# httpVideo
|
||||
'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html',
|
||||
'md5': '8483cbfe2320bd4d28a349d62d88bd74',
|
||||
'info_dict': {
|
||||
'id': 'doku948',
|
||||
'ext': 'mp4',
|
||||
'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
|
||||
'is_live': False,
|
||||
'upload_date': '20150807',
|
||||
'duration': 1011,
|
||||
},
|
||||
}, {
|
||||
# httpAudio
|
||||
'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
|
||||
'md5': 'd989f80f28ac954430f7b8a48197188a',
|
||||
'info_dict': {
|
||||
'id': 'stefanrichter100',
|
||||
'ext': 'mp3',
|
||||
'title': 'Interview mit einem Augenzeugen',
|
||||
'is_live': False,
|
||||
'uploader': 'njoy',
|
||||
'upload_date': '20150909',
|
||||
'duration': 140,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# httpAudioLive, no explicit ext
|
||||
'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
|
||||
'info_dict': {
|
||||
'id': 'webradioweltweit100',
|
||||
'ext': 'mp3',
|
||||
'title': 're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'is_live': True,
|
||||
'uploader': 'njoy',
|
||||
'upload_date': '20150810',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.n-joy.de/musik/dockville882-player_image-3905259e-0803-4764-ac72-8b7de077d80a_theme-n-joy.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.n-joy.de/radio/sendungen/morningshow/urlaubsfotos190-player_image-066a5df1-5c95-49ec-a323-941d848718db_theme-n-joy.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.n-joy.de/entertainment/comedy/krudetv290-player_image-ab261bfe-51bf-4bf3-87ba-c5122ee35b3d_theme-n-joy.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -16,53 +16,104 @@ from ..utils import (
|
||||
|
||||
class NFLIE(InfoExtractor):
|
||||
IE_NAME = 'nfl.com'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<host>(?:www\.)?(?:nfl\.com|.*?\.clubs\.nfl\.com))/
|
||||
(?:.+?/)*
|
||||
(?P<id>(?:[a-z0-9]{16}|\w{8}\-(?:\w{4}\-){3}\w{12}))'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
|
||||
'md5': '394ef771ddcd1354f665b471d78ec4c6',
|
||||
'info_dict': {
|
||||
'id': '0ap3000000398478',
|
||||
'ext': 'mp4',
|
||||
'title': 'Week 3: Redskins vs. Eagles highlights',
|
||||
'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
|
||||
'upload_date': '20140921',
|
||||
'timestamp': 1411337580,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266',
|
||||
'md5': 'cf85bdb4bc49f6e9d3816d130c78279c',
|
||||
'info_dict': {
|
||||
'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266',
|
||||
'ext': 'mp4',
|
||||
'title': 'LIVE: Post Game vs. Browns',
|
||||
'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8',
|
||||
'upload_date': '20131229',
|
||||
'timestamp': 1388354455,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
|
||||
'info_dict': {
|
||||
'id': '0ap3000000467607',
|
||||
'ext': 'mp4',
|
||||
'title': 'Frustrations flare on the field',
|
||||
'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.',
|
||||
'timestamp': 1422850320,
|
||||
'upload_date': '20150202',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',
|
||||
'only_matching': True,
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?P<host>
|
||||
(?:www\.)?
|
||||
(?:
|
||||
(?:
|
||||
nfl|
|
||||
buffalobills|
|
||||
miamidolphins|
|
||||
patriots|
|
||||
newyorkjets|
|
||||
baltimoreravens|
|
||||
bengals|
|
||||
clevelandbrowns|
|
||||
steelers|
|
||||
houstontexans|
|
||||
colts|
|
||||
jaguars|
|
||||
titansonline|
|
||||
denverbroncos|
|
||||
kcchiefs|
|
||||
raiders|
|
||||
chargers|
|
||||
dallascowboys|
|
||||
giants|
|
||||
philadelphiaeagles|
|
||||
redskins|
|
||||
chicagobears|
|
||||
detroitlions|
|
||||
packers|
|
||||
vikings|
|
||||
atlantafalcons|
|
||||
panthers|
|
||||
neworleanssaints|
|
||||
buccaneers|
|
||||
azcardinals|
|
||||
stlouisrams|
|
||||
49ers|
|
||||
seahawks
|
||||
)\.com|
|
||||
.+?\.clubs\.nfl\.com
|
||||
)
|
||||
)/
|
||||
(?:.+?/)*
|
||||
(?P<id>[^/#?&]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
|
||||
'md5': '394ef771ddcd1354f665b471d78ec4c6',
|
||||
'info_dict': {
|
||||
'id': '0ap3000000398478',
|
||||
'ext': 'mp4',
|
||||
'title': 'Week 3: Redskins vs. Eagles highlights',
|
||||
'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
|
||||
'upload_date': '20140921',
|
||||
'timestamp': 1411337580,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
]
|
||||
}, {
|
||||
'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266',
|
||||
'md5': 'cf85bdb4bc49f6e9d3816d130c78279c',
|
||||
'info_dict': {
|
||||
'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266',
|
||||
'ext': 'mp4',
|
||||
'title': 'LIVE: Post Game vs. Browns',
|
||||
'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8',
|
||||
'upload_date': '20131229',
|
||||
'timestamp': 1388354455,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
|
||||
'info_dict': {
|
||||
'id': '0ap3000000467607',
|
||||
'ext': 'mp4',
|
||||
'title': 'Frustrations flare on the field',
|
||||
'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.',
|
||||
'timestamp': 1422850320,
|
||||
'upload_date': '20150202',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.patriots.com/video/2015/09/18/10-days-gillette',
|
||||
'md5': '4c319e2f625ffd0b481b4382c6fc124c',
|
||||
'info_dict': {
|
||||
'id': 'n-238346',
|
||||
'ext': 'mp4',
|
||||
'title': '10 Days at Gillette',
|
||||
'description': 'md5:8cd9cd48fac16de596eadc0b24add951',
|
||||
'timestamp': 1442618809,
|
||||
'upload_date': '20150918',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.buffalobills.com/video/videos/Rex_Ryan_Show_World_Wide_Rex/b1dcfab2-3190-4bb1-bfc0-d6e603d6601a',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def prepend_host(host, url):
|
||||
@@ -95,13 +146,14 @@ class NFLIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
config_url = NFLIE.prepend_host(host, self._search_regex(
|
||||
r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL',
|
||||
default='static/content/static/config/video/config.json'))
|
||||
r'(?:(?:config|configURL)\s*:\s*|<nflcs:avplayer[^>]+data-config\s*=\s*)(["\'])(?P<config>.+?)\1',
|
||||
webpage, 'config URL', default='static/content/static/config/video/config.json',
|
||||
group='config'))
|
||||
# For articles, the id in the url is not the video id
|
||||
video_id = self._search_regex(
|
||||
r'contentId\s*:\s*"([^"]+)"', webpage, 'video id', default=video_id)
|
||||
config = self._download_json(config_url, video_id,
|
||||
note='Downloading player config')
|
||||
r'(?:<nflcs:avplayer[^>]+data-contentId\s*=\s*|contentId\s*:\s*)(["\'])(?P<id>.+?)\1',
|
||||
webpage, 'video id', default=video_id, group='id')
|
||||
config = self._download_json(config_url, video_id, 'Downloading player config')
|
||||
url_template = NFLIE.prepend_host(
|
||||
host, '{contentURLTemplate:}'.format(**config))
|
||||
video_data = self._download_json(
|
||||
|
||||
@@ -72,7 +72,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
class NHLIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = 'nhl.com'
|
||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)(?:id|hlg)=(?P<id>[-0-9a-zA-Z,]+)'
|
||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console|embed)?(?:\?(?:.*?[?&])?)(?:id|hlg|playlist)=(?P<id>[-0-9a-zA-Z,]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
|
||||
@@ -136,6 +136,9 @@ class NHLIE(NHLBaseInfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
}
|
||||
}, {
|
||||
'url': 'http://video.nhl.com/videocenter/embed?playlist=836127',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -146,9 +149,9 @@ class NHLIE(NHLBaseInfoExtractor):
|
||||
class NHLNewsIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = 'nhl.com:news'
|
||||
IE_DESC = 'NHL news'
|
||||
_VALID_URL = r'https?://(?:www\.)?nhl\.com/ice/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://(?:.+?\.)?nhl\.com/(?:ice|club)/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nhl.com/ice/news.htm?id=750727',
|
||||
'md5': '4b3d1262e177687a3009937bd9ec0be8',
|
||||
'info_dict': {
|
||||
@@ -159,13 +162,26 @@ class NHLNewsIE(NHLBaseInfoExtractor):
|
||||
'duration': 37,
|
||||
'upload_date': '20150128',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# iframe embed
|
||||
'url': 'http://sabres.nhl.com/club/news.htm?id=780189',
|
||||
'md5': '9f663d1c006c90ac9fb82777d4294e12',
|
||||
'info_dict': {
|
||||
'id': '836127',
|
||||
'ext': 'mp4',
|
||||
'title': 'Morning Skate: OTT vs. BUF (9/23/15)',
|
||||
'description': "Brian Duff chats with Tyler Ennis prior to Buffalo's first preseason home game.",
|
||||
'duration': 93,
|
||||
'upload_date': '20150923',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
news_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, news_id)
|
||||
video_id = self._search_regex(
|
||||
[r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'"],
|
||||
[r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'",
|
||||
r'<iframe[^>]+src=["\']https?://video.*?\.nhl\.com/videocenter/embed\?.*\bplaylist=(\d+)'],
|
||||
webpage, 'video id')
|
||||
return self._real_extract_video(video_id)
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import str_to_int
|
||||
@@ -9,61 +8,93 @@ from ..utils import str_to_int
|
||||
|
||||
class NineGagIE(InfoExtractor):
|
||||
IE_NAME = '9gag'
|
||||
_VALID_URL = r'''(?x)^https?://(?:www\.)?9gag\.tv/
|
||||
(?:
|
||||
v/(?P<numid>[0-9]+)|
|
||||
p/(?P<id>[a-zA-Z0-9]+)/(?P<display_id>[^?#/]+)
|
||||
)
|
||||
'''
|
||||
_VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P<id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^?#/]+))?'
|
||||
|
||||
_TESTS = [{
|
||||
"url": "http://9gag.tv/v/1912",
|
||||
"info_dict": {
|
||||
"id": "1912",
|
||||
"ext": "mp4",
|
||||
"description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
|
||||
"title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
|
||||
'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
|
||||
'info_dict': {
|
||||
'id': 'Kk2X5',
|
||||
'ext': 'mp4',
|
||||
'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
|
||||
'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
|
||||
'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
|
||||
'uploader': 'CompilationChannel',
|
||||
'upload_date': '20131110',
|
||||
"view_count": int,
|
||||
"thumbnail": "re:^https?://",
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Youtube']
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
|
||||
'url': 'http://9gag.com/tv/p/aKolP3',
|
||||
'info_dict': {
|
||||
'id': 'KklwM',
|
||||
'id': 'aKolP3',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'alternate-banned-opening-scene-of-gravity',
|
||||
"description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.",
|
||||
'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie",
|
||||
'uploader': 'Krishna Shenoi',
|
||||
'upload_date': '20140401',
|
||||
'uploader_id': 'krishnashenoi93',
|
||||
'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video',
|
||||
'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!",
|
||||
'uploader_id': 'rickmereki',
|
||||
'uploader': 'Rick Mereki',
|
||||
'upload_date': '20110803',
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
}, {
|
||||
'url': 'http://9gag.com/tv/p/KklwM',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://9gag.tv/p/Kk2X5',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://9gag.com/tv/embed/a5Dmvl',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_EXTERNAL_VIDEO_PROVIDER = {
|
||||
'1': {
|
||||
'url': '%s',
|
||||
'ie_key': 'Youtube',
|
||||
},
|
||||
'2': {
|
||||
'url': 'http://player.vimeo.com/video/%s',
|
||||
'ie_key': 'Vimeo',
|
||||
},
|
||||
'3': {
|
||||
'url': 'http://instagram.com/p/%s',
|
||||
'ie_key': 'Instagram',
|
||||
},
|
||||
'4': {
|
||||
'url': 'http://vine.co/v/%s',
|
||||
'ie_key': 'Vine',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('numid') or mobj.group('id')
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
post_view = json.loads(self._html_search_regex(
|
||||
r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view'))
|
||||
post_view = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost',
|
||||
webpage, 'post view'),
|
||||
display_id)
|
||||
|
||||
youtube_id = post_view['videoExternalId']
|
||||
ie_key = None
|
||||
source_url = post_view.get('sourceUrl')
|
||||
if not source_url:
|
||||
external_video_id = post_view['videoExternalId']
|
||||
external_video_provider = post_view['videoExternalProvider']
|
||||
source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id
|
||||
ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key']
|
||||
title = post_view['title']
|
||||
description = post_view['description']
|
||||
view_count = str_to_int(post_view['externalView'])
|
||||
description = post_view.get('description')
|
||||
view_count = str_to_int(post_view.get('externalView'))
|
||||
thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': youtube_id,
|
||||
'ie_key': 'Youtube',
|
||||
'url': source_url,
|
||||
'ie_key': ie_key,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
|
||||
@@ -1,64 +1,134 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .brightcove import BrightcoveIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
|
||||
class NownessIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])'
|
||||
class NownessBaseIE(InfoExtractor):
|
||||
def _extract_url_result(self, post):
|
||||
if post['type'] == 'video':
|
||||
for media in post['media']:
|
||||
if media['type'] == 'video':
|
||||
video_id = media['content']
|
||||
source = media['source']
|
||||
if source == 'brightcove':
|
||||
player_code = self._download_webpage(
|
||||
'http://www.nowness.com/iframe?id=%s' % video_id, video_id,
|
||||
note='Downloading player JavaScript',
|
||||
errnote='Unable to download player JavaScript')
|
||||
bc_url = BrightcoveIE._extract_brightcove_url(player_code)
|
||||
if bc_url is None:
|
||||
raise ExtractorError('Could not find player definition')
|
||||
return self.url_result(bc_url, 'Brightcove')
|
||||
elif source == 'vimeo':
|
||||
return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
|
||||
elif source == 'youtube':
|
||||
return self.url_result(video_id, 'Youtube')
|
||||
elif source == 'cinematique':
|
||||
# youtube-dl currently doesn't support cinematique
|
||||
# return self.url_result('http://cinematique.com/embed/%s' % video_id, 'Cinematique')
|
||||
pass
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
|
||||
'md5': '068bc0202558c2e391924cb8cc470676',
|
||||
'info_dict': {
|
||||
'id': '2520295746001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Candor: The Art of Gesticulation',
|
||||
'description': 'Candor: The Art of Gesticulation',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'uploader': 'Nowness',
|
||||
}
|
||||
def _api_request(self, url, request_path):
|
||||
display_id = self._match_id(url)
|
||||
request = compat_urllib_request.Request(
|
||||
'http://api.nowness.com/api/' + request_path % display_id,
|
||||
headers={
|
||||
'X-Nowness-Language': 'zh-cn' if 'cn.nowness.com' in url else 'en-us',
|
||||
})
|
||||
return display_id, self._download_json(request, display_id)
|
||||
|
||||
|
||||
class NownessIE(NownessBaseIE):
|
||||
IE_NAME = 'nowness'
|
||||
_VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/(?:story|(?:series|category)/[^/]+)/(?P<id>[^/]+?)(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nowness.com/story/candor-the-art-of-gesticulation',
|
||||
'md5': '068bc0202558c2e391924cb8cc470676',
|
||||
'info_dict': {
|
||||
'id': '2520295746001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Candor: The Art of Gesticulation',
|
||||
'description': 'Candor: The Art of Gesticulation',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'uploader': 'Nowness',
|
||||
},
|
||||
{
|
||||
'url': 'http://cn.nowness.com/day/2014/8/7/4069/kasper-bj-rke-ft-jaakko-eino-kalevi--tnr',
|
||||
'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
|
||||
'info_dict': {
|
||||
'id': '3716354522001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
|
||||
'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'uploader': 'Nowness',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://cn.nowness.com/story/kasper-bjorke-ft-jaakko-eino-kalevi-tnr',
|
||||
'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
|
||||
'info_dict': {
|
||||
'id': '3716354522001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
|
||||
'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'uploader': 'Nowness',
|
||||
},
|
||||
]
|
||||
}, {
|
||||
# vimeo
|
||||
'url': 'https://www.nowness.com/series/nowness-picks/jean-luc-godard-supercut',
|
||||
'md5': '9a5a6a8edf806407e411296ab6bc2a49',
|
||||
'info_dict': {
|
||||
'id': '130020913',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bleu, Blanc, Rouge - A Godard Supercut',
|
||||
'description': 'md5:f0ea5f1857dffca02dbd37875d742cec',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'upload_date': '20150607',
|
||||
'uploader': 'Cinema Sem Lei',
|
||||
'uploader_id': 'cinemasemlei',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('slug')
|
||||
_, post = self._api_request(url, 'post/getBySlug/%s')
|
||||
return self._extract_url_result(post)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_url = self._search_regex(
|
||||
r'"([^"]+/content/issue-[0-9.]+.js)"', webpage, 'player URL')
|
||||
real_id = self._search_regex(
|
||||
r'\sdata-videoId="([0-9]+)"', webpage, 'internal video ID')
|
||||
|
||||
player_code = self._download_webpage(
|
||||
player_url, video_id,
|
||||
note='Downloading player JavaScript',
|
||||
errnote='Player download failed')
|
||||
player_code = player_code.replace("'+d+'", real_id)
|
||||
class NownessPlaylistIE(NownessBaseIE):
|
||||
IE_NAME = 'nowness:playlist'
|
||||
_VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/playlist/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.nowness.com/playlist/3286/i-guess-thats-why-they-call-it-the-blues',
|
||||
'info_dict': {
|
||||
'id': '3286',
|
||||
},
|
||||
'playlist_mincount': 8,
|
||||
}
|
||||
|
||||
bc_url = BrightcoveIE._extract_brightcove_url(player_code)
|
||||
if bc_url is None:
|
||||
raise ExtractorError('Could not find player definition')
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': bc_url,
|
||||
'ie_key': 'Brightcove',
|
||||
}
|
||||
def _real_extract(self, url):
|
||||
playlist_id, playlist = self._api_request(url, 'post?PlaylistId=%s')
|
||||
entries = [self._extract_url_result(item) for item in playlist['items']]
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
|
||||
|
||||
class NownessSeriesIE(NownessBaseIE):
|
||||
IE_NAME = 'nowness:series'
|
||||
_VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/series/(?P<id>[^/]+?)(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'https://www.nowness.com/series/60-seconds',
|
||||
'info_dict': {
|
||||
'id': '60',
|
||||
'title': '60 Seconds',
|
||||
'description': 'One-minute wisdom in a new NOWNESS series',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, series = self._api_request(url, 'series/getBySlug/%s')
|
||||
entries = [self._extract_url_result(post) for post in series['posts']]
|
||||
series_title = None
|
||||
series_description = None
|
||||
translations = series.get('translations', [])
|
||||
if translations:
|
||||
series_title = translations[0].get('title') or translations[0]['seoTitle']
|
||||
series_description = translations[0].get('seoDescription')
|
||||
return self.playlist_result(
|
||||
entries, compat_str(series['id']), series_title, series_description)
|
||||
|
||||
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
int_or_none,
|
||||
qualities,
|
||||
@@ -28,6 +29,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'skip': 'Video has been blocked',
|
||||
}, {
|
||||
# metadataUrl
|
||||
'url': 'http://ok.ru/video/63567059965189-0',
|
||||
@@ -72,6 +74,12 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://ok.ru/video/%s' % video_id, video_id)
|
||||
|
||||
error = self._search_regex(
|
||||
r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
|
||||
webpage, 'error', default=None)
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
|
||||
player = self._parse_json(
|
||||
unescapeHTML(self._search_regex(
|
||||
r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote_plus
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
parse_age_limit,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class OpenFilmIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)openfilm\.com/videos/(?P<id>.+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.openfilm.com/videos/human-resources-remastered',
|
||||
'md5': '42bcd88c2f3ec13b65edf0f8ad1cac37',
|
||||
'info_dict': {
|
||||
'id': '32736',
|
||||
'display_id': 'human-resources-remastered',
|
||||
'ext': 'mp4',
|
||||
'title': 'Human Resources (Remastered)',
|
||||
'description': 'Social Engineering in the 20th Century.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 7164,
|
||||
'timestamp': 1334756988,
|
||||
'upload_date': '20120418',
|
||||
'uploader_id': '41117',
|
||||
'view_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
player = compat_urllib_parse_unquote_plus(
|
||||
self._og_search_video_url(webpage))
|
||||
|
||||
video = json.loads(self._search_regex(
|
||||
r'\bp=({.+?})(?:&|$)', player, 'video JSON'))
|
||||
|
||||
video_url = '%s1.mp4' % video['location']
|
||||
video_id = video.get('video_id')
|
||||
display_id = video.get('alias') or display_id
|
||||
title = video.get('title')
|
||||
description = video.get('description')
|
||||
thumbnail = video.get('main_thumb')
|
||||
duration = int_or_none(video.get('duration'))
|
||||
timestamp = parse_iso8601(video.get('dt_published'), ' ')
|
||||
uploader_id = video.get('user_id')
|
||||
view_count = int_or_none(video.get('views_count'))
|
||||
age_limit = parse_age_limit(video.get('age_limit'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'uploader_id': uploader_id,
|
||||
'view_count': view_count,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
@@ -19,7 +19,7 @@ class PlaywireIE(InfoExtractor):
|
||||
'id': '3353705',
|
||||
'ext': 'mp4',
|
||||
'title': 'S04_RM_UCL_Rus',
|
||||
'thumbnail': 're:^http://.*\.png$',
|
||||
'thumbnail': 're:^https?://.*\.png$',
|
||||
'duration': 145.94,
|
||||
},
|
||||
}, {
|
||||
|
||||
@@ -20,7 +20,7 @@ from ..aes import (
|
||||
|
||||
|
||||
class PornHubIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-z]+)'
|
||||
_VALID_URL = r'https?://(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
||||
'md5': '882f488fa1f0026f023f33576004a2ed',
|
||||
@@ -34,6 +34,9 @@ class PornHubIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -25,7 +25,7 @@ class QQMusicIE(InfoExtractor):
|
||||
'id': '004295Et37taLD',
|
||||
'ext': 'mp3',
|
||||
'title': '可惜没如果',
|
||||
'upload_date': '20141227',
|
||||
'release_date': '20141227',
|
||||
'creator': '林俊杰',
|
||||
'description': 'md5:d327722d0361576fde558f1ac68a7065',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
@@ -38,11 +38,26 @@ class QQMusicIE(InfoExtractor):
|
||||
'id': '004MsGEo3DdNxV',
|
||||
'ext': 'mp3',
|
||||
'title': '如果',
|
||||
'upload_date': '20050626',
|
||||
'release_date': '20050626',
|
||||
'creator': '李季美',
|
||||
'description': 'md5:46857d5ed62bc4ba84607a805dccf437',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'note': 'lyrics not in .lrc format',
|
||||
'url': 'http://y.qq.com/#type=song&mid=001JyApY11tIp6',
|
||||
'info_dict': {
|
||||
'id': '001JyApY11tIp6',
|
||||
'ext': 'mp3',
|
||||
'title': 'Shadows Over Transylvania',
|
||||
'release_date': '19970225',
|
||||
'creator': 'Dark Funeral',
|
||||
'description': 'md5:ed14d5bd7ecec19609108052c25b2c11',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
@@ -112,15 +127,27 @@ class QQMusicIE(InfoExtractor):
|
||||
self._check_formats(formats, mid)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
actual_lrc_lyrics = ''.join(
|
||||
line + '\n' for line in re.findall(
|
||||
r'(?m)^(\[[0-9]{2}:[0-9]{2}(?:\.[0-9]{2,})?\][^\n]*|\[[^\]]*\])', lrc_content))
|
||||
|
||||
info_dict = {
|
||||
'id': mid,
|
||||
'formats': formats,
|
||||
'title': song_name,
|
||||
'upload_date': publish_time,
|
||||
'release_date': publish_time,
|
||||
'creator': singer,
|
||||
'description': lrc_content,
|
||||
'thumbnail': thumbnail_url,
|
||||
'thumbnail': thumbnail_url
|
||||
}
|
||||
if actual_lrc_lyrics:
|
||||
info_dict['subtitles'] = {
|
||||
'origin': [{
|
||||
'ext': 'lrc',
|
||||
'data': actual_lrc_lyrics,
|
||||
}]
|
||||
}
|
||||
return info_dict
|
||||
|
||||
|
||||
class QQPlaylistBaseIE(InfoExtractor):
|
||||
|
||||
@@ -5,6 +5,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
@@ -72,6 +73,18 @@ class RaiIE(InfoExtractor):
|
||||
'description': 'Primo appuntamento con "Il candidato" con Filippo Timi, alias Piero Zucca presidente!',
|
||||
'uploader': 'RaiTre',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
|
||||
'md5': '037104d2c14132887e5e4cf114569214',
|
||||
'info_dict': {
|
||||
'id': '0c7a664b-d0f4-4b2c-8835-3f82e46f433e',
|
||||
'ext': 'flv',
|
||||
'title': 'Il pacco',
|
||||
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
|
||||
'uploader': 'RaiTre',
|
||||
'upload_date': '20141221',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
@@ -90,11 +103,14 @@ class RaiIE(InfoExtractor):
|
||||
relinker_url = self._extract_relinker_url(webpage)
|
||||
|
||||
if not relinker_url:
|
||||
iframe_path = self._search_regex(
|
||||
r'<iframe[^>]+src="/?(dl/[^"]+\?iframe\b[^"]*)"',
|
||||
iframe_url = self._search_regex(
|
||||
[r'<iframe[^>]+src="([^"]*/dl/[^"]+\?iframe\b[^"]*)"',
|
||||
r'drawMediaRaiTV\(["\'](.+?)["\']'],
|
||||
webpage, 'iframe')
|
||||
if not iframe_url.startswith('http'):
|
||||
iframe_url = compat_urlparse.urljoin(url, iframe_url)
|
||||
webpage = self._download_webpage(
|
||||
'%s/%s' % (host, iframe_path), video_id)
|
||||
iframe_url, video_id)
|
||||
relinker_url = self._extract_relinker_url(webpage)
|
||||
|
||||
relinker = self._download_json(
|
||||
|
||||
@@ -16,7 +16,7 @@ class ShahidIE(InfoExtractor):
|
||||
'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html',
|
||||
'info_dict': {
|
||||
'id': '90574',
|
||||
'ext': 'm3u8',
|
||||
'ext': 'mp4',
|
||||
'title': 'الملك عبدالله الإنسان الموسم 1 كليب 3',
|
||||
'description': 'الفيلم الوثائقي - الملك عبد الله الإنسان',
|
||||
'duration': 2972,
|
||||
@@ -81,7 +81,7 @@ class ShahidIE(InfoExtractor):
|
||||
compat_urllib_parse.urlencode({
|
||||
'apiKey': 'sh@hid0nlin3',
|
||||
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
|
||||
}).encode('utf-8')),
|
||||
})),
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
video = video[api_vars['playerType']]
|
||||
|
||||
@@ -113,7 +113,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||
_CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
|
||||
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
|
||||
|
||||
def report_resolve(self, video_id):
|
||||
|
||||
@@ -1,24 +1,51 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .mitele import MiTeleIE
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
get_element_by_attribute,
|
||||
parse_duration,
|
||||
strip_jsonp,
|
||||
)
|
||||
|
||||
|
||||
class TelecincoIE(MiTeleIE):
|
||||
IE_NAME = 'telecinco.es'
|
||||
_VALID_URL = r'https?://www\.telecinco\.es/(?:[^/]+/)+(?P<id>.+?)\.html'
|
||||
class TelecincoIE(InfoExtractor):
|
||||
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
|
||||
_VALID_URL = r'https?://www\.(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
|
||||
'md5': '5cbef3ad5ef17bf0d21570332d140729',
|
||||
'info_dict': {
|
||||
'id': 'MDSVID20141015_0058',
|
||||
'ext': 'mp4',
|
||||
'title': 'Con Martín Berasategui, hacer un bacalao al ...',
|
||||
'duration': 662,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}, {
|
||||
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
|
||||
'md5': '0a5b9f3cc8b074f50a0578f823a12694',
|
||||
'info_dict': {
|
||||
'id': 'MDSVID20150916_0128',
|
||||
'ext': 'mp4',
|
||||
'title': '¿Quién es este ex futbolista con el que hablan ...',
|
||||
'duration': 79,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
|
||||
'md5': 'ad1bfaaba922dd4a295724b05b68f86a',
|
||||
'info_dict': {
|
||||
'id': 'MDSVID20150513_0220',
|
||||
'ext': 'mp4',
|
||||
'title': '#DOYLACARA. Con la trata no hay trato',
|
||||
'duration': 50,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
|
||||
@@ -27,3 +54,41 @@ class TelecincoIE(MiTeleIE):
|
||||
'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode)
|
||||
embed_data_json = self._search_regex(
|
||||
r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
|
||||
).replace('\'', '"')
|
||||
embed_data = json.loads(embed_data_json)
|
||||
|
||||
domain = embed_data['mediaUrl']
|
||||
if not domain.startswith('http'):
|
||||
# only happens in telecinco.es videos
|
||||
domain = 'http://' + domain
|
||||
info_url = compat_urlparse.urljoin(
|
||||
domain,
|
||||
compat_urllib_parse_unquote(embed_data['flashvars']['host'])
|
||||
)
|
||||
info_el = self._download_xml(info_url, episode).find('./video/info')
|
||||
|
||||
video_link = info_el.find('videoUrl/link').text
|
||||
token_query = compat_urllib_parse.urlencode({'id': video_link})
|
||||
token_info = self._download_json(
|
||||
embed_data['flashvars']['ov_tk'] + '?' + token_query,
|
||||
episode,
|
||||
transform_source=strip_jsonp
|
||||
)
|
||||
formats = self._extract_m3u8_formats(
|
||||
token_info['tokenizedUrl'], episode, ext='mp4', entry_protocol='m3u8_native')
|
||||
|
||||
return {
|
||||
'id': embed_data['videoId'],
|
||||
'display_id': episode,
|
||||
'title': info_el.find('title').text,
|
||||
'formats': formats,
|
||||
'description': get_element_by_attribute('class', 'text', webpage),
|
||||
'thumbnail': info_el.find('thumb').text,
|
||||
'duration': parse_duration(info_el.find('duration').text),
|
||||
}
|
||||
|
||||
@@ -2,14 +2,12 @@
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
|
||||
|
||||
class TudouIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/.*?/(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])'
|
||||
_VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/([^/]+/)*(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
|
||||
'md5': '140a49ed444bd22f93330985d8475fcb',
|
||||
@@ -27,41 +25,41 @@ class TudouIE(InfoExtractor):
|
||||
'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.tudou.com/albumplay/cJAHGih4yYg.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf'
|
||||
|
||||
def _url_for_id(self, id, quality=None):
|
||||
info_url = "http://v2.tudou.com/f?id=" + str(id)
|
||||
def _url_for_id(self, video_id, quality=None):
|
||||
info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id)
|
||||
if quality:
|
||||
info_url += '&hd' + quality
|
||||
webpage = self._download_webpage(info_url, id, "Opening the info webpage")
|
||||
final_url = self._html_search_regex('>(.+?)</f>', webpage, 'video url')
|
||||
xml_data = self._download_xml(info_url, video_id, "Opening the info XML page")
|
||||
final_url = xml_data.text
|
||||
return final_url
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m = re.search(r'vcode:\s*[\'"](.+?)[\'"]', webpage)
|
||||
if m and m.group(1):
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': 'youku:' + m.group(1),
|
||||
'ie_key': 'Youku'
|
||||
}
|
||||
youku_vcode = self._search_regex(
|
||||
r'vcode\s*:\s*[\'"]([^\'"]*)[\'"]', webpage, 'youku vcode', default=None)
|
||||
if youku_vcode:
|
||||
return self.url_result('youku:' + youku_vcode, ie='Youku')
|
||||
|
||||
title = self._search_regex(
|
||||
r",kw:\s*['\"](.+?)[\"']", webpage, 'title')
|
||||
r',kw\s*:\s*[\'"]([^\'"]+)[\'"]', webpage, 'title')
|
||||
thumbnail_url = self._search_regex(
|
||||
r",pic:\s*[\"'](.+?)[\"']", webpage, 'thumbnail URL', fatal=False)
|
||||
r',pic\s*:\s*[\'"]([^\'"]+)[\'"]', webpage, 'thumbnail URL', fatal=False)
|
||||
|
||||
player_url = self._search_regex(
|
||||
r"playerUrl\s*:\s*['\"](.+?\.swf)[\"']",
|
||||
r'playerUrl\s*:\s*[\'"]([^\'"]+\.swf)[\'"]',
|
||||
webpage, 'player URL', default=self._PLAYER_URL)
|
||||
|
||||
segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments')
|
||||
segments = json.loads(segs_json)
|
||||
segments = self._parse_json(self._search_regex(
|
||||
r'segs: \'([^\']+)\'', webpage, 'segments'), video_id)
|
||||
# It looks like the keys are the arguments that have to be passed as
|
||||
# the hd field in the request url, we pick the higher
|
||||
# Also, filter non-number qualities (see issue #3643).
|
||||
|
||||
@@ -119,6 +119,7 @@ class VidmeIE(InfoExtractor):
|
||||
'url': f['uri'],
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'preference': 0 if f.get('type', '').endswith('clip') else 1,
|
||||
} for f in video.get('formats', []) if f.get('uri')]
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -3,12 +3,14 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
HEADRequest,
|
||||
@@ -16,14 +18,14 @@ from ..utils import (
|
||||
|
||||
|
||||
class ViewsterIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'
|
||||
_TESTS = [{
|
||||
# movie, Type=Movie
|
||||
'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
|
||||
'md5': '14d3cfffe66d57b41ae2d9c873416f01',
|
||||
'md5': 'e642d1b27fcf3a4ffa79f194f5adde36',
|
||||
'info_dict': {
|
||||
'id': '1140-11855-000',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'The listening Project',
|
||||
'description': 'md5:bac720244afd1a8ea279864e67baa071',
|
||||
'timestamp': 1214870400,
|
||||
@@ -33,10 +35,10 @@ class ViewsterIE(InfoExtractor):
|
||||
}, {
|
||||
# series episode, Type=Episode
|
||||
'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/',
|
||||
'md5': 'd5434c80fcfdb61651cc2199a88d6ba3',
|
||||
'md5': '9243079a8531809efe1b089db102c069',
|
||||
'info_dict': {
|
||||
'id': '1284-19427-001',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'The World and a Wall',
|
||||
'description': 'md5:24814cf74d3453fdf5bfef9716d073e3',
|
||||
'timestamp': 1428192000,
|
||||
@@ -61,6 +63,14 @@ class ViewsterIE(InfoExtractor):
|
||||
'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
# geo restricted series
|
||||
'url': 'https://www.viewster.com/serie/1280-18794-002/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# geo restricted video
|
||||
'url': 'https://www.viewster.com/serie/1280-18794-002/what-is-extraterritoriality-lawo/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
|
||||
@@ -74,8 +84,8 @@ class ViewsterIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
# Get 'api_token' cookie
|
||||
self._request_webpage(HEADRequest(url), video_id)
|
||||
cookies = self._get_cookies(url)
|
||||
self._request_webpage(HEADRequest('http://www.viewster.com/'), video_id)
|
||||
cookies = self._get_cookies('http://www.viewster.com/')
|
||||
self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value)
|
||||
|
||||
info = self._download_json(
|
||||
@@ -85,10 +95,16 @@ class ViewsterIE(InfoExtractor):
|
||||
entry_id = info.get('Id') or info['id']
|
||||
|
||||
# unfinished serie has no Type
|
||||
if info.get('Type') in ['Serie', None]:
|
||||
episodes = self._download_json(
|
||||
'https://public-api.viewster.com/series/%s/episodes' % entry_id,
|
||||
video_id, 'Downloading series JSON')
|
||||
if info.get('Type') in ('Serie', None):
|
||||
try:
|
||||
episodes = self._download_json(
|
||||
'https://public-api.viewster.com/series/%s/episodes' % entry_id,
|
||||
video_id, 'Downloading series JSON')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
self.raise_geo_restricted()
|
||||
else:
|
||||
raise
|
||||
entries = [
|
||||
self.url_result(
|
||||
'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster')
|
||||
@@ -98,7 +114,7 @@ class ViewsterIE(InfoExtractor):
|
||||
return self.playlist_result(entries, video_id, title, description)
|
||||
|
||||
formats = []
|
||||
for media_type in ('application/f4m+xml', 'application/x-mpegURL'):
|
||||
for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'):
|
||||
media = self._download_json(
|
||||
'https://public-api.viewster.com/movies/%s/video?mediaType=%s'
|
||||
% (entry_id, compat_urllib_parse.quote(media_type)),
|
||||
@@ -120,9 +136,22 @@ class ViewsterIE(InfoExtractor):
|
||||
fatal=False # m3u8 sometimes fail
|
||||
))
|
||||
else:
|
||||
formats.append({
|
||||
format_id = media.get('Bitrate')
|
||||
f = {
|
||||
'url': video_url,
|
||||
})
|
||||
'format_id': 'mp4-%s' % format_id,
|
||||
'height': int_or_none(media.get('Height')),
|
||||
'width': int_or_none(media.get('Width')),
|
||||
'preference': 1,
|
||||
}
|
||||
if format_id and not f['height']:
|
||||
f['height'] = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
||||
formats.append(f)
|
||||
|
||||
if not formats and not info.get('LanguageSets') and not info.get('VODSettings'):
|
||||
self.raise_geo_restricted()
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
synopsis = info.get('Synopsis', {})
|
||||
|
||||
@@ -63,7 +63,9 @@ class XHamsterIE(InfoExtractor):
|
||||
mrss_url = '%s://xhamster.com/movies/%s/%s.html' % (proto, video_id, seo)
|
||||
webpage = self._download_webpage(mrss_url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title')
|
||||
title = self._html_search_regex(
|
||||
[r'<title>(?P<title>.+?)(?:, (?:[^,]+? )?Porn: xHamster| - xHamster\.com)</title>',
|
||||
r'<h1>([^<]+)</h1>'], webpage, 'title')
|
||||
|
||||
# Only a few videos have an description
|
||||
mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
|
||||
|
||||
@@ -144,6 +144,17 @@ class YahooIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://tw.news.yahoo.com/-100120367.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Query result is embedded in webpage, but explicit request to video API fails with geo restriction
|
||||
'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html',
|
||||
'md5': '4fbafb9c9b6f07aa8f870629f6671b35',
|
||||
'info_dict': {
|
||||
'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',
|
||||
'ext': 'mp4',
|
||||
'title': 'Communitary - Community Episode 1: Ladders',
|
||||
'description': 'md5:8fc39608213295748e1e289807838c97',
|
||||
'duration': 1646,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
@@ -171,6 +182,19 @@ class YahooIE(InfoExtractor):
|
||||
if nbc_sports_url:
|
||||
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
|
||||
|
||||
# Query result is often embedded in webpage as JSON. Sometimes explicit requests
|
||||
# to video API results in a failure with geo restriction reason therefore using
|
||||
# embedded query result when present sounds reasonable.
|
||||
config_json = self._search_regex(
|
||||
r'window\.Af\.bootstrap\[[^\]]+\]\s*=\s*({.*?"applet_type"\s*:\s*"td-applet-videoplayer".*?});(?:</script>|$)',
|
||||
webpage, 'videoplayer applet', default=None)
|
||||
if config_json:
|
||||
config = self._parse_json(config_json, display_id, fatal=False)
|
||||
if config:
|
||||
sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi')
|
||||
if sapi:
|
||||
return self._extract_info(display_id, sapi, webpage)
|
||||
|
||||
items_json = self._search_regex(
|
||||
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
|
||||
default=None)
|
||||
@@ -190,22 +214,10 @@ class YahooIE(InfoExtractor):
|
||||
video_id = info['id']
|
||||
return self._get_info(video_id, display_id, webpage)
|
||||
|
||||
def _get_info(self, video_id, display_id, webpage):
|
||||
region = self._search_regex(
|
||||
r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
|
||||
webpage, 'region', fatal=False, default='US')
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'protocol': 'http',
|
||||
'region': region,
|
||||
})
|
||||
query_url = (
|
||||
'https://video.media.yql.yahoo.com/v1/video/sapi/streams/'
|
||||
'{id}?{data}'.format(id=video_id, data=data))
|
||||
query_result = self._download_json(
|
||||
query_url, display_id, 'Downloading video info')
|
||||
|
||||
info = query_result['query']['results']['mediaObj'][0]
|
||||
def _extract_info(self, display_id, query, webpage):
|
||||
info = query['query']['results']['mediaObj'][0]
|
||||
meta = info.get('meta')
|
||||
video_id = info.get('id')
|
||||
|
||||
if not meta:
|
||||
msg = info['status'].get('msg')
|
||||
@@ -231,6 +243,9 @@ class YahooIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
})
|
||||
else:
|
||||
if s.get('format') == 'm3u8_playlist':
|
||||
format_info['protocol'] = 'm3u8_native'
|
||||
format_info['ext'] = 'mp4'
|
||||
format_url = compat_urlparse.urljoin(host, path)
|
||||
format_info['url'] = format_url
|
||||
formats.append(format_info)
|
||||
@@ -264,6 +279,21 @@ class YahooIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _get_info(self, video_id, display_id, webpage):
|
||||
region = self._search_regex(
|
||||
r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
|
||||
webpage, 'region', fatal=False, default='US')
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'protocol': 'http',
|
||||
'region': region,
|
||||
})
|
||||
query_url = (
|
||||
'https://video.media.yql.yahoo.com/v1/video/sapi/streams/'
|
||||
'{id}?{data}'.format(id=video_id, data=data))
|
||||
query_result = self._download_json(
|
||||
query_url, display_id, 'Downloading video info')
|
||||
return self._extract_info(display_id, query_result, webpage)
|
||||
|
||||
|
||||
class YahooSearchIE(SearchInfoExtractor):
|
||||
IE_DESC = 'Yahoo screen search'
|
||||
|
||||
@@ -1654,12 +1654,15 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
channel_page = self._download_webpage(
|
||||
url + '?view=57', channel_id,
|
||||
'Downloading channel page', fatal=False)
|
||||
channel_playlist_id = self._html_search_meta(
|
||||
'channelId', channel_page, 'channel id', default=None)
|
||||
if not channel_playlist_id:
|
||||
channel_playlist_id = self._search_regex(
|
||||
r'data-channel-external-id="([^"]+)"',
|
||||
channel_page, 'channel id', default=None)
|
||||
if channel_page is False:
|
||||
channel_playlist_id = False
|
||||
else:
|
||||
channel_playlist_id = self._html_search_meta(
|
||||
'channelId', channel_page, 'channel id', default=None)
|
||||
if not channel_playlist_id:
|
||||
channel_playlist_id = self._search_regex(
|
||||
r'data-channel-external-id="([^"]+)"',
|
||||
channel_page, 'channel id', default=None)
|
||||
if channel_playlist_id and channel_playlist_id.startswith('UC'):
|
||||
playlist_id = 'UU' + channel_playlist_id[2:]
|
||||
return self.url_result(
|
||||
@@ -1970,6 +1973,7 @@ class YoutubeTruncatedURLIE(InfoExtractor):
|
||||
annotation_id=annotation_[^&]+|
|
||||
x-yt-cl=[0-9]+|
|
||||
hl=[^&]*|
|
||||
t=[0-9]+
|
||||
)?
|
||||
|
|
||||
attribution_link\?a=[^&]+
|
||||
@@ -1992,6 +1996,9 @@ class YoutubeTruncatedURLIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/watch?hl=en-GB',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/watch?t=2372',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -135,7 +135,10 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
|
||||
files_cmd = []
|
||||
for path in input_paths:
|
||||
files_cmd.extend([encodeArgument('-i'), encodeFilename(path, True)])
|
||||
files_cmd.extend([
|
||||
encodeArgument('-i'),
|
||||
encodeFilename(self._ffmpeg_filename_argument(path), True)
|
||||
])
|
||||
cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] +
|
||||
files_cmd +
|
||||
[encodeArgument(o) for o in opts] +
|
||||
@@ -155,10 +158,10 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
self.run_ffmpeg_multiple_files([path], out_path, opts)
|
||||
|
||||
def _ffmpeg_filename_argument(self, fn):
|
||||
# ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
|
||||
if fn.startswith('-'):
|
||||
return './' + fn
|
||||
return fn
|
||||
# Always use 'file:' because the filename may contain ':' (ffmpeg
|
||||
# interprets that as a protocol) or can start with '-' (-- is broken in
|
||||
# ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
|
||||
return 'file:' + fn
|
||||
|
||||
|
||||
class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
||||
|
||||
@@ -619,7 +619,7 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
|
||||
# expected HTTP responses to meet HTTP/1.0 or later (see also
|
||||
# https://github.com/rg3/youtube-dl/issues/6727)
|
||||
if sys.version_info < (3, 0):
|
||||
kwargs['strict'] = True
|
||||
kwargs[b'strict'] = True
|
||||
hc = http_class(*args, **kwargs)
|
||||
source_address = ydl_handler._params.get('source_address')
|
||||
if source_address is not None:
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2015.09.09'
|
||||
__version__ = '2015.09.28'
|
||||
|
||||
Reference in New Issue
Block a user