youtube-dl/bin/0000755000000000000000000000000012271337464012426 5ustar rootrootyoutube-dl/bin/youtube-dl0000755000000000000000000000013312271337464014442 0ustar rootroot#!/usr/bin/env python import youtube_dl if __name__ == '__main__': youtube_dl.main() youtube-dl/devscripts/0000755000000000000000000000000012271337464014044 5ustar rootrootyoutube-dl/devscripts/transition_helper.py0000644000000000000000000000234512271337464020153 0ustar rootroot#!/usr/bin/env python import sys, os try: import urllib.request as compat_urllib_request except ImportError: # Python 2 import urllib2 as compat_urllib_request sys.stderr.write(u'Hi! We changed distribution method and now youtube-dl needs to update itself one more time.\n') sys.stderr.write(u'This will only happen once. Simply press enter to go on. Sorry for the trouble!\n') sys.stderr.write(u'The new location of the binaries is https://github.com/rg3/youtube-dl/downloads, not the git repository.\n\n') try: raw_input() except NameError: # Python 3 input() filename = sys.argv[0] API_URL = "https://api.github.com/repos/rg3/youtube-dl/downloads" BIN_URL = "https://github.com/downloads/rg3/youtube-dl/youtube-dl" if not os.access(filename, os.W_OK): sys.exit('ERROR: no write permissions on %s' % filename) try: urlh = compat_urllib_request.urlopen(BIN_URL) newcontent = urlh.read() urlh.close() except (IOError, OSError) as err: sys.exit('ERROR: unable to download latest version') try: with open(filename, 'wb') as outf: outf.write(newcontent) except (IOError, OSError) as err: sys.exit('ERROR: unable to overwrite current version') sys.stderr.write(u'Done! Now you can run youtube-dl.\n') youtube-dl/devscripts/buildserver.py0000644000000000000000000003160112271337464016745 0ustar rootroot#!/usr/bin/python3 from http.server import HTTPServer, BaseHTTPRequestHandler from socketserver import ThreadingMixIn import argparse import ctypes import functools import sys import threading import traceback import os.path class BuildHTTPServer(ThreadingMixIn, HTTPServer): allow_reuse_address = True advapi32 = ctypes.windll.advapi32 SC_MANAGER_ALL_ACCESS = 0xf003f SC_MANAGER_CREATE_SERVICE = 0x02 SERVICE_WIN32_OWN_PROCESS = 0x10 SERVICE_AUTO_START = 0x2 SERVICE_ERROR_NORMAL = 0x1 DELETE = 0x00010000 SERVICE_STATUS_START_PENDING = 0x00000002 SERVICE_STATUS_RUNNING = 0x00000004 SERVICE_ACCEPT_STOP = 0x1 SVCNAME = 'youtubedl_builder' LPTSTR = ctypes.c_wchar_p START_CALLBACK = ctypes.WINFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(LPTSTR)) class SERVICE_TABLE_ENTRY(ctypes.Structure): _fields_ = [ ('lpServiceName', LPTSTR), ('lpServiceProc', START_CALLBACK) ] HandlerEx = ctypes.WINFUNCTYPE( ctypes.c_int, # return ctypes.c_int, # dwControl ctypes.c_int, # dwEventType ctypes.c_void_p, # lpEventData, ctypes.c_void_p, # lpContext, ) def _ctypes_array(c_type, py_array): ar = (c_type * len(py_array))() ar[:] = py_array return ar def win_OpenSCManager(): res = advapi32.OpenSCManagerW(None, None, SC_MANAGER_ALL_ACCESS) if not res: raise Exception('Opening service manager failed - ' 'are you running this as administrator?') return res def win_install_service(service_name, cmdline): manager = win_OpenSCManager() try: h = advapi32.CreateServiceW( manager, service_name, None, SC_MANAGER_CREATE_SERVICE, SERVICE_WIN32_OWN_PROCESS, SERVICE_AUTO_START, SERVICE_ERROR_NORMAL, cmdline, None, None, None, None, None) if not h: raise OSError('Service creation failed: %s' % ctypes.FormatError()) advapi32.CloseServiceHandle(h) finally: advapi32.CloseServiceHandle(manager) def win_uninstall_service(service_name): manager = win_OpenSCManager() try: h = advapi32.OpenServiceW(manager, service_name, DELETE) if not h: raise OSError('Could not find service %s: %s' % ( service_name, ctypes.FormatError())) try: if not advapi32.DeleteService(h): raise OSError('Deletion failed: %s' % ctypes.FormatError()) finally: advapi32.CloseServiceHandle(h) finally: advapi32.CloseServiceHandle(manager) def win_service_report_event(service_name, msg, is_error=True): with open('C:/sshkeys/log', 'a', encoding='utf-8') as f: f.write(msg + '\n') event_log = advapi32.RegisterEventSourceW(None, service_name) if not event_log: raise OSError('Could not report event: %s' % ctypes.FormatError()) try: type_id = 0x0001 if is_error else 0x0004 event_id = 0xc0000000 if is_error else 0x40000000 lines = _ctypes_array(LPTSTR, [msg]) if not advapi32.ReportEventW( event_log, type_id, 0, event_id, None, len(lines), 0, lines, None): raise OSError('Event reporting failed: %s' % ctypes.FormatError()) finally: advapi32.DeregisterEventSource(event_log) def win_service_handler(stop_event, *args): try: raise ValueError('Handler called with args ' + repr(args)) TODO except Exception as e: tb = traceback.format_exc() msg = str(e) + '\n' + tb win_service_report_event(service_name, msg, is_error=True) raise def win_service_set_status(handle, status_code): svcStatus = SERVICE_STATUS() svcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS svcStatus.dwCurrentState = status_code svcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP svcStatus.dwServiceSpecificExitCode = 0 if not advapi32.SetServiceStatus(handle, ctypes.byref(svcStatus)): raise OSError('SetServiceStatus failed: %r' % ctypes.FormatError()) def win_service_main(service_name, real_main, argc, argv_raw): try: #args = [argv_raw[i].value for i in range(argc)] stop_event = threading.Event() handler = HandlerEx(functools.partial(stop_event, win_service_handler)) h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None) if not h: raise OSError('Handler registration failed: %s' % ctypes.FormatError()) TODO except Exception as e: tb = traceback.format_exc() msg = str(e) + '\n' + tb win_service_report_event(service_name, msg, is_error=True) raise def win_service_start(service_name, real_main): try: cb = START_CALLBACK( functools.partial(win_service_main, service_name, real_main)) dispatch_table = _ctypes_array(SERVICE_TABLE_ENTRY, [ SERVICE_TABLE_ENTRY( service_name, cb ), SERVICE_TABLE_ENTRY(None, ctypes.cast(None, START_CALLBACK)) ]) if not advapi32.StartServiceCtrlDispatcherW(dispatch_table): raise OSError('ctypes start failed: %s' % ctypes.FormatError()) except Exception as e: tb = traceback.format_exc() msg = str(e) + '\n' + tb win_service_report_event(service_name, msg, is_error=True) raise def main(args=None): parser = argparse.ArgumentParser() parser.add_argument('-i', '--install', action='store_const', dest='action', const='install', help='Launch at Windows startup') parser.add_argument('-u', '--uninstall', action='store_const', dest='action', const='uninstall', help='Remove Windows service') parser.add_argument('-s', '--service', action='store_const', dest='action', const='service', help='Run as a Windows service') parser.add_argument('-b', '--bind', metavar='', action='store', default='localhost:8142', help='Bind to host:port (default %default)') options = parser.parse_args(args=args) if options.action == 'install': fn = os.path.abspath(__file__).replace('v:', '\\\\vboxsrv\\vbox') cmdline = '%s %s -s -b %s' % (sys.executable, fn, options.bind) win_install_service(SVCNAME, cmdline) return if options.action == 'uninstall': win_uninstall_service(SVCNAME) return if options.action == 'service': win_service_start(SVCNAME, main) return host, port_str = options.bind.split(':') port = int(port_str) print('Listening on %s:%d' % (host, port)) srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler) thr = threading.Thread(target=srv.serve_forever) thr.start() input('Press ENTER to shut down') srv.shutdown() thr.join() def rmtree(path): for name in os.listdir(path): fname = os.path.join(path, name) if os.path.isdir(fname): rmtree(fname) else: os.chmod(fname, 0o666) os.remove(fname) os.rmdir(path) #============================================================================== class BuildError(Exception): def __init__(self, output, code=500): self.output = output self.code = code def __str__(self): return self.output class HTTPError(BuildError): pass class PythonBuilder(object): def __init__(self, **kwargs): pythonVersion = kwargs.pop('python', '2.7') try: key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion) try: self.pythonPath, _ = _winreg.QueryValueEx(key, '') finally: _winreg.CloseKey(key) except Exception: raise BuildError('No such Python version: %s' % pythonVersion) super(PythonBuilder, self).__init__(**kwargs) class GITInfoBuilder(object): def __init__(self, **kwargs): try: self.user, self.repoName = kwargs['path'][:2] self.rev = kwargs.pop('rev') except ValueError: raise BuildError('Invalid path') except KeyError as e: raise BuildError('Missing mandatory parameter "%s"' % e.args[0]) path = os.path.join(os.environ['APPDATA'], 'Build archive', self.repoName, self.user) if not os.path.exists(path): os.makedirs(path) self.basePath = tempfile.mkdtemp(dir=path) self.buildPath = os.path.join(self.basePath, 'build') super(GITInfoBuilder, self).__init__(**kwargs) class GITBuilder(GITInfoBuilder): def build(self): try: subprocess.check_output(['git', 'clone', 'git://github.com/%s/%s.git' % (self.user, self.repoName), self.buildPath]) subprocess.check_output(['git', 'checkout', self.rev], cwd=self.buildPath) except subprocess.CalledProcessError as e: raise BuildError(e.output) super(GITBuilder, self).build() class YoutubeDLBuilder(object): authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile'] def __init__(self, **kwargs): if self.repoName != 'youtube-dl': raise BuildError('Invalid repository "%s"' % self.repoName) if self.user not in self.authorizedUsers: raise HTTPError('Unauthorized user "%s"' % self.user, 401) super(YoutubeDLBuilder, self).__init__(**kwargs) def build(self): try: subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], cwd=self.buildPath) except subprocess.CalledProcessError as e: raise BuildError(e.output) super(YoutubeDLBuilder, self).build() class DownloadBuilder(object): def __init__(self, **kwargs): self.handler = kwargs.pop('handler') self.srcPath = os.path.join(self.buildPath, *tuple(kwargs['path'][2:])) self.srcPath = os.path.abspath(os.path.normpath(self.srcPath)) if not self.srcPath.startswith(self.buildPath): raise HTTPError(self.srcPath, 401) super(DownloadBuilder, self).__init__(**kwargs) def build(self): if not os.path.exists(self.srcPath): raise HTTPError('No such file', 404) if os.path.isdir(self.srcPath): raise HTTPError('Is a directory: %s' % self.srcPath, 401) self.handler.send_response(200) self.handler.send_header('Content-Type', 'application/octet-stream') self.handler.send_header('Content-Disposition', 'attachment; filename=%s' % os.path.split(self.srcPath)[-1]) self.handler.send_header('Content-Length', str(os.stat(self.srcPath).st_size)) self.handler.end_headers() with open(self.srcPath, 'rb') as src: shutil.copyfileobj(src, self.handler.wfile) super(DownloadBuilder, self).build() class CleanupTempDir(object): def build(self): try: rmtree(self.basePath) except Exception as e: print('WARNING deleting "%s": %s' % (self.basePath, e)) super(CleanupTempDir, self).build() class Null(object): def __init__(self, **kwargs): pass def start(self): pass def close(self): pass def build(self): pass class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, CleanupTempDir, Null): pass class BuildHTTPRequestHandler(BaseHTTPRequestHandler): actionDict = { 'build': Builder, 'download': Builder } # They're the same, no more caching. def do_GET(self): path = urlparse.urlparse(self.path) paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()]) action, _, path = path.path.strip('/').partition('/') if path: path = path.split('/') if action in self.actionDict: try: builder = self.actionDict[action](path=path, handler=self, **paramDict) builder.start() try: builder.build() finally: builder.close() except BuildError as e: self.send_response(e.code) msg = unicode(e).encode('UTF-8') self.send_header('Content-Type', 'text/plain; charset=UTF-8') self.send_header('Content-Length', len(msg)) self.end_headers() self.wfile.write(msg) except HTTPError as e: self.send_response(e.code, str(e)) else: self.send_response(500, 'Unknown build method "%s"' % action) else: self.send_response(500, 'Malformed URL') #============================================================================== if __name__ == '__main__': main() youtube-dl/devscripts/SizeOfImage.patch0000644000000000000000000000022312271337464017224 0ustar rootrootBSDIFF4023DBZh91AY&SYgmDD`@ !`ЊeH lMBZh91AY&SY> M l %rE8P>BZh9rE8Pyoutube-dl/devscripts/wine-py2exe.sh0000755000000000000000000000365112271337464016564 0ustar rootroot#!/bin/bash # Run with as parameter a setup.py that works in the current directory # e.g. no os.chdir() # It will run twice, the first time will crash set -e SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )" if [ ! -d wine-py2exe ]; then sudo apt-get install wine1.3 axel bsdiff mkdir wine-py2exe cd wine-py2exe export WINEPREFIX=`pwd` axel -a "http://www.python.org/ftp/python/2.7/python-2.7.msi" axel -a "http://downloads.sourceforge.net/project/py2exe/py2exe/0.6.9/py2exe-0.6.9.win32-py2.7.exe" #axel -a "http://winetricks.org/winetricks" # http://appdb.winehq.org/objectManager.php?sClass=version&iId=21957 echo "Follow python setup on screen" wine msiexec /i python-2.7.msi echo "Follow py2exe setup on screen" wine py2exe-0.6.9.win32-py2.7.exe #echo "Follow Microsoft Visual C++ 2008 Redistributable Package setup on screen" #bash winetricks vcrun2008 rm py2exe-0.6.9.win32-py2.7.exe rm python-2.7.msi #rm winetricks # http://bugs.winehq.org/show_bug.cgi?id=3591 mv drive_c/Python27/Lib/site-packages/py2exe/run.exe drive_c/Python27/Lib/site-packages/py2exe/run.exe.backup bspatch drive_c/Python27/Lib/site-packages/py2exe/run.exe.backup drive_c/Python27/Lib/site-packages/py2exe/run.exe "$SCRIPT_DIR/SizeOfImage.patch" mv drive_c/Python27/Lib/site-packages/py2exe/run_w.exe drive_c/Python27/Lib/site-packages/py2exe/run_w.exe.backup bspatch drive_c/Python27/Lib/site-packages/py2exe/run_w.exe.backup drive_c/Python27/Lib/site-packages/py2exe/run_w.exe "$SCRIPT_DIR/SizeOfImage_w.patch" cd - else export WINEPREFIX="$( cd wine-py2exe && pwd )" fi wine "C:\\Python27\\python.exe" "$1" py2exe > "py2exe.log" 2>&1 || true echo '# Copying python27.dll' >> "py2exe.log" cp "$WINEPREFIX/drive_c/windows/system32/python27.dll" build/bdist.win32/winexe/bundle-2.7/ wine "C:\\Python27\\python.exe" "$1" py2exe >> "py2exe.log" 2>&1 youtube-dl/devscripts/posix-locale.sh0000755000000000000000000000026412271337464017004 0ustar rootroot # source this file in your shell to get a POSIX locale (which will break many programs, but that's kind of the point) export LC_ALL=POSIX export LANG=POSIX export LANGUAGE=POSIX youtube-dl/devscripts/SizeOfImage_w.patch0000644000000000000000000000022412271337464017553 0ustar rootrootBSDIFF4024DBZh91AY&SYk.DH`@ !`ЊeH  te`BZh91AY&SY֤? 0R~ovrE8P֤?BZh9rE8Pyoutube-dl/devscripts/check-porn.py0000644000000000000000000000352312271337464016452 0ustar rootroot#!/usr/bin/env python """ This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check if we are not 'age_limit' tagging some porn site A second approach implemented relies on a list of porn domains, to activate it pass the list filename as the only argument """ # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import get_testcases from youtube_dl.utils import compat_urllib_parse_urlparse from youtube_dl.utils import compat_urllib_request if len(sys.argv) > 1: METHOD = 'LIST' LIST = open(sys.argv[1]).read().decode('utf8').strip() else: METHOD = 'EURISTIC' for test in get_testcases(): if METHOD == 'EURISTIC': try: webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read() except: print('\nFail: {0}'.format(test['name'])) continue webpage = webpage.decode('utf8', 'replace') RESULT = 'porn' in webpage.lower() elif METHOD == 'LIST': domain = compat_urllib_parse_urlparse(test['url']).netloc if not domain: print('\nFail: {0}'.format(test['name'])) continue domain = '.'.join(domain.split('.')[-2:]) RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST) if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or test['info_dict']['age_limit'] != 18): print('\nPotential missing age_limit check: {0}'.format(test['name'])) elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and test['info_dict']['age_limit'] == 18): print('\nPotential false negative: {0}'.format(test['name'])) else: sys.stdout.write('.') sys.stdout.flush() print() youtube-dl/devscripts/bash-completion.in0000644000000000000000000000151412271337464017461 0ustar rootroot__youtube_dl() { local cur prev opts fileopts diropts keywords COMPREPLY=() cur="${COMP_WORDS[COMP_CWORD]}" prev="${COMP_WORDS[COMP_CWORD-1]}" opts="{{flags}}" keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory" fileopts="-a|--batch-file|--download-archive|--cookies|--load-info" diropts="--cache-dir" if [[ ${prev} =~ ${fileopts} ]]; then COMPREPLY=( $(compgen -f -- ${cur}) ) return 0 elif [[ ${prev} =~ ${diropts} ]]; then COMPREPLY=( $(compgen -d -- ${cur}) ) return 0 fi if [[ ${cur} =~ : ]]; then COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) ) return 0 elif [[ ${cur} == * ]] ; then COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) return 0 fi } complete -F __youtube_dl youtube-dl youtube-dl/devscripts/bash-completion.py0000755000000000000000000000146512271337464017513 0ustar rootroot#!/usr/bin/env python import os from os.path import dirname as dirn import sys sys.path.append(dirn(dirn((os.path.abspath(__file__))))) import youtube_dl BASH_COMPLETION_FILE = "youtube-dl.bash-completion" BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in" def build_completion(opt_parser): opts_flag = [] for group in opt_parser.option_groups: for option in group.option_list: #for every long flag opts_flag.append(option.get_opt_string()) with open(BASH_COMPLETION_TEMPLATE) as f: template = f.read() with open(BASH_COMPLETION_FILE, "w") as f: #just using the special char filled_template = template.replace("{{flags}}", " ".join(opts_flag)) f.write(filled_template) parser = youtube_dl.parseOpts()[0] build_completion(parser) youtube-dl/devscripts/gh-pages/0000755000000000000000000000000012271337464015537 5ustar rootrootyoutube-dl/devscripts/gh-pages/update-feed.py0000755000000000000000000000435412271337464020305 0ustar rootroot#!/usr/bin/env python3 import datetime import io import json import textwrap atom_template = textwrap.dedent("""\ youtube-dl releases https://yt-dl.org/feed/youtube-dl-updates-feed @TIMESTAMP@ @ENTRIES@ """) entry_template = textwrap.dedent(""" https://yt-dl.org/feed/youtube-dl-updates-feed/youtube-dl-@VERSION@ New version @VERSION@
Downloads available at https://yt-dl.org/downloads/@VERSION@/
The youtube-dl maintainers @TIMESTAMP@
""") now = datetime.datetime.now() now_iso = now.isoformat() + 'Z' atom_template = atom_template.replace('@TIMESTAMP@', now_iso) versions_info = json.load(open('update/versions.json')) versions = list(versions_info['versions'].keys()) versions.sort() entries = [] for v in versions: fields = v.split('.') year, month, day = map(int, fields[:3]) faked = 0 patchlevel = 0 while True: try: datetime.date(year, month, day) except ValueError: day -= 1 faked += 1 assert day > 0 continue break if len(fields) >= 4: try: patchlevel = int(fields[3]) except ValueError: patchlevel = 1 timestamp = '%04d-%02d-%02dT00:%02d:%02dZ' % (year, month, day, faked, patchlevel) entry = entry_template.replace('@TIMESTAMP@', timestamp) entry = entry.replace('@VERSION@', v) entries.append(entry) entries_str = textwrap.indent(''.join(entries), '\t') atom_template = atom_template.replace('@ENTRIES@', entries_str) with io.open('update/releases.atom', 'w', encoding='utf-8') as atom_file: atom_file.write(atom_template) youtube-dl/devscripts/gh-pages/update-sites.py0000755000000000000000000000200412271337464020517 0ustar rootroot#!/usr/bin/env python3 import sys import os import textwrap # We must be able to import youtube_dl sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) import youtube_dl def main(): with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf: template = tmplf.read() ie_htmls = [] for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()): ie_html = '{}'.format(ie.IE_NAME) ie_desc = getattr(ie, 'IE_DESC', None) if ie_desc is False: continue elif ie_desc is not None: ie_html += ': {}'.format(ie.IE_DESC) if ie.working() == False: ie_html += ' (Currently broken)' ie_htmls.append('
  • {}
  • '.format(ie_html)) template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t')) with open('supportedsites.html', 'w', encoding='utf-8') as sitesf: sitesf.write(template) if __name__ == '__main__': main() youtube-dl/devscripts/gh-pages/add-version.py0000755000000000000000000000215212271337464020327 0ustar rootroot#!/usr/bin/env python3 import json import sys import hashlib import os.path if len(sys.argv) <= 1: print('Specify the version number as parameter') sys.exit() version = sys.argv[1] with open('update/LATEST_VERSION', 'w') as f: f.write(version) versions_info = json.load(open('update/versions.json')) if 'signature' in versions_info: del versions_info['signature'] new_version = {} filenames = { 'bin': 'youtube-dl', 'exe': 'youtube-dl.exe', 'tar': 'youtube-dl-%s.tar.gz' % version} build_dir = os.path.join('..', '..', 'build', version) for key, filename in filenames.items(): url = 'https://yt-dl.org/downloads/%s/%s' % (version, filename) fn = os.path.join(build_dir, filename) with open(fn, 'rb') as f: data = f.read() if not data: raise ValueError('File %s is empty!' % fn) sha256sum = hashlib.sha256(data).hexdigest() new_version[key] = (url, sha256sum) versions_info['versions'][version] = new_version versions_info['latest'] = version with open('update/versions.json', 'w') as jsonf: json.dump(versions_info, jsonf, indent=4, sort_keys=True) youtube-dl/devscripts/gh-pages/update-copyright.py0000755000000000000000000000112312271337464021401 0ustar rootroot#!/usr/bin/env python # coding: utf-8 from __future__ import with_statement import datetime import glob import io # For Python 2 compatibilty import os import re year = str(datetime.datetime.now().year) for fn in glob.glob('*.html*'): with io.open(fn, encoding='utf-8') as f: content = f.read() newc = re.sub(u'(?PCopyright © 2006-)(?P[0-9]{4})', u'Copyright © 2006-' + year, content) if content != newc: tmpFn = fn + '.part' with io.open(tmpFn, 'wt', encoding='utf-8') as outf: outf.write(newc) os.rename(tmpFn, fn) youtube-dl/devscripts/gh-pages/sign-versions.py0000755000000000000000000000142212271337464020721 0ustar rootroot#!/usr/bin/env python3 import rsa import json from binascii import hexlify try: input = raw_input except NameError: pass versions_info = json.load(open('update/versions.json')) if 'signature' in versions_info: del versions_info['signature'] print('Enter the PKCS1 private key, followed by a blank line:') privkey = b'' while True: try: line = input() except EOFError: break if line == '': break privkey += line.encode('ascii') + b'\n' privkey = rsa.PrivateKey.load_pkcs1(privkey) signature = hexlify(rsa.pkcs1.sign(json.dumps(versions_info, sort_keys=True).encode('utf-8'), privkey, 'SHA-256')).decode() print('signature: ' + signature) versions_info['signature'] = signature json.dump(versions_info, open('update/versions.json', 'w'), indent=4, sort_keys=True)youtube-dl/devscripts/gh-pages/generate-download.py0000755000000000000000000000236612271337464021522 0ustar rootroot#!/usr/bin/env python3 import hashlib import shutil import subprocess import tempfile import urllib.request import json versions_info = json.load(open('update/versions.json')) version = versions_info['latest'] URL = versions_info['versions'][version]['bin'][0] data = urllib.request.urlopen(URL).read() # Read template page with open('download.html.in', 'r', encoding='utf-8') as tmplf: template = tmplf.read() md5sum = hashlib.md5(data).hexdigest() sha1sum = hashlib.sha1(data).hexdigest() sha256sum = hashlib.sha256(data).hexdigest() template = template.replace('@PROGRAM_VERSION@', version) template = template.replace('@PROGRAM_URL@', URL) template = template.replace('@PROGRAM_MD5SUM@', md5sum) template = template.replace('@PROGRAM_SHA1SUM@', sha1sum) template = template.replace('@PROGRAM_SHA256SUM@', sha256sum) template = template.replace('@EXE_URL@', versions_info['versions'][version]['exe'][0]) template = template.replace('@EXE_SHA256SUM@', versions_info['versions'][version]['exe'][1]) template = template.replace('@TAR_URL@', versions_info['versions'][version]['tar'][0]) template = template.replace('@TAR_SHA256SUM@', versions_info['versions'][version]['tar'][1]) with open('download.html', 'w', encoding='utf-8') as dlf: dlf.write(template) youtube-dl/devscripts/release.sh0000755000000000000000000000720512271337464016027 0ustar rootroot#!/bin/bash # IMPORTANT: the following assumptions are made # * the GH repo is on the origin remote # * the gh-pages branch is named so locally # * the git config user.signingkey is properly set # You will need # pip install coverage nose rsa # TODO # release notes # make hash on local files set -e skip_tests=false if [ "$1" = '--skip-test' ]; then skip_tests=true shift fi if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi version="$1" if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already present'; exit 1; fi if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi useless_files=$(find youtube_dl -type f -not -name '*.py') if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $useless_files"; exit 1; fi if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi /bin/echo -e "\n### First of all, testing..." make cleanall if $skip_tests ; then echo 'SKIPPING TESTS' else nosetests --verbose --with-coverage --cover-package=youtube_dl --cover-html test --stop || exit 1 fi /bin/echo -e "\n### Changing version in version.py..." sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py /bin/echo -e "\n### Committing CHANGELOG README.md and youtube_dl/version.py..." make README.md git add CHANGELOG README.md youtube_dl/version.py git commit -m "release $version" /bin/echo -e "\n### Now tagging, signing and pushing..." git tag -s -m "Release $version" "$version" git show "$version" read -p "Is it good, can I push? (y/n) " -n 1 if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi echo MASTER=$(git rev-parse --abbrev-ref HEAD) git push origin $MASTER:master git push origin "$version" /bin/echo -e "\n### OK, now it is time to build the binaries..." REV=$(git rev-parse HEAD) make youtube-dl youtube-dl.tar.gz read -p "VM running? (y/n) " -n 1 wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe mkdir -p "build/$version" mv youtube-dl youtube-dl.exe "build/$version" mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz" RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" (cd build/$version/ && md5sum $RELEASE_FILES > MD5SUMS) (cd build/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS) (cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS) (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS) git checkout HEAD -- youtube-dl youtube-dl.exe /bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..." for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done scp -r "build/$version" ytdl@yt-dl.org:html/tmp/ ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/" ssh ytdl@yt-dl.org "sh html/update_latest.sh $version" /bin/echo -e "\n### Now switching to gh-pages..." git clone --branch gh-pages --single-branch . build/gh-pages ROOT=$(pwd) ( set -e ORIGIN_URL=$(git config --get remote.origin.url) cd build/gh-pages "$ROOT/devscripts/gh-pages/add-version.py" $version "$ROOT/devscripts/gh-pages/update-feed.py" "$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem" "$ROOT/devscripts/gh-pages/generate-download.py" "$ROOT/devscripts/gh-pages/update-copyright.py" "$ROOT/devscripts/gh-pages/update-sites.py" git add *.html *.html.in update git commit -m "release $version" git push "$ROOT" gh-pages git push "$ORIGIN_URL" gh-pages ) rm -rf build make pypi-files echo "Uploading to PyPi ..." python setup.py sdist upload make clean /bin/echo -e "\n### DONE!" youtube-dl/devscripts/make_readme.py0000755000000000000000000000116312271337464016654 0ustar rootrootimport io import sys import re README_FILE = 'README.md' helptext = sys.stdin.read() if isinstance(helptext, bytes): helptext = helptext.decode('utf-8') with io.open(README_FILE, encoding='utf-8') as f: oldreadme = f.read() header = oldreadme[:oldreadme.index('# OPTIONS')] footer = oldreadme[oldreadme.index('# CONFIGURATION'):] options = helptext[helptext.index(' General Options:') + 19:] options = re.sub(r'^ (\w.+)$', r'## \1', options, flags=re.M) options = '# OPTIONS\n' + options + '\n' with io.open(README_FILE, 'w', encoding='utf-8') as f: f.write(header) f.write(options) f.write(footer) youtube-dl/devscripts/transition_helper_exe/0000755000000000000000000000000012271337464020436 5ustar rootrootyoutube-dl/devscripts/transition_helper_exe/youtube-dl.py0000644000000000000000000000726412271337464023112 0ustar rootroot#!/usr/bin/env python import sys, os import urllib2 import json, hashlib def rsa_verify(message, signature, key): from struct import pack from hashlib import sha256 from sys import version_info def b(x): if version_info[0] == 2: return x else: return x.encode('latin1') assert(type(message) == type(b(''))) block_size = 0 n = key[0] while n: block_size += 1 n >>= 8 signature = pow(int(signature, 16), key[1], key[0]) raw_bytes = [] while signature: raw_bytes.insert(0, pack("B", signature & 0xFF)) signature >>= 8 signature = (block_size - len(raw_bytes)) * b('\x00') + b('').join(raw_bytes) if signature[0:2] != b('\x00\x01'): return False signature = signature[2:] if not b('\x00') in signature: return False signature = signature[signature.index(b('\x00'))+1:] if not signature.startswith(b('\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20')): return False signature = signature[19:] if signature != sha256(message).digest(): return False return True sys.stderr.write(u'Hi! We changed distribution method and now youtube-dl needs to update itself one more time.\n') sys.stderr.write(u'This will only happen once. Simply press enter to go on. Sorry for the trouble!\n') sys.stderr.write(u'From now on, get the binaries from http://rg3.github.com/youtube-dl/download.html, not from the git repository.\n\n') raw_input() filename = sys.argv[0] UPDATE_URL = "http://rg3.github.io/youtube-dl/update/" VERSION_URL = UPDATE_URL + 'LATEST_VERSION' JSON_URL = UPDATE_URL + 'versions.json' UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537) if not os.access(filename, os.W_OK): sys.exit('ERROR: no write permissions on %s' % filename) exe = os.path.abspath(filename) directory = os.path.dirname(exe) if not os.access(directory, os.W_OK): sys.exit('ERROR: no write permissions on %s' % directory) try: versions_info = urllib2.urlopen(JSON_URL).read().decode('utf-8') versions_info = json.loads(versions_info) except: sys.exit(u'ERROR: can\'t obtain versions info. Please try again later.') if not 'signature' in versions_info: sys.exit(u'ERROR: the versions file is not signed or corrupted. Aborting.') signature = versions_info['signature'] del versions_info['signature'] if not rsa_verify(json.dumps(versions_info, sort_keys=True), signature, UPDATES_RSA_KEY): sys.exit(u'ERROR: the versions file signature is invalid. Aborting.') version = versions_info['versions'][versions_info['latest']] try: urlh = urllib2.urlopen(version['exe'][0]) newcontent = urlh.read() urlh.close() except (IOError, OSError) as err: sys.exit('ERROR: unable to download latest version') newcontent_hash = hashlib.sha256(newcontent).hexdigest() if newcontent_hash != version['exe'][1]: sys.exit(u'ERROR: the downloaded file hash does not match. Aborting.') try: with open(exe + '.new', 'wb') as outf: outf.write(newcontent) except (IOError, OSError) as err: sys.exit(u'ERROR: unable to write the new version') try: bat = os.path.join(directory, 'youtube-dl-updater.bat') b = open(bat, 'w') b.write(""" echo Updating youtube-dl... ping 127.0.0.1 -n 5 -w 1000 > NUL move /Y "%s.new" "%s" del "%s" \n""" %(exe, exe, bat)) b.close() os.startfile(bat) except (IOError, OSError) as err: sys.exit('ERROR: unable to overwrite current version') sys.stderr.write(u'Done! Now you can run youtube-dl.\n') youtube-dl/devscripts/transition_helper_exe/setup.py0000644000000000000000000000042312271337464022147 0ustar rootrootfrom distutils.core import setup import py2exe py2exe_options = { "bundle_files": 1, "compressed": 1, "optimize": 2, "dist_dir": '.', "dll_excludes": ['w9xpopen.exe'] } setup(console=['youtube-dl.py'], options={ "py2exe": py2exe_options }, zipfile=None)youtube-dl/test/0000755000000000000000000000000012300361346012622 5ustar rootrootyoutube-dl/test/test_unicode_literals.py0000644000000000000000000000257612271337464017605 0ustar rootrootfrom __future__ import unicode_literals import io import os import re import unittest rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) IGNORED_FILES = [ 'setup.py', # http://bugs.python.org/issue13943 ] class TestUnicodeLiterals(unittest.TestCase): def test_all_files(self): print('Skipping this test (not yet fully implemented)') return for dirpath, _, filenames in os.walk(rootDir): for basename in filenames: if not basename.endswith('.py'): continue if basename in IGNORED_FILES: continue fn = os.path.join(dirpath, basename) with io.open(fn, encoding='utf-8') as inf: code = inf.read() if "'" not in code and '"' not in code: continue imps = 'from __future__ import unicode_literals' self.assertTrue( imps in code, ' %s missing in %s' % (imps, fn)) m = re.search(r'(?<=\s)u[\'"](?!\)|,|$)', code) if m is not None: self.assertTrue( m is None, 'u present in %s, around %s' % ( fn, code[m.start() - 10:m.end() + 10])) if __name__ == '__main__': unittest.main() youtube-dl/test/test_YoutubeDL.py0000644000000000000000000002230312271337464016122 0ustar rootroot#!/usr/bin/env python from __future__ import unicode_literals # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL from youtube_dl import YoutubeDL from youtube_dl.extractor import YoutubeIE class YDL(FakeYDL): def __init__(self, *args, **kwargs): super(YDL, self).__init__(*args, **kwargs) self.downloaded_info_dicts = [] self.msgs = [] def process_info(self, info_dict): self.downloaded_info_dicts.append(info_dict) def to_screen(self, msg): self.msgs.append(msg) class TestFormatSelection(unittest.TestCase): def test_prefer_free_formats(self): # Same resolution => download webm ydl = YDL() ydl.params['prefer_free_formats'] = True formats = [ {'ext': 'webm', 'height': 460}, {'ext': 'mp4', 'height': 460}, ] info_dict = {'formats': formats, 'extractor': 'test'} yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['ext'], 'webm') # Different resolution => download best quality (mp4) ydl = YDL() ydl.params['prefer_free_formats'] = True formats = [ {'ext': 'webm', 'height': 720}, {'ext': 'mp4', 'height': 1080}, ] info_dict['formats'] = formats yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['ext'], 'mp4') # No prefer_free_formats => prefer mp4 and flv for greater compatibilty ydl = YDL() ydl.params['prefer_free_formats'] = False formats = [ {'ext': 'webm', 'height': 720}, {'ext': 'mp4', 'height': 720}, {'ext': 'flv', 'height': 720}, ] info_dict['formats'] = formats yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['ext'], 'mp4') ydl = YDL() ydl.params['prefer_free_formats'] = False formats = [ {'ext': 'flv', 'height': 720}, {'ext': 'webm', 'height': 720}, ] info_dict['formats'] = formats yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['ext'], 'flv') def test_format_limit(self): formats = [ {'format_id': 'meh', 'url': 'http://example.com/meh', 'preference': 1}, {'format_id': 'good', 'url': 'http://example.com/good', 'preference': 2}, {'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3}, {'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4}, ] info_dict = { 'formats': formats, 'extractor': 'test', 'id': 'testvid'} ydl = YDL() ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'excellent') ydl = YDL({'format_limit': 'good'}) assert ydl.params['format_limit'] == 'good' ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'good') ydl = YDL({'format_limit': 'great', 'format': 'all'}) ydl.process_ie_result(info_dict.copy()) self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'meh') self.assertEqual(ydl.downloaded_info_dicts[1]['format_id'], 'good') self.assertEqual(ydl.downloaded_info_dicts[2]['format_id'], 'great') self.assertTrue('3' in ydl.msgs[0]) ydl = YDL() ydl.params['format_limit'] = 'excellent' ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'excellent') def test_format_selection(self): formats = [ {'format_id': '35', 'ext': 'mp4', 'preference': 1}, {'format_id': '45', 'ext': 'webm', 'preference': 2}, {'format_id': '47', 'ext': 'webm', 'preference': 3}, {'format_id': '2', 'ext': 'flv', 'preference': 4}, ] info_dict = {'formats': formats, 'extractor': 'test'} ydl = YDL({'format': '20/47'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], '47') ydl = YDL({'format': '20/71/worst'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], '35') ydl = YDL() ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], '2') ydl = YDL({'format': 'webm/mp4'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], '47') ydl = YDL({'format': '3gp/40/mp4'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], '35') def test_format_selection_audio(self): formats = [ {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none'}, {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none'}, {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none'}, {'format_id': 'vid', 'ext': 'mp4', 'preference': 4}, ] info_dict = {'formats': formats, 'extractor': 'test'} ydl = YDL({'format': 'bestaudio'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'audio-high') ydl = YDL({'format': 'worstaudio'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'audio-low') formats = [ {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1}, {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2}, ] info_dict = {'formats': formats, 'extractor': 'test'} ydl = YDL({'format': 'bestaudio/worstaudio/best'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'vid-high') def test_youtube_format_selection(self): order = [ '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13', # Apple HTTP Live Streaming '96', '95', '94', '93', '92', '132', '151', # 3D '85', '84', '102', '83', '101', '82', '100', # Dash video '138', '137', '248', '136', '247', '135', '246', '245', '244', '134', '243', '133', '242', '160', # Dash audio '141', '172', '140', '139', '171', ] for f1id, f2id in zip(order, order[1:]): f1 = YoutubeIE._formats[f1id].copy() f1['format_id'] = f1id f2 = YoutubeIE._formats[f2id].copy() f2['format_id'] = f2id info_dict = {'formats': [f1, f2], 'extractor': 'youtube'} ydl = YDL() yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], f1id) info_dict = {'formats': [f2, f1], 'extractor': 'youtube'} ydl = YDL() yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], f1id) def test_add_extra_info(self): test_dict = { 'extractor': 'Foo', } extra_info = { 'extractor': 'Bar', 'playlist': 'funny videos', } YDL.add_extra_info(test_dict, extra_info) self.assertEqual(test_dict['extractor'], 'Foo') self.assertEqual(test_dict['playlist'], 'funny videos') def test_prepare_filename(self): info = { 'id': '1234', 'ext': 'mp4', 'width': None, } def fname(templ): ydl = YoutubeDL({'outtmpl': templ}) return ydl.prepare_filename(info) self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4') self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4') # Replace missing fields with 'NA' self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4') if __name__ == '__main__': unittest.main() youtube-dl/test/test_download.py0000644000000000000000000001542112274577363016067 0ustar rootroot#!/usr/bin/env python # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import ( get_params, get_testcases, try_rm, md5, report_warning ) import hashlib import io import json import socket import youtube_dl.YoutubeDL from youtube_dl.utils import ( compat_http_client, compat_str, compat_urllib_error, compat_HTTPError, DownloadError, ExtractorError, UnavailableVideoError, ) from youtube_dl.extractor import get_info_extractor RETRIES = 3 class YoutubeDL(youtube_dl.YoutubeDL): def __init__(self, *args, **kwargs): self.to_stderr = self.to_screen self.processed_info_dicts = [] super(YoutubeDL, self).__init__(*args, **kwargs) def report_warning(self, message): # Don't accept warnings during tests raise ExtractorError(message) def process_info(self, info_dict): self.processed_info_dicts.append(info_dict) return super(YoutubeDL, self).process_info(info_dict) def _file_md5(fn): with open(fn, 'rb') as f: return hashlib.md5(f.read()).hexdigest() defs = get_testcases() class TestDownload(unittest.TestCase): maxDiff = None def setUp(self): self.defs = defs ### Dynamically generate tests def generator(test_case): def test_template(self): ie = youtube_dl.extractor.get_info_extractor(test_case['name']) other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])] def print_skipping(reason): print('Skipping %s: %s' % (test_case['name'], reason)) if not ie.working(): print_skipping('IE marked as not _WORKING') return if 'playlist' not in test_case: info_dict = test_case.get('info_dict', {}) if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')): print_skipping('The output file cannot be know, the "file" ' 'key is missing or the info_dict is incomplete') return if 'skip' in test_case: print_skipping(test_case['skip']) return for other_ie in other_ies: if not other_ie.working(): print_skipping(u'test depends on %sIE, marked as not WORKING' % other_ie.ie_key()) return params = get_params(test_case.get('params', {})) ydl = YoutubeDL(params) ydl.add_default_info_extractors() finished_hook_called = set() def _hook(status): if status['status'] == 'finished': finished_hook_called.add(status['filename']) ydl.add_progress_hook(_hook) def get_tc_filename(tc): return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {})) test_cases = test_case.get('playlist', [test_case]) def try_rm_tcs_files(): for tc in test_cases: tc_filename = get_tc_filename(tc) try_rm(tc_filename) try_rm(tc_filename + '.part') try_rm(os.path.splitext(tc_filename)[0] + '.info.json') try_rm_tcs_files() try: try_num = 1 while True: try: ydl.download([test_case['url']]) except (DownloadError, ExtractorError) as err: # Check if the exception is not a network related one if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503): raise if try_num == RETRIES: report_warning(u'Failed due to network errors, skipping...') return print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num)) try_num += 1 else: break for tc in test_cases: tc_filename = get_tc_filename(tc) if not test_case.get('params', {}).get('skip_download', False): self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename) self.assertTrue(tc_filename in finished_hook_called) info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json' self.assertTrue(os.path.exists(info_json_fn)) if 'md5' in tc: md5_for_file = _file_md5(tc_filename) self.assertEqual(md5_for_file, tc['md5']) with io.open(info_json_fn, encoding='utf-8') as infof: info_dict = json.load(infof) for (info_field, expected) in tc.get('info_dict', {}).items(): if isinstance(expected, compat_str) and expected.startswith('md5:'): got = 'md5:' + md5(info_dict.get(info_field)) else: got = info_dict.get(info_field) self.assertEqual(expected, got, u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) # If checkable fields are missing from the test case, print the info_dict test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) for key, value in info_dict.items() if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location')) if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()): sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n') # Check for the presence of mandatory fields for key in ('id', 'url', 'title', 'ext'): self.assertTrue(key in info_dict.keys() and info_dict[key]) # Check for mandatory fields that are automatically set by YoutubeDL for key in ['webpage_url', 'extractor', 'extractor_key']: self.assertTrue(info_dict.get(key), u'Missing field: %s' % key) finally: try_rm_tcs_files() return test_template ### And add them to TestDownload for n, test_case in enumerate(defs): test_method = generator(test_case) tname = 'test_' + str(test_case['name']) i = 1 while hasattr(TestDownload, tname): tname = 'test_' + str(test_case['name']) + '_' + str(i) i += 1 test_method.__name__ = tname setattr(TestDownload, test_method.__name__, test_method) del test_method if __name__ == '__main__': unittest.main() youtube-dl/test/test_youtube_lists.py0000644000000000000000000001175112275470300017154 0ustar rootroot#!/usr/bin/env python # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL from youtube_dl.extractor import ( YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE, YoutubeShowIE, YoutubeTopListIE, ) class TestYoutubeLists(unittest.TestCase): def assertIsPlaylist(self, info): """Make sure the info has '_type' set to 'playlist'""" self.assertEqual(info['_type'], 'playlist') def test_youtube_playlist(self): dl = FakeYDL() ie = YoutubePlaylistIE(dl) result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') self.assertIsPlaylist(result) self.assertEqual(result['title'], 'ytdl test PL') ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']] self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE']) def test_youtube_playlist_noplaylist(self): dl = FakeYDL() dl.params['noplaylist'] = True ie = YoutubePlaylistIE(dl) result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') self.assertEqual(result['_type'], 'url') self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg') def test_issue_673(self): dl = FakeYDL() ie = YoutubePlaylistIE(dl) result = ie.extract('PLBB231211A4F62143') self.assertTrue(len(result['entries']) > 25) def test_youtube_playlist_long(self): dl = FakeYDL() ie = YoutubePlaylistIE(dl) result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') self.assertIsPlaylist(result) self.assertTrue(len(result['entries']) >= 799) def test_youtube_playlist_with_deleted(self): #651 dl = FakeYDL() ie = YoutubePlaylistIE(dl) result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']] self.assertFalse('pElCt5oNDuI' in ytie_results) self.assertFalse('KdPEApIVdWM' in ytie_results) def test_youtube_playlist_empty(self): dl = FakeYDL() ie = YoutubePlaylistIE(dl) result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx') self.assertIsPlaylist(result) self.assertEqual(len(result['entries']), 0) def test_youtube_course(self): dl = FakeYDL() ie = YoutubePlaylistIE(dl) # TODO find a > 100 (paginating?) videos course result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') entries = result['entries'] self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs') self.assertEqual(len(entries), 25) self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0') def test_youtube_channel(self): dl = FakeYDL() ie = YoutubeChannelIE(dl) #test paginated channel result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w') self.assertTrue(len(result['entries']) > 90) #test autogenerated channel result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') self.assertTrue(len(result['entries']) >= 18) def test_youtube_user(self): dl = FakeYDL() ie = YoutubeUserIE(dl) result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation') self.assertTrue(len(result['entries']) >= 320) def test_youtube_safe_search(self): dl = FakeYDL() ie = YoutubePlaylistIE(dl) result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl') self.assertEqual(len(result['entries']), 2) def test_youtube_show(self): dl = FakeYDL() ie = YoutubeShowIE(dl) result = ie.extract('http://www.youtube.com/show/airdisasters') self.assertTrue(len(result) >= 3) def test_youtube_mix(self): dl = FakeYDL() ie = YoutubePlaylistIE(dl) result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y') entries = result['entries'] self.assertTrue(len(entries) >= 20) original_video = entries[0] self.assertEqual(original_video['id'], 'rjFaenf1T-Y') def test_youtube_toptracks(self): dl = FakeYDL() ie = YoutubePlaylistIE(dl) result = ie.extract('https://www.youtube.com/playlist?list=MCUS') entries = result['entries'] self.assertEqual(len(entries), 100) def test_youtube_toplist(self): dl = FakeYDL() ie = YoutubeTopListIE(dl) result = ie.extract('yttoplist:music:Trending') entries = result['entries'] self.assertTrue(len(entries) >= 5) if __name__ == '__main__': unittest.main() youtube-dl/test/test_youtube_signature.py0000644000000000000000000000443712274256655020040 0ustar rootroot#!/usr/bin/env python # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import io import re import string from youtube_dl.extractor import YoutubeIE from youtube_dl.utils import compat_str, compat_urlretrieve _TESTS = [ ( u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js', u'js', 86, u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321', ), ( u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js', u'js', 85, u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@', ), ( u'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js', u'js', 90, u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', ), ] class TestSignature(unittest.TestCase): def setUp(self): TEST_DIR = os.path.dirname(os.path.abspath(__file__)) self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata') if not os.path.exists(self.TESTDATA_DIR): os.mkdir(self.TESTDATA_DIR) def make_tfunc(url, stype, sig_length, expected_sig): basename = url.rpartition('/')[2] m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename) assert m, '%r should follow URL format' % basename test_id = m.group(1) def test_func(self): fn = os.path.join(self.TESTDATA_DIR, basename) if not os.path.exists(fn): compat_urlretrieve(url, fn) ie = YoutubeIE() if stype == 'js': with io.open(fn, encoding='utf-8') as testf: jscode = testf.read() func = ie._parse_sig_js(jscode) else: assert stype == 'swf' with open(fn, 'rb') as testf: swfcode = testf.read() func = ie._parse_sig_swf(swfcode) src_sig = compat_str(string.printable[:sig_length]) got_sig = func(src_sig) self.assertEqual(got_sig, expected_sig) test_func.__name__ = str('test_signature_' + stype + '_' + test_id) setattr(TestSignature, test_func.__name__, test_func) for test_spec in _TESTS: make_tfunc(*test_spec) if __name__ == '__main__': unittest.main() youtube-dl/test/test_execution.py0000644000000000000000000000146112271337464016253 0ustar rootrootimport unittest import sys import os import subprocess rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) try: _DEV_NULL = subprocess.DEVNULL except AttributeError: _DEV_NULL = open(os.devnull, 'wb') class TestExecution(unittest.TestCase): def test_import(self): subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir) def test_module_exec(self): if sys.version_info >= (2,7): # Python 2.6 doesn't support package execution subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL) def test_main_exec(self): subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL) if __name__ == '__main__': unittest.main() youtube-dl/test/__init__.py0000644000000000000000000000000012271337464014734 0ustar rootrootyoutube-dl/test/test_playlists.py0000644000000000000000000002477212277675351016314 0ustar rootroot#!/usr/bin/env python # encoding: utf-8 from __future__ import unicode_literals # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL from youtube_dl.extractor import ( AcademicEarthCourseIE, DailymotionPlaylistIE, DailymotionUserIE, VimeoChannelIE, VimeoUserIE, VimeoAlbumIE, VimeoGroupsIE, UstreamChannelIE, SoundcloudSetIE, SoundcloudUserIE, LivestreamIE, NHLVideocenterIE, BambuserChannelIE, BandcampAlbumIE, SmotriCommunityIE, SmotriUserIE, IviCompilationIE, ImdbListIE, KhanAcademyIE, EveryonesMixtapeIE, RutubeChannelIE, GoogleSearchIE, GenericIE, ) class TestPlaylists(unittest.TestCase): def assertIsPlaylist(self, info): """Make sure the info has '_type' set to 'playlist'""" self.assertEqual(info['_type'], 'playlist') def test_dailymotion_playlist(self): dl = FakeYDL() ie = DailymotionPlaylistIE(dl) result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q') self.assertIsPlaylist(result) self.assertEqual(result['title'], 'SPORT') self.assertTrue(len(result['entries']) > 20) def test_dailymotion_user(self): dl = FakeYDL() ie = DailymotionUserIE(dl) result = ie.extract('https://www.dailymotion.com/user/nqtv') self.assertIsPlaylist(result) self.assertEqual(result['title'], 'Rémi Gaillard') self.assertTrue(len(result['entries']) >= 100) def test_vimeo_channel(self): dl = FakeYDL() ie = VimeoChannelIE(dl) result = ie.extract('http://vimeo.com/channels/tributes') self.assertIsPlaylist(result) self.assertEqual(result['title'], 'Vimeo Tributes') self.assertTrue(len(result['entries']) > 24) def test_vimeo_user(self): dl = FakeYDL() ie = VimeoUserIE(dl) result = ie.extract('http://vimeo.com/nkistudio/videos') self.assertIsPlaylist(result) self.assertEqual(result['title'], 'Nki') self.assertTrue(len(result['entries']) > 65) def test_vimeo_album(self): dl = FakeYDL() ie = VimeoAlbumIE(dl) result = ie.extract('http://vimeo.com/album/2632481') self.assertIsPlaylist(result) self.assertEqual(result['title'], 'Staff Favorites: November 2013') self.assertTrue(len(result['entries']) > 12) def test_vimeo_groups(self): dl = FakeYDL() ie = VimeoGroupsIE(dl) result = ie.extract('http://vimeo.com/groups/rolexawards') self.assertIsPlaylist(result) self.assertEqual(result['title'], 'Rolex Awards for Enterprise') self.assertTrue(len(result['entries']) > 72) def test_ustream_channel(self): dl = FakeYDL() ie = UstreamChannelIE(dl) result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty') self.assertIsPlaylist(result) self.assertEqual(result['id'], '5124905') self.assertTrue(len(result['entries']) >= 11) def test_soundcloud_set(self): dl = FakeYDL() ie = SoundcloudSetIE(dl) result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep') self.assertIsPlaylist(result) self.assertEqual(result['title'], 'The Royal Concept EP') self.assertTrue(len(result['entries']) >= 6) def test_soundcloud_user(self): dl = FakeYDL() ie = SoundcloudUserIE(dl) result = ie.extract('https://soundcloud.com/the-concept-band') self.assertIsPlaylist(result) self.assertEqual(result['id'], '9615865') self.assertTrue(len(result['entries']) >= 12) def test_livestream_event(self): dl = FakeYDL() ie = LivestreamIE(dl) result = ie.extract('http://new.livestream.com/tedx/cityenglish') self.assertIsPlaylist(result) self.assertEqual(result['title'], 'TEDCity2.0 (English)') self.assertTrue(len(result['entries']) >= 4) def test_nhl_videocenter(self): dl = FakeYDL() ie = NHLVideocenterIE(dl) result = ie.extract('http://video.canucks.nhl.com/videocenter/console?catid=999') self.assertIsPlaylist(result) self.assertEqual(result['id'], '999') self.assertEqual(result['title'], 'Highlights') self.assertEqual(len(result['entries']), 12) def test_bambuser_channel(self): dl = FakeYDL() ie = BambuserChannelIE(dl) result = ie.extract('http://bambuser.com/channel/pixelversity') self.assertIsPlaylist(result) self.assertEqual(result['title'], 'pixelversity') self.assertTrue(len(result['entries']) >= 60) def test_bandcamp_album(self): dl = FakeYDL() ie = BandcampAlbumIE(dl) result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep') self.assertIsPlaylist(result) self.assertEqual(result['title'], 'Nightmare Night EP') self.assertTrue(len(result['entries']) >= 4) def test_smotri_community(self): dl = FakeYDL() ie = SmotriCommunityIE(dl) result = ie.extract('http://smotri.com/community/video/kommuna') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'kommuna') self.assertEqual(result['title'], 'КПРФ') self.assertTrue(len(result['entries']) >= 4) def test_smotri_user(self): dl = FakeYDL() ie = SmotriUserIE(dl) result = ie.extract('http://smotri.com/user/inspector') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'inspector') self.assertEqual(result['title'], 'Inspector') self.assertTrue(len(result['entries']) >= 9) def test_AcademicEarthCourse(self): dl = FakeYDL() ie = AcademicEarthCourseIE(dl) result = ie.extract('http://academicearth.org/courses/building-dynamic-websites/') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'building-dynamic-websites') self.assertEqual(result['title'], 'Building Dynamic Websites') self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.") self.assertEqual(len(result['entries']), 10) def test_ivi_compilation(self): dl = FakeYDL() ie = IviCompilationIE(dl) result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'dezhurnyi_angel') self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012)') self.assertTrue(len(result['entries']) >= 36) def test_ivi_compilation_season(self): dl = FakeYDL() ie = IviCompilationIE(dl) result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel/season2') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'dezhurnyi_angel/season2') self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012) 2 сезон') self.assertTrue(len(result['entries']) >= 20) def test_imdb_list(self): dl = FakeYDL() ie = ImdbListIE(dl) result = ie.extract('http://www.imdb.com/list/JFs9NWw6XI0') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'JFs9NWw6XI0') self.assertEqual(result['title'], 'March 23, 2012 Releases') self.assertEqual(len(result['entries']), 7) def test_khanacademy_topic(self): dl = FakeYDL() ie = KhanAcademyIE(dl) result = ie.extract('https://www.khanacademy.org/math/applied-math/cryptography') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'cryptography') self.assertEqual(result['title'], 'Journey into cryptography') self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?') self.assertTrue(len(result['entries']) >= 3) def test_EveryonesMixtape(self): dl = FakeYDL() ie = EveryonesMixtapeIE(dl) result = ie.extract('http://everyonesmixtape.com/#/mix/m7m0jJAbMQi') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'm7m0jJAbMQi') self.assertEqual(result['title'], 'Driving') self.assertEqual(len(result['entries']), 24) def test_rutube_channel(self): dl = FakeYDL() ie = RutubeChannelIE(dl) result = ie.extract('http://rutube.ru/tags/video/1409') self.assertIsPlaylist(result) self.assertEqual(result['id'], '1409') self.assertTrue(len(result['entries']) >= 34) def test_multiple_brightcove_videos(self): # https://github.com/rg3/youtube-dl/issues/2283 dl = FakeYDL() ie = GenericIE(dl) result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'always-never-nuclear-command-and-control') self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker') self.assertEqual(len(result['entries']), 3) def test_GoogleSearch(self): dl = FakeYDL() ie = GoogleSearchIE(dl) result = ie.extract('gvsearch15:python language') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'python language') self.assertEqual(result['title'], 'python language') self.assertTrue(len(result['entries']) == 15) if __name__ == '__main__': unittest.main() youtube-dl/test/test_write_annotations.py0000644000000000000000000000500512271337464020015 0ustar rootroot#!/usr/bin/env python # coding: utf-8 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import get_params, try_rm import io import xml.etree.ElementTree import youtube_dl.YoutubeDL import youtube_dl.extractor class YoutubeDL(youtube_dl.YoutubeDL): def __init__(self, *args, **kwargs): super(YoutubeDL, self).__init__(*args, **kwargs) self.to_stderr = self.to_screen params = get_params({ 'writeannotations': True, 'skip_download': True, 'writeinfojson': False, 'format': 'flv', }) TEST_ID = 'gr51aVj-mLg' ANNOTATIONS_FILE = TEST_ID + '.flv.annotations.xml' EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label'] class TestAnnotations(unittest.TestCase): def setUp(self): # Clear old files self.tearDown() def test_info_json(self): expected = list(EXPECTED_ANNOTATIONS) #Two annotations could have the same text. ie = youtube_dl.extractor.YoutubeIE() ydl = YoutubeDL(params) ydl.add_info_extractor(ie) ydl.download([TEST_ID]) self.assertTrue(os.path.exists(ANNOTATIONS_FILE)) annoxml = None with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof: annoxml = xml.etree.ElementTree.parse(annof) self.assertTrue(annoxml is not None, 'Failed to parse annotations XML') root = annoxml.getroot() self.assertEqual(root.tag, 'document') annotationsTag = root.find('annotations') self.assertEqual(annotationsTag.tag, 'annotations') annotations = annotationsTag.findall('annotation') #Not all the annotations have TEXT children and the annotations are returned unsorted. for a in annotations: self.assertEqual(a.tag, 'annotation') if a.get('type') == 'text': textTag = a.find('TEXT') text = textTag.text self.assertTrue(text in expected) #assertIn only added in python 2.7 #remove the first occurance, there could be more than one annotation with the same text expected.remove(text) #We should have seen (and removed) all the expected annotation texts. self.assertEqual(len(expected), 0, 'Not all expected annotations were found.') def tearDown(self): try_rm(ANNOTATIONS_FILE) if __name__ == '__main__': unittest.main() youtube-dl/test/test_subtitles.py0000644000000000000000000002425612273722437016275 0ustar rootroot#!/usr/bin/env python # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL, md5 from youtube_dl.extractor import ( BlipTVIE, YoutubeIE, DailymotionIE, TEDIE, VimeoIE, ) class BaseTestSubtitles(unittest.TestCase): url = None IE = None def setUp(self): self.DL = FakeYDL() self.ie = self.IE(self.DL) def getInfoDict(self): info_dict = self.ie.extract(self.url) return info_dict def getSubtitles(self): info_dict = self.getInfoDict() return info_dict['subtitles'] class TestYoutubeSubtitles(BaseTestSubtitles): url = 'QRS8MkLhQmM' IE = YoutubeIE def test_youtube_no_writesubtitles(self): self.DL.params['writesubtitles'] = False subtitles = self.getSubtitles() self.assertEqual(subtitles, None) def test_youtube_subtitles(self): self.DL.params['writesubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260') def test_youtube_subtitles_lang(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitleslangs'] = ['it'] subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d') def test_youtube_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles.keys()), 13) def test_youtube_subtitles_sbv_format(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = 'sbv' subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b') def test_youtube_subtitles_vtt_format(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = 'vtt' subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06') def test_youtube_list_subtitles(self): self.DL.expect_warning(u'Video doesn\'t have automatic captions') self.DL.params['listsubtitles'] = True info_dict = self.getInfoDict() self.assertEqual(info_dict, None) def test_youtube_automatic_captions(self): self.url = '8YoUxe5ncPo' self.DL.params['writeautomaticsub'] = True self.DL.params['subtitleslangs'] = ['it'] subtitles = self.getSubtitles() self.assertTrue(subtitles['it'] is not None) def test_youtube_nosubtitles(self): self.DL.expect_warning(u'video doesn\'t have subtitles') self.url = 'sAjKT8FhjI8' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles), 0) def test_youtube_multiple_langs(self): self.url = 'QRS8MkLhQmM' self.DL.params['writesubtitles'] = True langs = ['it', 'fr', 'de'] self.DL.params['subtitleslangs'] = langs subtitles = self.getSubtitles() for lang in langs: self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) class TestDailymotionSubtitles(BaseTestSubtitles): url = 'http://www.dailymotion.com/video/xczg00' IE = DailymotionIE def test_no_writesubtitles(self): subtitles = self.getSubtitles() self.assertEqual(subtitles, None) def test_subtitles(self): self.DL.params['writesubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f') def test_subtitles_lang(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitleslangs'] = ['fr'] subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792') def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles.keys()), 5) def test_list_subtitles(self): self.DL.expect_warning(u'Automatic Captions not supported by this server') self.DL.params['listsubtitles'] = True info_dict = self.getInfoDict() self.assertEqual(info_dict, None) def test_automatic_captions(self): self.DL.expect_warning(u'Automatic Captions not supported by this server') self.DL.params['writeautomaticsub'] = True self.DL.params['subtitleslang'] = ['en'] subtitles = self.getSubtitles() self.assertTrue(len(subtitles.keys()) == 0) def test_nosubtitles(self): self.DL.expect_warning(u'video doesn\'t have subtitles') self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles), 0) def test_multiple_langs(self): self.DL.params['writesubtitles'] = True langs = ['es', 'fr', 'de'] self.DL.params['subtitleslangs'] = langs subtitles = self.getSubtitles() for lang in langs: self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) class TestTedSubtitles(BaseTestSubtitles): url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html' IE = TEDIE def test_no_writesubtitles(self): subtitles = self.getSubtitles() self.assertEqual(subtitles, None) def test_subtitles(self): self.DL.params['writesubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14') def test_subtitles_lang(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitleslangs'] = ['fr'] subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5') def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles.keys()), 28) def test_list_subtitles(self): self.DL.expect_warning(u'Automatic Captions not supported by this server') self.DL.params['listsubtitles'] = True info_dict = self.getInfoDict() self.assertEqual(info_dict, None) def test_automatic_captions(self): self.DL.expect_warning(u'Automatic Captions not supported by this server') self.DL.params['writeautomaticsub'] = True self.DL.params['subtitleslang'] = ['en'] subtitles = self.getSubtitles() self.assertTrue(len(subtitles.keys()) == 0) def test_multiple_langs(self): self.DL.params['writesubtitles'] = True langs = ['es', 'fr', 'de'] self.DL.params['subtitleslangs'] = langs subtitles = self.getSubtitles() for lang in langs: self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) class TestBlipTVSubtitles(BaseTestSubtitles): url = 'http://blip.tv/a/a-6603250' IE = BlipTVIE def test_list_subtitles(self): self.DL.expect_warning(u'Automatic Captions not supported by this server') self.DL.params['listsubtitles'] = True info_dict = self.getInfoDict() self.assertEqual(info_dict, None) def test_allsubtitles(self): self.DL.expect_warning(u'Automatic Captions not supported by this server') self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), set(['en'])) self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4') class TestVimeoSubtitles(BaseTestSubtitles): url = 'http://vimeo.com/76979871' IE = VimeoIE def test_no_writesubtitles(self): subtitles = self.getSubtitles() self.assertEqual(subtitles, None) def test_subtitles(self): self.DL.params['writesubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888') def test_subtitles_lang(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitleslangs'] = ['fr'] subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8') def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr'])) def test_list_subtitles(self): self.DL.expect_warning(u'Automatic Captions not supported by this server') self.DL.params['listsubtitles'] = True info_dict = self.getInfoDict() self.assertEqual(info_dict, None) def test_automatic_captions(self): self.DL.expect_warning(u'Automatic Captions not supported by this server') self.DL.params['writeautomaticsub'] = True self.DL.params['subtitleslang'] = ['en'] subtitles = self.getSubtitles() self.assertTrue(len(subtitles.keys()) == 0) def test_nosubtitles(self): self.DL.expect_warning(u'video doesn\'t have subtitles') self.url = 'http://vimeo.com/56015672' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles), 0) def test_multiple_langs(self): self.DL.params['writesubtitles'] = True langs = ['es', 'fr', 'de'] self.DL.params['subtitleslangs'] = langs subtitles = self.getSubtitles() for lang in langs: self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) if __name__ == '__main__': unittest.main() youtube-dl/test/parameters.json0000644000000000000000000000212512271337464015673 0ustar rootroot{ "consoletitle": false, "continuedl": true, "forcedescription": false, "forcefilename": false, "forceformat": false, "forcethumbnail": false, "forcetitle": false, "forceurl": false, "format": null, "format_limit": null, "ignoreerrors": false, "listformats": null, "logtostderr": false, "matchtitle": null, "max_downloads": null, "nooverwrites": false, "nopart": false, "noprogress": false, "outtmpl": "%(id)s.%(ext)s", "password": null, "playlistend": -1, "playliststart": 1, "prefer_free_formats": false, "quiet": false, "ratelimit": null, "rejecttitle": null, "retries": 10, "simulate": false, "skip_download": false, "subtitleslang": null, "subtitlesformat": "srt", "test": true, "updatetime": true, "usenetrc": false, "username": null, "verbose": true, "writedescription": false, "writeinfojson": true, "writesubtitles": false, "allsubtitles": false, "listssubtitles": false, "socket_timeout": 20 } youtube-dl/test/helper.py0000644000000000000000000000471712271337464014477 0ustar rootrootimport errno import io import hashlib import json import os.path import re import types import sys import youtube_dl.extractor from youtube_dl import YoutubeDL from youtube_dl.utils import preferredencoding def get_params(override=None): PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: parameters = json.load(pf) if override: parameters.update(override) return parameters def try_rm(filename): """ Remove a file if it exists """ try: os.remove(filename) except OSError as ose: if ose.errno != errno.ENOENT: raise def report_warning(message): ''' Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored ''' if sys.stderr.isatty() and os.name != 'nt': _msg_header = u'\033[0;33mWARNING:\033[0m' else: _msg_header = u'WARNING:' output = u'%s %s\n' % (_msg_header, message) if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3: output = output.encode(preferredencoding()) sys.stderr.write(output) class FakeYDL(YoutubeDL): def __init__(self, override=None): # Different instances of the downloader can't share the same dictionary # some test set the "sublang" parameter, which would break the md5 checks. params = get_params(override=override) super(FakeYDL, self).__init__(params) self.result = [] def to_screen(self, s, skip_eol=None): print(s) def trouble(self, s, tb=None): raise Exception(s) def download(self, x): self.result.append(x) def expect_warning(self, regex): # Silence an expected warning matching a regex old_report_warning = self.report_warning def report_warning(self, message): if re.match(regex, message): return old_report_warning(message) self.report_warning = types.MethodType(report_warning, self) def get_testcases(): for ie in youtube_dl.extractor.gen_extractors(): t = getattr(ie, '_TEST', None) if t: t['name'] = type(ie).__name__[:-len('IE')] yield t for t in getattr(ie, '_TESTS', []): t['name'] = type(ie).__name__[:-len('IE')] yield t md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() youtube-dl/test/test_write_info_json.py0000644000000000000000000000417512271337464017453 0ustar rootroot#!/usr/bin/env python # coding: utf-8 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import get_params import io import json import youtube_dl.YoutubeDL import youtube_dl.extractor class YoutubeDL(youtube_dl.YoutubeDL): def __init__(self, *args, **kwargs): super(YoutubeDL, self).__init__(*args, **kwargs) self.to_stderr = self.to_screen params = get_params({ 'writeinfojson': True, 'skip_download': True, 'writedescription': True, }) TEST_ID = 'BaW_jenozKc' INFO_JSON_FILE = TEST_ID + '.info.json' DESCRIPTION_FILE = TEST_ID + '.mp4.description' EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐 test URL: https://github.com/rg3/youtube-dl/issues/1892 This is a test video for youtube-dl. For more information, contact phihag@phihag.de .''' class TestInfoJSON(unittest.TestCase): def setUp(self): # Clear old files self.tearDown() def test_info_json(self): ie = youtube_dl.extractor.YoutubeIE() ydl = YoutubeDL(params) ydl.add_info_extractor(ie) ydl.download([TEST_ID]) self.assertTrue(os.path.exists(INFO_JSON_FILE)) with io.open(INFO_JSON_FILE, 'r', encoding='utf-8') as jsonf: jd = json.load(jsonf) self.assertEqual(jd['upload_date'], u'20121002') self.assertEqual(jd['description'], EXPECTED_DESCRIPTION) self.assertEqual(jd['id'], TEST_ID) self.assertEqual(jd['extractor'], 'youtube') self.assertEqual(jd['title'], u'''youtube-dl test video "'/\ä↭𝕐''') self.assertEqual(jd['uploader'], 'Philipp Hagemeister') self.assertTrue(os.path.exists(DESCRIPTION_FILE)) with io.open(DESCRIPTION_FILE, 'r', encoding='utf-8') as descf: descr = descf.read() self.assertEqual(descr, EXPECTED_DESCRIPTION) def tearDown(self): if os.path.exists(INFO_JSON_FILE): os.remove(INFO_JSON_FILE) if os.path.exists(DESCRIPTION_FILE): os.remove(DESCRIPTION_FILE) if __name__ == '__main__': unittest.main() youtube-dl/test/test_utils.py0000644000000000000000000002403212300361346015374 0ustar rootroot#!/usr/bin/env python # coding: utf-8 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # Various small unit tests import xml.etree.ElementTree #from youtube_dl.utils import htmlentity_transform from youtube_dl.utils import ( DateRange, encodeFilename, find_xpath_attr, fix_xml_ampersands, get_meta_content, orderedSet, PagedList, parse_duration, sanitize_filename, shell_quote, smuggle_url, str_to_int, struct_unpack, timeconvert, unescapeHTML, unified_strdate, unsmuggle_url, url_basename, xpath_with_ns, ) if sys.version_info < (3, 0): _compat_str = lambda b: b.decode('unicode-escape') else: _compat_str = lambda s: s class TestUtil(unittest.TestCase): def test_timeconvert(self): self.assertTrue(timeconvert('') is None) self.assertTrue(timeconvert('bougrg') is None) def test_sanitize_filename(self): self.assertEqual(sanitize_filename('abc'), 'abc') self.assertEqual(sanitize_filename('abc_d-e'), 'abc_d-e') self.assertEqual(sanitize_filename('123'), '123') self.assertEqual('abc_de', sanitize_filename('abc/de')) self.assertFalse('/' in sanitize_filename('abc/de///')) self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de')) self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|')) self.assertEqual('yes no', sanitize_filename('yes? no')) self.assertEqual('this - that', sanitize_filename('this: that')) self.assertEqual(sanitize_filename('AT&T'), 'AT&T') aumlaut = _compat_str('\xe4') self.assertEqual(sanitize_filename(aumlaut), aumlaut) tests = _compat_str('\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430') self.assertEqual(sanitize_filename(tests), tests) forbidden = '"\0\\/' for fc in forbidden: for fbc in forbidden: self.assertTrue(fbc not in sanitize_filename(fc)) def test_sanitize_filename_restricted(self): self.assertEqual(sanitize_filename('abc', restricted=True), 'abc') self.assertEqual(sanitize_filename('abc_d-e', restricted=True), 'abc_d-e') self.assertEqual(sanitize_filename('123', restricted=True), '123') self.assertEqual('abc_de', sanitize_filename('abc/de', restricted=True)) self.assertFalse('/' in sanitize_filename('abc/de///', restricted=True)) self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', restricted=True)) self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', restricted=True)) self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True)) self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True)) tests = _compat_str('a\xe4b\u4e2d\u56fd\u7684c') self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c') self.assertTrue(sanitize_filename(_compat_str('\xf6'), restricted=True) != '') # No empty filename forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#' for fc in forbidden: for fbc in forbidden: self.assertTrue(fbc not in sanitize_filename(fc, restricted=True)) # Handle a common case more neatly self.assertEqual(sanitize_filename(_compat_str('\u5927\u58f0\u5e26 - Song'), restricted=True), 'Song') self.assertEqual(sanitize_filename(_compat_str('\u603b\u7edf: Speech'), restricted=True), 'Speech') # .. but make sure the file name is never empty self.assertTrue(sanitize_filename('-', restricted=True) != '') self.assertTrue(sanitize_filename(':', restricted=True) != '') def test_sanitize_ids(self): self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw') self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI') def test_ordered_set(self): self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([]), []) self.assertEqual(orderedSet([1]), [1]) #keep the list ordered self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1]) def test_unescape_html(self): self.assertEqual(unescapeHTML(_compat_str('%20;')), _compat_str('%20;')) def test_daterange(self): _20century = DateRange("19000101","20000101") self.assertFalse("17890714" in _20century) _ac = DateRange("00010101") self.assertTrue("19690721" in _ac) _firstmilenium = DateRange(end="10000101") self.assertTrue("07110427" in _firstmilenium) def test_unified_dates(self): self.assertEqual(unified_strdate('December 21, 2010'), '20101221') self.assertEqual(unified_strdate('8/7/2009'), '20090708') self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214') self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011') self.assertEqual(unified_strdate('1968-12-10'), '19681210') def test_find_xpath_attr(self): testxml = u''' ''' doc = xml.etree.ElementTree.fromstring(testxml) self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None) self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1]) self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2]) def test_meta_parser(self): testhtml = u''' ''' get_meta = lambda name: get_meta_content(name, testhtml) self.assertEqual(get_meta('description'), u'foo & bar') self.assertEqual(get_meta('author'), 'Plato') def test_xpath_with_ns(self): testxml = u''' The Author http://server.com/download.mp3 ''' doc = xml.etree.ElementTree.fromstring(testxml) find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'})) self.assertTrue(find('media:song') is not None) self.assertEqual(find('media:song/media:author').text, u'The Author') self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3') def test_smuggle_url(self): data = {u"ö": u"ö", u"abc": [3]} url = 'https://foo.bar/baz?x=y#a' smug_url = smuggle_url(url, data) unsmug_url, unsmug_data = unsmuggle_url(smug_url) self.assertEqual(url, unsmug_url) self.assertEqual(data, unsmug_data) res_url, res_data = unsmuggle_url(url) self.assertEqual(res_url, url) self.assertEqual(res_data, None) def test_shell_quote(self): args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')] self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""") def test_str_to_int(self): self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123.456'), 123456) def test_url_basename(self): self.assertEqual(url_basename(u'http://foo.de/'), u'') self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz') self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz') self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz') self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz') self.assertEqual( url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'), u'trailer.mp4') def test_parse_duration(self): self.assertEqual(parse_duration(None), None) self.assertEqual(parse_duration('1'), 1) self.assertEqual(parse_duration('1337:12'), 80232) self.assertEqual(parse_duration('9:12:43'), 33163) self.assertEqual(parse_duration('12:00'), 720) self.assertEqual(parse_duration('00:01:01'), 61) self.assertEqual(parse_duration('x:y'), None) self.assertEqual(parse_duration('3h11m53s'), 11513) self.assertEqual(parse_duration('62m45s'), 3765) self.assertEqual(parse_duration('6m59s'), 419) self.assertEqual(parse_duration('49s'), 49) self.assertEqual(parse_duration('0h0m0s'), 0) self.assertEqual(parse_duration('0m0s'), 0) self.assertEqual(parse_duration('0s'), 0) def test_fix_xml_ampersands(self): self.assertEqual( fix_xml_ampersands('"&x=y&z=a'), '"&x=y&z=a') self.assertEqual( fix_xml_ampersands('"&x=y&wrong;&z=a'), '"&x=y&wrong;&z=a') self.assertEqual( fix_xml_ampersands('&'><"'), '&'><"') self.assertEqual( fix_xml_ampersands('Ӓ᪼'), 'Ӓ᪼') self.assertEqual(fix_xml_ampersands('&#&#'), '&#&#') def test_paged_list(self): def testPL(size, pagesize, sliceargs, expected): def get_page(pagenum): firstid = pagenum * pagesize upto = min(size, pagenum * pagesize + pagesize) for i in range(firstid, upto): yield i pl = PagedList(get_page, pagesize) got = pl.getslice(*sliceargs) self.assertEqual(got, expected) testPL(5, 2, (), [0, 1, 2, 3, 4]) testPL(5, 2, (1,), [1, 2, 3, 4]) testPL(5, 2, (2,), [2, 3, 4]) testPL(5, 2, (4,), [4]) testPL(5, 2, (0, 3), [0, 1, 2]) testPL(5, 2, (1, 4), [1, 2, 3]) testPL(5, 2, (2, 99), [2, 3, 4]) testPL(5, 2, (20, 99), []) def test_struct_unpack(self): self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,)) if __name__ == '__main__': unittest.main() youtube-dl/test/test_all_urls.py0000644000000000000000000001520112275470533016061 0ustar rootroot#!/usr/bin/env python from __future__ import unicode_literals # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import get_testcases from youtube_dl.extractor import ( FacebookIE, gen_extractors, JustinTVIE, PBSIE, YoutubeIE, ) class TestAllURLsMatching(unittest.TestCase): def setUp(self): self.ies = gen_extractors() def matching_ies(self, url): return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic'] def assertMatch(self, url, ie_list): self.assertEqual(self.matching_ies(url), ie_list) def test_youtube_playlist_matching(self): assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist']) assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') #585 assertPlaylist('PL63F0C78739B09958') assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668 self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M')) # Top tracks assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101') def test_youtube_matching(self): self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M')) self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668 self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube']) self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube']) def test_youtube_channel_matching(self): assertChannel = lambda url: self.assertMatch(url, ['youtube:channel']) assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM') assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec') assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') def test_youtube_user_matching(self): self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user']) def test_youtube_feeds(self): self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later']) self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions']) self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended']) self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites']) def test_youtube_show_matching(self): self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) def test_justin_tv_channelid_matching(self): self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) self.assertTrue(JustinTVIE.suitable(u"www.justin.tv/vanillatv")) self.assertTrue(JustinTVIE.suitable(u"www.twitch.tv/vanillatv")) self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv")) self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv")) self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv/")) self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/")) def test_justintv_videoid_matching(self): self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/b/328087483")) def test_justin_tv_chapterid_matching(self): self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361")) def test_youtube_extract(self): assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc') assertExtractId('BaW_jenozKc', 'BaW_jenozKc') def test_facebook_matching(self): self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268')) def test_no_duplicates(self): ies = gen_extractors() for tc in get_testcases(): url = tc['url'] for ie in ies: if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'): self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url)) else: self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url)) def test_keywords(self): self.assertMatch(':ytsubs', ['youtube:subscriptions']) self.assertMatch(':ytsubscriptions', ['youtube:subscriptions']) self.assertMatch(':ythistory', ['youtube:history']) self.assertMatch(':thedailyshow', ['ComedyCentralShows']) self.assertMatch(':tds', ['ComedyCentralShows']) self.assertMatch(':colbertreport', ['ComedyCentralShows']) self.assertMatch(':cr', ['ComedyCentralShows']) def test_vimeo_matching(self): self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel']) self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user']) self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user']) self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review']) # https://github.com/rg3/youtube-dl/issues/1930 def test_soundcloud_not_matching_sets(self): self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set']) def test_tumblr(self): self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr']) self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr']) def test_pbs(self): # https://github.com/rg3/youtube-dl/issues/2350 self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS']) if __name__ == '__main__': unittest.main() youtube-dl/test/test_age_restriction.py0000644000000000000000000000274612271337464017440 0ustar rootroot#!/usr/bin/env python # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import try_rm from youtube_dl import YoutubeDL def _download_restricted(url, filename, age): """ Returns true iff the file has been downloaded """ params = { 'age_limit': age, 'skip_download': True, 'writeinfojson': True, "outtmpl": "%(id)s.%(ext)s", } ydl = YoutubeDL(params) ydl.add_default_info_extractors() json_filename = os.path.splitext(filename)[0] + '.info.json' try_rm(json_filename) ydl.download([url]) res = os.path.exists(json_filename) try_rm(json_filename) return res class TestAgeRestriction(unittest.TestCase): def _assert_restricted(self, url, filename, age, old_age=None): self.assertTrue(_download_restricted(url, filename, old_age)) self.assertFalse(_download_restricted(url, filename, age)) def test_youtube(self): self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10) def test_youporn(self): self._assert_restricted( 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', '505835.mp4', 2, old_age=25) def test_pornotube(self): self._assert_restricted( 'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing', '1689755.flv', 13) if __name__ == '__main__': unittest.main() youtube-dl/youtube_dl/0000755000000000000000000000000012300362551014015 5ustar rootrootyoutube-dl/youtube_dl/FileDownloader.py0000644000000000000000000000104712271337464017303 0ustar rootroot# Legacy file for backwards compatibility, use youtube_dl.downloader instead! from .downloader import FileDownloader as RealFileDownloader from .downloader import get_suitable_downloader # This class reproduces the old behaviour of FileDownloader class FileDownloader(RealFileDownloader): def _do_download(self, filename, info_dict): real_fd = get_suitable_downloader(info_dict)(self.ydl, self.params) for ph in self._progress_hooks: real_fd.add_progress_hook(ph) return real_fd.download(filename, info_dict) youtube-dl/youtube_dl/YoutubeDL.py0000644000000000000000000015147512272152330016260 0ustar rootroot#!/usr/bin/env python # -*- coding: utf-8 -*- from __future__ import absolute_import, unicode_literals import collections import errno import io import json import os import platform import re import shutil import subprocess import socket import sys import time import traceback if os.name == 'nt': import ctypes from .utils import ( compat_cookiejar, compat_http_client, compat_str, compat_urllib_error, compat_urllib_request, ContentTooShortError, date_from_str, DateRange, determine_ext, DownloadError, encodeFilename, ExtractorError, format_bytes, formatSeconds, get_term_width, locked_file, make_HTTPS_handler, MaxDownloadsReached, PagedList, PostProcessingError, platform_name, preferredencoding, SameFileError, sanitize_filename, subtitles_filename, takewhile_inclusive, UnavailableVideoError, url_basename, write_json_file, write_string, YoutubeDLHandler, prepend_extension, ) from .extractor import get_info_extractor, gen_extractors from .downloader import get_suitable_downloader from .postprocessor import FFmpegMergerPP from .version import __version__ class YoutubeDL(object): """YoutubeDL class. YoutubeDL objects are the ones responsible of downloading the actual video file and writing it to disk if the user has requested it, among some other tasks. In most cases there should be one per program. As, given a video URL, the downloader doesn't know how to extract all the needed information, task that InfoExtractors do, it has to pass the URL to one of them. For this, YoutubeDL objects have a method that allows InfoExtractors to be registered in a given order. When it is passed a URL, the YoutubeDL object handles it to the first InfoExtractor it finds that reports being able to handle it. The InfoExtractor extracts all the information about the video or videos the URL refers to, and YoutubeDL process the extracted information, possibly using a File Downloader to download the video. YoutubeDL objects accept a lot of parameters. In order not to saturate the object constructor with arguments, it receives a dictionary of options instead. These options are available through the params attribute for the InfoExtractors to use. The YoutubeDL also registers itself as the downloader in charge for the InfoExtractors that are added to it, so this is a "mutual registration". Available options: username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for acces a video. usenetrc: Use netrc for authentication instead. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. forceurl: Force printing final URL. forcetitle: Force printing title. forceid: Force printing ID. forcethumbnail: Force printing thumbnail URL. forcedescription: Force printing description. forcefilename: Force printing final filename. forceduration: Force printing duration. forcejson: Force printing info_dict as JSON. simulate: Do not download the video files. format: Video format code. format_limit: Highest quality format to try. outtmpl: Template for output names. restrictfilenames: Do not allow "&" and spaces in file names ignoreerrors: Do not stop on download errors. nooverwrites: Prevent overwriting files. playliststart: Playlist item to start at. playlistend: Playlist item to end at. matchtitle: Download only matching titles. rejecttitle: Reject downloads for matching titles. logger: Log messages to a logging.Logger instance. logtostderr: Log messages to stderr instead of stdout. writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file writeannotations: Write the video annotations to a .annotations.xml file writethumbnail: Write the thumbnail image to a file writesubtitles: Write the video subtitles to a file writeautomaticsub: Write the automatic subtitles to a file allsubtitles: Downloads all the subtitles of the video (requires writesubtitles or writeautomaticsub) listsubtitles: Lists all available subtitles for the video subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt) subtitleslangs: List of languages of the subtitles to download keepvideo: Keep the video file after post-processing daterange: A DateRange object, download only if the upload_date is in the range. skip_download: Skip the actual download of the video file cachedir: Location of the cache files in the filesystem. None to disable filesystem cache. noplaylist: Download single video instead of a playlist if in doubt. age_limit: An integer representing the user's age in years. Unsuitable videos for the given age are skipped. min_views: An integer representing the minimum view count the video must have in order to not be skipped. Videos without view count information are always downloaded. None for no limit. max_views: An integer representing the maximum view count. Videos that are more popular than that are not downloaded. Videos without view count information are always downloaded. None for no limit. download_archive: File name of a file where all downloads are recorded. Videos already present in the file are not downloaded again. cookiefile: File name where cookies should be read from and dumped to. nocheckcertificate:Do not verify SSL certificates proxy: URL of the proxy server to use socket_timeout: Time to wait for unresponsive hosts, in seconds bidi_workaround: Work around buggy terminals without bidirectional text support, using fridibi debug_printtraffic:Print out sent and received HTTP traffic include_ads: Download ads as well default_search: Prepend this string if an input url is not valid. 'auto' for elaborate guessing The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle The following options are used by the post processors: prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, otherwise prefer avconv. """ params = None _ies = [] _pps = [] _download_retcode = None _num_downloads = None _screen_file = None def __init__(self, params=None): """Create a FileDownloader object with the given options.""" if params is None: params = {} self._ies = [] self._ies_instances = {} self._pps = [] self._progress_hooks = [] self._download_retcode = 0 self._num_downloads = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self._err_file = sys.stderr self.params = params if params.get('bidi_workaround', False): try: import pty master, slave = pty.openpty() width = get_term_width() if width is None: width_args = [] else: width_args = ['-w', str(width)] sp_kwargs = dict( stdin=subprocess.PIPE, stdout=slave, stderr=self._err_file) try: self._output_process = subprocess.Popen( ['bidiv'] + width_args, **sp_kwargs ) except OSError: self._output_process = subprocess.Popen( ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) self._output_channel = os.fdopen(master, 'rb') except OSError as ose: if ose.errno == 2: self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.') else: raise if (sys.version_info >= (3,) and sys.platform != 'win32' and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and not params['restrictfilenames']): # On Python 3, the Unicode filesystem API will throw errors (#1474) self.report_warning( 'Assuming --restrict-filenames since file system encoding ' 'cannot encode all charactes. ' 'Set the LC_ALL environment variable to fix this.') self.params['restrictfilenames'] = True if '%(stitle)s' in self.params.get('outtmpl', ''): self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') self._setup_opener() def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) self._ies_instances[ie.ie_key()] = ie ie.set_downloader(self) def get_info_extractor(self, ie_key): """ Get an instance of an IE with name ie_key, it will try to get one from the _ies list, if there's no instance it will create a new one and add it to the extractor list. """ ie = self._ies_instances.get(ie_key) if ie is None: ie = get_info_extractor(ie_key)() self.add_info_extractor(ie) return ie def add_default_info_extractors(self): """ Add the InfoExtractors returned by gen_extractors to the end of the list """ for ie in gen_extractors(): self.add_info_extractor(ie) def add_post_processor(self, pp): """Add a PostProcessor object to the end of the chain.""" self._pps.append(pp) pp.set_downloader(self) def add_progress_hook(self, ph): """Add the progress hook (currently only for the file downloader)""" self._progress_hooks.append(ph) def _bidi_workaround(self, message): if not hasattr(self, '_output_channel'): return message assert hasattr(self, '_output_process') assert type(message) == type('') line_count = message.count('\n') + 1 self._output_process.stdin.write((message + '\n').encode('utf-8')) self._output_process.stdin.flush() res = ''.join(self._output_channel.readline().decode('utf-8') for _ in range(line_count)) return res[:-len('\n')] def to_screen(self, message, skip_eol=False): """Print message to stdout if not in quiet mode.""" return self.to_stdout(message, skip_eol, check_quiet=True) def to_stdout(self, message, skip_eol=False, check_quiet=False): """Print message to stdout if not in quiet mode.""" if self.params.get('logger'): self.params['logger'].debug(message) elif not check_quiet or not self.params.get('quiet', False): message = self._bidi_workaround(message) terminator = ['\n', ''][skip_eol] output = message + terminator write_string(output, self._screen_file) def to_stderr(self, message): """Print message to stderr.""" assert type(message) == type('') if self.params.get('logger'): self.params['logger'].error(message) else: message = self._bidi_workaround(message) output = message + '\n' write_string(output, self._err_file) def to_console_title(self, message): if not self.params.get('consoletitle', False): return if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): # c_wchar_p() might not be necessary if `message` is # already of type unicode() ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) elif 'TERM' in os.environ: write_string('\033]0;%s\007' % message, self._screen_file) def save_console_title(self): if not self.params.get('consoletitle', False): return if 'TERM' in os.environ: # Save the title on stack write_string('\033[22;0t', self._screen_file) def restore_console_title(self): if not self.params.get('consoletitle', False): return if 'TERM' in os.environ: # Restore the title from stack write_string('\033[23;0t', self._screen_file) def __enter__(self): self.save_console_title() return self def __exit__(self, *args): self.restore_console_title() if self.params.get('cookiefile') is not None: self.cookiejar.save() def trouble(self, message=None, tb=None): """Determine action to take when a download problem appears. Depending on if the downloader has been configured to ignore download errors or not, this method may throw an exception or not when errors are found, after printing the message. tb, if given, is additional traceback information. """ if message is not None: self.to_stderr(message) if self.params.get('verbose'): if tb is None: if sys.exc_info()[0]: # if .trouble has been called from an except block tb = '' if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) tb += compat_str(traceback.format_exc()) else: tb_data = traceback.format_list(traceback.extract_stack()) tb = ''.join(tb_data) self.to_stderr(tb) if not self.params.get('ignoreerrors', False): if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: exc_info = sys.exc_info()[1].exc_info else: exc_info = sys.exc_info() raise DownloadError(message, exc_info) self._download_retcode = 1 def report_warning(self, message): ''' Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored ''' if self._err_file.isatty() and os.name != 'nt': _msg_header = '\033[0;33mWARNING:\033[0m' else: _msg_header = 'WARNING:' warning_message = '%s %s' % (_msg_header, message) self.to_stderr(warning_message) def report_error(self, message, tb=None): ''' Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. ''' if self._err_file.isatty() and os.name != 'nt': _msg_header = '\033[0;31mERROR:\033[0m' else: _msg_header = 'ERROR:' error_message = '%s %s' % (_msg_header, message) self.trouble(error_message, tb) def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" try: self.to_screen('[download] %s has already been downloaded' % file_name) except UnicodeEncodeError: self.to_screen('[download] The file has already been downloaded') def prepare_filename(self, info_dict): """Generate the output filename.""" try: template_dict = dict(info_dict) template_dict['epoch'] = int(time.time()) autonumber_size = self.params.get('autonumber_size') if autonumber_size is None: autonumber_size = 5 autonumber_templ = '%0' + str(autonumber_size) + 'd' template_dict['autonumber'] = autonumber_templ % self._num_downloads if template_dict.get('playlist_index') is not None: template_dict['playlist_index'] = '%05d' % template_dict['playlist_index'] sanitize = lambda k, v: sanitize_filename( compat_str(v), restricted=self.params.get('restrictfilenames'), is_id=(k == 'id')) template_dict = dict((k, sanitize(k, v)) for k, v in template_dict.items() if v is not None) template_dict = collections.defaultdict(lambda: 'NA', template_dict) tmpl = os.path.expanduser(self.params['outtmpl']) filename = tmpl % template_dict return filename except ValueError as err: self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') return None def _match_entry(self, info_dict): """ Returns None iff the file should be downloaded """ video_title = info_dict.get('title', info_dict.get('id', 'video')) if 'title' in info_dict: # This can happen when we're just evaluating the playlist title = info_dict['title'] matchtitle = self.params.get('matchtitle', False) if matchtitle: if not re.search(matchtitle, title, re.IGNORECASE): return '"' + title + '" title did not match pattern "' + matchtitle + '"' rejecttitle = self.params.get('rejecttitle', False) if rejecttitle: if re.search(rejecttitle, title, re.IGNORECASE): return '"' + title + '" title matched reject pattern "' + rejecttitle + '"' date = info_dict.get('upload_date', None) if date is not None: dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) view_count = info_dict.get('view_count', None) if view_count is not None: min_views = self.params.get('min_views') if min_views is not None and view_count < min_views: return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views) max_views = self.params.get('max_views') if max_views is not None and view_count > max_views: return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) age_limit = self.params.get('age_limit') if age_limit is not None: if age_limit < info_dict.get('age_limit', 0): return 'Skipping "' + title + '" because it is age restricted' if self.in_download_archive(info_dict): return '%s has already been recorded in archive' % video_title return None @staticmethod def add_extra_info(info_dict, extra_info): '''Set the keys from extra_info in info dict if they are missing''' for key, value in extra_info.items(): info_dict.setdefault(key, value) def extract_info(self, url, download=True, ie_key=None, extra_info={}, process=True): ''' Returns a list with a dictionary for each video we find. If 'download', also downloads the videos. extra_info is a dict containing the extra values to add to each result ''' if ie_key: ies = [self.get_info_extractor(ie_key)] else: ies = self._ies for ie in ies: if not ie.suitable(url): continue if not ie.working(): self.report_warning('The program functionality for this site has been marked as broken, ' 'and will probably not work.') try: ie_result = ie.extract(url) if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) break if isinstance(ie_result, list): # Backwards compatibility: old IE result format ie_result = { '_type': 'compat_list', 'entries': ie_result, } self.add_extra_info(ie_result, { 'extractor': ie.IE_NAME, 'webpage_url': url, 'webpage_url_basename': url_basename(url), 'extractor_key': ie.ie_key(), }) if process: return self.process_ie_result(ie_result, download, extra_info) else: return ie_result except ExtractorError as de: # An error we somewhat expected self.report_error(compat_str(de), de.format_traceback()) break except MaxDownloadsReached: raise except Exception as e: if self.params.get('ignoreerrors', False): self.report_error(compat_str(e), tb=compat_str(traceback.format_exc())) break else: raise else: self.report_error('no suitable InfoExtractor: %s' % url) def process_ie_result(self, ie_result, download=True, extra_info={}): """ Take the result of the ie(may be modified) and resolve all unresolved references (URLs, playlist items). It will also download the videos if 'download'. Returns the resolved ie_result. """ result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system if result_type == 'video': self.add_extra_info(ie_result, extra_info) return self.process_video_result(ie_result, download=download) elif result_type == 'url': # We have to add extra_info to the results because it may be # contained in a playlist return self.extract_info(ie_result['url'], download, ie_key=ie_result.get('ie_key'), extra_info=extra_info) elif result_type == 'url_transparent': # Use the information from the embedding page info = self.extract_info( ie_result['url'], ie_key=ie_result.get('ie_key'), extra_info=extra_info, download=False, process=False) def make_result(embedded_info): new_result = ie_result.copy() for f in ('_type', 'url', 'ext', 'player_url', 'formats', 'entries', 'ie_key', 'duration', 'subtitles', 'annotations', 'format', 'thumbnail', 'thumbnails'): if f in new_result: del new_result[f] if f in embedded_info: new_result[f] = embedded_info[f] return new_result new_result = make_result(info) assert new_result.get('_type') != 'url_transparent' if new_result.get('_type') == 'compat_list': new_result['entries'] = [ make_result(e) for e in new_result['entries']] return self.process_ie_result( new_result, download=download, extra_info=extra_info) elif result_type == 'playlist': # We process each entry in the playlist playlist = ie_result.get('title', None) or ie_result.get('id', None) self.to_screen('[download] Downloading playlist: %s' % playlist) playlist_results = [] playliststart = self.params.get('playliststart', 1) - 1 playlistend = self.params.get('playlistend', None) # For backwards compatibility, interpret -1 as whole list if playlistend == -1: playlistend = None if isinstance(ie_result['entries'], list): n_all_entries = len(ie_result['entries']) entries = ie_result['entries'][playliststart:playlistend] n_entries = len(entries) self.to_screen( "[%s] playlist %s: Collected %d video ids (downloading %d of them)" % (ie_result['extractor'], playlist, n_all_entries, n_entries)) else: assert isinstance(ie_result['entries'], PagedList) entries = ie_result['entries'].getslice( playliststart, playlistend) n_entries = len(entries) self.to_screen( "[%s] playlist %s: Downloading %d videos" % (ie_result['extractor'], playlist, n_entries)) for i, entry in enumerate(entries, 1): self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries)) extra = { 'playlist': playlist, 'playlist_index': i + playliststart, 'extractor': ie_result['extractor'], 'webpage_url': ie_result['webpage_url'], 'webpage_url_basename': url_basename(ie_result['webpage_url']), 'extractor_key': ie_result['extractor_key'], } reason = self._match_entry(entry) if reason is not None: self.to_screen('[download] ' + reason) continue entry_result = self.process_ie_result(entry, download=download, extra_info=extra) playlist_results.append(entry_result) ie_result['entries'] = playlist_results return ie_result elif result_type == 'compat_list': def _fixup(r): self.add_extra_info(r, { 'extractor': ie_result['extractor'], 'webpage_url': ie_result['webpage_url'], 'webpage_url_basename': url_basename(ie_result['webpage_url']), 'extractor_key': ie_result['extractor_key'], }) return r ie_result['entries'] = [ self.process_ie_result(_fixup(r), download, extra_info) for r in ie_result['entries'] ] return ie_result else: raise Exception('Invalid result type: %s' % result_type) def select_format(self, format_spec, available_formats): if format_spec == 'best' or format_spec is None: return available_formats[-1] elif format_spec == 'worst': return available_formats[0] elif format_spec == 'bestaudio': audio_formats = [ f for f in available_formats if f.get('vcodec') == 'none'] if audio_formats: return audio_formats[-1] elif format_spec == 'worstaudio': audio_formats = [ f for f in available_formats if f.get('vcodec') == 'none'] if audio_formats: return audio_formats[0] else: extensions = ['mp4', 'flv', 'webm', '3gp'] if format_spec in extensions: filter_f = lambda f: f['ext'] == format_spec else: filter_f = lambda f: f['format_id'] == format_spec matches = list(filter(filter_f, available_formats)) if matches: return matches[-1] return None def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' if 'playlist' not in info_dict: # It isn't part of a playlist info_dict['playlist'] = None info_dict['playlist_index'] = None # This extractors handle format selection themselves if info_dict['extractor'] in ['Youku']: if download: self.process_info(info_dict) return info_dict # We now pick which formats have to be downloaded if info_dict.get('formats') is None: # There's only one format available formats = [info_dict] else: formats = info_dict['formats'] # We check that all the formats have the format and format_id fields for (i, format) in enumerate(formats): if format.get('format_id') is None: format['format_id'] = compat_str(i) if format.get('format') is None: format['format'] = '{id} - {res}{note}'.format( id=format['format_id'], res=self.format_resolution(format), note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '', ) # Automatically determine file extension if missing if 'ext' not in format: format['ext'] = determine_ext(format['url']) format_limit = self.params.get('format_limit', None) if format_limit: formats = list(takewhile_inclusive( lambda f: f['format_id'] != format_limit, formats )) # TODO Central sorting goes here if formats[0] is not info_dict: # only set the 'formats' fields if the original info_dict list them # otherwise we end up with a circular reference, the first (and unique) # element in the 'formats' field in info_dict is info_dict itself, # wich can't be exported to json info_dict['formats'] = formats if self.params.get('listformats', None): self.list_formats(info_dict) return req_format = self.params.get('format') if req_format is None: req_format = 'best' formats_to_download = [] # The -1 is for supporting YoutubeIE if req_format in ('-1', 'all'): formats_to_download = formats else: # We can accept formats requested in the format: 34/5/best, we pick # the first that is available, starting from left req_formats = req_format.split('/') for rf in req_formats: if re.match(r'.+?\+.+?', rf) is not None: # Two formats have been requested like '137+139' format_1, format_2 = rf.split('+') formats_info = (self.select_format(format_1, formats), self.select_format(format_2, formats)) if all(formats_info): selected_format = { 'requested_formats': formats_info, 'format': rf, 'ext': formats_info[0]['ext'], } else: selected_format = None else: selected_format = self.select_format(rf, formats) if selected_format is not None: formats_to_download = [selected_format] break if not formats_to_download: raise ExtractorError('requested format not available', expected=True) if download: if len(formats_to_download) > 1: self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download))) for format in formats_to_download: new_info = dict(info_dict) new_info.update(format) self.process_info(new_info) # We update the info dict with the best quality format (backwards compatibility) info_dict.update(formats_to_download[-1]) return info_dict def process_info(self, info_dict): """Process a single resolved IE result.""" assert info_dict.get('_type', 'video') == 'video' max_downloads = self.params.get('max_downloads') if max_downloads is not None: if self._num_downloads >= int(max_downloads): raise MaxDownloadsReached() info_dict['fulltitle'] = info_dict['title'] if len(info_dict['title']) > 200: info_dict['title'] = info_dict['title'][:197] + '...' # Keep for backwards compatibility info_dict['stitle'] = info_dict['title'] if not 'format' in info_dict: info_dict['format'] = info_dict['ext'] reason = self._match_entry(info_dict) if reason is not None: self.to_screen('[download] ' + reason) return self._num_downloads += 1 filename = self.prepare_filename(info_dict) # Forced printings if self.params.get('forcetitle', False): self.to_stdout(info_dict['fulltitle']) if self.params.get('forceid', False): self.to_stdout(info_dict['id']) if self.params.get('forceurl', False): # For RTMP URLs, also include the playpath self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None: self.to_stdout(info_dict['thumbnail']) if self.params.get('forcedescription', False) and info_dict.get('description') is not None: self.to_stdout(info_dict['description']) if self.params.get('forcefilename', False) and filename is not None: self.to_stdout(filename) if self.params.get('forceduration', False) and info_dict.get('duration') is not None: self.to_stdout(formatSeconds(info_dict['duration'])) if self.params.get('forceformat', False): self.to_stdout(info_dict['format']) if self.params.get('forcejson', False): info_dict['_filename'] = filename self.to_stdout(json.dumps(info_dict)) # Do nothing else if in simulate mode if self.params.get('simulate', False): return if filename is None: return try: dn = os.path.dirname(encodeFilename(filename)) if dn != '' and not os.path.exists(dn): os.makedirs(dn) except (OSError, IOError) as err: self.report_error('unable to create directory ' + compat_str(err)) return if self.params.get('writedescription', False): descfn = filename + '.description' if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)): self.to_screen('[info] Video description is already present') else: try: self.to_screen('[info] Writing video description to: ' + descfn) with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: descfile.write(info_dict['description']) except (KeyError, TypeError): self.report_warning('There\'s no description to write.') except (OSError, IOError): self.report_error('Cannot write description file ' + descfn) return if self.params.get('writeannotations', False): annofn = filename + '.annotations.xml' if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)): self.to_screen('[info] Video annotations are already present') else: try: self.to_screen('[info] Writing video annotations to: ' + annofn) with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: annofile.write(info_dict['annotations']) except (KeyError, TypeError): self.report_warning('There are no annotations to write.') except (OSError, IOError): self.report_error('Cannot write annotations file: ' + annofn) return subtitles_are_requested = any([self.params.get('writesubtitles', False), self.params.get('writeautomaticsub')]) if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']: # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE subtitles = info_dict['subtitles'] sub_format = self.params.get('subtitlesformat', 'srt') for sub_lang in subtitles.keys(): sub = subtitles[sub_lang] if sub is None: continue try: sub_filename = subtitles_filename(filename, sub_lang, sub_format) if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format)) else: self.to_screen('[info] Writing video subtitles to: ' + sub_filename) with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: subfile.write(sub) except (OSError, IOError): self.report_error('Cannot write subtitles file ' + descfn) return if self.params.get('writeinfojson', False): infofn = os.path.splitext(filename)[0] + '.info.json' if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)): self.to_screen('[info] Video description metadata is already present') else: self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn) try: write_json_file(info_dict, encodeFilename(infofn)) except (OSError, IOError): self.report_error('Cannot write metadata to JSON file ' + infofn) return if self.params.get('writethumbnail', False): if info_dict.get('thumbnail') is not None: thumb_format = determine_ext(info_dict['thumbnail'], 'jpg') thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): self.to_screen('[%s] %s: Thumbnail is already present' % (info_dict['extractor'], info_dict['id'])) else: self.to_screen('[%s] %s: Downloading thumbnail ...' % (info_dict['extractor'], info_dict['id'])) try: uf = compat_urllib_request.urlopen(info_dict['thumbnail']) with open(thumb_filename, 'wb') as thumbf: shutil.copyfileobj(uf, thumbf) self.to_screen('[%s] %s: Writing thumbnail to: %s' % (info_dict['extractor'], info_dict['id'], thumb_filename)) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self.report_warning('Unable to download thumbnail "%s": %s' % (info_dict['thumbnail'], compat_str(err))) if not self.params.get('skip_download', False): if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)): success = True else: try: def dl(name, info): fd = get_suitable_downloader(info)(self, self.params) for ph in self._progress_hooks: fd.add_progress_hook(ph) return fd.download(name, info) if info_dict.get('requested_formats') is not None: downloaded = [] success = True merger = FFmpegMergerPP(self) if not merger._get_executable(): postprocessors = [] self.report_warning('You have requested multiple ' 'formats but ffmpeg or avconv are not installed.' ' The formats won\'t be merged') else: postprocessors = [merger] for f in info_dict['requested_formats']: new_info = dict(info_dict) new_info.update(f) fname = self.prepare_filename(new_info) fname = prepend_extension(fname, 'f%s' % f['format_id']) downloaded.append(fname) partial_success = dl(fname, new_info) success = success and partial_success info_dict['__postprocessors'] = postprocessors info_dict['__files_to_merge'] = downloaded else: # Just a single file success = dl(filename, info_dict) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self.report_error('unable to download video data: %s' % str(err)) return except (OSError, IOError) as err: raise UnavailableVideoError(err) except (ContentTooShortError, ) as err: self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) return if success: try: self.post_process(filename, info_dict) except (PostProcessingError) as err: self.report_error('postprocessing: %s' % str(err)) return self.record_download_archive(info_dict) def download(self, url_list): """Download a given list of URLs.""" if (len(url_list) > 1 and '%' not in self.params['outtmpl'] and self.params.get('max_downloads') != 1): raise SameFileError(self.params['outtmpl']) for url in url_list: try: #It also downloads the videos self.extract_info(url) except UnavailableVideoError: self.report_error('unable to download video') except MaxDownloadsReached: self.to_screen('[info] Maximum number of downloaded files reached.') raise return self._download_retcode def download_with_info_file(self, info_filename): with io.open(info_filename, 'r', encoding='utf-8') as f: info = json.load(f) try: self.process_ie_result(info, download=True) except DownloadError: webpage_url = info.get('webpage_url') if webpage_url is not None: self.report_warning('The info failed to download, trying with "%s"' % webpage_url) return self.download([webpage_url]) else: raise return self._download_retcode def post_process(self, filename, ie_info): """Run all the postprocessors on the given file.""" info = dict(ie_info) info['filepath'] = filename keep_video = None pps_chain = [] if ie_info.get('__postprocessors') is not None: pps_chain.extend(ie_info['__postprocessors']) pps_chain.extend(self._pps) for pp in pps_chain: try: keep_video_wish, new_info = pp.run(info) if keep_video_wish is not None: if keep_video_wish: keep_video = keep_video_wish elif keep_video is None: # No clear decision yet, let IE decide keep_video = keep_video_wish except PostProcessingError as e: self.report_error(e.msg) if keep_video is False and not self.params.get('keepvideo', False): try: self.to_screen('Deleting original file %s (pass -k to keep)' % filename) os.remove(encodeFilename(filename)) except (IOError, OSError): self.report_warning('Unable to remove downloaded video file') def _make_archive_id(self, info_dict): # Future-proof against any change in case # and backwards compatibility with prior versions extractor = info_dict.get('extractor_key') if extractor is None: if 'id' in info_dict: extractor = info_dict.get('ie_key') # key in a playlist if extractor is None: return None # Incomplete video information return extractor.lower() + ' ' + info_dict['id'] def in_download_archive(self, info_dict): fn = self.params.get('download_archive') if fn is None: return False vid_id = self._make_archive_id(info_dict) if vid_id is None: return False # Incomplete video information try: with locked_file(fn, 'r', encoding='utf-8') as archive_file: for line in archive_file: if line.strip() == vid_id: return True except IOError as ioe: if ioe.errno != errno.ENOENT: raise return False def record_download_archive(self, info_dict): fn = self.params.get('download_archive') if fn is None: return vid_id = self._make_archive_id(info_dict) assert vid_id with locked_file(fn, 'a', encoding='utf-8') as archive_file: archive_file.write(vid_id + '\n') @staticmethod def format_resolution(format, default='unknown'): if format.get('vcodec') == 'none': return 'audio only' if format.get('resolution') is not None: return format['resolution'] if format.get('height') is not None: if format.get('width') is not None: res = '%sx%s' % (format['width'], format['height']) else: res = '%sp' % format['height'] elif format.get('width') is not None: res = '?x%d' % format['width'] else: res = default return res def list_formats(self, info_dict): def format_note(fdict): res = '' if fdict.get('ext') in ['f4f', 'f4m']: res += '(unsupported) ' if fdict.get('format_note') is not None: res += fdict['format_note'] + ' ' if fdict.get('tbr') is not None: res += '%4dk ' % fdict['tbr'] if fdict.get('container') is not None: if res: res += ', ' res += '%s container' % fdict['container'] if (fdict.get('vcodec') is not None and fdict.get('vcodec') != 'none'): if res: res += ', ' res += fdict['vcodec'] if fdict.get('vbr') is not None: res += '@' elif fdict.get('vbr') is not None and fdict.get('abr') is not None: res += 'video@' if fdict.get('vbr') is not None: res += '%4dk' % fdict['vbr'] if fdict.get('acodec') is not None: if res: res += ', ' if fdict['acodec'] == 'none': res += 'video only' else: res += '%-5s' % fdict['acodec'] elif fdict.get('abr') is not None: if res: res += ', ' res += 'audio' if fdict.get('abr') is not None: res += '@%3dk' % fdict['abr'] if fdict.get('asr') is not None: res += ' (%5dHz)' % fdict['asr'] if fdict.get('filesize') is not None: if res: res += ', ' res += format_bytes(fdict['filesize']) return res def line(format, idlen=20): return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % ( format['format_id'], format['ext'], self.format_resolution(format), format_note(format), )) formats = info_dict.get('formats', [info_dict]) idlen = max(len('format code'), max(len(f['format_id']) for f in formats)) formats_s = [line(f, idlen) for f in formats] if len(formats) > 1: formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)' formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)' header_line = line({ 'format_id': 'format code', 'ext': 'extension', 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen) self.to_screen('[info] Available formats for %s:\n%s\n%s' % (info_dict['id'], header_line, '\n'.join(formats_s))) def urlopen(self, req): """ Start an HTTP download """ return self._opener.open(req) def print_debug_header(self): if not self.params.get('verbose'): return write_string('[debug] youtube-dl version ' + __version__ + '\n') try: sp = subprocess.Popen( ['git', 'rev-parse', '--short', 'HEAD'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=os.path.dirname(os.path.abspath(__file__))) out, err = sp.communicate() out = out.decode().strip() if re.match('[0-9a-f]+', out): write_string('[debug] Git HEAD: ' + out + '\n') except: try: sys.exc_clear() except: pass write_string('[debug] Python version %s - %s' % (platform.python_version(), platform_name()) + '\n') proxy_map = {} for handler in self._opener.handlers: if hasattr(handler, 'proxies'): proxy_map.update(handler.proxies) write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n') def _setup_opener(self): timeout_val = self.params.get('socket_timeout') timeout = 600 if timeout_val is None else float(timeout_val) opts_cookiefile = self.params.get('cookiefile') opts_proxy = self.params.get('proxy') if opts_cookiefile is None: self.cookiejar = compat_cookiejar.CookieJar() else: self.cookiejar = compat_cookiejar.MozillaCookieJar( opts_cookiefile) if os.access(opts_cookiefile, os.R_OK): self.cookiejar.load() cookie_processor = compat_urllib_request.HTTPCookieProcessor( self.cookiejar) if opts_proxy is not None: if opts_proxy == '': proxies = {} else: proxies = {'http': opts_proxy, 'https': opts_proxy} else: proxies = compat_urllib_request.getproxies() # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) if 'http' in proxies and 'https' not in proxies: proxies['https'] = proxies['http'] proxy_handler = compat_urllib_request.ProxyHandler(proxies) debuglevel = 1 if self.params.get('debug_printtraffic') else 0 https_handler = make_HTTPS_handler( self.params.get('nocheckcertificate', False), debuglevel=debuglevel) ydlh = YoutubeDLHandler(debuglevel=debuglevel) opener = compat_urllib_request.build_opener( https_handler, proxy_handler, cookie_processor, ydlh) # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play # (See https://github.com/rg3/youtube-dl/issues/1309 for details) opener.addheaders = [] self._opener = opener # TODO remove this global modification compat_urllib_request.install_opener(opener) socket.setdefaulttimeout(timeout) youtube-dl/youtube_dl/extractor/0000755000000000000000000000000012300361701016024 5ustar rootrootyoutube-dl/youtube_dl/extractor/ninegag.py0000644000000000000000000000274412272013344020022 0ustar rootrootfrom __future__ import unicode_literals import json import re from .common import InfoExtractor class NineGagIE(InfoExtractor): IE_NAME = '9gag' _VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P[0-9]+)' _TEST = { "url": "http://9gag.tv/v/1912", "file": "1912.mp4", "info_dict": { "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)", "title": "\"People Are Awesome 2013\" Is Absolutely Awesome" }, 'add_ie': ['Youtube'] } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) data_json = self._html_search_regex(r'''(?x) .*)' _TESTS = [{ 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', 'file': '1812978515.mp3', 'md5': 'c557841d5e50261777a6585648adf439', 'info_dict': { "title": "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad", "duration": 10, }, '_skip': 'There is a limit of 200 free downloads / month for the test song' }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) title = mobj.group('title') webpage = self._download_webpage(url, title) # We get the link to the free download page m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage) if m_download is None: m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage) if m_trackinfo: json_code = m_trackinfo.group(1) data = json.loads(json_code) d = data[0] duration = int(round(d['duration'])) formats = [] for format_id, format_url in d['file'].items(): ext, _, abr_str = format_id.partition('-') formats.append({ 'format_id': format_id, 'url': format_url, 'ext': format_id.partition('-')[0], 'vcodec': 'none', 'acodec': format_id.partition('-')[0], 'abr': int(format_id.partition('-')[2]), }) self._sort_formats(formats) return { 'id': compat_str(d['id']), 'title': d['title'], 'formats': formats, 'duration': duration, } else: raise ExtractorError('No free songs found') download_link = m_download.group(1) video_id = re.search( r'var TralbumData = {(.*?)id: (?P\d*?)$', webpage, re.MULTILINE | re.DOTALL).group('id') download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page') # We get the dictionary of the track from some javascrip code info = re.search(r'items: (.*?),$', download_webpage, re.MULTILINE).group(1) info = json.loads(info)[0] # We pick mp3-320 for now, until format selection can be easily implemented. mp3_info = info['downloads']['mp3-320'] # If we try to use this url it says the link has expired initial_url = mp3_info['url'] re_url = r'(?Phttp://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P.*?)&id=(?P.*?)&ts=(?P.*)$' m_url = re.match(re_url, initial_url) #We build the url we will use to get the final track url # This url is build in Bandcamp in the script download_bunde_*.js request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts')) final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url') # If we could correctly generate the .rand field the url would be #in the "download_url" key final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1) return { 'id': video_id, 'title': info['title'], 'ext': 'mp3', 'vcodec': 'none', 'url': final_url, 'thumbnail': info.get('thumb_url'), 'uploader': info.get('artist'), } class BandcampAlbumIE(InfoExtractor): IE_NAME = 'Bandcamp:album' _VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P.*)' _TEST = { 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', 'playlist': [ { 'file': '1353101989.mp3', 'md5': '39bc1eded3476e927c724321ddf116cf', 'info_dict': { 'title': 'Intro', } }, { 'file': '38097443.mp3', 'md5': '1a2c32e2691474643e912cc6cd4bffaa', 'info_dict': { 'title': 'Kero One - Keep It Alive (Blazo remix)', } }, ], 'params': { 'playlistend': 2 }, 'skip': 'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) title = mobj.group('title') webpage = self._download_webpage(url, title) tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage) if not tracks_paths: raise ExtractorError('The page doesn\'t contain any tracks') entries = [ self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key()) for t_path in tracks_paths] title = self._search_regex(r'album_title : "(.*?)"', webpage, 'title') return { '_type': 'playlist', 'title': title, 'entries': entries, } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������youtube-dl/youtube_dl/extractor/cinemassacre.py�����������������������������������������������������0000644�0000000�0000000�00000006500�12271337464�021054� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# encoding: utf-8 import re from .common import InfoExtractor from ..utils import ( ExtractorError, ) class CinemassacreIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:www\.)?(?P<url>cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?)(?:[/?].*)?' _TESTS = [{ u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', u'file': u'19911.flv', u'info_dict': { u'upload_date': u'20121110', u'title': u'“Angry Video Game Nerd: The Movie” – Trailer', u'description': u'md5:fb87405fcb42a331742a0dce2708560b', }, u'params': { # rtmp download u'skip_download': True, }, }, { u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', u'file': u'521be8ef82b16.flv', u'info_dict': { u'upload_date': u'20131002', u'title': u'The Mummy’s Hand (1940)', }, u'params': { # rtmp download u'skip_download': True, }, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) webpage_url = u'http://' + mobj.group('url') webpage = self._download_webpage(webpage_url, None) # Don't know video id yet video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage) if not mobj: raise ExtractorError(u'Can\'t extract embed url and video id') playerdata_url = mobj.group(u'embed_url') video_id = mobj.group(u'video_id') video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|', webpage, u'title') video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>', webpage, u'description', flags=re.DOTALL, fatal=False) if len(video_description) == 0: video_description = None playerdata = self._download_webpage(playerdata_url, video_id) url = self._html_search_regex(r'\'streamer\': \'(?P<url>[^\']+)\'', playerdata, u'url') sd_file = self._html_search_regex(r'\'file\': \'(?P<sd_file>[^\']+)\'', playerdata, u'sd_file') hd_file = self._html_search_regex(r'\'?file\'?: "(?P<hd_file>[^"]+)"', playerdata, u'hd_file') video_thumbnail = self._html_search_regex(r'\'image\': \'(?P<thumbnail>[^\']+)\'', playerdata, u'thumbnail', fatal=False) formats = [ { 'url': url, 'play_path': 'mp4:' + sd_file, 'rtmp_live': True, # workaround 'ext': 'flv', 'format': 'sd', 'format_id': 'sd', }, { 'url': url, 'play_path': 'mp4:' + hd_file, 'rtmp_live': True, # workaround 'ext': 'flv', 'format': 'hd', 'format_id': 'hd', }, ] return { 'id': video_id, 'title': video_title, 'formats': formats, 'description': video_description, 'upload_date': video_date, 'thumbnail': video_thumbnail, } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������youtube-dl/youtube_dl/extractor/videopremium.py�����������������������������������������������������0000644�0000000�0000000�00000003046�12271337464�021126� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re import random from .common import InfoExtractor class VideoPremiumIE(InfoExtractor): _VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?' _TEST = { u'url': u'http://videopremium.tv/4w7oadjsf156', u'file': u'4w7oadjsf156.f4v', u'info_dict': { u"title": u"youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4" }, u'params': { u'skip_download': True, }, u'skip': u'Test file has been deleted.', } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') webpage_url = 'http://videopremium.tv/' + video_id webpage = self._download_webpage(webpage_url, video_id) if re.match(r"^<html><head><script[^>]*>window.location\s*=", webpage): # Download again, we need a cookie webpage = self._download_webpage( webpage_url, video_id, note=u'Downloading webpage again (with cookie)') video_title = self._html_search_regex( r'<h2(?:.*?)>\s*(.+?)\s*<', webpage, u'video title') return { 'id': video_id, 'url': "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16), 'play_path': "mp4:%s.f4v" % video_id, 'page_url': "http://videopremium.tv/" + video_id, 'player_url': "http://videopremium.tv/uplayer/uppod.swf", 'ext': 'f4v', 'title': video_title, } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������youtube-dl/youtube_dl/extractor/elpais.py�����������������������������������������������������������0000644�0000000�0000000�00000004165�12275000667�017675� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# coding: utf-8 from __future__ import unicode_literals import re from .common import InfoExtractor from ..utils import unified_strdate class ElPaisIE(InfoExtractor): _VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])' IE_DESC = 'El País' _TEST = { 'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html', 'md5': '98406f301f19562170ec071b83433d55', 'info_dict': { 'id': 'tiempo-nuevo-recetas-viejas', 'ext': 'mp4', 'title': 'Tiempo nuevo, recetas viejas', 'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.', 'upload_date': '20140206', } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) prefix = self._html_search_regex( r'var url_cache = "([^"]+)";', webpage, 'URL prefix') video_suffix = self._search_regex( r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL') video_url = prefix + video_suffix thumbnail_suffix = self._search_regex( r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL', fatal=False) thumbnail = ( None if thumbnail_suffix is None else prefix + thumbnail_suffix) title = self._html_search_regex( '<h2 class="entry-header entry-title.*?>(.*?)</h2>', webpage, 'title') date_str = self._search_regex( r'<p class="date-header date-int updated"\s+title="([^"]+)">', webpage, 'upload date', fatal=False) upload_date = (None if date_str is None else unified_strdate(date_str)) return { 'id': video_id, 'url': video_url, 'title': title, 'description': self._og_search_description(webpage), 'thumbnail': thumbnail, 'upload_date': upload_date, } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������youtube-dl/youtube_dl/extractor/vice.py�������������������������������������������������������������0000644�0000000�0000000�00000002410�12271337464�017341� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from .ooyala import OoyalaIE from ..utils import ExtractorError class ViceIE(InfoExtractor): _VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)' _TEST = { u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1', u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4', u'info_dict': { u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', }, u'params': { # Requires ffmpeg (m3u8 manifest) u'skip_download': True, }, } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) name = mobj.group('name') webpage = self._download_webpage(url, name) try: ooyala_url = self._og_search_video_url(webpage) except ExtractorError: try: embed_code = self._search_regex( r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage, u'ooyala embed code') ooyala_url = OoyalaIE._url_for_embed_code(embed_code) except ExtractorError: raise ExtractorError(u'The page doesn\'t contain a video', expected=True) return self.url_result(ooyala_url, ie='Ooyala') ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������youtube-dl/youtube_dl/extractor/dotsub.py�����������������������������������������������������������0000644�0000000�0000000�00000003561�12277162042�017715� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������from __future__ import unicode_literals import re import time from .common import InfoExtractor class DotsubIE(InfoExtractor): _VALID_URL = r'http://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)' _TEST = { 'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27', 'md5': '0914d4d69605090f623b7ac329fea66e', 'info_dict': { 'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27', 'ext': 'flv', 'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary', 'uploader': '4v4l0n42', 'description': 'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com', 'thumbnail': 'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p', 'upload_date': '20101213', } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') info_url = "https://dotsub.com/api/media/%s/metadata" % video_id info = self._download_json(info_url, video_id) date = time.gmtime(info['dateCreated']/1000) # The timestamp is in miliseconds return { 'id': video_id, 'url': info['mediaURI'], 'ext': 'flv', 'title': info['title'], 'thumbnail': info['screenshotURI'], 'description': info['description'], 'uploader': info['user'], 'view_count': info['numberOfViews'], 'upload_date': '%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday), } �����������������������������������������������������������������������������������������������������������������������������������������������youtube-dl/youtube_dl/extractor/ivi.py��������������������������������������������������������������0000644�0000000�0000000�00000015131�12275167510�017203� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# encoding: utf-8 from __future__ import unicode_literals import re import json from .common import InfoExtractor from ..utils import ( compat_urllib_request, ExtractorError, ) class IviIE(InfoExtractor): IE_DESC = 'ivi.ru' IE_NAME = 'ivi' _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)' _TESTS = [ # Single movie { 'url': 'http://www.ivi.ru/watch/53141', 'md5': '6ff5be2254e796ed346251d117196cf4', 'info_dict': { 'id': '53141', 'ext': 'mp4', 'title': 'Иван Васильевич меняет профессию', 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f', 'duration': 5498, 'thumbnail': 'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg', }, 'skip': 'Only works from Russia', }, # Serial's serie { 'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791', 'md5': '3e6cc9a848c1d2ebcc6476444967baa9', 'info_dict': { 'id': '74791', 'ext': 'mp4', 'title': 'Дежурный ангел - 1 серия', 'duration': 2490, 'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg', }, 'skip': 'Only works from Russia', } ] # Sorted by quality _known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ'] # Sorted by size _known_thumbnails = ['Thumb-120x90', 'Thumb-160', 'Thumb-640x480'] def _extract_description(self, html): m = re.search(r'<meta name="description" content="(?P<description>[^"]+)"/>', html) return m.group('description') if m is not None else None def _extract_comment_count(self, html): m = re.search('(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html) return int(m.group('commentcount')) if m is not None else 0 def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('videoid') api_url = 'http://api.digitalaccess.ru/api/json/' data = {'method': 'da.content.get', 'params': [video_id, {'site': 's183', 'referrer': 'http://www.ivi.ru/watch/%s' % video_id, 'contentid': video_id } ] } request = compat_urllib_request.Request(api_url, json.dumps(data)) video_json_page = self._download_webpage(request, video_id, 'Downloading video JSON') video_json = json.loads(video_json_page) if 'error' in video_json: error = video_json['error'] if error['origin'] == 'NoRedisValidData': raise ExtractorError('Video %s does not exist' % video_id, expected=True) raise ExtractorError('Unable to download video %s: %s' % (video_id, error['message']), expected=True) result = video_json['result'] formats = [{ 'url': x['url'], 'format_id': x['content_format'], 'preference': self._known_formats.index(x['content_format']), } for x in result['files'] if x['content_format'] in self._known_formats] self._sort_formats(formats) if not formats: raise ExtractorError('No media links available for %s' % video_id) duration = result['duration'] compilation = result['compilation'] title = result['title'] title = '%s - %s' % (compilation, title) if compilation is not None else title previews = result['preview'] previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format'])) thumbnail = previews[-1]['url'] if len(previews) > 0 else None video_page = self._download_webpage(url, video_id, 'Downloading video page') description = self._extract_description(video_page) comment_count = self._extract_comment_count(video_page) return { 'id': video_id, 'title': title, 'thumbnail': thumbnail, 'description': description, 'duration': duration, 'comment_count': comment_count, 'formats': formats, } class IviCompilationIE(InfoExtractor): IE_DESC = 'ivi.ru compilations' IE_NAME = 'ivi:compilation' _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$' def _extract_entries(self, html, compilation_id): return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi') for serie in re.findall(r'<strong><a href="/watch/%s/(\d+)">(?:[^<]+)</a></strong>' % compilation_id, html)] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) compilation_id = mobj.group('compilationid') season_id = mobj.group('seasonid') if season_id is not None: # Season link season_page = self._download_webpage(url, compilation_id, 'Downloading season %s web page' % season_id) playlist_id = '%s/season%s' % (compilation_id, season_id) playlist_title = self._html_search_meta('title', season_page, 'title') entries = self._extract_entries(season_page, compilation_id) else: # Compilation link compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page') playlist_id = compilation_id playlist_title = self._html_search_meta('title', compilation_page, 'title') seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page) if len(seasons) == 0: # No seasons in this compilation entries = self._extract_entries(compilation_page, compilation_id) else: entries = [] for season_id in seasons: season_page = self._download_webpage( 'http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id), compilation_id, 'Downloading season %s web page' % season_id) entries.extend(self._extract_entries(season_page, compilation_id)) return self.playlist_result(entries, playlist_id, playlist_title)���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������youtube-dl/youtube_dl/extractor/rutube.py�����������������������������������������������������������0000644�0000000�0000000�00000010505�12271613266�017722� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# encoding: utf-8 from __future__ import unicode_literals import re import json import itertools from .common import InfoExtractor from ..utils import ( compat_str, unified_strdate, ExtractorError, ) class RutubeIE(InfoExtractor): IE_NAME = 'rutube' IE_DESC = 'Rutube videos' _VALID_URL = r'https?://rutube\.ru/video/(?P<id>[\da-z]{32})' _TEST = { 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', 'file': '3eac3b4561676c17df9132a9a1e62e3e.mp4', 'info_dict': { 'title': 'Раненный кенгуру забежал в аптеку', 'description': 'http://www.ntdtv.ru ', 'duration': 80, 'uploader': 'NTDRussian', 'uploader_id': '29790', 'upload_date': '20131016', }, 'params': { # It requires ffmpeg (m3u8 download) 'skip_download': True, }, } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') api_response = self._download_webpage('http://rutube.ru/api/video/%s/?format=json' % video_id, video_id, 'Downloading video JSON') video = json.loads(api_response) api_response = self._download_webpage('http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id, video_id, 'Downloading trackinfo JSON') trackinfo = json.loads(api_response) # Some videos don't have the author field author = trackinfo.get('author') or {} m3u8_url = trackinfo['video_balancer'].get('m3u8') if m3u8_url is None: raise ExtractorError('Couldn\'t find m3u8 manifest url') return { 'id': video['id'], 'title': video['title'], 'description': video['description'], 'duration': video['duration'], 'view_count': video['hits'], 'url': m3u8_url, 'ext': 'mp4', 'thumbnail': video['thumbnail_url'], 'uploader': author.get('name'), 'uploader_id': compat_str(author['id']) if author else None, 'upload_date': unified_strdate(video['created_ts']), 'age_limit': 18 if video['is_adult'] else 0, } class RutubeChannelIE(InfoExtractor): IE_NAME = 'rutube:channel' IE_DESC = 'Rutube channels' _VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)' _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' def _extract_videos(self, channel_id, channel_title=None): entries = [] for pagenum in itertools.count(1): api_response = self._download_webpage( self._PAGE_TEMPLATE % (channel_id, pagenum), channel_id, 'Downloading page %s' % pagenum) page = json.loads(api_response) results = page['results'] if not results: break entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results) if not page['has_next']: break return self.playlist_result(entries, channel_id, channel_title) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) channel_id = mobj.group('id') return self._extract_videos(channel_id) class RutubeMovieIE(RutubeChannelIE): IE_NAME = 'rutube:movie' IE_DESC = 'Rutube movies' _VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)' _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) movie_id = mobj.group('id') api_response = self._download_webpage( self._MOVIE_TEMPLATE % movie_id, movie_id, 'Downloading movie JSON') movie = json.loads(api_response) movie_name = movie['name'] return self._extract_videos(movie_id, movie_name) class RutubePersonIE(RutubeChannelIE): IE_NAME = 'rutube:person' IE_DESC = 'Rutube person videos' _VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)' _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������youtube-dl/youtube_dl/extractor/myvideo.py����������������������������������������������������������0000644�0000000�0000000�00000015306�12271337464�020077� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������import binascii import base64 import hashlib import re import json from .common import InfoExtractor from ..utils import ( compat_ord, compat_urllib_parse, compat_urllib_request, ExtractorError, ) class MyVideoIE(InfoExtractor): """Information Extractor for myvideo.de.""" _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*' IE_NAME = u'myvideo' _TEST = { u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win', u'file': u'8229274.flv', u'md5': u'2d2753e8130479ba2cb7e0a37002053e', u'info_dict': { u"title": u"bowling-fail-or-win" } } # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git # Released into the Public Domain by Tristan Fischer on 2013-05-19 # https://github.com/rg3/youtube-dl/pull/842 def __rc4crypt(self,data, key): x = 0 box = list(range(256)) for i in list(range(256)): x = (x + box[i] + compat_ord(key[i % len(key)])) % 256 box[i], box[x] = box[x], box[i] x = 0 y = 0 out = '' for char in data: x = (x + 1) % 256 y = (y + box[x]) % 256 box[x], box[y] = box[y], box[x] out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256]) return out def __md5(self,s): return hashlib.md5(s).hexdigest().encode() def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) if mobj is None: raise ExtractorError(u'invalid URL: %s' % url) video_id = mobj.group(1) GK = ( b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt' b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3' b'TnpsbA0KTVRkbU1tSTRNdz09' ) # Get video webpage webpage_url = 'http://www.myvideo.de/watch/%s' % video_id webpage = self._download_webpage(webpage_url, video_id) mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage) if mobj is not None: self.report_extraction(video_id) video_url = mobj.group(1) + '.flv' video_title = self._html_search_regex('<title>([^<]+)', webpage, u'title') video_ext = self._search_regex('[.](.+?)$', video_url, u'extension') return [{ 'id': video_id, 'url': video_url, 'uploader': None, 'upload_date': None, 'title': video_title, 'ext': video_ext, }] mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage) if mobj is not None: request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '') response = self._download_webpage(request, video_id, u'Downloading video info') info = json.loads(base64.b64decode(response).decode('utf-8')) return {'id': video_id, 'title': info['title'], 'url': info['streaming_url'].replace('rtmpe', 'rtmpt'), 'play_path': info['filename'], 'ext': 'flv', 'thumbnail': info['thumbnail'][0]['url'], } # try encxml mobj = re.search('var flashvars={(.+?)}', webpage) if mobj is None: raise ExtractorError(u'Unable to extract video') params = {} encxml = '' sec = mobj.group(1) for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec): if not a == '_encxml': params[a] = b else: encxml = compat_urllib_parse.unquote(b) if not params.get('domain'): params['domain'] = 'www.myvideo.de' xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params)) if 'flash_playertype=MTV' in xmldata_url: self._downloader.report_warning(u'avoiding MTV player') xmldata_url = ( 'http://www.myvideo.de/dynamic/get_player_video_xml.php' '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes' ) % video_id # get enc data enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1] enc_data_b = binascii.unhexlify(enc_data) sk = self.__md5( base64.b64decode(base64.b64decode(GK)) + self.__md5( str(video_id).encode('utf-8') ) ) dec_data = self.__rc4crypt(enc_data_b, sk) # extracting infos self.report_extraction(video_id) video_url = None mobj = re.search('connectionurl=\'(.*?)\'', dec_data) if mobj: video_url = compat_urllib_parse.unquote(mobj.group(1)) if 'myvideo2flash' in video_url: self.report_warning( u'Rewriting URL to use unencrypted rtmp:// ...', video_id) video_url = video_url.replace('rtmpe://', 'rtmp://') if not video_url: # extract non rtmp videos mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data) if mobj is None: raise ExtractorError(u'unable to extract url') video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2)) video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file') video_file = compat_urllib_parse.unquote(video_file) if not video_file.endswith('f4m'): ppath, prefix = video_file.split('.') video_playpath = '%s:%s' % (prefix, ppath) video_hls_playlist = '' else: video_playpath = '' video_hls_playlist = ( video_file ).replace('.f4m', '.m3u8') video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj') video_swfobj = compat_urllib_parse.unquote(video_swfobj) video_title = self._html_search_regex("(.*?)", webpage, u'title') return [{ 'id': video_id, 'url': video_url, 'tc_url': video_url, 'uploader': None, 'upload_date': None, 'title': video_title, 'ext': u'flv', 'play_path': video_playpath, 'video_file': video_file, 'video_hls_playlist': video_hls_playlist, 'player_url': video_swfobj, }] youtube-dl/youtube_dl/extractor/vk.py0000644000000000000000000000747712300361346017042 0ustar rootroot# encoding: utf-8 from __future__ import unicode_literals import re import json from .common import InfoExtractor from ..utils import ( ExtractorError, compat_urllib_request, compat_urllib_parse, compat_str, unescapeHTML, ) class VKIE(InfoExtractor): IE_NAME = 'vk.com' _VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P.*?)(?:\?|%2F|$)' _NETRC_MACHINE = 'vk' _TESTS = [ { 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', 'md5': '0deae91935c54e00003c2a00646315f0', 'info_dict': { 'id': '162222515', 'ext': 'flv', 'title': 'ProtivoGunz - Хуёвая песня', 'uploader': 'Noize MC', 'duration': 195, }, }, { 'url': 'http://vk.com/video4643923_163339118', 'md5': 'f79bccb5cd182b1f43502ca5685b2b36', 'info_dict': { 'id': '163339118', 'ext': 'mp4', 'uploader': 'Elvira Dzhonik', 'title': 'Dream Theater - Hollow Years Live at Budokan 720*', 'duration': 558, } }, { 'url': 'http://vk.com/video-8871596_164049491', 'md5': 'a590bcaf3d543576c9bd162812387666', 'note': 'Only available for registered users', 'info_dict': { 'id': '164049491', 'ext': 'mp4', 'uploader': 'Триллеры', 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]\u00a0', 'duration': 8352, }, 'skip': 'Requires vk account credentials', } ] def _login(self): (username, password) = self._get_login_info() if username is None: return login_form = { 'act': 'login', 'role': 'al_frame', 'expire': '1', 'email': username, 'pass': password, } request = compat_urllib_request.Request('https://login.vk.com/?act=login', compat_urllib_parse.urlencode(login_form).encode('utf-8')) login_page = self._download_webpage(request, None, note='Logging in as %s' % username) if re.search(r'onLoginFailed', login_page): raise ExtractorError('Unable to login, incorrect username and/or password', expected=True) def _real_initialize(self): self._login() def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id info_page = self._download_webpage(info_url, video_id) if re.search(r'Please log in or <', info_page): raise ExtractorError('This video is only available for registered users, ' 'use --username and --password options to provide account credentials.', expected=True) m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page) if m_yt is not None: self.to_screen(u'Youtube video detected') return self.url_result(m_yt.group(1), 'Youtube') data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars') data = json.loads(data_json) formats = [{ 'format_id': k, 'url': v, 'width': int(k[len('url'):]), } for k, v in data.items() if k.startswith('url')] self._sort_formats(formats) return { 'id': compat_str(data['vid']), 'formats': formats, 'title': unescapeHTML(data['md_title']), 'thumbnail': data.get('jpg'), 'uploader': data.get('md_author'), 'duration': data.get('duration') } youtube-dl/youtube_dl/extractor/ebaumsworld.py0000644000000000000000000000215612271337464020746 0ustar rootrootimport re from .common import InfoExtractor from ..utils import determine_ext class EbaumsWorldIE(InfoExtractor): _VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P\d+)' _TEST = { u'url': u'http://www.ebaumsworld.com/video/watch/83367677/', u'file': u'83367677.mp4', u'info_dict': { u'title': u'A Giant Python Opens The Door', u'description': u'This is how nightmares start...', u'uploader': u'jihadpizza', }, } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') config = self._download_xml( 'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id) video_url = config.find('file').text return { 'id': video_id, 'title': config.find('title').text, 'url': video_url, 'ext': determine_ext(video_url), 'description': config.find('description').text, 'thumbnail': config.find('image').text, 'uploader': config.find('username').text, } youtube-dl/youtube_dl/extractor/stanfordoc.py0000644000000000000000000001005212271337464020556 0ustar rootrootimport re from .common import InfoExtractor from ..utils import ( ExtractorError, orderedSet, unescapeHTML, ) class StanfordOpenClassroomIE(InfoExtractor): IE_NAME = u'stanfordoc' IE_DESC = u'Stanford Open ClassRoom' _VALID_URL = r'^(?:https?://)?openclassroom\.stanford\.edu(?P/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P[^&]+)(&video=(?P