pax_global_header00006660000000000000000000000064142775524370014532gustar00rootroot0000000000000052 comment=b76e9cedb33d23f21060281596f7443750f67758 yt-dlp-2022.08.19/000077500000000000000000000000001427755243700133505ustar00rootroot00000000000000yt-dlp-2022.08.19/.editorconfig000066400000000000000000000002061427755243700160230ustar00rootroot00000000000000root = true [**.py] charset = utf-8 indent_size = 4 indent_style = space trim_trailing_whitespace = true insert_final_newline = true yt-dlp-2022.08.19/.gitattributes000066400000000000000000000001531427755243700162420ustar00rootroot00000000000000* text=auto Makefile* text whitespace=-tab-in-indent *.sh text eol=lf *.md diff=markdown *.py diff=python yt-dlp-2022.08.19/.github/000077500000000000000000000000001427755243700147105ustar00rootroot00000000000000yt-dlp-2022.08.19/.github/FUNDING.yml000066400000000000000000000013201427755243700165210ustar00rootroot00000000000000# These are supported funding model platforms github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] patreon: # Replace with a single Patreon username open_collective: # Replace with a single Open Collective username ko_fi: # Replace with a single Ko-fi username tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry liberapay: # Replace with a single Liberapay username issuehunt: # Replace with a single IssueHunt username otechie: # Replace with a single Otechie username custom: ['https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators'] yt-dlp-2022.08.19/.github/ISSUE_TEMPLATE/000077500000000000000000000000001427755243700170735ustar00rootroot00000000000000yt-dlp-2022.08.19/.github/ISSUE_TEMPLATE/1_broken_site.yml000066400000000000000000000101521427755243700223410ustar00rootroot00000000000000name: Broken site description: Report broken or misfunctioning site labels: [triage, site-bug] body: - type: checkboxes attributes: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - label: I understand that I will be **blocked** if I remove or skip any mandatory\* field required: true - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm reporting a broken site required: true - label: I've verified that I'm running yt-dlp version **2022.08.19** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/ytdl-org/youtube-dl#video-url-contains-an-ampersand-and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required - type: input id: region attributes: label: Region description: Enter the country/region that the site is accessible from placeholder: India - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true - type: checkboxes id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea id: log attributes: label: Complete Verbose Output description: | It should start like this: placeholder: | [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] yt-dlp version 2022.08.19 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest Latest version: 2022.08.19, Current version: 2022.08.19 yt-dlp is up to date (2022.08.19) render: shell validations: required: true yt-dlp-2022.08.19/.github/ISSUE_TEMPLATE/2_site_support_request.yml000066400000000000000000000111401427755243700243440ustar00rootroot00000000000000name: Site support request description: Request support for a new site labels: [triage, site-request] body: - type: checkboxes attributes: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - label: I understand that I will be **blocked** if I remove or skip any mandatory\* field required: true - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm reporting a new site support request required: true - label: I've verified that I'm running yt-dlp version **2022.08.19** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - label: I've checked that none of provided URLs [violate any copyrights](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and am willing to share it if required - type: input id: region attributes: label: Region description: Enter the country/region that the site is accessible from placeholder: India - type: textarea id: example-urls attributes: label: Example URLs description: | Provide all kinds of example URLs for which support should be added placeholder: | - Single video: https://www.youtube.com/watch?v=BaW_jenozKc - Single video: https://youtu.be/BaW_jenozKc - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc validations: required: true - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true - type: checkboxes id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea id: log attributes: label: Complete Verbose Output description: | It should start like this: placeholder: | [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] yt-dlp version 2022.08.19 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest Latest version: 2022.08.19, Current version: 2022.08.19 yt-dlp is up to date (2022.08.19) render: shell validations: required: true yt-dlp-2022.08.19/.github/ISSUE_TEMPLATE/3_site_feature_request.yml000066400000000000000000000101731427755243700242710ustar00rootroot00000000000000name: Site feature request description: Request a new functionality for a supported site labels: [triage, site-enhancement] body: - type: checkboxes attributes: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - label: I understand that I will be **blocked** if I remove or skip any mandatory\* field required: true - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm requesting a site-specific feature required: true - label: I've verified that I'm running yt-dlp version **2022.08.19** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required - type: input id: region attributes: label: Region description: Enter the country/region that the site is accessible from placeholder: India - type: textarea id: example-urls attributes: label: Example URLs description: | Example URLs that can be used to demonstrate the requested feature placeholder: | https://www.youtube.com/watch?v=BaW_jenozKc validations: required: true - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true - type: checkboxes id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea id: log attributes: label: Complete Verbose Output description: | It should start like this: placeholder: | [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] yt-dlp version 2022.08.19 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest Latest version: 2022.08.19, Current version: 2022.08.19 yt-dlp is up to date (2022.08.19) render: shell validations: required: true yt-dlp-2022.08.19/.github/ISSUE_TEMPLATE/4_bug_report.yml000066400000000000000000000074161427755243700222210ustar00rootroot00000000000000name: Bug report description: Report a bug unrelated to any particular site or extractor labels: [triage, bug] body: - type: checkboxes attributes: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - label: I understand that I will be **blocked** if I remove or skip any mandatory\* field required: true - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm reporting a bug unrelated to a specific site required: true - label: I've verified that I'm running yt-dlp version **2022.08.19** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/ytdl-org/youtube-dl#video-url-contains-an-ampersand-and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true - type: checkboxes id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea id: log attributes: label: Complete Verbose Output description: | It should start like this: placeholder: | [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] yt-dlp version 2022.08.19 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest Latest version: 2022.08.19, Current version: 2022.08.19 yt-dlp is up to date (2022.08.19) render: shell validations: required: true yt-dlp-2022.08.19/.github/ISSUE_TEMPLATE/5_feature_request.yml000066400000000000000000000066031427755243700232520ustar00rootroot00000000000000name: Feature request description: Request a new functionality unrelated to any particular site or extractor labels: [triage, enhancement] body: - type: checkboxes attributes: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - label: I understand that I will be **blocked** if I remove or skip any mandatory\* field required: true - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm requesting a feature unrelated to a specific site required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - label: I've verified that I'm running yt-dlp version **2022.08.19** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true - type: checkboxes id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below - type: textarea id: log attributes: label: Complete Verbose Output description: | It should start like this: placeholder: | [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] yt-dlp version 2022.08.19 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest Latest version: 2022.08.19, Current version: 2022.08.19 yt-dlp is up to date (2022.08.19) render: shell yt-dlp-2022.08.19/.github/ISSUE_TEMPLATE/6_question.yml000066400000000000000000000072461427755243700217230ustar00rootroot00000000000000name: Ask question description: Ask yt-dlp related question labels: [question] body: - type: checkboxes attributes: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - label: I understand that I will be **blocked** if I remove or skip any mandatory\* field required: true - type: markdown attributes: value: | ### Make sure you are **only** asking a question and not reporting a bug or requesting a feature. If your question contains "isn't working" or "can you add", this is most likely the wrong template. If you are in doubt whether this is the right template, **USE ANOTHER TEMPLATE**! - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm asking a question and **not** reporting a bug or requesting a feature required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - label: I've verified that I'm running yt-dlp version **2022.08.19** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - type: textarea id: question attributes: label: Please make sure the question is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information and as much context and examples as possible validations: required: true - type: checkboxes id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below - type: textarea id: log attributes: label: Complete Verbose Output description: | It should start like this: placeholder: | [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] yt-dlp version 2022.08.19 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest Latest version: 2022.08.19, Current version: 2022.08.19 yt-dlp is up to date (2022.08.19) render: shell yt-dlp-2022.08.19/.github/ISSUE_TEMPLATE/config.yml000066400000000000000000000005271427755243700210670ustar00rootroot00000000000000blank_issues_enabled: false contact_links: - name: Get help from the community on Discord url: https://discord.gg/H5MNcFW63r about: Join the yt-dlp Discord for community-powered support! - name: Matrix Bridge to the Discord server url: https://matrix.to/#/#yt-dlp:matrix.org about: For those who do not want to use Discord yt-dlp-2022.08.19/.github/ISSUE_TEMPLATE_tmpl/000077500000000000000000000000001427755243700201275ustar00rootroot00000000000000yt-dlp-2022.08.19/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml000066400000000000000000000044071427755243700234030ustar00rootroot00000000000000name: Broken site description: Report broken or misfunctioning site labels: [triage, site-bug] body: %(no_skip)s - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm reporting a broken site required: true - label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/ytdl-org/youtube-dl#video-url-contains-an-ampersand-and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required - type: input id: region attributes: label: Region description: Enter the country/region that the site is accessible from placeholder: India - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true %(verbose)s yt-dlp-2022.08.19/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml000066400000000000000000000053751427755243700254150ustar00rootroot00000000000000name: Site support request description: Request support for a new site labels: [triage, site-request] body: %(no_skip)s - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm reporting a new site support request required: true - label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - label: I've checked that none of provided URLs [violate any copyrights](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and am willing to share it if required - type: input id: region attributes: label: Region description: Enter the country/region that the site is accessible from placeholder: India - type: textarea id: example-urls attributes: label: Example URLs description: | Provide all kinds of example URLs for which support should be added placeholder: | - Single video: https://www.youtube.com/watch?v=BaW_jenozKc - Single video: https://youtu.be/BaW_jenozKc - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc validations: required: true - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true %(verbose)s yt-dlp-2022.08.19/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml000066400000000000000000000044301427755243700253240ustar00rootroot00000000000000name: Site feature request description: Request a new functionality for a supported site labels: [triage, site-enhancement] body: %(no_skip)s - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm requesting a site-specific feature required: true - label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required - type: input id: region attributes: label: Region description: Enter the country/region that the site is accessible from placeholder: India - type: textarea id: example-urls attributes: label: Example URLs description: | Example URLs that can be used to demonstrate the requested feature placeholder: | https://www.youtube.com/watch?v=BaW_jenozKc validations: required: true - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true %(verbose)s yt-dlp-2022.08.19/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml000066400000000000000000000036531427755243700232540ustar00rootroot00000000000000name: Bug report description: Report a bug unrelated to any particular site or extractor labels: [triage, bug] body: %(no_skip)s - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm reporting a bug unrelated to a specific site required: true - label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/ytdl-org/youtube-dl#video-url-contains-an-ampersand-and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true %(verbose)s yt-dlp-2022.08.19/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml000066400000000000000000000032011427755243700242750ustar00rootroot00000000000000name: Feature request description: Request a new functionality unrelated to any particular site or extractor labels: [triage, enhancement] body: %(no_skip)s - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm requesting a feature unrelated to a specific site required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true %(verbose_optional)s yt-dlp-2022.08.19/.github/ISSUE_TEMPLATE_tmpl/6_question.yml000066400000000000000000000036441427755243700227550ustar00rootroot00000000000000name: Ask question description: Ask yt-dlp related question labels: [question] body: %(no_skip)s - type: markdown attributes: value: | ### Make sure you are **only** asking a question and not reporting a bug or requesting a feature. If your question contains "isn't working" or "can you add", this is most likely the wrong template. If you are in doubt whether this is the right template, **USE ANOTHER TEMPLATE**! - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm asking a question and **not** reporting a bug or requesting a feature required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - type: textarea id: question attributes: label: Please make sure the question is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information and as much context and examples as possible validations: required: true %(verbose_optional)s yt-dlp-2022.08.19/.github/PULL_REQUEST_TEMPLATE.md000066400000000000000000000045111427755243700205120ustar00rootroot00000000000000**IMPORTANT**: PRs without the template will be CLOSED ### Description of your *pull request* and other information ADD DESCRIPTION HERE Fixes #
Template ### Before submitting a *pull request* make sure you have: - [ ] At least skimmed through [contributing guidelines](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) including [yt-dlp coding conventions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#yt-dlp-coding-conventions) - [ ] [Searched](https://github.com/yt-dlp/yt-dlp/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests - [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) and [ran relevant tests](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) ### In order to be accepted and merged into yt-dlp each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: - [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) - [ ] I am not the original author of this code but it is in public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence) ### What is the purpose of your *pull request*? - [ ] Fix or improvement to an extractor (Make sure to add/update tests) - [ ] New extractor ([Piracy websites will not be accepted](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy)) - [ ] Core bug fix/improvement - [ ] New feature (It is strongly [recommended to open an issue first](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#adding-new-feature-or-making-overarching-changes)) yt-dlp-2022.08.19/.github/banner.svg000066400000000000000000000603101427755243700166760ustar00rootroot00000000000000 yt-dlp-2022.08.19/.github/workflows/000077500000000000000000000000001427755243700167455ustar00rootroot00000000000000yt-dlp-2022.08.19/.github/workflows/build.yml000066400000000000000000000275671427755243700206100ustar00rootroot00000000000000name: Build on: workflow_dispatch jobs: prepare: runs-on: ubuntu-latest outputs: version_suffix: ${{ steps.version_suffix.outputs.version_suffix }} ytdlp_version: ${{ steps.bump_version.outputs.ytdlp_version }} head_sha: ${{ steps.push_release.outputs.head_sha }} steps: - uses: actions/checkout@v3 with: fetch-depth: 0 - uses: actions/setup-python@v4 with: python-version: '3.10' - name: Set version suffix id: version_suffix env: PUSH_VERSION_COMMIT: ${{ secrets.PUSH_VERSION_COMMIT }} if: "env.PUSH_VERSION_COMMIT == ''" run: echo ::set-output name=version_suffix::$(date -u +"%H%M%S") - name: Bump version id: bump_version run: | python devscripts/update-version.py ${{ steps.version_suffix.outputs.version_suffix }} make issuetemplates - name: Push to release id: push_release run: | git config --global user.name github-actions git config --global user.email github-actions@example.com git add -u git commit -m "[version] update" -m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all :ci run dl" git push origin --force ${{ github.event.ref }}:release echo ::set-output name=head_sha::$(git rev-parse HEAD) - name: Update master env: PUSH_VERSION_COMMIT: ${{ secrets.PUSH_VERSION_COMMIT }} if: "env.PUSH_VERSION_COMMIT != ''" run: git push origin ${{ github.event.ref }} build_unix: needs: prepare runs-on: ubuntu-18.04 # Standalone executable should be built on minimum supported OS steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: python-version: '3.10' - name: Install Requirements run: | sudo apt-get -y install zip pandoc man python -m pip install --upgrade pip setuptools wheel twine python -m pip install Pyinstaller -r requirements.txt - name: Prepare run: | python devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }} python devscripts/make_lazy_extractors.py - name: Build Unix executables run: | make all tar python pyinst.py --onedir (cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .) python pyinst.py - name: Get SHA2-SUMS id: get_sha run: | - name: Upload artifacts uses: actions/upload-artifact@v3 with: path: | yt-dlp yt-dlp.tar.gz dist/yt-dlp_linux dist/yt-dlp_linux.zip - name: Build and publish on PyPi env: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} if: "env.TWINE_PASSWORD != ''" run: | rm -rf dist/* python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update" python setup.py sdist bdist_wheel twine upload dist/* - name: Install SSH private key for Homebrew env: BREW_TOKEN: ${{ secrets.BREW_TOKEN }} if: "env.BREW_TOKEN != ''" uses: yt-dlp/ssh-agent@v0.5.3 with: ssh-private-key: ${{ env.BREW_TOKEN }} - name: Update Homebrew Formulae env: BREW_TOKEN: ${{ secrets.BREW_TOKEN }} if: "env.BREW_TOKEN != ''" run: | git clone git@github.com:yt-dlp/homebrew-taps taps/ python devscripts/update-formulae.py taps/Formula/yt-dlp.rb "${{ needs.prepare.outputs.ytdlp_version }}" git -C taps/ config user.name github-actions git -C taps/ config user.email github-actions@example.com git -C taps/ commit -am 'yt-dlp: ${{ needs.prepare.outputs.ytdlp_version }}' git -C taps/ push build_macos: runs-on: macos-11 needs: prepare steps: - uses: actions/checkout@v3 # NB: In order to create a universal2 application, the version of python3 in /usr/bin has to be used - name: Install Requirements run: | brew install coreutils /usr/bin/python3 -m pip install -U --user pip Pyinstaller -r requirements.txt - name: Prepare run: | /usr/bin/python3 devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }} /usr/bin/python3 devscripts/make_lazy_extractors.py - name: Build run: | /usr/bin/python3 pyinst.py --target-architecture universal2 --onedir (cd ./dist/yt-dlp_macos && zip -r ../yt-dlp_macos.zip .) /usr/bin/python3 pyinst.py --target-architecture universal2 - name: Upload artifacts uses: actions/upload-artifact@v3 with: path: | dist/yt-dlp_macos dist/yt-dlp_macos.zip build_macos_legacy: runs-on: macos-latest needs: prepare steps: - uses: actions/checkout@v3 - name: Install Python # We need the official Python, because the GA ones only support newer macOS versions env: PYTHON_VERSION: 3.10.5 MACOSX_DEPLOYMENT_TARGET: 10.9 # Used up by the Python build tools run: | # Hack to get the latest patch version. Uncomment if needed #brew install python@3.10 #export PYTHON_VERSION=$( $(brew --prefix)/opt/python@3.10/bin/python3 --version | cut -d ' ' -f 2 ) curl https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-macos11.pkg -o "python.pkg" sudo installer -pkg python.pkg -target / python3 --version - name: Install Requirements run: | brew install coreutils python3 -m pip install -U --user pip Pyinstaller -r requirements.txt - name: Prepare run: | python3 devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }} python3 devscripts/make_lazy_extractors.py - name: Build run: | python3 pyinst.py mv dist/yt-dlp_macos dist/yt-dlp_macos_legacy - name: Upload artifacts uses: actions/upload-artifact@v3 with: path: | dist/yt-dlp_macos_legacy build_windows: runs-on: windows-latest needs: prepare steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: # 3.8 is used for Win7 support python-version: '3.8' - name: Install Requirements run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds python -m pip install --upgrade pip setuptools wheel py2exe pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.2-py3-none-any.whl" -r requirements.txt - name: Prepare run: | python devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }} python devscripts/make_lazy_extractors.py - name: Build run: | python setup.py py2exe Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe python pyinst.py python pyinst.py --onedir Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip - name: Upload artifacts uses: actions/upload-artifact@v3 with: path: | dist/yt-dlp.exe dist/yt-dlp_min.exe dist/yt-dlp_win.zip build_windows32: runs-on: windows-latest needs: prepare steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: # 3.7 is used for Vista support. See https://github.com/yt-dlp/yt-dlp/issues/390 python-version: '3.7' architecture: 'x86' - name: Install Requirements run: | python -m pip install --upgrade pip setuptools wheel pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.2-py3-none-any.whl" -r requirements.txt - name: Prepare run: | python devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }} python devscripts/make_lazy_extractors.py - name: Build run: | python pyinst.py - name: Upload artifacts uses: actions/upload-artifact@v3 with: path: | dist/yt-dlp_x86.exe publish_release: runs-on: ubuntu-latest needs: [prepare, build_unix, build_windows, build_windows32, build_macos, build_macos_legacy] steps: - uses: actions/checkout@v3 - uses: actions/download-artifact@v3 - name: Get Changelog run: | changelog=$(grep -oPz '(?s)(?<=### ${{ needs.prepare.outputs.ytdlp_version }}\n{2}).+?(?=\n{2,3}###)' Changelog.md) || true echo "changelog<> $GITHUB_ENV echo "$changelog" >> $GITHUB_ENV echo "EOF" >> $GITHUB_ENV - name: Make Update spec run: | echo "# This file is used for regulating self-update" >> _update_spec echo "lock 2022.07.18 .+ Python 3.6" >> _update_spec - name: Make SHA2-SUMS files run: | sha256sum artifact/yt-dlp | awk '{print $1 " yt-dlp"}' >> SHA2-256SUMS sha256sum artifact/yt-dlp.tar.gz | awk '{print $1 " yt-dlp.tar.gz"}' >> SHA2-256SUMS sha256sum artifact/yt-dlp.exe | awk '{print $1 " yt-dlp.exe"}' >> SHA2-256SUMS sha256sum artifact/yt-dlp_win.zip | awk '{print $1 " yt-dlp_win.zip"}' >> SHA2-256SUMS sha256sum artifact/yt-dlp_min.exe | awk '{print $1 " yt-dlp_min.exe"}' >> SHA2-256SUMS sha256sum artifact/yt-dlp_x86.exe | awk '{print $1 " yt-dlp_x86.exe"}' >> SHA2-256SUMS sha256sum artifact/yt-dlp_macos | awk '{print $1 " yt-dlp_macos"}' >> SHA2-256SUMS sha256sum artifact/yt-dlp_macos.zip | awk '{print $1 " yt-dlp_macos.zip"}' >> SHA2-256SUMS sha256sum artifact/yt-dlp_macos_legacy | awk '{print $1 " yt-dlp_macos_legacy"}' >> SHA2-256SUMS sha256sum artifact/dist/yt-dlp_linux | awk '{print $1 " yt-dlp_linux"}' >> SHA2-256SUMS sha256sum artifact/dist/yt-dlp_linux.zip | awk '{print $1 " yt-dlp_linux.zip"}' >> SHA2-256SUMS sha512sum artifact/yt-dlp | awk '{print $1 " yt-dlp"}' >> SHA2-512SUMS sha512sum artifact/yt-dlp.tar.gz | awk '{print $1 " yt-dlp.tar.gz"}' >> SHA2-512SUMS sha512sum artifact/yt-dlp.exe | awk '{print $1 " yt-dlp.exe"}' >> SHA2-512SUMS sha512sum artifact/yt-dlp_win.zip | awk '{print $1 " yt-dlp_win.zip"}' >> SHA2-512SUMS sha512sum artifact/yt-dlp_min.exe | awk '{print $1 " yt-dlp_min.exe"}' >> SHA2-512SUMS sha512sum artifact/yt-dlp_x86.exe | awk '{print $1 " yt-dlp_x86.exe"}' >> SHA2-512SUMS sha512sum artifact/yt-dlp_macos | awk '{print $1 " yt-dlp_macos"}' >> SHA2-512SUMS sha512sum artifact/yt-dlp_macos.zip | awk '{print $1 " yt-dlp_macos.zip"}' >> SHA2-512SUMS sha512sum artifact/yt-dlp_macos_legacy | awk '{print $1 " yt-dlp_macos_legacy"}' >> SHA2-512SUMS sha512sum artifact/dist/yt-dlp_linux | awk '{print $1 " yt-dlp_linux"}' >> SHA2-512SUMS sha512sum artifact/dist/yt-dlp_linux.zip | awk '{print $1 " yt-dlp_linux.zip"}' >> SHA2-512SUMS - name: Publish Release uses: yt-dlp/action-gh-release@v1 with: tag_name: ${{ needs.prepare.outputs.ytdlp_version }} name: yt-dlp ${{ needs.prepare.outputs.ytdlp_version }} target_commitish: ${{ needs.prepare.outputs.head_sha }} body: | #### [A description of the various files]((https://github.com/yt-dlp/yt-dlp#release-files)) are in the README ---

Changelog

${{ env.changelog }}

files: | SHA2-256SUMS SHA2-512SUMS artifact/yt-dlp artifact/yt-dlp.tar.gz artifact/yt-dlp.exe artifact/yt-dlp_win.zip artifact/yt-dlp_min.exe artifact/yt-dlp_x86.exe artifact/yt-dlp_macos artifact/yt-dlp_macos.zip artifact/yt-dlp_macos_legacy artifact/dist/yt-dlp_linux artifact/dist/yt-dlp_linux.zip _update_spec yt-dlp-2022.08.19/.github/workflows/core.yml000066400000000000000000000020451427755243700204210ustar00rootroot00000000000000name: Core Tests on: [push, pull_request] jobs: tests: name: Core Tests if: "!contains(github.event.head_commit.message, 'ci skip')" runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest] # CPython 3.9 is in quick-test python-version: ['3.7', '3.10', 3.11-dev, pypy-3.7, pypy-3.8] run-tests-ext: [sh] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest python-version: '3.8' run-tests-ext: bat - os: windows-latest python-version: pypy-3.9 run-tests-ext: bat steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install pytest run: pip install pytest - name: Run tests continue-on-error: False run: ./devscripts/run_tests.${{ matrix.run-tests-ext }} core # Linter is in quick-test yt-dlp-2022.08.19/.github/workflows/download.yml000066400000000000000000000026621427755243700213050ustar00rootroot00000000000000name: Download Tests on: [push, pull_request] jobs: quick: name: Quick Download Tests if: "contains(github.event.head_commit.message, 'ci run dl')" runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v4 with: python-version: 3.9 - name: Install test requirements run: pip install pytest - name: Run tests continue-on-error: true run: ./devscripts/run_tests.sh download full: name: Full Download Tests if: "contains(github.event.head_commit.message, 'ci run dl all')" runs-on: ${{ matrix.os }} strategy: fail-fast: true matrix: os: [ubuntu-latest] python-version: ['3.7', '3.10', 3.11-dev, pypy-3.7, pypy-3.8] run-tests-ext: [sh] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest python-version: '3.8' run-tests-ext: bat - os: windows-latest python-version: pypy-3.9 run-tests-ext: bat steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install pytest run: pip install pytest - name: Run tests continue-on-error: true run: ./devscripts/run_tests.${{ matrix.run-tests-ext }} download yt-dlp-2022.08.19/.github/workflows/quick-test.yml000066400000000000000000000016261427755243700215660ustar00rootroot00000000000000name: Quick Test on: [push, pull_request] jobs: tests: name: Core Test if: "!contains(github.event.head_commit.message, 'ci skip all')" runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v4 with: python-version: 3.9 - name: Install test requirements run: pip install pytest pycryptodomex - name: Run tests run: ./devscripts/run_tests.sh core flake8: name: Linter if: "!contains(github.event.head_commit.message, 'ci skip all')" runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v4 with: python-version: 3.9 - name: Install flake8 run: pip install flake8 - name: Make lazy extractors run: python devscripts/make_lazy_extractors.py - name: Run flake8 run: flake8 . yt-dlp-2022.08.19/.gitignore000066400000000000000000000023441427755243700153430ustar00rootroot00000000000000# Config *.conf cookies *cookies.txt .netrc # Downloaded *.annotations.xml *.aria2 *.description *.dump *.frag *.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl .cache/ *.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.jpeg *.jpg *.m4a *.mpga *.m4v *.mhtml *.mkv *.mov *.mp3 *.mp4 *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp # Allow config/media files in testdata !test/** # Python *.pyc *.pyo .pytest_cache wine-py2exe/ py2exe.log build/ dist/ zip/ tmp/ venv/ completions/ # Misc *~ *.DS_Store *.kate-swp MANIFEST test/local_parameters.json .coverage cover/ secrets/ updates_key.pem *.egg-info .tox *.class *.isorted # Generated AUTHORS README.txt .mailmap *.1 *.bash-completion *.fish *.tar.gz *.zsh *.spec test/testdata/sigs/player-*.js # Binary /youtube-dl /youtube-dlc /yt-dlp yt-dlp.zip *.exe # Text Editor / IDE .idea *.iml .vscode *.sublime-* *.code-workspace # Lazy extractors */extractor/lazy_extractors.py # Plugins ytdlp_plugins/extractor/* !ytdlp_plugins/extractor/__init__.py !ytdlp_plugins/extractor/sample.py ytdlp_plugins/postprocessor/* !ytdlp_plugins/postprocessor/__init__.py !ytdlp_plugins/postprocessor/sample.py yt-dlp-2022.08.19/CONTRIBUTING.md000066400000000000000000001032641427755243700156070ustar00rootroot00000000000000# CONTRIBUTING TO YT-DLP - [OPENING AN ISSUE](#opening-an-issue) - [Is the description of the issue itself sufficient?](#is-the-description-of-the-issue-itself-sufficient) - [Are you using the latest version?](#are-you-using-the-latest-version) - [Is the issue already documented?](#is-the-issue-already-documented) - [Why are existing options not enough?](#why-are-existing-options-not-enough) - [Have you read and understood the changes, between youtube-dl and yt-dlp](#have-you-read-and-understood-the-changes-between-youtube-dl-and-yt-dlp) - [Is there enough context in your bug report?](#is-there-enough-context-in-your-bug-report) - [Does the issue involve one problem, and one problem only?](#does-the-issue-involve-one-problem-and-one-problem-only) - [Is anyone going to need the feature?](#is-anyone-going-to-need-the-feature) - [Is your question about yt-dlp?](#is-your-question-about-yt-dlp) - [Are you willing to share account details if needed?](#are-you-willing-to-share-account-details-if-needed) - [Is the website primarily used for piracy](#is-the-website-primarily-used-for-piracy) - [DEVELOPER INSTRUCTIONS](#developer-instructions) - [Adding new feature or making overarching changes](#adding-new-feature-or-making-overarching-changes) - [Adding support for a new site](#adding-support-for-a-new-site) - [yt-dlp coding conventions](#yt-dlp-coding-conventions) - [Mandatory and optional metafields](#mandatory-and-optional-metafields) - [Provide fallbacks](#provide-fallbacks) - [Regular expressions](#regular-expressions) - [Long lines policy](#long-lines-policy) - [Quotes](#quotes) - [Inline values](#inline-values) - [Collapse fallbacks](#collapse-fallbacks) - [Trailing parentheses](#trailing-parentheses) - [Use convenience conversion and parsing functions](#use-convenience-conversion-and-parsing-functions) - [My pull request is labeled pending-fixes](#my-pull-request-is-labeled-pending-fixes) - [EMBEDDING YT-DLP](README.md#embedding-yt-dlp) # OPENING AN ISSUE Bugs and suggestions should be reported at: [yt-dlp/yt-dlp/issues](https://github.com/yt-dlp/yt-dlp/issues). Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in our [discord server](https://discord.gg/H5MNcFW63r). **Please include the full output of yt-dlp when run with `-vU`**, i.e. **add** `-vU` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: ``` $ yt-dlp -vU [debug] Command-line config: ['-v', 'demo.com'] [debug] Encodings: locale UTF-8, fs utf-8, out utf-8, pref UTF-8 [debug] yt-dlp version 2021.09.25 (zip) [debug] Python version 3.8.10 (CPython 64bit) - Linux-5.4.0-74-generic-x86_64-with-glibc2.29 [debug] exe versions: ffmpeg 4.2.4, ffprobe 4.2.4 [debug] Proxy map: {} Current Build Hash 25cc412d1d3c0725a1f2f5b7e4682f6fb40e6d15f7024e96f7afd572e9919535 yt-dlp is up to date (2021.09.25) ... ``` **Do not post screenshots of verbose logs; only plain text is acceptable.** The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore will be closed as `incomplete`. The templates provided for the Issues, should be completed and **not removed**, this helps aide the resolution of the issue. Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist): ### Is the description of the issue itself sufficient? We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious - What the problem is - How it could be fixed - How your proposed solution would look like If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. We often get frustrated by these issues, since the only possible way for us to move forward on them is to ask for clarification over and over. For bug reports, this means that your report should contain the **complete** output of yt-dlp when called with the `-vU` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--write-pages` and upload the `.dump` files you get [somewhere](https://gist.github.com). **Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL. ### Are you using the latest version? Before reporting any issue, type `yt-dlp -U`. This should report that you're up-to-date. This goes for feature requests as well. ### Is the issue already documented? Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/yt-dlp/yt-dlp/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2021.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. Additionally, it is also helpful to see if the issue has already been documented in the [youtube-dl issue tracker](https://github.com/ytdl-org/youtube-dl/issues). If similar issues have already been reported in youtube-dl (but not in our issue tracker), links to them can be included in your issue report here. ### Why are existing options not enough? Before requesting a new feature, please have a quick peek at [the list of supported options](README.md#usage-and-options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. ### Have you read and understood the changes, between youtube-dl and yt-dlp There are many changes between youtube-dl and yt-dlp [(changes to default behavior)](README.md#differences-in-default-behavior), and some of the options available have a different behaviour in yt-dlp, or have been removed all together [(list of changes to options)](README.md#deprecated-options). Make sure you have read and understand the differences in the options and how this may impact your downloads before opening an issue. ### Is there enough context in your bug report? People want to solve problems, and often think they do us a favor by breaking down their larger problems (e.g. wanting to skip already downloaded files) to a specific request (e.g. requesting us to look whether the file exists before downloading the info page). However, what often happens is that they break down the problem into two steps: One simple, and one impossible (or extremely complicated one). We are then presented with a very complicated request when the original problem could be solved far easier, e.g. by recording the downloaded video IDs in a separate file. To avoid this, you must include the greater context where it is non-obvious. In particular, every feature request that does not consist of adding support for a new site should contain a use case scenario that explains in what situation the missing feature would be useful. ### Does the issue involve one problem, and one problem only? Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones. In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of yt-dlp that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service. ### Is anyone going to need the feature? Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. ### Is your question about yt-dlp? Some bug reports are completely unrelated to yt-dlp and relate to a different, or even the reporter's own, application. Please make sure that you are actually using yt-dlp. If you are using a UI for yt-dlp, report the bug to the maintainer of the actual application providing the UI. In general, if you are unable to provide the verbose log, you should not be opening the issue here. If the issue is with `youtube-dl` (the upstream fork of yt-dlp) and not with yt-dlp, the issue should be raised in the youtube-dl project. ### Are you willing to share account details if needed? The maintainers and potential contributors of the project often do not have an account for the website you are asking support for. So any developer interested in solving your issue may ask you for account details. It is your personal discretion whether you are willing to share the account in order for the developer to try and solve your issue. However, if you are unwilling or unable to provide details, they obviously cannot work on the issue and it cannot be solved unless some developer who both has an account and is willing/able to contribute decides to solve it. By sharing an account with anyone, you agree to bear all risks associated with it. The maintainers and yt-dlp can't be held responsible for any misuse of the credentials. While these steps won't necessarily ensure that no misuse of the account takes place, these are still some good practices to follow. - Look for people with `Member` (maintainers of the project) or `Contributor` (people who have previously contributed code) tag on their messages. - Change the password before sharing the account to something random (use [this](https://passwordsgenerator.net/) if you don't have a random password generator). - Change the password after receiving the account back. ### Is the website primarily used for piracy? We follow [youtube-dl's policy](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) to not support services that is primarily used for infringing copyright. Additionally, it has been decided to not to support porn sites that specialize in deep fake. We also cannot support any service that serves only [DRM protected content](https://en.wikipedia.org/wiki/Digital_rights_management). # DEVELOPER INSTRUCTIONS Most users do not need to build yt-dlp and can [download the builds](https://github.com/yt-dlp/yt-dlp/releases) or get them via [the other installation methods](README.md#installation). To run yt-dlp as a developer, you don't need to build anything either. Simply execute python -m yt_dlp To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work: python -m unittest discover python test/test_download.py nosetests pytest See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases. If you want to create a build of yt-dlp yourself, you can follow the instructions [here](README.md#compile). ## Adding new feature or making overarching changes Before you start writing code for implementing a new feature, open an issue explaining your feature request and atleast one use case. This allows the maintainers to decide whether such a feature is desired for the project in the first place, and will provide an avenue to discuss some implementation details. If you open a pull request for a new feature without discussing with us first, do not be surprised when we ask for large changes to the code, or even reject it outright. The same applies for changes to the documentation, code style, or overarching changes to the architecture ## Adding support for a new site If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](https://www.github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. yt-dlp does **not support** such sites thus pull requests adding support for them **will be rejected**. After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`): 1. [Fork this repository](https://github.com/yt-dlp/yt-dlp/fork) 1. Check out the source code with: git clone git@github.com:YOUR_GITHUB_USERNAME/yt-dlp.git 1. Start a new git branch with cd yt-dlp git checkout -b yourextractor 1. Start with this simple template and save it to `yt_dlp/extractor/yourextractor.py`: ```python from .common import InfoExtractor class YourExtractorIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' _TESTS = [{ 'url': 'https://yourextractor.com/watch/42', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'info_dict': { 'id': '42', 'ext': 'mp4', 'title': 'Video title goes here', 'thumbnail': r're:^https?://.*\.jpg$', # TODO more properties, either as: # * A value # * MD5 checksum; start the string with md5: # * A regular expression; start the string with re: # * Any Python type, e.g. int or float } }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) # TODO more code goes here, for example ... title = self._html_search_regex(r'

(.+?)

', webpage, 'title') return { 'id': video_id, 'title': title, 'description': self._og_search_description(webpage), 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), # TODO more properties (see yt_dlp/extractor/common.py) } ``` 1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`. 1. Run `python test/test_download.py TestDownload.test_YourExtractor` (note that `YourExtractor` doesn't end with `IE`). This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, the tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. You can also run all the tests in one go with `TestDownload.test_YourExtractor_all` 1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running. 1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L91-L426). Add tests and code for as many as you want. 1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): $ flake8 yt_dlp/extractor/yourextractor.py 1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython and PyPy for Python 3.7 and above. Backward compatibility is not required for even older versions of Python. 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: $ git add yt_dlp/extractor/_extractors.py $ git add yt_dlp/extractor/yourextractor.py $ git commit -m '[yourextractor] Add extractor' $ git push origin yourextractor 1. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. In any case, thank you very much for your contributions! **Tip:** To test extractors that require login information, create a file `test/local_parameters.json` and add `"usenetrc": true` or your username and password in it: ```json { "username": "your user name", "password": "your password" } ``` ## yt-dlp coding conventions This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code. Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old yt-dlp versions working. Even though this breakage issue may be easily fixed by a new version of yt-dlp, this could take some time, during which the the extractor will remain broken. ### Mandatory and optional metafields For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L91-L426) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp: - `id` (media identifier) - `title` (media title) - `url` (media download URL) or `formats` The aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. While all extractors must return a `title`, they must also allow it's extraction to be non-fatal. For pornographic sites, appropriate `age_limit` must also be returned. The extractor is allowed to return the info dict without url or formats in some special cases if it allows the user to extract usefull information with `--ignore-no-formats-error` - e.g. when the video is a live stream that has not started yet. [Any field](yt_dlp/extractor/common.py#219-L426) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. #### Example Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`: ```python meta = self._download_json(url, video_id) ``` Assume at this point `meta`'s layout is: ```python { "summary": "some fancy summary text", "user": { "name": "uploader name" }, ... } ``` Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional meta field you should be ready that this key may be missing from the `meta` dict, so that you should extract it like: ```python description = meta.get('summary') # correct ``` and not like: ```python description = meta['summary'] # incorrect ``` The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data). If the data is nested, do not use `.get` chains, but instead make use of `traverse_obj`. Considering the above `meta` again, assume you want to extract `["user"]["name"]` and put it in the resulting info dict as `uploader` ```python uploader = traverse_obj(meta, ('user', 'name')) # correct ``` and not like: ```python uploader = meta['user']['name'] # incorrect ``` or ```python uploader = meta.get('user', {}).get('name') # incorrect ``` or ```python uploader = try_get(meta, lambda x: x['user']['name']) # old utility ``` Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance: ```python description = self._search_regex( r']+id="title"[^>]*>([^<]+)<', webpage, 'description', fatal=False) ``` With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction. You can also pass `default=`, for example: ```python description = self._search_regex( r']+id="title"[^>]*>([^<]+)<', webpage, 'description', default=None) ``` On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present. Another thing to remember is not to try to iterate over `None` Say you extracted a list of thumbnails into `thumbnail_data` and want to iterate over them ```python thumbnail_data = data.get('thumbnails') or [] thumbnails = [{ 'url': item['url'] } for item in thumbnail_data] # correct ``` and not like: ```python thumbnail_data = data.get('thumbnails') thumbnails = [{ 'url': item['url'] } for item in thumbnail_data] # incorrect ``` In this case, `thumbnail_data` will be `None` if the field was not found and this will cause the loop `for item in thumbnail_data` to raise a fatal error. Using `or []` avoids this error and results in setting an empty list in `thumbnails` instead. ### Provide fallbacks When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable. #### Example Say `meta` from the previous example has a `title` and you are about to extract it like: ```python title = meta.get('title') ``` If `title` disappears from `meta` in future due to some changes on the hoster's side the title extraction would fail. Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback like: ```python title = meta.get('title') or self._og_search_title(webpage) ``` This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`, making the extractor more robust. ### Regular expressions #### Don't capture groups you don't use Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing. ##### Example Don't capture id attribute name here since you can't use it for anything anyway. Correct: ```python r'(?:id|ID)=(?P\d+)' ``` Incorrect: ```python r'(id|ID)=(?P\d+)' ``` #### Make regular expressions relaxed and flexible When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on. ##### Example Say you need to extract `title` from the following HTML code: ```html some fancy title ``` The code for that task should look similar to: ```python title = self._search_regex( # correct r']+class="title"[^>]*>([^<]+)', webpage, 'title') ``` which tolerates potential changes in the `style` attribute's value. Or even better: ```python title = self._search_regex( # correct r']+class=(["\'])title\1[^>]*>(?P[^<]+)', webpage, 'title', group='title') ``` which also handles both single quotes in addition to double quotes. The code definitely should not look like: ```python title = self._search_regex( # incorrect r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>', webpage, 'title', group='title') ``` or even ```python title = self._search_regex( # incorrect r'<span style=".*?" class="title">(.*?)</span>', webpage, 'title', group='title') ``` Here the presence or absence of other attributes including `style` is irrelevant for the data we need, and so the regex must not depend on it #### Keep the regular expressions as simple as possible, but no simpler Since many extractors deal with unstructured data provided by websites, we will often need to use very complex regular expressions. You should try to use the *simplest* regex that can accomplish what you want. In other words, each part of the regex must have a reason for existing. If you can take out a symbol and the functionality does not change, the symbol should not be there. ##### Example Correct: ```python _VALID_URL = r'https?://(?:www\.)?website\.com/(?:[^/]+/){3,4}(?P<display_id>[^/]+)_(?P<id>\d+)' ``` Incorrect: ```python _VALID_URL = r'https?:\/\/(?:www\.)?website\.com\/[^\/]+/[^\/]+/[^\/]+(?:\/[^\/]+)?\/(?P<display_id>[^\/]+)_(?P<id>\d+)' ``` #### Do not misuse `.` and use the correct quantifiers (`+*?`) Avoid creating regexes that over-match because of wrong use of quantifiers. Also try to avoid non-greedy matching (`?`) where possible since they could easily result in [catastrophic backtracking](https://www.regular-expressions.info/catastrophic.html) Correct: ```python title = self._search_regex(r'<span\b[^>]+class="title"[^>]*>([^<]+)', webpage, 'title') ``` Incorrect: ```python title = self._search_regex(r'<span\b.*class="title".*>(.+?)<', webpage, 'title') ``` ### Long lines policy There is a soft limit to keep lines of code under 100 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse. Sometimes, it may be reasonable to go upto 120 characters and sometimes even 80 can be unreadable. Keep in mind that this is not a hard limit and is just one of many tools to make the code more readable. For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit: Conversely, don't unnecessarily split small lines further. As a rule of thumb, if removing the line split keeps the code under 80 characters, it should be a single line. ##### Examples Correct: ```python 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' ``` Incorrect: ```python 'https://www.youtube.com/watch?v=FqZTN594JQw&list=' 'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' ``` Correct: ```python uploader = traverse_obj(info, ('uploader', 'name'), ('author', 'fullname')) ``` Incorrect: ```python uploader = traverse_obj( info, ('uploader', 'name'), ('author', 'fullname')) ``` Correct: ```python formats = self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', note='Downloading HD m3u8 information', errnote='Unable to download HD m3u8 information') ``` Incorrect: ```python formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', note='Downloading HD m3u8 information', errnote='Unable to download HD m3u8 information') ``` ### Quotes Always use single quotes for strings (even if the string has `'`) and double quotes for docstrings. Use `'''` only for multi-line strings. An exception can be made if a string has multiple single quotes in it and escaping makes it *significantly* harder to read. For f-strings, use you can use double quotes on the inside. But avoid f-strings that have too many quotes inside. ### Inline values Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult. #### Examples Correct: ```python return { 'title': self._html_search_regex(r'<h1>([^<]+)</h1>', webpage, 'title'), # ...some lines of code... } ``` Incorrect: ```python TITLE_RE = r'<h1>([^<]+)</h1>' # ...some lines of code... title = self._html_search_regex(TITLE_RE, webpage, 'title') # ...some lines of code... return { 'title': title, # ...some lines of code... } ``` ### Collapse fallbacks Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns. #### Example Good: ```python description = self._html_search_meta( ['og:description', 'description', 'twitter:description'], webpage, 'description', default=None) ``` Unwieldy: ```python description = ( self._og_search_description(webpage, default=None) or self._html_search_meta('description', webpage, default=None) or self._html_search_meta('twitter:description', webpage, default=None)) ``` Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`. ### Trailing parentheses Always move trailing parentheses used for grouping/functions after the last argument. On the other hand, multi-line literal list/tuple/dict/set should closed be in a new line. Generators and list/dict comprehensions may use either style #### Examples Correct: ```python url = traverse_obj(info, ( 'context', 'dispatcher', 'stores', 'VideoTitlePageStore', 'data', 'video', 0, 'VideoUrlSet', 'VideoUrl'), list) ``` Correct: ```python url = traverse_obj( info, ('context', 'dispatcher', 'stores', 'VideoTitlePageStore', 'data', 'video', 0, 'VideoUrlSet', 'VideoUrl'), list) ``` Incorrect: ```python url = traverse_obj( info, ('context', 'dispatcher', 'stores', 'VideoTitlePageStore', 'data', 'video', 0, 'VideoUrlSet', 'VideoUrl'), list ) ``` Correct: ```python f = { 'url': url, 'format_id': format_id, } ``` Incorrect: ```python f = {'url': url, 'format_id': format_id} ``` Correct: ```python formats = [process_formats(f) for f in format_data if f.get('type') in ('hls', 'dash', 'direct') and f.get('downloadable')] ``` Correct: ```python formats = [ process_formats(f) for f in format_data if f.get('type') in ('hls', 'dash', 'direct') and f.get('downloadable') ] ``` ### Use convenience conversion and parsing functions Wrap all extracted numeric data into safe functions from [`yt_dlp/utils.py`](yt_dlp/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. Use `url_or_none` for safe URL processing. Use `traverse_obj` and `try_call` (superseeds `dict_get` and `try_get`) for safe metadata extraction from parsed JSON. Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. Explore [`yt_dlp/utils.py`](yt_dlp/utils.py) for more useful convenience functions. #### Examples ```python description = traverse_obj(response, ('result', 'video', 'summary'), expected_type=str) thumbnails = traverse_obj(response, ('result', 'thumbnails', ..., 'url'), expected_type=url_or_none) video = traverse_obj(response, ('result', 'video', 0), default={}, expected_type=dict) duration = float_or_none(video.get('durationMs'), scale=1000) view_count = int_or_none(video.get('views')) ``` # My pull request is labeled pending-fixes The `pending-fixes` label is added when there are changes requested to a PR. When the necessary changes are made, the label should be removed. However, despite our best efforts, it may sometimes happen that the maintainer did not see the changes or forgot to remove the label. If your PR is still marked as `pending-fixes` a few days after all requested changes have been made, feel free to ping the maintainer who labeled your issue and ask them to re-review and remove the label. # EMBEDDING YT-DLP See [README.md#embedding-yt-dlp](README.md#embedding-yt-dlp) for instructions on how to embed yt-dlp in another Python program ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/CONTRIBUTORS����������������������������������������������������������������������0000664�0000000�0000000�00000006007�14277552437�0015233�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������pukkandan (owner) shirt-dev (collaborator) coletdjnz/colethedj (collaborator) Ashish0804 (collaborator) nao20010128nao/Lesmiscore (collaborator) h-h-h-h pauldubois98 nixxo GreyAlien502 kyuyeunk siikamiika jbruchon alexmerkel glenn-slayden Unrud wporr mariuszskon ohnonot samiksome alxnull FelixFrog Zocker1999NET kurumigi bbepis animelover1984/horahoradev Pccode66 RobinD42 hseg DennyDai codeasashu teesid kevinoconnor7 damianoamatruda 2ShedsJackson CXwudi xtkoba llacb47 hheimbuerger B0pol lkho fstirlitz Lamieur tsukumijima Hadi0609 b5eff52 craftingmod tpikonen tripulse king-millez alex-gedeon hhirtz louie-github MinePlayersPE olifre rhsmachine/zenerdi0de nihil-admirari krichbanana ohmybahgosh nyuszika7h blackjack4494 pyx TpmKranz mzbaulhaque zackmark29 mbway zerodytrash wesnm pento rigstot dirkf funniray Jessecar96 jhwgh1968 kikuyan max-te nchilada pgaig PSlava stdedos u-spec-png Sipherdrakon kidonng smege1001 tandy1000 IONECarter capntrips mrfade ParadoxGBB wlritchi NeroBurner mahanstreamer alerikaisattera Derkades BunnyHelp i6t std-move Chocobozzz ouwou korli octotherp CeruleanSky zootedb0t chao813 ChillingPepper ConquerorDopy dalanmiller DigitalDJ f4pp3rk1ng gesa Jules-A makeworld-the-better-one MKSherbini mrx23dot poschi3 raphaeldore renalid sleaux-meaux sulyi tmarki Vangelis66 AjaxGb ajj8 jakubadamw jfogelman timethrow sarnoud Bojidarist 18928172992817182/gustaf nixklai smplayer-dev Zirro CrypticSignal flashdagger fractalf frafra kaz-us ozburo rhendric sdomi selfisekai stanoarn 0xA7404A/Aurora 4a1e2y5 aarubui chio0hai cntrl-s Deer-Spangle DEvmIb Grabien/MaximVol j54vc1bk mpeter50 mrpapersonic pabs3 staubichsauger xenova Yakabuff zulaport ehoogeveen-medweb PilzAdam zmousm iw0nderhow unit193 TwoThousandHedgehogs/KathrynElrod Jertzukka cypheron Hyeeji bwildenhain C0D3D3V kebianizao Lapin0t abdullah-if DavidSkrundz mkubecek raleeper YuenSzeHong Sematre jaller94 r5d julien-hadleyjack git-anony-mouse mdawar trassshhub foghawk k3ns1n teridon mozlima timendum ischmidt20 CreaValix sian1468 arkamar hyano KiberInfinity tejing1 Bricio lazypete365 Aniruddh-J blackgear CplPwnies cyberfox1691 FestplattenSchnitzel hatienl0i261299 iphoting jakeogh lukasfink1 lyz-code marieell mdpauley Mipsters mxmehl ofkz P-reducible pycabbage regarten Ronnnny schn0sch s0u1h MrRawes cffswb danielyli 1-Byte mehq dzek69 aaearon panatexxa kmark un-def goggle Soebb Fam0r bohwaz dodrian vvto33 ca-za connercsbn diegorodriguezv ekangmonyet elyse0 evansp GiedriusS HE7086 JordanWeatherby m4tu4g MarwenDallel nevack putnam rand-net vertan Wikidepia Yipten moench-tegeder christoph-heinrich HobbyistDev LunarFang416 sbor23 aurelg adamanldo gamer191 vkorablin Burve mnn ZhymabekRoman mozbugbox aejdl ping sqrtNOT bubbleguuum darkxex miseran StefanLobbenmeier crazymoose77756 nomevi Brett824 pingiun dosy4ev EhtishamSabir Ferdi265 FirefoxMetzger ftk lamby llamasblade lockmatrix misaelaguayo odo2063 pritam20ps05 scy sheerluck AxiosDeminence DjesonPV eren-kemer freezboltz Galiley haobinliang Mehavoid winterbird-code yashkc2025 aldoridhoni bashonly jacobtruman masta79 palewire �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/Changelog.md����������������������������������������������������������������������0000664�0000000�0000000�00000536120�14277552437�0015570�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Changelog <!-- # Instuctions for creating release * Run `make doc` * Update Changelog.md and CONTRIBUTORS * Change "Based on ytdl" version in Readme.md if needed * Commit as `Release <version>` and push to master * Dispatch the workflow https://github.com/yt-dlp/yt-dlp/actions/workflows/build.yml on master --> ### 2022.08.19 * Fix bug in `--download-archive` * [jsinterp] **Fix for new youtube players** and related improvements by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) * [phantomjs] Add function to execute JS without a DOM by [MinePlayersPE](https://github.com/MinePlayersPE), [pukkandan](https://github.com/pukkandan) * [build] Exclude devscripts from installs by [Lesmiscore](https://github.com/Lesmiscore) * [cleanup] Misc fixes and cleanup * [extractor/youtube] **Add fallback to phantomjs** for nsig * [extractor/youtube] Fix error reporting of "Incomplete data" * [extractor/youtube] Improve format sorting for IOS formats * [extractor/youtube] Improve signature caching * [extractor/instagram] Fix extraction by [bashonly](https://github.com/bashonly), [pritam20ps05](https://github.com/pritam20ps05) * [extractor/rai] Minor fix by [nixxo](https://github.com/nixxo) * [extractor/rtbf] Fix stream extractor by [elyse0](https://github.com/elyse0) * [extractor/SovietsCloset] Fix extractor by [ChillingPepper](https://github.com/ChillingPepper) * [extractor/zattoo] Fix Zattoo resellers by [goggle](https://github.com/goggle) ### 2022.08.14 * Merge youtube-dl: Upto [commit/d231b56](https://github.com/ytdl-org/youtube-dl/commit/d231b56) * [jsinterp] Handle **new youtube signature functions** * [jsinterp] Truncate error messages * [extractor] Fix format sorting of `channels` * [ffmpeg] Disable avconv unless `--prefer-avconv` * [ffmpeg] Smarter detection of ffprobe filename * [embedthumbnail] Detect `libatomicparsley.so` * [ThumbnailsConvertor] Fix conversion after `fixup_webp` * [utils] Fix `get_compatible_ext` * [build] Fix changelog * [update] Set executable bit-mask by [pukkandan](https://github.com/pukkandan), [Lesmiscore](https://github.com/Lesmiscore) * [devscripts] Fix import * [docs] Consistent use of `e.g.` by [Lesmiscore](https://github.com/Lesmiscore) * [cleanup] Misc fixes and cleanup * [extractor/moview] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/parler] Add extractor by [palewire](https://github.com/palewire) * [extractor/patreon] Ignore erroneous media attachments by [coletdjnz](https://github.com/coletdjnz) * [extractor/truth] Add extractor by [palewire](https://github.com/palewire) * [extractor/aenetworks] Add formats parameter by [jacobtruman](https://github.com/jacobtruman) * [extractor/crunchyroll] Improve `_VALID_URL`s * [extractor/doodstream] Add `wf` domain by [aldoridhoni](https://github.com/aldoridhoni) * [extractor/facebook] Add reel support by [bashonly](https://github.com/bashonly) * [extractor/MLB] New extractor by [ischmidt20](https://github.com/ischmidt20) * [extractor/rai] Misc fixes by [nixxo](https://github.com/nixxo) * [extractor/toggo] Improve `_VALID_URL` by [masta79](https://github.com/masta79) * [extractor/tubitv] Extract additional formats by [shirt-dev](https://github.com/shirt-dev) * [extractor/zattoo] Potential fix for resellers ### 2022.08.08 * **Remove Python 3.6 support** * Determine merge container better by [pukkandan](https://github.com/pukkandan), [selfisekai](https://github.com/selfisekai) * Framework for embed detection by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * Merge youtube-dl: Upto [commit/adb5294](https://github.com/ytdl-org/youtube-dl/commit/adb5294) * `--compat-option no-live-chat` should disable danmaku * Fix misleading DRM message * Import ctypes only when necessary * Minor bugfixes * Reject entire playlists faster with `--match-filter` * Remove filtered entries from `-J` * Standardize retry mechanism * Validate `--merge-output-format` * [downloader] Add average speed to final progress line * [extractor] Add field `audio_channels` * [extractor] Support multiple archive ids for one video * [ffmpeg] Set `ffmpeg_location` in a contextvar * [FFmpegThumbnailsConvertor] Fix conversion from GIF * [MetadataParser] Don't set `None` when the field didn't match * [outtmpl] Smarter replacing of unsupported characters * [outtmpl] Treat empty values as None in filenames * [utils] sanitize_open: Allow any IO stream as stdout * [build, devscripts] Add devscript to set a build variant * [build] Improve build process by [shirt-dev](https://github.com/shirt-dev) * [build] Update pyinstaller * [devscripts] Create `utils` and refactor * [docs] Clarify `best*` * [docs] Fix bug report issue template * [docs] Fix capitalization in references by [christoph-heinrich](https://github.com/christoph-heinrich) * [cleanup, mhtml] Use imghdr * [cleanup, utils] Consolidate known media extensions * [cleanup] Misc fixes and cleanup * [extractor/angel] Add extractor by [AxiosDeminence](https://github.com/AxiosDeminence) * [extractor/dplay] Add MotorTrend extractor by [Sipherdrakon](https://github.com/Sipherdrakon) * [extractor/harpodeon] Add extractor by [eren-kemer](https://github.com/eren-kemer) * [extractor/holodex] Add extractor by [pukkandan](https://github.com/pukkandan), [sqrtNOT](https://github.com/sqrtNOT) * [extractor/kompas] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/rai] Add raisudtirol extractor by [nixxo](https://github.com/nixxo) * [extractor/tempo] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/youtube] **Fixes for third party client detection** by [coletdjnz](https://github.com/coletdjnz) * [extractor/youtube] Add `live_status=post_live` by [lazypete365](https://github.com/lazypete365) * [extractor/youtube] Extract more format info * [extractor/youtube] Parse translated subtitles only when requested * [extractor/youtube, extractor/twitch] Allow waiting for channels to become live * [extractor/youtube, webvtt] Extract auto-subs from livestream VODs by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan) * [extractor/AbemaTVTitle] Implement paging by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/archiveorg] Improve handling of formats by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [extractor/arte] Fix title extraction * [extractor/arte] **Move to v2 API** by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan) * [extractor/bbc] Fix news articles by [ajj8](https://github.com/ajj8) * [extractor/camtasia] Separate into own extractor by [coletdjnz](https://github.com/coletdjnz) * [extractor/cloudflarestream] Fix video_id padding by [haobinliang](https://github.com/haobinliang) * [extractor/crunchyroll] Fix conversion of thumbnail from GIF * [extractor/crunchyroll] Handle missing metadata correctly by [Burve](https://github.com/Burve), [pukkandan](https://github.com/pukkandan) * [extractor/crunchyroll:beta] Extract timestamp and fix tests by [tejing1](https://github.com/tejing1) * [extractor/crunchyroll:beta] Use streams API by [tejing1](https://github.com/tejing1) * [extractor/doodstream] Support more domains by [Galiley](https://github.com/Galiley) * [extractor/ESPN] Extract duration by [ischmidt20](https://github.com/ischmidt20) * [extractor/FIFA] Change API endpoint by [Bricio](https://github.com/Bricio), [yashkc2025](https://github.com/yashkc2025) * [extractor/globo:article] Remove false positives by [Bricio](https://github.com/Bricio) * [extractor/Go] Extract timestamp by [ischmidt20](https://github.com/ischmidt20) * [extractor/hidive] Fix cookie login when netrc is also given by [winterbird-code](https://github.com/winterbird-code) * [extractor/html5] Separate into own extractor by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [extractor/ina] Improve extractor by [elyse0](https://github.com/elyse0) * [extractor/NaverNow] Change endpoint by [ping](https://github.com/ping) * [extractor/ninegag] Extract uploader by [DjesonPV](https://github.com/DjesonPV) * [extractor/NovaPlay] Fix extractor by [Bojidarist](https://github.com/Bojidarist) * [extractor/orf:radio] Rewrite extractors * [extractor/patreon] Fix and improve extractors by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [extractor/rai] Fix RaiNews extraction by [nixxo](https://github.com/nixxo) * [extractor/redbee] Unify and update extractors by [elyse0](https://github.com/elyse0) * [extractor/stripchat] Fix _VALID_URL by [freezboltz](https://github.com/freezboltz) * [extractor/tubi] Exclude playlists from playlist entries by [sqrtNOT](https://github.com/sqrtNOT) * [extractor/tviplayer] Improve `_VALID_URL` by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/twitch] Extract chapters for single chapter VODs by [mpeter50](https://github.com/mpeter50) * [extractor/vgtv] Support tv.vg.no by [sqrtNOT](https://github.com/sqrtNOT) * [extractor/vidio] Support embed link by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/vk] Fix extractor by [Mehavoid](https://github.com/Mehavoid) * [extractor/WASDTV:record] Fix `_VALID_URL` * [extractor/xfileshare] Add Referer by [Galiley](https://github.com/Galiley) * [extractor/YahooJapanNews] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/yandexmusic] Extract higher quality format * [extractor/zee5] Update Device ID by [m4tu4g](https://github.com/m4tu4g) ### 2022.07.18 * Allow users to specify encoding in each config files by [Lesmiscore](https://github.com/Lesmiscore) * Discard infodict from memory if no longer needed * Do not allow extractors to return `None` * Do not load system certificates when `certifi` is used * Fix rounding of integers in format table * Improve chapter sanitization * Skip some fixup if remux/recode is needed by [Lesmiscore](https://github.com/Lesmiscore) * Support `--no-progress` for `--wait-for-video` * Fix bug in [612f2be](https://github.com/yt-dlp/yt-dlp/commit/612f2be5d3924540158dfbe5f25d841f04cff8c6) * [outtmpl] Add alternate form `h` for HTML escaping * [aes] Add multiple padding modes in CBC by [elyse0](https://github.com/elyse0) * [extractor/common] Passthrough `errnote=False` to parsers * [extractor/generic] Remove HEAD request * [http] Ensure the file handle is always closed * [ModifyChapters] Modify duration in infodict * [options] Fix aliases to `--config-location` * [utils] Fix `get_domain` * [build] Consistent order for lazy extractors by [lamby](https://github.com/lamby) * [build] Fix architecture suffix of executables by [odo2063](https://github.com/odo2063) * [build] Improve `setup.py` * [update] Do not check `_update_spec` when up to date * [update] Prepare to remove Python 3.6 support * [compat] Let PyInstaller detect _legacy module * [devscripts/update-formulae] Do not change dependency section * [test] Split download tests so they can be more easily run in CI * [docs] Improve docstring of `download_ranges` by [FirefoxMetzger](https://github.com/FirefoxMetzger) * [docs] Improve issue templates * [build] Fix bug in [6d916fe](https://github.com/yt-dlp/yt-dlp/commit/6d916fe709a38e8c4c69b73843acf170b5165931) * [cleanup, utils] Refactor parse_codecs * [cleanup] Misc fixes and cleanup * [extractor/acfun] Add extractors by [lockmatrix](https://github.com/lockmatrix) * [extractor/Audiodraft] Add extractors by [Ashish0804](https://github.com/Ashish0804), [fstirlitz](https://github.com/fstirlitz) * [extractor/cellebrite] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/detik] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/hytale] Add extractor by [llamasblade](https://github.com/llamasblade), [pukkandan](https://github.com/pukkandan) * [extractor/liputan6] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/mocha] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/rtl.lu] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/rtvsl] Add extractor by [iw0nderhow](https://github.com/iw0nderhow), [pukkandan](https://github.com/pukkandan) * [extractor/StarTrek] Add extractor by [scy](https://github.com/scy) * [extractor/syvdk] Add extractor by [misaelaguayo](https://github.com/misaelaguayo) * [extractor/theholetv] Add extractor by [dosy4ev](https://github.com/dosy4ev) * [extractor/TubeTuGraz] Add extractor by [Ferdi265](https://github.com/Ferdi265), [pukkandan](https://github.com/pukkandan) * [extractor/tviplayer] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/wetv] Add extractors by [elyse0](https://github.com/elyse0) * [extractor/wikimedia] Add extractor by [EhtishamSabir](https://github.com/EhtishamSabir), [pukkandan](https://github.com/pukkandan) * [extractor/youtube] Fix duration check for post-live manifestless mode * [extractor/youtube] More metadata for storyboards by [ftk](https://github.com/ftk) * [extractor/bigo] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/BiliIntl] Fix subtitle extraction by [MinePlayersPE](https://github.com/MinePlayersPE) * [extractor/crunchyroll] Improve `_VALID_URL` * [extractor/fifa] Fix extractor by [ischmidt20](https://github.com/ischmidt20) * [extractor/instagram] Fix post/story extractors by [pritam20ps05](https://github.com/pritam20ps05), [pukkandan](https://github.com/pukkandan) * [extractor/iq] Set language correctly for Korean subtitles * [extractor/MangoTV] Fix subtitle languages * [extractor/Netverse] Improve playlist extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/philharmoniedeparis] Fix extractor by [sqrtNOT](https://github.com/sqrtNOT) * [extractor/Trovo] Fix extractor by [u-spec-png](https://github.com/u-spec-png) * [extractor/twitch] Support storyboards for VODs by [ftk](https://github.com/ftk) * [extractor/WatchESPN] Improve `_VALID_URL` by [IONECarter](https://github.com/IONECarter), [dirkf](https://github.com/dirkf) * [extractor/WSJArticle] Fix video id extraction by [sqrtNOT](https://github.com/sqrtNOT) * [extractor/Ximalaya] Fix extractors by [lockmatrix](https://github.com/lockmatrix) * [cleanup, extractor/youtube] Fix tests by [sheerluck](https://github.com/sheerluck) ### 2022.06.29 * Fix `--downloader native` * Fix `section_end` of clips * Fix playlist error handling * Sanitize `chapters` * [extractor] Fix `_create_request` when headers is None * [extractor] Fix empty `BaseURL` in MPD * [ffmpeg] Write full output to debug on error * [hls] Warn user when trying to download live HLS * [options] Fix `parse_known_args` for `--` * [utils] Fix inconsistent default handling between HTTP and HTTPS requests by [coletdjnz](https://github.com/coletdjnz) * [build] Draft release until complete * [build] Fix release tag commit * [build] Standalone x64 builds for MacOS 10.9 by [StefanLobbenmeier](https://github.com/StefanLobbenmeier) * [update] Ability to set a maximum version for specific variants * [compat] Fix `compat.WINDOWS_VT_MODE` * [compat] Remove deprecated functions from core code * [compat] Remove more functions * [cleanup, extractor] Reduce direct use of `_downloader` * [cleanup] Consistent style for file heads * [cleanup] Fix some typos by [crazymoose77756](https://github.com/crazymoose77756) * [cleanup] Misc fixes and cleanup * [extractor/Scrolller] Add extractor by [LunarFang416](https://github.com/LunarFang416) * [extractor/ViMP] Add playlist extractor by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel) * [extractor/fuyin] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/livestreamfails] Add extractor by [nomevi](https://github.com/nomevi) * [extractor/premiershiprugby] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/steam] Add broadcast extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/youtube] Mark videos as fully watched by [Brett824](https://github.com/Brett824) * [extractor/CWTV] Extract thumbnail by [ischmidt20](https://github.com/ischmidt20) * [extractor/ViMP] Add thumbnail and support more sites by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel) * [extractor/dropout] Support cookies and login only as needed by [pingiun](https://github.com/pingiun), [pukkandan](https://github.com/pukkandan) * [extractor/ertflix] Improve `_VALID_URL` * [extractor/lbry] Use HEAD request for redirect URL by [flashdagger](https://github.com/flashdagger) * [extractor/mediaset] Improve `_VALID_URL` * [extractor/npr] Implement [e50c350](https://github.com/yt-dlp/yt-dlp/commit/e50c3500b43d80e4492569c4b4523c4379c6fbb2) differently * [extractor/tennistv] Rewrite extractor by [pukkandan](https://github.com/pukkandan), [zenerdi0de](https://github.com/zenerdi0de) ### 2022.06.22.1 * [build] Fix updating homebrew formula ### 2022.06.22 * [**Deprecate support for Python 3.6**](https://github.com/yt-dlp/yt-dlp/issues/3764#issuecomment-1154051119) * **Add option `--download-sections` to download video partially** * Chapter regex and time ranges are accepted, e.g. `--download-sections *1:10-2:20` * Add option `--alias` * Add option `--lazy-playlist` to process entries as they are received * Add option `--retry-sleep` * Add slicing notation to `--playlist-items` * Adds support for negative indices and step * Add `-I` as alias for `--playlist-index` * Makes `--playlist-start`, `--playlist-end`, `--playlist-reverse`, `--no-playlist-reverse` redundant * `--config-location -` to provide options interactively * [build] Add Linux standalone builds * [update] Self-restart after update * Merge youtube-dl: Upto [commit/8a158a9](https://github.com/ytdl-org/youtube-dl/commit/8a158a9) * Add `--no-update` * Allow extractors to specify section_start/end for clips * Do not print progress to `stderr` with `-q` * Ensure pre-processor errors do not block video download * Fix `--simulate --max-downloads` * Improve error handling of bad config files * Return an error code if update fails * Fix bug in [3a408f9](https://github.com/yt-dlp/yt-dlp/commit/3a408f9d199127ca2626359e21a866a09ab236b3) * [ExtractAudio] Allow conditional conversion * [ModifyChapters] Fix repeated removal of small segments * [ThumbnailsConvertor] Allow conditional conversion * [cookies] Detect profiles for cygwin/BSD by [moench-tegeder](https://github.com/moench-tegeder) * [dash] Show fragment count with `--live-from-start` by [flashdagger](https://github.com/flashdagger) * [extractor] Add `_search_json` by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [extractor] Add `default` parameter to `_search_json` by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [extractor] Add dev option `--load-pages` * [extractor] Handle `json_ld` with multiple `@type`s * [extractor] Import `_ALL_CLASSES` lazily * [extractor] Recognize `src` attribute from HTML5 media elements by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/generic] Revert e6ae51c123897927eb3c9899923d8ffd31c7f85d * [f4m] Bugfix * [ffmpeg] Check version lazily * [jsinterp] Some optimizations and refactoring by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) * [utils] Improve performance using `functools.cache` * [utils] Send HTTP/1.1 ALPN extension by [coletdjnz](https://github.com/coletdjnz) * [utils] `ExtractorError`: Fix `exc_info` * [utils] `ISO3166Utils`: Add `EU` and `AP` * [utils] `Popen`: Refactor to use contextmanager * [utils] `locked_file`: Fix for PyPy on Windows * [update] Expose more functionality to API * [update] Use `.git` folder to distinguish `source`/`unknown` * [compat] Add `functools.cached_property` * [test] Fix `FakeYDL` signatures by [coletdjnz](https://github.com/coletdjnz) * [docs] Improvements * [cleanup, ExtractAudio] Refactor * [cleanup, downloader] Refactor `report_progress` * [cleanup, extractor] Refactor `_download_...` methods * [cleanup, extractor] Rename `extractors.py` to `_extractors.py` * [cleanup, utils] Don't use kwargs for `format_field` * [cleanup, build] Refactor * [cleanup, docs] Re-indent "Usage and Options" section * [cleanup] Deprecate `YoutubeDL.parse_outtmpl` * [cleanup] Misc fixes and cleanup by [Lesmiscore](https://github.com/Lesmiscore), [MrRawes](https://github.com/MrRawes), [christoph-heinrich](https://github.com/christoph-heinrich), [flashdagger](https://github.com/flashdagger), [gamer191](https://github.com/gamer191), [kwconder](https://github.com/kwconder), [pukkandan](https://github.com/pukkandan) * [extractor/DailyWire] Add extractors by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan) * [extractor/fourzerostudio] Add extractors by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/GoogleDrive] Add folder extractor by [evansp](https://github.com/evansp), [pukkandan](https://github.com/pukkandan) * [extractor/MirrorCoUK] Add extractor by [LunarFang416](https://github.com/LunarFang416), [pukkandan](https://github.com/pukkandan) * [extractor/atscaleconfevent] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [extractor/freetv] Add extractor by [elyse0](https://github.com/elyse0) * [extractor/ixigua] Add Extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/kicker.de] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/netverse] Add extractors by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan) * [extractor/playsuisse] Add extractor by [pukkandan](https://github.com/pukkandan), [sbor23](https://github.com/sbor23) * [extractor/substack] Add extractor by [elyse0](https://github.com/elyse0) * [extractor/youtube] **Support downloading clips** * [extractor/youtube] Add `innertube_host` and `innertube_key` extractor args by [coletdjnz](https://github.com/coletdjnz) * [extractor/youtube] Add warning for PostLiveDvr * [extractor/youtube] Bring back `_extract_chapters_from_description` * [extractor/youtube] Extract `comment_count` from webpage * [extractor/youtube] Fix `:ytnotifications` extractor by [coletdjnz](https://github.com/coletdjnz) * [extractor/youtube] Fix initial player response extraction by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [extractor/youtube] Fix live chat for videos with content warning by [coletdjnz](https://github.com/coletdjnz) * [extractor/youtube] Make signature extraction non-fatal * [extractor/youtube:tab] Detect `videoRenderer` in `_post_thread_continuation_entries` * [extractor/BiliIntl] Fix metadata extraction * [extractor/BiliIntl] Fix subtitle extraction by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/FranceCulture] Fix extractor by [aurelg](https://github.com/aurelg), [pukkandan](https://github.com/pukkandan) * [extractor/PokemonSoundLibrary] Remove extractor by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/StreamCZ] Fix extractor by [adamanldo](https://github.com/adamanldo), [dirkf](https://github.com/dirkf) * [extractor/WatchESPN] Support free videos and BAM_DTC by [ischmidt20](https://github.com/ischmidt20) * [extractor/animelab] Remove extractor by [gamer191](https://github.com/gamer191) * [extractor/bloomberg] Change playback endpoint by [m4tu4g](https://github.com/m4tu4g) * [extractor/ccc] Extract view_count by [vkorablin](https://github.com/vkorablin) * [extractor/crunchyroll:beta] Fix extractor after API change by [Burve](https://github.com/Burve), [tejing1](https://github.com/tejing1) * [extractor/curiositystream] Get `auth_token` from cookie by [mnn](https://github.com/mnn) * [extractor/digitalconcerthall] Fix extractor by [ZhymabekRoman](https://github.com/ZhymabekRoman) * [extractor/dropbox] Extract the correct `mountComponent` * [extractor/dropout] Login is not mandatory * [extractor/duboku] Fix for hostname change by [mozbugbox](https://github.com/mozbugbox) * [extractor/espn] Add `WatchESPN` extractor by [ischmidt20](https://github.com/ischmidt20), [pukkandan](https://github.com/pukkandan) * [extractor/expressen] Fix extractor by [aejdl](https://github.com/aejdl) * [extractor/foxnews] Update embed extraction by [elyse0](https://github.com/elyse0) * [extractor/ina] Fix extractor by [elyse0](https://github.com/elyse0) * [extractor/iwara:user] Make paging better by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/jwplatform] Look for `data-video-jw-id` * [extractor/lbry] Update livestream API by [flashdagger](https://github.com/flashdagger) * [extractor/mediaset] Improve `_VALID_URL` * [extractor/naver] Add `navernow` extractor by [ping](https://github.com/ping) * [extractor/niconico:series] Fix extractor by [sqrtNOT](https://github.com/sqrtNOT) * [extractor/npr] Use stream url from json-ld by [r5d](https://github.com/r5d) * [extractor/pornhub] Extract `uploader_id` field by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/radiofrance] Add more radios by [bubbleguuum](https://github.com/bubbleguuum) * [extractor/rumble] Detect JS embed * [extractor/rumble] Extract subtitles by [fstirlitz](https://github.com/fstirlitz) * [extractor/southpark] Add `southpark.lat` extractor by [darkxex](https://github.com/darkxex) * [extractor/spotify:show] Fix extractor * [extractor/tiktok] Detect embeds * [extractor/tiktok] Extract `SIGI_STATE` by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan), [sulyi](https://github.com/sulyi) * [extractor/tver] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/vevo] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/yahoo:gyao] Fix extractor * [extractor/zattoo] Fix live streams by [miseran](https://github.com/miseran) * [extractor/zdf] Improve format sorting by [elyse0](https://github.com/elyse0) ### 2022.05.18 * Add support for SSL client certificate authentication by [coletdjnz](https://github.com/coletdjnz), [dirkf](https://github.com/dirkf) * Adds `--client-certificate`, `--client-certificate-key`, `--client-certificate-password` * Add `--match-filter -` to interactively ask for each video * `--max-downloads` should obey `--break-per-input` * Allow use of weaker ciphers with `--legacy-server-connect` * Don't imply `-s` for later stages of `-O` * Fix `--date today` * Fix `--skip-unavailable-fragments` * Fix color in `-q -F` * Fix redirect HTTP method handling by [coletdjnz](https://github.com/coletdjnz) * Improve `--clean-infojson` * Remove warning for videos with an empty title * Run `FFmpegFixupM3u8PP` for live-streams if needed * Show name of downloader in verbose log * [cookies] Allow `cookiefile` to be a text stream * [cookies] Report progress when importing cookies * [downloader/ffmpeg] Specify headers for each URL by [elyse0](https://github.com/elyse0) * [fragment] Do not change chunk-size when `--test` * [fragment] Make single thread download work for `--live-from-start` by [Lesmiscore](https://github.com/Lesmiscore) * [hls] Fix `byte_range` for `EXT-X-MAP` fragment by [fstirlitz](https://github.com/fstirlitz) * [http] Fix retrying on read timeout by [coletdjnz](https://github.com/coletdjnz) * [ffmpeg] Fix features detection * [EmbedSubtitle] Enable for more video extensions * [EmbedThumbnail] Disable thumbnail conversion for mkv by [evansp](https://github.com/evansp) * [EmbedThumbnail] Do not obey `-k` * [EmbedThumbnail] Do not remove id3v1 tags * [FFmpegMetadata] Remove `\0` from metadata * [FFmpegMetadata] Remove filename from attached info-json * [FixupM3u8] Obey `--hls-prefer-mpegts` * [Sponsorblock] Don't crash when duration is unknown * [XAttrMetadata] Refactor and document dependencies * [extractor] Document netrc machines * [extractor] Update `manifest_url`s after redirect by [elyse0](https://github.com/elyse0) * [extractor] Update dash `manifest_url` after redirects by [elyse0](https://github.com/elyse0) * [extractor] Use `classmethod`/`property` where possible * [generic] Refactor `_extract_rss` * [utils] `is_html`: Handle double BOM * [utils] `locked_file`: Ignore illegal seek on `truncate` by [jakeogh](https://github.com/jakeogh) * [utils] `sanitize_path`: Fix when path is empty string * [utils] `write_string`: Workaround newline issue in `conhost` * [utils] `certifi`: Make sure the pem file exists * [utils] Fix `WebSocketsWrapper` * [utils] `locked_file`: Do not give executable bits for newly created files by [Lesmiscore](https://github.com/Lesmiscore) * [utils] `YoutubeDLCookieJar`: Detect and reject JSON file by [Lesmiscore](https://github.com/Lesmiscore) * [test] Convert warnings into errors and fix some existing warnings by [fstirlitz](https://github.com/fstirlitz) * [dependencies] Create module with all dependency imports * [compat] Split into sub-modules by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan) * [compat] Implement `compat.imghdr` * [build] Add `make uninstall` by [MrRawes](https://github.com/MrRawes) * [build] Avoid use of `install -D` * [build] Fix `Makefile` by [putnam](https://github.com/putnam) * [build] Fix `--onedir` on macOS * [build] Add more test-runners * [cleanup] Deprecate some compat vars by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan) * [cleanup] Remove unused code paths, extractors, scripts and tests by [fstirlitz](https://github.com/fstirlitz) * [cleanup] Upgrade syntax (`pyupgrade`) and sort imports (`isort`) * [cleanup, docs, build] Misc fixes * [BilibiliLive] Add extractor by [HE7086](https://github.com/HE7086), [pukkandan](https://github.com/pukkandan) * [Fifa] Add Extractor by [Bricio](https://github.com/Bricio) * [goodgame] Add extractor by [nevack](https://github.com/nevack) * [gronkh] Add playlist extractors by [hatienl0i261299](https://github.com/hatienl0i261299) * [icareus] Add extractor by [tpikonen](https://github.com/tpikonen), [pukkandan](https://github.com/pukkandan) * [iwara] Add playlist extractors by [i6t](https://github.com/i6t) * [Likee] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) * [masters] Add extractor by [m4tu4g](https://github.com/m4tu4g) * [nebula] Add support for subscriptions by [hheimbuerger](https://github.com/hheimbuerger) * [Podchaser] Add extractors by [connercsbn](https://github.com/connercsbn) * [rokfin:search] Add extractor by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan) * [youtube] Add `:ytnotifications` extractor by [krichbanana](https://github.com/krichbanana) * [youtube] Add YoutubeStoriesIE (`ytstories:<channel UCID>`) by [coletdjnz](https://github.com/coletdjnz) * [ZingMp3] Add chart and user extractors by [hatienl0i261299](https://github.com/hatienl0i261299) * [adn] Update AES key by [elyse0](https://github.com/elyse0) * [adobepass] Allow cookies for authenticating MSO * [bandcamp] Exclude merch links by [Yipten](https://github.com/Yipten) * [chingari] Fix archiving and tests * [DRTV] Improve `_VALID_URL` by [vertan](https://github.com/vertan) * [facebook] Improve thumbnail extraction by [Wikidepia](https://github.com/Wikidepia) * [fc2] Stop heatbeating once FFmpeg finishes by [Lesmiscore](https://github.com/Lesmiscore) * [Gofile] Fix extraction and support password-protected links by [mehq](https://github.com/mehq) * [hotstar, cleanup] Refactor extractors * [InfoQ] Don't fail on missing audio format by [evansp](https://github.com/evansp) * [Jamendo] Extract more metadata by [evansp](https://github.com/evansp) * [kaltura] Update API calls by [flashdagger](https://github.com/flashdagger) * [KhanAcademy] Fix extractor by [rand-net](https://github.com/rand-net) * [LCI] Fix extractor by [MarwenDallel](https://github.com/MarwenDallel) * [lrt] Support livestreams by [GiedriusS](https://github.com/GiedriusS) * [niconico] Set `expected_protocol` to a public field * [Niconico] Support 2FA by [ekangmonyet](https://github.com/ekangmonyet) * [Olympics] Fix format extension * [openrec:movie] Enable fallback for /movie/ URLs * [PearVideo] Add fallback for formats by [hatienl0i261299](https://github.com/hatienl0i261299) * [radiko] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) * [rai] Add `release_year` * [reddit] Prevent infinite loop * [rokfin] Implement login by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan) * [ruutu] Support hs.fi embeds by [tpikonen](https://github.com/tpikonen), [pukkandan](https://github.com/pukkandan) * [spotify] Detect iframe embeds by [fstirlitz](https://github.com/fstirlitz) * [telegram] Fix metadata extraction * [tmz, cleanup] Update tests by [diegorodriguezv](https://github.com/diegorodriguezv) * [toggo] Fix `_VALID_URL` by [ca-za](https://github.com/ca-za) * [trovo] Update to new API by [nyuszika7h](https://github.com/nyuszika7h) * [TVer] Improve extraction by [Lesmiscore](https://github.com/Lesmiscore) * [twitcasting] Pass headers for each formats by [Lesmiscore](https://github.com/Lesmiscore) * [VideocampusSachsen] Improve extractor by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel) * [vimeo] Fix extractors * [wat] Fix extraction of multi-language videos and subtitles by [elyse0](https://github.com/elyse0) * [wistia] Fix `_VALID_URL` by [dirkf](https://github.com/dirkf) * [youtube, cleanup] Minor refactoring by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [youtube] Added piped instance urls by [JordanWeatherby](https://github.com/JordanWeatherby) * [youtube] Deprioritize auto-generated thumbnails * [youtube] Deprioritize format 22 (often damaged) * [youtube] Fix episode metadata extraction * [zee5] Fix extractor by [Ashish0804](https://github.com/Ashish0804) * [zingmp3, cleanup] Refactor extractors ### 2022.04.08 * Use certificates from `certifi` if installed by [coletdjnz](https://github.com/coletdjnz) * Treat multiple `--match-filters` as OR * File locking improvements: * Do not lock downloading file on Windows * Do not prevent download if locking is unsupported * Do not truncate files before locking by [jakeogh](https://github.com/jakeogh), [pukkandan](https://github.com/pukkandan) * Fix non-blocking non-exclusive lock * De-prioritize automatic-subtitles when no `--sub-lang` is given * Exit after `--dump-user-agent` * Fallback to video-only format when selecting by extension * Fix `--abort-on-error` for subtitles * Fix `--no-overwrite` for playlist infojson * Fix `--print` with `--ignore-no-formats` when url is `None` by [flashdagger](https://github.com/flashdagger) * Fix `--sleep-interval` * Fix `--throttled-rate` * Fix `autonumber` * Fix case of `http_headers` * Fix filepath sanitization in `--print-to-file` * Handle float in `--wait-for-video` * Ignore `mhtml` formats from `-f mergeall` * Ignore format-specific fields in initial pass of `--match-filter` * Protect stdout from unexpected progress and console-title * Remove `Accept-Encoding` header from `std_headers` by [coletdjnz](https://github.com/coletdjnz) * Remove incorrect warning for `--dateafter` * Show warning when all media formats have DRM * [downloader] Fix invocation of `HttpieFD` * [http] Fix #3215 * [http] Reject broken range before request by [Lesmiscore](https://github.com/Lesmiscore), [Jules-A](https://github.com/Jules-A), [pukkandan](https://github.com/pukkandan) * [fragment] Read downloaded fragments only when needed by [Lesmiscore](https://github.com/Lesmiscore) * [http] Retry on more errors by [coletdjnz](https://github.com/coletdjnz) * [mhtml] Fix fragments with absolute urls by [coletdjnz](https://github.com/coletdjnz) * [extractor] Add `_perform_login` function * [extractor] Allow control characters inside json * [extractor] Support merging subtitles with data by [coletdjnz](https://github.com/coletdjnz) * [generic] Extract subtitles from video.js by [Lesmiscore](https://github.com/Lesmiscore) * [ffmpeg] Cache version data * [FFmpegConcat] Ensure final directory exists * [FfmpegMetadata] Write id3v1 tags * [FFmpegVideoConvertor] Add more formats to `--remux-video` * [FFmpegVideoConvertor] Ensure all streams are copied * [MetadataParser] Validate outtmpl early * [outtmpl] Fix replacement/default when used with alternate * [outtmpl] Limit changes during sanitization * [phantomjs] Fix bug * [test] Add `test_locked_file` * [utils] `format_decimal_suffix`: Fix for very large numbers by [s0u1h](https://github.com/s0u1h) * [utils] `traverse_obj`: Allow filtering by value * [utils] Add `filter_dict`, `get_first`, `try_call` * [utils] ExtractorError: Fix for older python versions * [utils] WebSocketsWrapper: Allow omitting `__enter__` invocation by [Lesmiscore](https://github.com/Lesmiscore) * [docs] Add an `.editorconfig` file by [fstirlitz](https://github.com/fstirlitz) * [docs] Clarify the exact `BSD` license of dependencies by [MrRawes](https://github.com/MrRawes) * [docs] Minor improvements by [pukkandan](https://github.com/pukkandan), [cffswb](https://github.com/cffswb), [danielyli](https://github.com/danielyli) * [docs] Remove readthedocs * [build] Add `requirements.txt` to pip distributions * [cleanup, postprocessor] Create `_download_json` * [cleanup, vimeo] Fix tests * [cleanup] Misc fixes and minor cleanup * [cleanup] Use `_html_extract_title` * [AfreecaTV] Add `AfreecaTVUserIE` by [hatienl0i261299](https://github.com/hatienl0i261299) * [arte] Add `format_note` to m3u8 formats * [azmedien] Add TVO Online to supported hosts by [1-Byte](https://github.com/1-Byte) * [BanBye] Add extractor by [mehq](https://github.com/mehq) * [bilibili] Fix extraction of title with quotes by [dzek69](https://github.com/dzek69) * [Craftsy] Add extractor by [Bricio](https://github.com/Bricio) * [Cybrary] Add extractor by [aaearon](https://github.com/aaearon) * [Huya] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) * [ITProTV] Add extractor by [aaearon](https://github.com/aaearon) * [Jable] Add extractors by [mehq](https://github.com/mehq) * [LastFM] Add extractors by [mehq](https://github.com/mehq) * [Moviepilot] Add extractor by [panatexxa](https://github.com/panatexxa) * [panopto] Add extractors by [coletdjnz](https://github.com/coletdjnz), [kmark](https://github.com/kmark) * [PokemonSoundLibrary] Add extractor by [Lesmiscore](https://github.com/Lesmiscore) * [WasdTV] Add extractor by [un-def](https://github.com/un-def), [hatienl0i261299](https://github.com/hatienl0i261299) * [adobepass] Fix Suddenlink MSO by [CplPwnies](https://github.com/CplPwnies) * [afreecatv] Match new vod url by [wlritchi](https://github.com/wlritchi) * [AZMedien] Support `tv.telezueri.ch` by [goggle](https://github.com/goggle) * [BiliIntl] Support user-generated videos by [wlritchi](https://github.com/wlritchi) * [BRMediathek] Fix VALID_URL * [crunchyroll:playlist] Implement beta API by [tejing1](https://github.com/tejing1) * [crunchyroll] Fix inheritance * [daftsex] Fix extractor by [Soebb](https://github.com/Soebb) * [dailymotion] Support `geo.dailymotion.com` by [hatienl0i261299](https://github.com/hatienl0i261299) * [ellentube] Extract subtitles from manifest * [elonet] Rewrite extractor by [Fam0r](https://github.com/Fam0r), [pukkandan](https://github.com/pukkandan) * [fptplay] Fix metadata extraction by [hatienl0i261299](https://github.com/hatienl0i261299) * [FranceCulture] Support playlists by [bohwaz](https://github.com/bohwaz) * [go, viu] Extract subtitles from the m3u8 manifest by [fstirlitz](https://github.com/fstirlitz) * [Imdb] Improve extractor by [hatienl0i261299](https://github.com/hatienl0i261299) * [MangoTV] Improve extractor by [hatienl0i261299](https://github.com/hatienl0i261299) * [Nebula] Fix bug in 52efa4b31200119adaa8acf33e50b84fcb6948f0 * [niconico] Fix extraction of thumbnails and uploader (#3266) * [niconico] Rewrite NiconicoIE by [Lesmiscore](https://github.com/Lesmiscore) * [nitter] Minor fixes and update instance list by [foghawk](https://github.com/foghawk) * [NRK] Extract timestamp by [hatienl0i261299](https://github.com/hatienl0i261299) * [openrec] Download archived livestreams by [Lesmiscore](https://github.com/Lesmiscore) * [openrec] Refactor extractors by [Lesmiscore](https://github.com/Lesmiscore) * [panopto] Improve subtitle extraction and support slides by [coletdjnz](https://github.com/coletdjnz) * [ParamountPlus, CBS] Change VALID_URL by [Sipherdrakon](https://github.com/Sipherdrakon) * [ParamountPlusSeries] Support multiple pages by [dodrian](https://github.com/dodrian) * [Piapro] Extract description with break lines by [Lesmiscore](https://github.com/Lesmiscore) * [rai] Fix extraction of http formas by [nixxo](https://github.com/nixxo) * [rumble] unescape title * [RUTV] Fix format sorting by [Lesmiscore](https://github.com/Lesmiscore) * [ruutu] Detect embeds by [tpikonen](https://github.com/tpikonen) * [tenplay] Improve extractor by [aarubui](https://github.com/aarubui) * [TikTok] Fix URLs with user id by [hatienl0i261299](https://github.com/hatienl0i261299) * [TikTokVM] Fix redirect to user URL * [TVer] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) * [TVer] Support landing page by [vvto33](https://github.com/vvto33) * [twitcasting] Don't return multi_video for archive with single hls manifest by [Lesmiscore](https://github.com/Lesmiscore) * [veo] Fix `_VALID_URL` * [Veo] Fix extractor by [i6t](https://github.com/i6t) * [viki] Don't attempt to modify URLs with signature by [nyuszika7h](https://github.com/nyuszika7h) * [viu] Fix bypass for preview by [zackmark29](https://github.com/zackmark29) * [viu] Fixed extractor by [zackmark29](https://github.com/zackmark29), [pukkandan](https://github.com/pukkandan) * [web.archive:youtube] Make CDX API requests non-fatal by [coletdjnz](https://github.com/coletdjnz) * [wget] Fix proxy by [kikuyan](https://github.com/kikuyan), [coletdjnz](https://github.com/coletdjnz) * [xnxx] Add `xnxx3.com` by [rozari0](https://github.com/rozari0) * [youtube] **Add new age-gate bypass** by [zerodytrash](https://github.com/zerodytrash), [pukkandan](https://github.com/pukkandan) * [youtube] Add extractor-arg to skip auto-translated subs * [youtube] Avoid false positives when detecting damaged formats * [youtube] Detect DRM better by [shirt](https://github.com/shirt-dev) * [youtube] Fix auto-translated automatic captions * [youtube] Fix pagination of `membership` tab * [youtube] Fix uploader for collaborative playlists by [coletdjnz](https://github.com/coletdjnz) * [youtube] Improve video upload date handling by [coletdjnz](https://github.com/coletdjnz) * [youtube:api] Prefer minified JSON response by [coletdjnz](https://github.com/coletdjnz) * [youtube:search] Support hashtag entries by [coletdjnz](https://github.com/coletdjnz) * [youtube:tab] Fix duration extraction for shorts by [coletdjnz](https://github.com/coletdjnz) * [youtube:tab] Minor improvements * [youtube:tab] Return shorts url if video is a short by [coletdjnz](https://github.com/coletdjnz) * [Zattoo] Fix extractors by [goggle](https://github.com/goggle) * [Zingmp3] Fix signature by [hatienl0i261299](https://github.com/hatienl0i261299) ### 2022.03.08.1 * [cleanup] Refactor `__init__.py` * [build] Fix bug ### 2022.03.08 * Merge youtube-dl: Upto [commit/6508688](https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a) (except NDR) * Add regex operator and quoting to format filters by [lukasfink1](https://github.com/lukasfink1) * Add brotli content-encoding support by [coletdjnz](https://github.com/coletdjnz) * Add pre-processor stage `after_filter` * Better error message when no `--live-from-start` format * Create necessary directories for `--print-to-file` * Fill more fields for playlists by [Lesmiscore](https://github.com/Lesmiscore) * Fix `-all` for `--sub-langs` * Fix doubling of `video_id` in `ExtractorError` * Fix for when stdout/stderr encoding is `None` * Handle negative duration from extractor * Implement `--add-header` without modifying `std_headers` * Obey `--abort-on-error` for "ffmpeg not installed" * Set `webpage_url_...` from `webpage_url` and not input URL * Tolerate failure to `--write-link` due to unknown URL * [aria2c] Add `--http-accept-gzip=true` * [build] Update pyinstaller to 4.10 by [shirt](https://github.com/shirt-dev) * [cookies] Update MacOS12 `Cookies.binarycookies` location by [mdpauley](https://github.com/mdpauley) * [devscripts] Improve `prepare_manpage` * [downloader] Do not use aria2c for non-native `m3u8` * [downloader] Obey `--file-access-retries` when deleting/renaming by [ehoogeveen-medweb](https://github.com/ehoogeveen-medweb) * [extractor] Allow `http_headers` to be specified for `thumbnails` * [extractor] Extract subtitles from manifests for vimeo, globo, kaltura, svt by [fstirlitz](https://github.com/fstirlitz) * [extractor] Fix for manifests without period duration by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) * [extractor] Support `--mark-watched` without `_NETRC_MACHINE` by [coletdjnz](https://github.com/coletdjnz) * [FFmpegConcat] Abort on `--simulate` * [FormatSort] Consider `acodec`=`ogg` as `vorbis` * [fragment] Fix bugs around resuming with Range by [Lesmiscore](https://github.com/Lesmiscore) * [fragment] Improve `--live-from-start` for YouTube livestreams by [Lesmiscore](https://github.com/Lesmiscore) * [generic] Pass referer to extracted formats * [generic] Set rss `guid` as video id by [Bricio](https://github.com/Bricio) * [options] Better ambiguous option resolution * [options] Rename `--clean-infojson` to `--clean-info-json` * [SponsorBlock] Fixes for highlight and "full video labels" by [nihil-admirari](https://github.com/nihil-admirari) * [Sponsorblock] minor fixes by [nihil-admirari](https://github.com/nihil-admirari) * [utils] Better traceback for `ExtractorError` * [utils] Fix file locking for AOSP by [jakeogh](https://github.com/jakeogh) * [utils] Improve file locking * [utils] OnDemandPagedList: Do not download pages after error * [utils] render_table: Fix character calculation for removing extra gap by [Lesmiscore](https://github.com/Lesmiscore) * [utils] Use `locked_file` for `sanitize_open` by [jakeogh](https://github.com/jakeogh) * [utils] Validate `DateRange` input * [utils] WebSockets wrapper for non-async functions by [Lesmiscore](https://github.com/Lesmiscore) * [cleanup] Don't pass protocol to `_extract_m3u8_formats` for live videos * [cleanup] Remove extractors for some dead websites by [marieell](https://github.com/marieell) * [cleanup, docs] Misc cleanup * [AbemaTV] Add extractors by [Lesmiscore](https://github.com/Lesmiscore) * [adobepass] Add Suddenlink MSO by [CplPwnies](https://github.com/CplPwnies) * [ant1newsgr] Add extractor by [zmousm](https://github.com/zmousm) * [bigo] Add extractor by [Lesmiscore](https://github.com/Lesmiscore) * [Caltrans] Add extractor by [Bricio](https://github.com/Bricio) * [daystar] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) * [fc2:live] Add extractor by [Lesmiscore](https://github.com/Lesmiscore) * [fptplay] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) * [murrtube] Add extractor by [cyberfox1691](https://github.com/cyberfox1691) * [nfb] Add extractor by [ofkz](https://github.com/ofkz) * [niconico] Add playlist extractors and refactor by [Lesmiscore](https://github.com/Lesmiscore) * [peekvids] Add extractor by [schn0sch](https://github.com/schn0sch) * [piapro] Add extractor by [pycabbage](https://github.com/pycabbage), [Lesmiscore](https://github.com/Lesmiscore) * [rokfin] Add extractor by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan) * [rokfin] Add stack and channel extractors by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan) * [ruv.is] Add extractor by [iw0nderhow](https://github.com/iw0nderhow) * [telegram] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) * [VideocampusSachsen] Add extractors by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel) * [xinpianchang] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) * [abc] Support 1080p by [Ronnnny](https://github.com/Ronnnny) * [afreecatv] Support password-protected livestreams by [wlritchi](https://github.com/wlritchi) * [ard] Fix valid URL * [ATVAt] Detect geo-restriction by [marieell](https://github.com/marieell) * [bandcamp] Detect acodec * [bandcamp] Fix user URLs by [lyz-code](https://github.com/lyz-code) * [bbc] Fix extraction of news articles by [ajj8](https://github.com/ajj8) * [beeg] Fix extractor by [Bricio](https://github.com/Bricio) * [bigo] Fix extractor to not to use `form_params` * [Bilibili] Pass referer for all formats by [blackgear](https://github.com/blackgear) * [Biqle] Fix extractor by [Bricio](https://github.com/Bricio) * [ccma] Fix timestamp parsing by [nyuszika7h](https://github.com/nyuszika7h) * [crunchyroll] Better error reporting on login failure by [tejing1](https://github.com/tejing1) * [cspan] Support of C-Span congress videos by [Grabien](https://github.com/Grabien) * [dropbox] fix regex by [zenerdi0de](https://github.com/zenerdi0de) * [fc2] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore) * [fujitv] Extract resolution for free sources by [YuenSzeHong](https://github.com/YuenSzeHong) * [Gettr] Add `GettrStreamingIE` by [i6t](https://github.com/i6t) * [Gettr] Fix formats order by [i6t](https://github.com/i6t) * [Gettr] Improve extractor by [i6t](https://github.com/i6t) * [globo] Expand valid URL by [Bricio](https://github.com/Bricio) * [lbry] Fix `--ignore-no-formats-error` * [manyvids] Extract `uploader` by [regarten](https://github.com/regarten) * [mildom] Fix linter * [mildom] Rework extractors by [Lesmiscore](https://github.com/Lesmiscore) * [mirrativ] Cleanup extractor code by [Lesmiscore](https://github.com/Lesmiscore) * [nhk] Add support for NHK for School by [Lesmiscore](https://github.com/Lesmiscore) * [niconico:tag] Add support for searching tags * [nrk] Add fallback API * [peekvids] Use JSON-LD by [schn0sch](https://github.com/schn0sch) * [peertube] Add media.fsfe.org by [mxmehl](https://github.com/mxmehl) * [rtvs] Fix extractor by [Bricio](https://github.com/Bricio) * [spiegel] Fix `_VALID_URL` * [ThumbnailsConvertor] Support `webp` * [tiktok] Fix `vm.tiktok`/`vt.tiktok` URLs * [tubitv] Fix/improve TV series extraction by [bbepis](https://github.com/bbepis) * [tumblr] Fix extractor by [foghawk](https://github.com/foghawk) * [twitcasting] Add fallback for finding running live by [Lesmiscore](https://github.com/Lesmiscore) * [TwitCasting] Check for password protection by [Lesmiscore](https://github.com/Lesmiscore) * [twitcasting] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore) * [twitch] Fix field name of `view_count` * [twitter] Fix for private videos by [iphoting](https://github.com/iphoting) * [washingtonpost] Fix extractor by [Bricio](https://github.com/Bricio) * [youtube:tab] Add `approximate_date` extractor-arg * [youtube:tab] Follow redirect to regional channel by [coletdjnz](https://github.com/coletdjnz) * [youtube:tab] Reject webpage data if redirected to home page * [youtube] De-prioritize potentially damaged formats * [youtube] Differentiate descriptive audio by language code * [youtube] Ensure subtitle urls are absolute by [coletdjnz](https://github.com/coletdjnz) * [youtube] Escape possible `$` in `_extract_n_function_name` regex by [Lesmiscore](https://github.com/Lesmiscore) * [youtube] Fix automatic captions * [youtube] Fix n-sig extraction for phone player JS by [MinePlayersPE](https://github.com/MinePlayersPE) * [youtube] Further de-prioritize 3gp format * [youtube] Label original auto-subs * [youtube] Prefer UTC upload date for videos by [coletdjnz](https://github.com/coletdjnz) * [zaq1] Remove dead extractor by [marieell](https://github.com/marieell) * [zee5] Support web-series by [Aniruddh-J](https://github.com/Aniruddh-J) * [zingmp3] Fix extractor by [hatienl0i261299](https://github.com/hatienl0i261299) * [zoom] Add support for screen cast by [Mipsters](https://github.com/Mipsters) ### 2022.02.04 * [youtube:search] Fix extractor by [coletdjnz](https://github.com/coletdjnz) * [youtube:search] Add tests * [twitcasting] Enforce UTF-8 for POST payload by [Lesmiscore](https://github.com/Lesmiscore) * [mediaset] Fix extractor by [nixxo](https://github.com/nixxo) * [websocket] Make syntax error in `websockets` module non-fatal ### 2022.02.03 * Merge youtube-dl: Upto [commit/78ce962](https://github.com/ytdl-org/youtube-dl/commit/78ce962f4fe020994c216dd2671546fbe58a5c67) * Add option `--print-to-file` * Make nested --config-locations relative to parent file * Ensure `_type` is present in `info.json` * Fix `--compat-options list-formats` * Fix/improve `InAdvancePagedList` * [downloader/ffmpeg] Handle unknown formats better * [outtmpl] Handle `-o ""` better * [outtmpl] Handle hard-coded file extension better * [extractor] Add convenience function `_yes_playlist` * [extractor] Allow non-fatal `title` extraction * [extractor] Extract video inside `Article` json_ld * [generic] Allow further processing of json_ld URL * [cookies] Fix keyring selection for unsupported desktops * [utils] Strip double spaces in `clean_html` by [dirkf](https://github.com/dirkf) * [aes] Add `unpad_pkcs7` * [test] Fix `test_youtube_playlist_noplaylist` * [docs,cleanup] Misc cleanup * [dplay] Add extractors for site changes by [Sipherdrakon](https://github.com/Sipherdrakon) * [ertgr] Add extractors by [zmousm](https://github.com/zmousm), [dirkf](https://github.com/dirkf) * [Musicdex] Add extractors by [Ashish0804](https://github.com/Ashish0804) * [YandexVideoPreview] Add extractor by [KiberInfinity](https://github.com/KiberInfinity) * [youtube] Add extractor `YoutubeMusicSearchURLIE` * [archive.org] Ignore unnecessary files * [Bilibili] Add 8k support by [u-spec-png](https://github.com/u-spec-png) * [bilibili] Fix extractor, make anthology title non-fatal * [CAM4] Add thumbnail extraction by [alerikaisattera](https://github.com/alerikaisattera) * [cctv] De-prioritize sample format * [crunchyroll:beta] Add cookies support by [tejing1](https://github.com/tejing1) * [crunchyroll] Fix login by [tejing1](https://github.com/tejing1) * [doodstream] Fix extractor * [fc2] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore) * [FFmpegConcat] Abort on --skip-download and download errors * [Fujitv] Extract metadata and support premium by [YuenSzeHong](https://github.com/YuenSzeHong) * [globo] Fix extractor by [Bricio](https://github.com/Bricio) * [glomex] Simplify embed detection * [GoogleSearch] Fix extractor * [Instagram] Fix extraction when logged in by [MinePlayersPE](https://github.com/MinePlayersPE) * [iq.com] Add VIP support by [MinePlayersPE](https://github.com/MinePlayersPE) * [mildom] Fix extractor by [lazypete365](https://github.com/lazypete365) * [MySpass] Fix video url processing by [trassshhub](https://github.com/trassshhub) * [Odnoklassniki] Improve embedded players extraction by [KiberInfinity](https://github.com/KiberInfinity) * [orf:tvthek] Lazy playlist extraction and obey --no-playlist * [Pladform] Fix redirection to external player by [KiberInfinity](https://github.com/KiberInfinity) * [ThisOldHouse] Improve Premium URL check by [Ashish0804](https://github.com/Ashish0804) * [TikTok] Iterate through app versions by [MinePlayersPE](https://github.com/MinePlayersPE) * [tumblr] Fix 403 errors and handle vimeo embeds by [foghawk](https://github.com/foghawk) * [viki] Fix "Bad request" for manifest by [nyuszika7h](https://github.com/nyuszika7h) * [Vimm] add recording extractor by [alerikaisattera](https://github.com/alerikaisattera) * [web.archive:youtube] Add `ytarchive:` prefix and misc cleanup * [youtube:api] Do not use seek when reading HTTPError response by [coletdjnz](https://github.com/coletdjnz) * [youtube] Fix n-sig for player e06dea74 * [youtube, cleanup] Misc fixes and cleanup ### 2022.01.21 * Add option `--concat-playlist` to **concat videos in a playlist** * Allow **multiple and nested configuration files** * Add more post-processing stages (`after_video`, `playlist`) * Allow `--exec` to be run at any post-processing stage (Deprecates `--exec-before-download`) * Allow `--print` to be run at any post-processing stage * Allow listing formats, thumbnails, subtitles using `--print` by [pukkandan](https://github.com/pukkandan), [Zirro](https://github.com/Zirro) * Add fields `video_autonumber`, `modified_date`, `modified_timestamp`, `playlist_count`, `channel_follower_count` * Add key `requested_downloads` in the root `info_dict` * Write `download_archive` only after all formats are downloaded * [FfmpegMetadata] Allow setting metadata of individual streams using `meta<n>_` prefix * Add option `--legacy-server-connect` by [xtkoba](https://github.com/xtkoba) * Allow escaped `,` in `--extractor-args` * Allow unicode characters in `info.json` * Check for existing thumbnail/subtitle in final directory * Don't treat empty containers as `None` in `sanitize_info` * Fix `-s --ignore-no-formats --force-write-archive` * Fix live title for multiple formats * List playlist thumbnails in `--list-thumbnails` * Raise error if subtitle download fails * [cookies] Fix bug when keyring is unspecified * [ffmpeg] Ignore unknown streams, standardize use of `-map 0` * [outtmpl] Alternate form for `D` and fix suffix's case * [utils] Add `Sec-Fetch-Mode` to `std_headers` * [utils] Fix `format_bytes` output for Bytes by [pukkandan](https://github.com/pukkandan), [mdawar](https://github.com/mdawar) * [utils] Handle `ss:xxx` in `parse_duration` * [utils] Improve parsing for nested HTML elements by [zmousm](https://github.com/zmousm), [pukkandan](https://github.com/pukkandan) * [utils] Use key `None` in `traverse_obj` to return as-is * [extractor] Detect more subtitle codecs in MPD manifests by [fstirlitz](https://github.com/fstirlitz) * [extractor] Extract chapters from JSON-LD by [iw0nderhow](https://github.com/iw0nderhow), [pukkandan](https://github.com/pukkandan) * [extractor] Extract thumbnails from JSON-LD by [nixxo](https://github.com/nixxo) * [extractor] Improve `url_result` and related * [generic] Improve KVS player extraction by [trassshhub](https://github.com/trassshhub) * [build] Reduce dependency on third party workflows * [extractor,cleanup] Use `_search_nextjs_data`, `format_field` * [cleanup] Minor fixes and cleanup * [docs] Improvements * [test] Fix TestVerboseOutput * [afreecatv] Add livestreams extractor by [wlritchi](https://github.com/wlritchi) * [callin] Add extractor by [foghawk](https://github.com/foghawk) * [CrowdBunker] Add extractors by [Ashish0804](https://github.com/Ashish0804) * [daftsex] Add extractors by [k3ns1n](https://github.com/k3ns1n) * [digitalconcerthall] Add extractor by [teridon](https://github.com/teridon) * [Drooble] Add extractor by [u-spec-png](https://github.com/u-spec-png) * [EuropeanTour] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [iq.com] Add extractors by [MinePlayersPE](https://github.com/MinePlayersPE) * [KelbyOne] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [LnkIE] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [MainStreaming] Add extractor by [coletdjnz](https://github.com/coletdjnz) * [megatvcom] Add extractors by [zmousm](https://github.com/zmousm) * [Newsy] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [noodlemagazine] Add extractor by [trassshhub](https://github.com/trassshhub) * [PokerGo] Add extractors by [Ashish0804](https://github.com/Ashish0804) * [Pornez] Add extractor by [mozlima](https://github.com/mozlima) * [PRX] Add Extractors by [coletdjnz](https://github.com/coletdjnz) * [RTNews] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [Rule34video] Add extractor by [trassshhub](https://github.com/trassshhub) * [tvopengr] Add extractors by [zmousm](https://github.com/zmousm) * [Vimm] Add extractor by [alerikaisattera](https://github.com/alerikaisattera) * [glomex] Add extractors by [zmousm](https://github.com/zmousm) * [instagram] Add story/highlight extractor by [u-spec-png](https://github.com/u-spec-png) * [openrec] Add movie extractor by [Lesmiscore](https://github.com/Lesmiscore) * [rai] Add Raiplaysound extractors by [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan) * [aparat] Fix extractor * [ard] Extract subtitles by [fstirlitz](https://github.com/fstirlitz) * [BiliIntl] Add login by [MinePlayersPE](https://github.com/MinePlayersPE) * [CeskaTelevize] Use `http` for manifests * [CTVNewsIE] Add fallback for video search by [Ashish0804](https://github.com/Ashish0804) * [dplay] Migrate DiscoveryPlusItaly to DiscoveryPlus by [timendum](https://github.com/timendum) * [dplay] Re-structure DiscoveryPlus extractors * [Dropbox] Support password protected files and more formats by [zenerdi0de](https://github.com/zenerdi0de) * [facebook] Fix extraction from groups * [facebook] Improve title and uploader extraction * [facebook] Parse dash manifests * [fox] Extract m3u8 from preview by [ischmidt20](https://github.com/ischmidt20) * [funk] Support origin URLs * [gfycat] Fix `uploader` * [gfycat] Support embeds by [coletdjnz](https://github.com/coletdjnz) * [hotstar] Add extractor args to ignore tags by [Ashish0804](https://github.com/Ashish0804) * [hrfernsehen] Fix ardloader extraction by [CreaValix](https://github.com/CreaValix) * [instagram] Fix username extraction for stories and highlights by [nyuszika7h](https://github.com/nyuszika7h) * [kakao] Detect geo-restriction * [line] Remove `tv.line.me` by [sian1468](https://github.com/sian1468) * [mixch] Add `MixchArchiveIE` by [Lesmiscore](https://github.com/Lesmiscore) * [mixcloud] Detect restrictions by [llacb47](https://github.com/llacb47) * [NBCSports] Fix extraction of platform URLs by [ischmidt20](https://github.com/ischmidt20) * [Nexx] Extract more metadata by [MinePlayersPE](https://github.com/MinePlayersPE) * [Nexx] Support 3q CDN by [MinePlayersPE](https://github.com/MinePlayersPE) * [pbs] de-prioritize AD formats * [PornHub,YouTube] Refresh onion addresses by [unit193](https://github.com/unit193) * [RedBullTV] Parse subtitles from manifest by [Ashish0804](https://github.com/Ashish0804) * [streamcz] Fix extractor by [arkamar](https://github.com/arkamar), [pukkandan](https://github.com/pukkandan) * [Ted] Rewrite extractor by [pukkandan](https://github.com/pukkandan), [trassshhub](https://github.com/trassshhub) * [Theta] Fix valid URL by [alerikaisattera](https://github.com/alerikaisattera) * [ThisOldHouseIE] Add support for premium videos by [Ashish0804](https://github.com/Ashish0804) * [TikTok] Fix extraction for sigi-based webpages, add API fallback by [MinePlayersPE](https://github.com/MinePlayersPE) * [TikTok] Pass cookies to formats, and misc fixes by [MinePlayersPE](https://github.com/MinePlayersPE) * [TikTok] Extract captions, user thumbnail by [MinePlayersPE](https://github.com/MinePlayersPE) * [TikTok] Change app version by [MinePlayersPE](https://github.com/MinePlayersPE), [llacb47](https://github.com/llacb47) * [TVer] Extract message for unaired live by [Lesmiscore](https://github.com/Lesmiscore) * [twitcasting] Refactor extractor by [Lesmiscore](https://github.com/Lesmiscore) * [twitter] Fix video in quoted tweets * [veoh] Improve extractor by [foghawk](https://github.com/foghawk) * [vk] Capture `clip` URLs * [vk] Fix VKUserVideosIE by [Ashish0804](https://github.com/Ashish0804) * [vk] Improve `_VALID_URL` by [k3ns1n](https://github.com/k3ns1n) * [VrtNU] Handle empty title by [pgaig](https://github.com/pgaig) * [XVideos] Check HLS formats by [MinePlayersPE](https://github.com/MinePlayersPE) * [yahoo:gyao] Improved playlist handling by [hyano](https://github.com/hyano) * [youtube:tab] Extract more playlist metadata by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [youtube:tab] Raise error on tab redirect by [krichbanana](https://github.com/krichbanana), [coletdjnz](https://github.com/coletdjnz) * [youtube] Update Innertube clients by [coletdjnz](https://github.com/coletdjnz) * [youtube] Detect live-stream embeds * [youtube] Do not return `upload_date` for playlists * [youtube] Extract channel subscriber count by [coletdjnz](https://github.com/coletdjnz) * [youtube] Make invalid storyboard URL non-fatal * [youtube] Enforce UTC, update innertube clients and tests by [coletdjnz](https://github.com/coletdjnz) * [zdf] Add chapter extraction by [iw0nderhow](https://github.com/iw0nderhow) * [zee5] Add geo-bypass ### 2021.12.27 * Avoid recursion error when re-extracting info * [ffmpeg] Fix position of `--ppa` * [aria2c] Don't show progress when `--no-progress` * [cookies] Support other keyrings by [mbway](https://github.com/mbway) * [EmbedThumbnail] Prefer AtomicParsley over ffmpeg if available * [generic] Fix HTTP KVS Player by [git-anony-mouse](https://github.com/git-anony-mouse) * [ThumbnailsConvertor] Fix for when there are no thumbnails * [docs] Add examples for using `TYPES:` in `-P`/`-o` * [PixivSketch] Add extractors by [nao20010128nao](https://github.com/nao20010128nao) * [tiktok] Add music, sticker and tag IEs by [MinePlayersPE](https://github.com/MinePlayersPE) * [BiliIntl] Fix extractor by [MinePlayersPE](https://github.com/MinePlayersPE) * [CBC] Fix URL regex * [tiktok] Fix `extractor_key` used in archive * [youtube] **End `live-from-start` properly when stream ends with 403** * [Zee5] Fix VALID_URL for tv-shows by [Ashish0804](https://github.com/Ashish0804) ### 2021.12.25 * [dash,youtube] **Download live from start to end** by [nao20010128nao](https://github.com/nao20010128nao), [pukkandan](https://github.com/pukkandan) * Add option `--live-from-start` to enable downloading live videos from start * Add key `is_from_start` in formats to identify formats (of live videos) that downloads from start * [dash] Create protocol `http_dash_segments_generator` that allows a function to be passed instead of fragments * [fragment] Allow multiple live dash formats to download simultaneously * [youtube] Implement fragment re-fetching for the live dash formats * [youtube] Re-extract dash manifest every 5 hours (manifest expires in 6hrs) * [postprocessor/ffmpeg] Add `FFmpegFixupDuplicateMoovPP` to fixup duplicated moov atoms * Known issues: * Ctrl+C doesn't work on Windows when downloading multiple formats * If video becomes private, download hangs * [SponsorBlock] Add `Filler` and `Highlight` categories by [nihil-admirari](https://github.com/nihil-admirari), [pukkandan](https://github.com/pukkandan) * Change `--sponsorblock-cut all` to `--sponsorblock-cut default` if you do not want filler sections to be removed * Add field `webpage_url_domain` * Add interactive format selection with `-f -` * Add option `--file-access-retries` by [ehoogeveen-medweb](https://github.com/ehoogeveen-medweb) * [outtmpl] Add alternate forms `S`, `D` and improve `id` detection * [outtmpl] Add operator `&` for replacement text by [PilzAdam](https://github.com/PilzAdam) * [EmbedSubtitle] Disable duration check temporarily * [extractor] Add `_search_nuxt_data` by [nao20010128nao](https://github.com/nao20010128nao) * [extractor] Ignore errors in comment extraction when `-i` is given * [extractor] Standardize `_live_title` * [FormatSort] Prevent incorrect deprecation warning * [generic] Extract m3u8 formats from JSON-LD * [postprocessor/ffmpeg] Always add `faststart` * [utils] Fix parsing `YYYYMMDD` dates in Nov/Dec by [wlritchi](https://github.com/wlritchi) * [utils] Improve `parse_count` * [utils] Update `std_headers` by [kikuyan](https://github.com/kikuyan), [fstirlitz](https://github.com/fstirlitz) * [lazy_extractors] Fix for search IEs * [extractor] Support default implicit graph in JSON-LD by [zmousm](https://github.com/zmousm) * Allow `--no-write-thumbnail` to override `--write-all-thumbnail` * Fix `--throttled-rate` * Fix control characters being printed to `--console-title` * Fix PostProcessor hooks not registered for some PPs * Pre-process when using `--flat-playlist` * Remove known invalid thumbnails from `info_dict` * Add warning when using `-f best` * Use `parse_duration` for `--wait-for-video` and some minor fix * [test/download] Add more fields * [test/download] Ignore field `webpage_url_domain` by [std-move](https://github.com/std-move) * [compat] Suppress errors in enabling VT mode * [docs] Improve manpage format by [iw0nderhow](https://github.com/iw0nderhow), [pukkandan](https://github.com/pukkandan) * [docs,cleanup] Minor fixes and cleanup * [cleanup] Fix some typos by [unit193](https://github.com/unit193) * [ABC:iview] Add show extractor by [pabs3](https://github.com/pabs3) * [dropout] Add extractor by [TwoThousandHedgehogs](https://github.com/TwoThousandHedgehogs), [pukkandan](https://github.com/pukkandan) * [GameJolt] Add extractors by [MinePlayersPE](https://github.com/MinePlayersPE) * [gofile] Add extractor by [Jertzukka](https://github.com/Jertzukka), [Ashish0804](https://github.com/Ashish0804) * [hse] Add extractors by [cypheron](https://github.com/cypheron), [pukkandan](https://github.com/pukkandan) * [NateTV] Add NateIE and NateProgramIE by [Ashish0804](https://github.com/Ashish0804), [Hyeeji](https://github.com/Hyeeji) * [OpenCast] Add extractors by [bwildenhain](https://github.com/bwildenhain), [C0D3D3V](https://github.com/C0D3D3V) * [rtve] Add `RTVEAudioIE` by [kebianizao](https://github.com/kebianizao) * [Rutube] Add RutubeChannelIE by [Ashish0804](https://github.com/Ashish0804) * [skeb] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) * [soundcloud] Add related tracks extractor by [Lapin0t](https://github.com/Lapin0t) * [toggo] Add extractor by [nyuszika7h](https://github.com/nyuszika7h) * [TrueID] Add extractor by [MinePlayersPE](https://github.com/MinePlayersPE) * [audiomack] Update album and song VALID_URL by [abdullah-if](https://github.com/abdullah-if), [dirkf](https://github.com/dirkf) * [CBC Gem] Extract 1080p formats by [DavidSkrundz](https://github.com/DavidSkrundz) * [ceskatelevize] Fetch iframe from nextJS data by [mkubecek](https://github.com/mkubecek) * [crackle] Look for non-DRM formats by [raleeper](https://github.com/raleeper) * [dplay] Temporary fix for `discoveryplus.com/it` * [DiscoveryPlusShowBaseIE] yield actual video id by [Ashish0804](https://github.com/Ashish0804) * [Facebook] Handle redirect URLs * [fujitv] Extract 1080p from `tv_android` m3u8 by [YuenSzeHong](https://github.com/YuenSzeHong) * [gronkh] Support new URL pattern by [Sematre](https://github.com/Sematre) * [instagram] Expand valid URL by [u-spec-png](https://github.com/u-spec-png) * [Instagram] Try bypassing login wall with embed page by [MinePlayersPE](https://github.com/MinePlayersPE) * [Jamendo] Fix use of `_VALID_URL_RE` by [jaller94](https://github.com/jaller94) * [LBRY] Support livestreams by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan) * [NJPWWorld] Extract formats from m3u8 by [aarubui](https://github.com/aarubui) * [NovaEmbed] update player regex by [std-move](https://github.com/std-move) * [npr] Make SMIL extraction non-fatal by [r5d](https://github.com/r5d) * [ntvcojp] Extract NUXT data by [nao20010128nao](https://github.com/nao20010128nao) * [ok.ru] add mobile fallback by [nao20010128nao](https://github.com/nao20010128nao) * [olympics] Add uploader and cleanup by [u-spec-png](https://github.com/u-spec-png) * [ondemandkorea] Update `jw_config` regex by [julien-hadleyjack](https://github.com/julien-hadleyjack) * [PlutoTV] Expand `_VALID_URL` * [RaiNews] Fix extractor by [nixxo](https://github.com/nixxo) * [RCTIPlusSeries] Lazy extraction and video type selection by [MinePlayersPE](https://github.com/MinePlayersPE) * [redtube] Handle formats delivered inside a JSON by [dirkf](https://github.com/dirkf), [nixxo](https://github.com/nixxo) * [SonyLiv] Add OTP login support by [Ashish0804](https://github.com/Ashish0804) * [Steam] Fix extractor by [u-spec-png](https://github.com/u-spec-png) * [TikTok] Pass cookies to mobile API by [MinePlayersPE](https://github.com/MinePlayersPE) * [trovo] Fix inheritance of `TrovoChannelBaseIE` * [TVer] Extract better thumbnails by [YuenSzeHong](https://github.com/YuenSzeHong) * [vimeo] Extract chapters * [web.archive:youtube] Improve metadata extraction by [coletdjnz](https://github.com/coletdjnz) * [youtube:comments] Add more options for limiting number of comments extracted by [coletdjnz](https://github.com/coletdjnz) * [youtube:tab] Extract more metadata from feeds/channels/playlists by [coletdjnz](https://github.com/coletdjnz) * [youtube:tab] Extract video thumbnails from playlist by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [youtube:tab] Ignore query when redirecting channel to playlist and cleanup of related code * [youtube] Fix `ytsearchdate` * [zdf] Support videos with different ptmd location by [iw0nderhow](https://github.com/iw0nderhow) * [zee5] Support /episodes in URL ### 2021.12.01 * **Add option `--wait-for-video` to wait for scheduled streams** * Add option `--break-per-input` to apply --break-on... to each input URL * Add option `--embed-info-json` to embed info.json in mkv * Add compat-option `embed-metadata` * Allow using a custom format selector through API * [AES] Add ECB mode by [nao20010128nao](https://github.com/nao20010128nao) * [build] Fix MacOS Build * [build] Save Git HEAD at release alongside version info * [build] Use `workflow_dispatch` for release * [downloader/ffmpeg] Fix for direct videos inside mpd manifests * [downloader] Add colors to download progress * [EmbedSubtitles] Slightly relax duration check and related cleanup * [ExtractAudio] Fix conversion to `wav` and `vorbis` * [ExtractAudio] Support `alac` * [extractor] Extract `average_rating` from JSON-LD * [FixupM3u8] Fixup MPEG-TS in MP4 container * [generic] Support mpd manifests without extension by [shirt](https://github.com/shirt-dev) * [hls] Better FairPlay DRM detection by [nyuszika7h](https://github.com/nyuszika7h) * [jsinterp] Fix splice to handle float (for youtube js player f1ca6900) * [utils] Allow alignment in `render_table` and add tests * [utils] Fix `PagedList` * [utils] Fix error when copying `LazyList` * Clarify video/audio-only formats in -F * Ensure directory exists when checking formats * Ensure path for link files exists by [Zirro](https://github.com/Zirro) * Ensure same config file is not loaded multiple times * Fix `postprocessor_hooks` * Fix `--break-on-archive` when pre-checking * Fix `--check-formats` for `mhtml` * Fix `--load-info-json` of playlists with failed entries * Fix `--trim-filename` when filename has `.` * Fix bug in parsing `--add-header` * Fix error in `report_unplayable_conflict` by [shirt](https://github.com/shirt-dev) * Fix writing playlist infojson with `--no-clean-infojson` * Validate --get-bypass-country * [blogger] Add extractor by [pabs3](https://github.com/pabs3) * [breitbart] Add extractor by [Grabien](https://github.com/Grabien) * [CableAV] Add extractor by [j54vc1bk](https://github.com/j54vc1bk) * [CanalAlpha] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [CozyTV] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [CPTwentyFour] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [DiscoveryPlus] Add `DiscoveryPlusItalyShowIE` by [Ashish0804](https://github.com/Ashish0804) * [ESPNCricInfo] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [LinkedIn] Add extractor by [u-spec-png](https://github.com/u-spec-png) * [mixch] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) * [nebula] Add `NebulaCollectionIE` and rewrite extractor by [hheimbuerger](https://github.com/hheimbuerger) * [OneFootball] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [peer.tv] Add extractor by [u-spec-png](https://github.com/u-spec-png) * [radiozet] Add extractor by [0xA7404A](https://github.com/0xA7404A) (Aurora) * [redgifs] Add extractor by [chio0hai](https://github.com/chio0hai) * [RedGifs] Add Search and User extractors by [Deer-Spangle](https://github.com/Deer-Spangle) * [rtrfm] Add extractor by [pabs3](https://github.com/pabs3) * [Streamff] Add extractor by [cntrl-s](https://github.com/cntrl-s) * [Stripchat] Add extractor by [zulaport](https://github.com/zulaport) * [Aljazeera] Fix extractor by [u-spec-png](https://github.com/u-spec-png) * [AmazonStoreIE] Fix regex to not match vdp urls by [Ashish0804](https://github.com/Ashish0804) * [ARDBetaMediathek] Handle new URLs * [bbc] Get all available formats by [nyuszika7h](https://github.com/nyuszika7h) * [Bilibili] Fix title extraction by [u-spec-png](https://github.com/u-spec-png) * [CBC Gem] Fix for shows that don't have all seasons by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) * [curiositystream] Add more metadata * [CuriosityStream] Fix series * [DiscoveryPlus] Rewrite extractors by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan) * [HotStar] Set language field from tags by [Ashish0804](https://github.com/Ashish0804) * [instagram, cleanup] Refactor extractors * [Instagram] Display more login errors by [MinePlayersPE](https://github.com/MinePlayersPE) * [itv] Fix extractor by [staubichsauger](https://github.com/staubichsauger), [pukkandan](https://github.com/pukkandan) * [mediaklikk] Expand valid URL * [MTV] Improve mgid extraction by [Sipherdrakon](https://github.com/Sipherdrakon), [kikuyan](https://github.com/kikuyan) * [nexx] Better error message for unsupported format * [NovaEmbed] Fix extractor by [pukkandan](https://github.com/pukkandan), [std-move](https://github.com/std-move) * [PatreonUser] Do not capture RSS URLs * [Reddit] Add support for 1080p videos by [xenova](https://github.com/xenova) * [RoosterTeethSeries] Fix for multiple pages by [MinePlayersPE](https://github.com/MinePlayersPE) * [sbs] Fix for movies and livestreams * [Senate.gov] Add SenateGovIE and fix SenateISVPIE by [Grabien](https://github.com/Grabien), [pukkandan](https://github.com/pukkandan) * [soundcloud:search] Fix pagination * [tiktok:user] Set `webpage_url` correctly * [Tokentube] Fix description by [u-spec-png](https://github.com/u-spec-png) * [trovo] Fix extractor by [nyuszika7h](https://github.com/nyuszika7h) * [tv2] Expand valid URL * [Tvplayhome] Fix extractor by [pukkandan](https://github.com/pukkandan), [18928172992817182](https://github.com/18928172992817182) * [Twitch:vod] Add chapters by [mpeter50](https://github.com/mpeter50) * [twitch:vod] Extract live status by [DEvmIb](https://github.com/DEvmIb) * [VidLii] Add 720p support by [mrpapersonic](https://github.com/mrpapersonic) * [vimeo] Add fallback for config URL * [vimeo] Sort http formats higher * [WDR] Expand valid URL * [willow] Add extractor by [aarubui](https://github.com/aarubui) * [xvideos] Detect embed URLs by [4a1e2y5](https://github.com/4a1e2y5) * [xvideos] Fix extractor by [Yakabuff](https://github.com/Yakabuff) * [youtube, cleanup] Reorganize Tab and Search extractor inheritances * [youtube:search_url] Add playlist/channel support * [youtube] Add `default` player client by [coletdjnz](https://github.com/coletdjnz) * [youtube] Add storyboard formats * [youtube] Decrypt n-sig for URLs with `ratebypass` * [youtube] Minor improvement to format sorting * [cleanup] Add deprecation warnings * [cleanup] Refactor `JSInterpreter._seperate` * [Cleanup] Remove some unnecessary groups in regexes by [Ashish0804](https://github.com/Ashish0804) * [cleanup] Misc cleanup ### 2021.11.10.1 * Temporarily disable MacOS Build ### 2021.11.10 * [youtube] **Fix throttling by decrypting n-sig** * Merging extractors from [haruhi-dl](https://git.sakamoto.pl/laudom/haruhi-dl) by [selfisekai](https://github.com/selfisekai) * [extractor] Add `_search_nextjs_data` * [tvp] Fix extractors * [tvp] Add TVPStreamIE * [wppilot] Add extractors * [polskieradio] Add extractors * [radiokapital] Add extractors * [polsatgo] Add extractor by [selfisekai](https://github.com/selfisekai), [sdomi](https://github.com/sdomi) * Separate `--check-all-formats` from `--check-formats` * Approximate filesize from bitrate * Don't create console in `windows_enable_vt_mode` * Fix bug in `--load-infojson` of playlists * [minicurses] Add colors to `-F` and standardize color-printing code * [outtmpl] Add type `link` for internet shortcut files * [outtmpl] Add alternate forms for `q` and `j` * [outtmpl] Do not traverse `None` * [fragment] Fix progress display in fragmented downloads * [downloader/ffmpeg] Fix vtt download with ffmpeg * [ffmpeg] Detect presence of setts and libavformat version * [ExtractAudio] Rescale `--audio-quality` correctly by [CrypticSignal](https://github.com/CrypticSignal), [pukkandan](https://github.com/pukkandan) * [ExtractAudio] Use `libfdk_aac` if available by [CrypticSignal](https://github.com/CrypticSignal) * [FormatSort] `eac3` is better than `ac3` * [FormatSort] Fix some fields' defaults * [generic] Detect more json_ld * [generic] parse jwplayer with only the json URL * [extractor] Add keyword automatically to SearchIE descriptions * [extractor] Fix some errors being converted to `ExtractorError` * [utils] Add `join_nonempty` * [utils] Add `jwt_decode_hs256` by [Ashish0804](https://github.com/Ashish0804) * [utils] Create `DownloadCancelled` exception * [utils] Parse `vp09` as vp9 * [utils] Sanitize URL when determining protocol * [test/download] Fallback test to `bv` * [docs] Minor documentation improvements * [cleanup] Improvements to error and debug messages * [cleanup] Minor fixes and cleanup * [3speak] Add extractors by [Ashish0804](https://github.com/Ashish0804) * [AmazonStore] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [Gab] Add extractor by [u-spec-png](https://github.com/u-spec-png) * [mediaset] Add playlist support by [nixxo](https://github.com/nixxo) * [MLSScoccer] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [N1] Add support for nova.rs by [u-spec-png](https://github.com/u-spec-png) * [PlanetMarathi] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [RaiplayRadio] Add extractors by [frafra](https://github.com/frafra) * [roosterteeth] Add series extractor * [sky] Add `SkyNewsStoryIE` by [ajj8](https://github.com/ajj8) * [youtube] Fix sorting for some videos * [youtube] Populate `thumbnail` with the best "known" thumbnail * [youtube] Refactor itag processing * [youtube] Remove unnecessary no-playlist warning * [youtube:tab] Add Invidious list for playlists/channels by [rhendric](https://github.com/rhendric) * [Bilibili:comments] Fix infinite loop by [u-spec-png](https://github.com/u-spec-png) * [ceskatelevize] Fix extractor by [flashdagger](https://github.com/flashdagger) * [Coub] Fix media format identification by [wlritchi](https://github.com/wlritchi) * [crunchyroll] Add extractor-args `language` and `hardsub` * [DiscoveryPlus] Allow language codes in URL * [imdb] Fix thumbnail by [ozburo](https://github.com/ozburo) * [instagram] Add IOS URL support by [u-spec-png](https://github.com/u-spec-png) * [instagram] Improve login code by [u-spec-png](https://github.com/u-spec-png) * [Instagram] Improve metadata extraction by [u-spec-png](https://github.com/u-spec-png) * [iPrima] Fix extractor by [stanoarn](https://github.com/stanoarn) * [itv] Add support for ITV News by [ajj8](https://github.com/ajj8) * [la7] Fix extractor by [nixxo](https://github.com/nixxo) * [linkedin] Don't login multiple times * [mtv] Fix some videos by [Sipherdrakon](https://github.com/Sipherdrakon) * [Newgrounds] Fix description by [u-spec-png](https://github.com/u-spec-png) * [Nrk] Minor fixes by [fractalf](https://github.com/fractalf) * [Olympics] Fix extractor by [u-spec-png](https://github.com/u-spec-png) * [piksel] Fix sorting * [twitter] Do not sort by codec * [viewlift] Add cookie-based login and series support by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan) * [vimeo] Detect source extension and misc cleanup by [flashdagger](https://github.com/flashdagger) * [vimeo] Fix ondemand videos and direct URLs with hash * [vk] Fix login and add subtitles by [kaz-us](https://github.com/kaz-us) * [VLive] Add upload_date and thumbnail by [Ashish0804](https://github.com/Ashish0804) * [VRT] Fix login by [pgaig](https://github.com/pgaig) * [Vupload] Fix extractor by [u-spec-png](https://github.com/u-spec-png) * [wakanim] Add support for MPD manifests by [nyuszika7h](https://github.com/nyuszika7h) * [wakanim] Detect geo-restriction by [nyuszika7h](https://github.com/nyuszika7h) * [ZenYandex] Fix extractor by [u-spec-png](https://github.com/u-spec-png) ### 2021.10.22 * [build] Improvements * Build standalone MacOS packages by [smplayer-dev](https://github.com/smplayer-dev) * Release windows exe built with `py2exe` * Enable lazy-extractors in releases * Set env var `YTDLP_NO_LAZY_EXTRACTORS` to forcefully disable this (experimental) * Clean up error reporting in update * Refactor `pyinst.py`, misc cleanup and improve docs * [docs] Migrate issues to use forms by [Ashish0804](https://github.com/Ashish0804) * [downloader] **Fix slow progress hooks** * This was causing HLS/DASH downloads to be extremely slow in some situations * [downloader/ffmpeg] Improve simultaneous download and merge * [EmbedMetadata] Allow overwriting all default metadata with `meta_default` key * [ModifyChapters] Add ability for `--remove-chapters` to remove sections by timestamp * [utils] Allow duration strings in `--match-filter` * Add HDR information to formats * Add negative option `--no-batch-file` by [Zirro](https://github.com/Zirro) * Calculate more fields for merged formats * Do not verify thumbnail URLs unless `--check-formats` is specified * Don't create console for subprocesses on Windows * Fix `--restrict-filename` when used with default template * Fix `check_formats` output being written to stdout when `-qv` * Fix bug in storyboards * Fix conflict b/w id and ext in format selection * Fix verbose head not showing custom configs * Load archive only after printing verbose head * Make `duration_string` and `resolution` available in --match-filter * Re-implement deprecated option `--id` * Reduce default `--socket-timeout` * Write verbose header to logger * [outtmpl] Fix bug in expanding environment variables * [cookies] Local State should be opened as utf-8 * [extractor,utils] Detect more codecs/mimetypes * [extractor] Detect `EXT-X-KEY` Apple FairPlay * [utils] Use `importlib` to load plugins by [sulyi](https://github.com/sulyi) * [http] Retry on socket timeout and show the last encountered error * [fragment] Print error message when skipping fragment * [aria2c] Fix `--skip-unavailable-fragment` * [SponsorBlock] Obey `extractor-retries` and `sleep-requests` * [Merger] Do not add `aac_adtstoasc` to non-hls audio * [ModifyChapters] Do not mutate original chapters by [nihil-admirari](https://github.com/nihil-admirari) * [devscripts/run_tests] Use markers to filter tests by [sulyi](https://github.com/sulyi) * [7plus] Add cookie based authentication by [nyuszika7h](https://github.com/nyuszika7h) * [AdobePass] Fix RCN MSO by [jfogelman](https://github.com/jfogelman) * [CBC] Fix Gem livestream by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) * [CBC] Support CBC Gem member content by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) * [crunchyroll] Add season to flat-playlist * [crunchyroll] Add support for `beta.crunchyroll` URLs and fix series URLs with language code * [EUScreen] Add Extractor by [Ashish0804](https://github.com/Ashish0804) * [Gronkh] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [hidive] Fix typo * [Hotstar] Mention Dynamic Range in `format_id` by [Ashish0804](https://github.com/Ashish0804) * [Hotstar] Raise appropriate error for DRM * [instagram] Add login by [u-spec-png](https://github.com/u-spec-png) * [instagram] Show appropriate error when login is needed * [microsoftstream] Add extractor by [damianoamatruda](https://github.com/damianoamatruda), [nixklai](https://github.com/nixklai) * [on24] Add extractor by [damianoamatruda](https://github.com/damianoamatruda) * [patreon] Fix vimeo player regex by [zenerdi0de](https://github.com/zenerdi0de) * [SkyNewsAU] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [tagesschau] Fix extractor by [u-spec-png](https://github.com/u-spec-png) * [tbs] Add tbs live streams by [llacb47](https://github.com/llacb47) * [tiktok] Fix typo and update tests * [trovo] Support channel clips and VODs by [Ashish0804](https://github.com/Ashish0804) * [Viafree] Add support for Finland by [18928172992817182](https://github.com/18928172992817182) * [vimeo] Fix embedded `player.vimeo` * [vlive:channel] Fix extraction by [kikuyan](https://github.com/kikuyan), [pukkandan](https://github.com/pukkandan) * [youtube] Add auto-translated subtitles * [youtube] Expose different formats with same itag * [youtube:comments] Fix for new layout by [coletdjnz](https://github.com/coletdjnz) * [cleanup] Cleanup bilibili code by [pukkandan](https://github.com/pukkandan), [u-spec-png](https://github.com/u-spec-png) * [cleanup] Remove broken youtube login code * [cleanup] Standardize timestamp formatting code * [cleanup] Generalize `getcomments` implementation for extractors * [cleanup] Simplify search extractors code * [cleanup] misc ### 2021.10.10 * [downloader/ffmpeg] Fix bug in initializing `FFmpegPostProcessor` * [minicurses] Fix when printing to file * [downloader] Fix throttledratelimit * [francetv] Fix extractor by [fstirlitz](https://github.com/fstirlitz), [sarnoud](https://github.com/sarnoud) * [NovaPlay] Add extractor by [Bojidarist](https://github.com/Bojidarist) * [ffmpeg] Revert "Set max probesize" - No longer needed * [docs] Remove incorrect dependency on VC++10 * [build] Allow to release without changelog ### 2021.10.09 * Improved progress reporting * Separate `--console-title` and `--no-progress` * Add option `--progress` to show progress-bar even in quiet mode * Fix and refactor `minicurses` and use it for all progress reporting * Standardize use of terminal sequences and enable color support for windows 10 * Add option `--progress-template` to customize progress-bar and console-title * Add postprocessor hooks and progress reporting * [postprocessor] Add plugin support with option `--use-postprocessor` * [extractor] Extract storyboards from SMIL manifests by [fstirlitz](https://github.com/fstirlitz) * [outtmpl] Alternate form of format type `l` for `\n` delimited list * [outtmpl] Format type `U` for unicode normalization * [outtmpl] Allow empty output template to skip a type of file * Merge webm formats into mkv if thumbnails are to be embedded * [adobepass] Add RCN as MSO by [jfogelman](https://github.com/jfogelman) * [ciscowebex] Add extractor by [damianoamatruda](https://github.com/damianoamatruda) * [Gettr] Add extractor by [i6t](https://github.com/i6t) * [GoPro] Add extractor by [i6t](https://github.com/i6t) * [N1] Add extractor by [u-spec-png](https://github.com/u-spec-png) * [Theta] Add video extractor by [alerikaisattera](https://github.com/alerikaisattera) * [Veo] Add extractor by [i6t](https://github.com/i6t) * [Vupload] Add extractor by [u-spec-png](https://github.com/u-spec-png) * [bbc] Extract better quality videos by [ajj8](https://github.com/ajj8) * [Bilibili] Add subtitle converter by [u-spec-png](https://github.com/u-spec-png) * [CBC] Cleanup tests by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) * [Douyin] Rewrite extractor by [MinePlayersPE](https://github.com/MinePlayersPE) * [Funimation] Fix for /v/ urls by [pukkandan](https://github.com/pukkandan), [Jules-A](https://github.com/Jules-A) * [Funimation] Sort formats according to the relevant extractor-args * [Hidive] Fix duplicate and incorrect formats * [HotStarSeries] Fix cookies by [Ashish0804](https://github.com/Ashish0804) * [LinkedInLearning] Add subtitles by [Ashish0804](https://github.com/Ashish0804) * [Mediaite] Relax valid url by [coletdjnz](https://github.com/coletdjnz) * [Newgrounds] Add age_limit and fix duration by [u-spec-png](https://github.com/u-spec-png) * [Newgrounds] Fix view count on songs by [u-spec-png](https://github.com/u-spec-png) * [parliamentlive.tv] Fix extractor by [u-spec-png](https://github.com/u-spec-png) * [PolskieRadio] Fix extractors by [jakubadamw](https://github.com/jakubadamw), [u-spec-png](https://github.com/u-spec-png) * [reddit] Add embedded url by [u-spec-png](https://github.com/u-spec-png) * [reddit] Fix 429 by generating a random `reddit_session` by [AjaxGb](https://github.com/AjaxGb) * [Rumble] Add RumbleChannelIE by [Ashish0804](https://github.com/Ashish0804) * [soundcloud:playlist] Detect last page correctly * [SovietsCloset] Add duration from m3u8 by [ChillingPepper](https://github.com/ChillingPepper) * [Streamable] Add codecs by [u-spec-png](https://github.com/u-spec-png) * [vidme] Remove extractor by [alerikaisattera](https://github.com/alerikaisattera) * [youtube:tab] Fallback to API when webpage fails to download by [coletdjnz](https://github.com/coletdjnz) * [youtube] Fix non-fatal errors in fetching player * Fix `--flat-playlist` when neither IE nor id is known * Fix `-f mp4` behaving differently from youtube-dl * Workaround for bug in `ssl.SSLContext.load_default_certs` * [aes] Improve performance slightly by [sulyi](https://github.com/sulyi) * [cookies] Fix keyring fallback by [mbway](https://github.com/mbway) * [embedsubtitle] Fix error when duration is unknown * [ffmpeg] Fix error when subtitle file is missing * [ffmpeg] Set max probesize to workaround AAC HLS stream issues by [shirt](https://github.com/shirt-dev) * [FixupM3u8] Remove redundant run if merged is needed * [hls] Fix decryption issues by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan) * [http] Respect user-provided chunk size over extractor's * [utils] Let traverse_obj accept functions as keys * [docs] Add note about our custom ffmpeg builds * [docs] Write embedding and contributing documentation by [pukkandan](https://github.com/pukkandan), [timethrow](https://github.com/timethrow) * [update] Check for new version even if not updateable * [build] Add more files to the tarball * [build] Allow building with py2exe (and misc fixes) * [build] Use pycryptodomex by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan) * [cleanup] Some minor refactoring, improve docs and misc cleanup ### 2021.09.25 * Add new option `--netrc-location` * [outtmpl] Allow alternate fields using `,` * [outtmpl] Add format type `B` to treat the value as bytes, e.g. to limit the filename to a certain number of bytes * Separate the options `--ignore-errors` and `--no-abort-on-error` * Basic framework for simultaneous download of multiple formats by [nao20010128nao](https://github.com/nao20010128nao) * [17live] Add 17.live extractor by [nao20010128nao](https://github.com/nao20010128nao) * [bilibili] Add BiliIntlIE and BiliIntlSeriesIE by [Ashish0804](https://github.com/Ashish0804) * [CAM4] Add extractor by [alerikaisattera](https://github.com/alerikaisattera) * [Chingari] Add extractors by [Ashish0804](https://github.com/Ashish0804) * [CGTN] Add extractor by [chao813](https://github.com/chao813) * [damtomo] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) * [gotostage] Add extractor by [poschi3](https://github.com/poschi3) * [Koo] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [Mediaite] Add Extractor by [Ashish0804](https://github.com/Ashish0804) * [Mediaklikk] Add Extractor by [tmarki](https://github.com/tmarki), [mrx23dot](https://github.com/mrx23dot), [coletdjnz](https://github.com/coletdjnz) * [MuseScore] Add Extractor by [Ashish0804](https://github.com/Ashish0804) * [Newgrounds] Add NewgroundsUserIE and improve extractor by [u-spec-png](https://github.com/u-spec-png) * [nzherald] Add NZHeraldIE by [coletdjnz](https://github.com/coletdjnz) * [Olympics] Add replay extractor by [Ashish0804](https://github.com/Ashish0804) * [Peertube] Add channel and playlist extractors by [u-spec-png](https://github.com/u-spec-png) * [radlive] Add extractor by [nyuszika7h](https://github.com/nyuszika7h) * [SovietsCloset] Add extractor by [ChillingPepper](https://github.com/ChillingPepper) * [Streamanity] Add Extractor by [alerikaisattera](https://github.com/alerikaisattera) * [Theta] Add extractor by [alerikaisattera](https://github.com/alerikaisattera) * [Yandex] Add ZenYandexIE and ZenYandexChannelIE by [Ashish0804](https://github.com/Ashish0804) * [9Now] handle episodes of series by [dalanmiller](https://github.com/dalanmiller) * [AnimalPlanet] Fix extractor by [Sipherdrakon](https://github.com/Sipherdrakon) * [Arte] Improve description extraction by [renalid](https://github.com/renalid) * [atv.at] Use jwt for API by [NeroBurner](https://github.com/NeroBurner) * [brightcove] Extract subtitles from manifests * [CBC] Fix CBC Gem extractors by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) * [cbs] Report appropriate error for DRM * [comedycentral] Support `collection-playlist` by [nixxo](https://github.com/nixxo) * [DIYNetwork] Support new format by [Sipherdrakon](https://github.com/Sipherdrakon) * [downloader/niconico] Pass custom headers by [nao20010128nao](https://github.com/nao20010128nao) * [dw] Fix extractor * [Fancode] Fix live streams by [zenerdi0de](https://github.com/zenerdi0de) * [funimation] Fix for locations outside US by [Jules-A](https://github.com/Jules-A), [pukkandan](https://github.com/pukkandan) * [globo] Fix GloboIE by [Ashish0804](https://github.com/Ashish0804) * [HiDive] Fix extractor by [Ashish0804](https://github.com/Ashish0804) * [Hotstar] Add referer for subs by [Ashish0804](https://github.com/Ashish0804) * [itv] Fix extractor, add subtitles and thumbnails by [coletdjnz](https://github.com/coletdjnz), [sleaux-meaux](https://github.com/sleaux-meaux), [Vangelis66](https://github.com/Vangelis66) * [lbry] Show error message from API response * [Mxplayer] Use mobile API by [Ashish0804](https://github.com/Ashish0804) * [NDR] Rewrite NDRIE by [Ashish0804](https://github.com/Ashish0804) * [Nuvid] Fix extractor by [u-spec-png](https://github.com/u-spec-png) * [Oreilly] Handle new web url by [MKSherbini](https://github.com/MKSherbini) * [pbs] Fix subtitle extraction by [coletdjnz](https://github.com/coletdjnz), [gesa](https://github.com/gesa), [raphaeldore](https://github.com/raphaeldore) * [peertube] Update instances by [u-spec-png](https://github.com/u-spec-png) * [plutotv] Fix extractor for URLs with `/en` * [reddit] Workaround for 429 by redirecting to old.reddit.com * [redtube] Fix exts * [soundcloud] Make playlist extraction lazy * [soundcloud] Retry playlist pages on `502` error and update `_CLIENT_ID` * [southpark] Fix SouthParkDE by [coletdjnz](https://github.com/coletdjnz) * [SovietsCloset] Fix playlists for games with only named categories by [ConquerorDopy](https://github.com/ConquerorDopy) * [SpankBang] Fix uploader by [f4pp3rk1ng](https://github.com/f4pp3rk1ng), [coletdjnz](https://github.com/coletdjnz) * [tiktok] Use API to fetch higher quality video by [MinePlayersPE](https://github.com/MinePlayersPE), [llacb47](https://github.com/llacb47) * [TikTokUser] Fix extractor using mobile API by [MinePlayersPE](https://github.com/MinePlayersPE), [llacb47](https://github.com/llacb47) * [videa] Fix some extraction errors by [nyuszika7h](https://github.com/nyuszika7h) * [VrtNU] Handle login errors by [llacb47](https://github.com/llacb47) * [vrv] Don't raise error when thumbnails are missing * [youtube] Cleanup authentication code by [coletdjnz](https://github.com/coletdjnz) * [youtube] Fix `--mark-watched` with `--cookies-from-browser` * [youtube] Improvements to JS player extraction and add extractor-args to skip it by [coletdjnz](https://github.com/coletdjnz) * [youtube] Retry on 'Unknown Error' by [coletdjnz](https://github.com/coletdjnz) * [youtube] Return full URL instead of just ID * [youtube] Warn when trying to download clips * [zdf] Improve format sorting * [zype] Extract subtitles from the m3u8 manifest by [fstirlitz](https://github.com/fstirlitz) * Allow `--force-write-archive` to work with `--flat-playlist` * Download subtitles in order of `--sub-langs` * Allow `0` in `--playlist-items` * Handle more playlist errors with `-i` * Fix `--no-get-comments` * Fix `extra_info` being reused across runs * Fix compat options `no-direct-merge` and `playlist-index` * Dump files should obey `--trim-filename` by [sulyi](https://github.com/sulyi) * [aes] Add `aes_gcm_decrypt_and_verify` by [sulyi](https://github.com/sulyi), [pukkandan](https://github.com/pukkandan) * [aria2c] Fix IV for some AES-128 streams by [shirt](https://github.com/shirt-dev) * [compat] Don't ignore `HOME` (if set) on windows * [cookies] Make browser names case insensitive * [cookies] Print warning for cookie decoding error only once * [extractor] Fix root-relative URLs in MPD by [DigitalDJ](https://github.com/DigitalDJ) * [ffmpeg] Add `aac_adtstoasc` when merging if needed * [fragment,aria2c] Generalize and refactor some code * [fragment] Avoid repeated request for AES key * [fragment] Fix range header when using `-N` and media sequence by [shirt](https://github.com/shirt-dev) * [hls,aes] Fallback to native implementation for AES-CBC and detect `Cryptodome` in addition to `Crypto` * [hls] Byterange + AES128 is supported by native downloader * [ModifyChapters] Improve sponsor chapter merge algorithm by [nihil-admirari](https://github.com/nihil-admirari) * [ModifyChapters] Minor fixes * [WebVTT] Adjust parser to accommodate PBS subtitles * [utils] Improve `extract_timezone` by [dirkf](https://github.com/dirkf) * [options] Fix `--no-config` and refactor reading of config files * [options] Strip spaces and ignore empty entries in list-like switches * [test/cookies] Improve logging * [build] Automate more of the release process by [animelover1984](https://github.com/animelover1984), [pukkandan](https://github.com/pukkandan) * [build] Fix sha256 by [nihil-admirari](https://github.com/nihil-admirari) * [build] Bring back brew taps by [nao20010128nao](https://github.com/nao20010128nao) * [build] Provide `--onedir` zip for windows * [cleanup,docs] Add deprecation warning in docs for some counter intuitive behaviour * [cleanup] Fix line endings for `nebula.py` by [glenn-slayden](https://github.com/glenn-slayden) * [cleanup] Improve `make clean-test` by [sulyi](https://github.com/sulyi) * [cleanup] Misc ### 2021.09.02 * **Native SponsorBlock** implementation by [nihil-admirari](https://github.com/nihil-admirari), [pukkandan](https://github.com/pukkandan) * `--sponsorblock-remove CATS` removes specified chapters from file * `--sponsorblock-mark CATS` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title TMPL` to specify sponsor chapter template * `--sponsorblock-api URL` to use a different API * No re-encoding is done unless `--force-keyframes-at-cuts` is used * The fetched sponsor sections are written to the infojson * Deprecates: `--sponskrub`, `--no-sponskrub`, `--sponskrub-cut`, `--no-sponskrub-cut`, `--sponskrub-force`, `--no-sponskrub-force`, `--sponskrub-location`, `--sponskrub-args` * Split `--embed-chapters` from `--embed-metadata` (it still implies the former by default) * Add option `--remove-chapters` to remove arbitrary chapters by [nihil-admirari](https://github.com/nihil-admirari), [pukkandan](https://github.com/pukkandan) * Add option `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters by [nihil-admirari](https://github.com/nihil-admirari) * Let `--match-filter` reject entries early * Makes redundant: `--match-title`, `--reject-title`, `--min-views`, `--max-views` * [lazy_extractor] Improvements (It now passes all tests) * Bugfix for when plugin directory doesn't exist by [kidonng](https://github.com/kidonng) * Create instance only after pre-checking archive * Import actual class if an attribute is accessed * Fix `suitable` and add flake8 test * [downloader/ffmpeg] Experimental support for DASH manifests (including live) * Your ffmpeg must have [this patch](https://github.com/FFmpeg/FFmpeg/commit/3249c757aed678780e22e99a1a49f4672851bca9) applied for YouTube DASH to work * [downloader/ffmpeg] Allow passing custom arguments before `-i` * [BannedVideo] Add extractor by [smege1001](https://github.com/smege1001), [blackjack4494](https://github.com/blackjack4494), [pukkandan](https://github.com/pukkandan) * [bilibili] Add category extractor by [animelover1984](https://github.com/animelover1984) * [Epicon] Add extractors by [Ashish0804](https://github.com/Ashish0804) * [filmmodu] Add extractor by [mzbaulhaque](https://github.com/mzbaulhaque) * [GabTV] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [Hungama] Fix `HungamaSongIE` and add `HungamaAlbumPlaylistIE` by [Ashish0804](https://github.com/Ashish0804) * [ManotoTV] Add new extractors by [tandy1000](https://github.com/tandy1000) * [Niconico] Add Search extractors by [animelover1984](https://github.com/animelover1984), [pukkandan](https://github.com/pukkandan) * [Patreon] Add `PatreonUserIE` by [zenerdi0de](https://github.com/zenerdi0de) * [peloton] Add extractor by [IONECarter](https://github.com/IONECarter), [capntrips](https://github.com/capntrips), [pukkandan](https://github.com/pukkandan) * [ProjectVeritas] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [radiko] Add extractors by [nao20010128nao](https://github.com/nao20010128nao) * [StarTV] Add extractor for `startv.com.tr` by [mrfade](https://github.com/mrfade), [coletdjnz](https://github.com/coletdjnz) * [tiktok] Add `TikTokUserIE` by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan) * [Tokentube] Add extractor by [u-spec-png](https://github.com/u-spec-png) * [TV2Hu] Fix `TV2HuIE` and add `TV2HuSeriesIE` by [Ashish0804](https://github.com/Ashish0804) * [voicy] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) * [adobepass] Fix Verizon SAML login by [nyuszika7h](https://github.com/nyuszika7h), [ParadoxGBB](https://github.com/ParadoxGBB) * [afreecatv] Fix adult VODs by [wlritchi](https://github.com/wlritchi) * [afreecatv] Tolerate failure to parse date string by [wlritchi](https://github.com/wlritchi) * [aljazeera] Fix extractor by [MinePlayersPE](https://github.com/MinePlayersPE) * [ATV.at] Fix extractor for ATV.at by [NeroBurner](https://github.com/NeroBurner), [coletdjnz](https://github.com/coletdjnz) * [bitchute] Fix test by [mahanstreamer](https://github.com/mahanstreamer) * [camtube] Remove obsolete extractor by [alerikaisattera](https://github.com/alerikaisattera) * [CDA] Add more formats by [u-spec-png](https://github.com/u-spec-png) * [eroprofile] Fix page skipping in albums by [jhwgh1968](https://github.com/jhwgh1968) * [facebook] Fix format sorting * [facebook] Fix metadata extraction by [kikuyan](https://github.com/kikuyan) * [facebook] Update onion URL by [Derkades](https://github.com/Derkades) * [HearThisAtIE] Fix extractor by [Ashish0804](https://github.com/Ashish0804) * [instagram] Add referrer to prevent throttling by [u-spec-png](https://github.com/u-spec-png), [kikuyan](https://github.com/kikuyan) * [iwara.tv] Extract more metadata by [BunnyHelp](https://github.com/BunnyHelp) * [iwara] Add thumbnail by [i6t](https://github.com/i6t) * [kakao] Fix extractor * [mediaset] Fix extraction for some videos by [nyuszika7h](https://github.com/nyuszika7h) * [Motherless] Fix extractor by [coletdjnz](https://github.com/coletdjnz) * [Nova] fix extractor by [std-move](https://github.com/std-move) * [ParamountPlus] Fix geo verification by [shirt](https://github.com/shirt-dev) * [peertube] handle new video URL format by [Chocobozzz](https://github.com/Chocobozzz) * [pornhub] Separate and fix playlist extractor by [mzbaulhaque](https://github.com/mzbaulhaque) * [reddit] Fix for quarantined subreddits by [ouwou](https://github.com/ouwou) * [ShemarooMe] Fix extractor by [Ashish0804](https://github.com/Ashish0804) * [soundcloud] Refetch `client_id` on 403 * [tiktok] Fix metadata extraction * [TV2] Fix extractor by [Ashish0804](https://github.com/Ashish0804) * [tv5mondeplus] Fix extractor by [korli](https://github.com/korli) * [VH1,TVLand] Fix extractors by [Sipherdrakon](https://github.com/Sipherdrakon) * [Viafree] Fix extractor and extract subtitles by [coletdjnz](https://github.com/coletdjnz) * [XHamster] Extract `uploader_id` by [octotherp](https://github.com/octotherp) * [youtube] Add `shorts` to `_VALID_URL` * [youtube] Add av01 itags to known formats list by [blackjack4494](https://github.com/blackjack4494) * [youtube] Extract error messages from HTTPError response by [coletdjnz](https://github.com/coletdjnz) * [youtube] Fix subtitle names * [youtube] Prefer audio stream that YouTube considers default * [youtube] Remove annotations and deprecate `--write-annotations` by [coletdjnz](https://github.com/coletdjnz) * [Zee5] Fix extractor and add subtitles by [Ashish0804](https://github.com/Ashish0804) * [aria2c] Obey `--rate-limit` * [EmbedSubtitle] Continue even if some files are missing * [extractor] Better error message for DRM * [extractor] Common function `_match_valid_url` * [extractor] Show video id in error messages if possible * [FormatSort] Remove priority of `lang` * [options] Add `_set_from_options_callback` * [SubtitleConvertor] Fix bug during subtitle conversion * [utils] Add `parse_qs` * [webvtt] Fix timestamp overflow adjustment by [fstirlitz](https://github.com/fstirlitz) * Bugfix for `--replace-in-metadata` * Don't try to merge with final extension * Fix `--force-overwrites` when using `-k` * Fix `--no-prefer-free-formats` by [CeruleanSky](https://github.com/CeruleanSky) * Fix `-F` for extractors that directly return url * Fix `-J` when there are failed videos * Fix `extra_info` being reused across runs * Fix `playlist_index` not obeying `playlist_start` and add tests * Fix resuming of single formats when using `--no-part` * Revert erroneous use of the `Content-Length` header by [fstirlitz](https://github.com/fstirlitz) * Use `os.replace` where applicable by; paulwrubel * [build] Add homebrew taps `yt-dlp/taps/yt-dlp` by [nao20010128nao](https://github.com/nao20010128nao) * [build] Fix bug in making `yt-dlp.tar.gz` * [docs] Fix some typos by [pukkandan](https://github.com/pukkandan), [zootedb0t](https://github.com/zootedb0t) * [cleanup] Replace improper use of tab in trovo by [glenn-slayden](https://github.com/glenn-slayden) ### 2021.08.10 * Add option `--replace-in-metadata` * Add option `--no-simulate` to not simulate even when `--print` or `--list...` are used - Deprecates `--print-json` * Allow entire infodict to be printed using `%()s` - makes `--dump-json` redundant * Allow multiple `--exec` and `--exec-before-download` * Add regex to `--match-filter` * Add all format filtering operators also to `--match-filter` by [max-te](https://github.com/max-te) * Add compat-option `no-keep-subs` * [adobepass] Add MSO Cablevision by [Jessecar96](https://github.com/Jessecar96) * [BandCamp] Add BandcampMusicIE by [Ashish0804](https://github.com/Ashish0804) * [blackboardcollaborate] Add new extractor by [mzbaulhaque](https://github.com/mzbaulhaque) * [eroprofile] Add album downloader by [jhwgh1968](https://github.com/jhwgh1968) * [mirrativ] Add extractors by [nao20010128nao](https://github.com/nao20010128nao) * [openrec] Add extractors by [nao20010128nao](https://github.com/nao20010128nao) * [nbcolympics:stream] Fix extractor by [nchilada](https://github.com/nchilada), [pukkandan](https://github.com/pukkandan) * [nbcolympics] Update extractor for 2020 olympics by [wesnm](https://github.com/wesnm) * [paramountplus] Separate extractor and fix some titles by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan) * [RCTIPlus] Support events and TV by [MinePlayersPE](https://github.com/MinePlayersPE) * [Newgrounds] Improve extractor and fix playlist by [u-spec-png](https://github.com/u-spec-png) * [aenetworks] Update `_THEPLATFORM_KEY` and `_THEPLATFORM_SECRET` by [wesnm](https://github.com/wesnm) * [crunchyroll] Fix thumbnail by [funniray](https://github.com/funniray) * [HotStar] Use API for metadata and extract subtitles by [Ashish0804](https://github.com/Ashish0804) * [instagram] Fix comments extraction by [u-spec-png](https://github.com/u-spec-png) * [peertube] Fix videos without description by [u-spec-png](https://github.com/u-spec-png) * [twitch:clips] Extract `display_id` by [dirkf](https://github.com/dirkf) * [viki] Print error message from API request * [Vine] Remove invalid formats by [u-spec-png](https://github.com/u-spec-png) * [VrtNU] Fix XSRF token by [pgaig](https://github.com/pgaig) * [vrv] Fix thumbnail extraction by [funniray](https://github.com/funniray) * [youtube] Add extractor-arg `include-live-dash` to show live dash formats * [youtube] Improve signature function detection by [PSlava](https://github.com/PSlava) * [youtube] Raise appropriate error when API pages can't be downloaded * Ensure `_write_ytdl_file` closes file handle on error * Fix `--compat-options filename` by [stdedos](https://github.com/stdedos) * Fix issues with infodict sanitization * Fix resuming when using `--no-part` * Fix wrong extension for intermediate files * Handle `BrokenPipeError` by [kikuyan](https://github.com/kikuyan) * Show libraries present in verbose head * [extractor] Detect `sttp` as subtitles in MPD by [fstirlitz](https://github.com/fstirlitz) * [extractor] Reset non-repeating warnings per video * [ffmpeg] Fix streaming `mp4` to `stdout` * [ffpmeg] Allow `--ffmpeg-location` to be a file with different name * [utils] Fix `InAdvancePagedList.__getitem__` * [utils] Fix `traverse_obj` depth when `is_user_input` * [webvtt] Merge daisy-chained duplicate cues by [fstirlitz](https://github.com/fstirlitz) * [build] Use custom build of `pyinstaller` by [shirt](https://github.com/shirt-dev) * [tests:download] Add batch testing for extractors (`test_YourExtractor_all`) * [docs] Document which fields `--add-metadata` adds to the file * [docs] Fix some mistakes and improve doc * [cleanup] Misc code cleanup ### 2021.08.02 * Add logo, banner and donate links * [outtmpl] Expand and escape environment variables * [outtmpl] Add format types `j` (json), `l` (comma delimited list), `q` (quoted for terminal) * [downloader] Allow streaming some unmerged formats to stdout using ffmpeg * [youtube] **Age-gate bypass** * Add `agegate` clients by [pukkandan](https://github.com/pukkandan), [MinePlayersPE](https://github.com/MinePlayersPE) * Add `thirdParty` to agegate clients to bypass more videos * Simplify client definitions, expose `embedded` clients * Improve age-gate detection by [coletdjnz](https://github.com/coletdjnz) * Fix default global API key by [coletdjnz](https://github.com/coletdjnz) * Add `creator` clients for age-gate bypass using unverified accounts by [zerodytrash](https://github.com/zerodytrash), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [adobepass] Add MSO Sling TV by [wesnm](https://github.com/wesnm) * [CBS] Add ParamountPlusSeriesIE by [Ashish0804](https://github.com/Ashish0804) * [dplay] Add `ScienceChannelIE` by [Sipherdrakon](https://github.com/Sipherdrakon) * [UtreonIE] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [youtube] Add `mweb` client by [coletdjnz](https://github.com/coletdjnz) * [youtube] Add `player_client=all` * [youtube] Force `hl=en` for comments by [coletdjnz](https://github.com/coletdjnz) * [youtube] Fix format sorting when using alternate clients * [youtube] Misc cleanup by [pukkandan](https://github.com/pukkandan), [coletdjnz](https://github.com/coletdjnz) * [youtube] Extract SAPISID only once * [CBS] Add fallback by [llacb47](https://github.com/llacb47), [pukkandan](https://github.com/pukkandan) * [Hotstar] Support cookies by [Ashish0804](https://github.com/Ashish0804) * [HotStarSeriesIE] Fix regex by [Ashish0804](https://github.com/Ashish0804) * [bilibili] Improve `_VALID_URL` * [mediaset] Fix extraction by [nixxo](https://github.com/nixxo) * [Mxplayer] Add h265 formats by [Ashish0804](https://github.com/Ashish0804) * [RCTIPlus] Remove PhantomJS dependency by [MinePlayersPE](https://github.com/MinePlayersPE) * [tenplay] Add MA15+ age limit by [pento](https://github.com/pento) * [vidio] Fix login error detection by [MinePlayersPE](https://github.com/MinePlayersPE) * [vimeo] Better extraction of original file by [Ashish0804](https://github.com/Ashish0804) * [generic] Support KVS player (replaces ThisVidIE) by [rigstot](https://github.com/rigstot) * Add compat-option `no-clean-infojson` * Remove `asr` appearing twice in `-F` * Set `home:` as the default key for `-P` * [utils] Fix slicing of reversed `LazyList` * [FormatSort] Fix bug for audio with unknown codec * [test:download] Support testing with `ignore_no_formats_error` * [cleanup] Refactor some code ### 2021.07.24 * [youtube:tab] Extract video duration early * [downloader] Pass `info_dict` to `progress_hook`s * [youtube] Fix age-gated videos for API clients when cookies are supplied by [coletdjnz](https://github.com/coletdjnz) * [youtube] Disable `get_video_info` age-gate workaround - This endpoint seems to be completely dead * [youtube] Try all clients even if age-gated * [youtube] Fix subtitles only being extracted from the first client * [youtube] Simplify `_get_text` * [cookies] bugfix for microsoft edge on macOS * [cookies] Handle `sqlite` `ImportError` gracefully by [mbway](https://github.com/mbway) * [cookies] Handle errors when importing `keyring` ### 2021.07.21 * **Add option `--cookies-from-browser`** to load cookies from a browser by [mbway](https://github.com/mbway) * Usage: `--cookies-from-browser BROWSER[:PROFILE_NAME_OR_PATH]` * Also added `--no-cookies-from-browser` * To decrypt chromium cookies, `keyring` is needed for UNIX and `pycryptodome` for Windows * Add option `--exec-before-download` * Add field `live_status` * [FFmpegMetadata] Add language of each stream and some refactoring * [douyin] Add extractor by [pukkandan](https://github.com/pukkandan), [pyx](https://github.com/pyx) * [pornflip] Add extractor by [mzbaulhaque](https://github.com/mzbaulhaque) * **[youtube] Extract data from multiple clients** by [pukkandan](https://github.com/pukkandan), [coletdjnz](https://github.com/coletdjnz) * `player_client` now accepts multiple clients * Default `player_client` = `android,web` * This uses twice as many requests, but avoids throttling for most videos while also not losing any formats * Music clients can be specifically requested and is enabled by default if `music.youtube.com` * Added `player_client=ios` (Known issue: formats from ios are not sorted correctly) * Add age-gate bypass for android and ios clients * [youtube] Extract more thumbnails * The thumbnail URLs are hard-coded and their actual existence is tested lazily * Added option `--no-check-formats` to not test them * [youtube] Misc fixes * Improve extraction of livestream metadata by [pukkandan](https://github.com/pukkandan), [krichbanana](https://github.com/krichbanana) * Hide live dash formats since they can't be downloaded anyway * Fix authentication when using multiple accounts by [coletdjnz](https://github.com/coletdjnz) * Fix controversial videos when requested via API by [coletdjnz](https://github.com/coletdjnz) * Fix session index extraction and headers for non-web player clients by [coletdjnz](https://github.com/coletdjnz) * Make `--extractor-retries` work for more errors * Fix sorting of 3gp format * Sanity check `chapters` (and refactor related code) * Make `parse_time_text` and `_extract_chapters` non-fatal * Misc cleanup and bug fixes by [coletdjnz](https://github.com/coletdjnz) * [youtube:tab] Fix channels tab * [youtube:tab] Extract playlist availability by [coletdjnz](https://github.com/coletdjnz) * **[youtube:comments] Move comment extraction to new API** by [coletdjnz](https://github.com/coletdjnz) * Adds extractor-args `comment_sort` (`top`/`new`), `max_comments`, `max_comment_depth` * [youtube:comments] Fix `is_favorited`, improve `like_count` parsing by [coletdjnz](https://github.com/coletdjnz) * [BravoTV] Improve metadata extraction by [kevinoconnor7](https://github.com/kevinoconnor7) * [crunchyroll:playlist] Force http * [yahoo:gyao:player] Relax `_VALID_URL` by [nao20010128nao](https://github.com/nao20010128nao) * [nebula] Authentication via tokens from cookie jar by [hheimbuerger](https://github.com/hheimbuerger), [TpmKranz](https://github.com/TpmKranz) * [RTP] Fix extraction and add subtitles by [fstirlitz](https://github.com/fstirlitz) * [viki] Rewrite extractors and add extractor-arg `video_types` to `vikichannel` by [zackmark29](https://github.com/zackmark29), [pukkandan](https://github.com/pukkandan) * [vlive] Extract thumbnail directly in addition to the one from Naver * [generic] Extract previously missed subtitles by [fstirlitz](https://github.com/fstirlitz) * [generic] Extract everything in the SMIL manifest and detect discarded subtitles by [fstirlitz](https://github.com/fstirlitz) * [embedthumbnail] Fix `_get_thumbnail_resolution` * [metadatafromfield] Do not detect numbers as field names * Fix selectors `all`, `mergeall` and add tests * Errors in playlist extraction should obey `--ignore-errors` * Fix bug where `original_url` was not propagated when `_type`=`url` * Revert "Merge webm formats into mkv if thumbnails are to be embedded (#173)" * This was wrongly checking for `write_thumbnail` * Improve `extractor_args` parsing * Rename `NOTE` in `-F` to `MORE INFO` since it's often confused to be the same as `format_note` * Add `only_once` param for `write_debug` and `report_warning` * [extractor] Allow extracting multiple groups in `_search_regex` by [fstirlitz](https://github.com/fstirlitz) * [utils] Improve `traverse_obj` * [utils] Add `variadic` * [utils] Improve `js_to_json` comment regex by [fstirlitz](https://github.com/fstirlitz) * [webtt] Fix timestamps * [compat] Remove unnecessary code * [docs] fix default of multistreams ### 2021.07.07 * Merge youtube-dl: Upto [commit/a803582](https://github.com/ytdl-org/youtube-dl/commit/a8035827177d6b59aca03bd717acb6a9bdd75ada) * Add `--extractor-args` to pass some extractor-specific arguments. See [readme](https://github.com/yt-dlp/yt-dlp#extractor-arguments) * Add extractor option `skip` for `youtube`, e.g. `--extractor-args youtube:skip=hls,dash` * Deprecates `--youtube-skip-dash-manifest`, `--youtube-skip-hls-manifest`, `--youtube-include-dash-manifest`, `--youtube-include-hls-manifest` * Allow `--list...` options to work with `--print`, `--quiet` and other `--list...` options * [youtube] Use `player` API for additional video extraction requests by [coletdjnz](https://github.com/coletdjnz) * **Fixes youtube premium music** (format 141) extraction * Adds extractor option `player_client` = `web`/`android` * **`--extractor-args youtube:player_client=android` works around the throttling** for the time-being * Adds extractor option `player_skip=config` * Adds age-gate fallback using embedded client * [youtube] Choose correct Live chat API for upcoming streams by [krichbanana](https://github.com/krichbanana) * [youtube] Fix subtitle names for age-gated videos * [youtube:comments] Fix error handling and add `itct` to params by [coletdjnz](https://github.com/coletdjnz) * [youtube_live_chat] Fix download with cookies by [siikamiika](https://github.com/siikamiika) * [youtube_live_chat] use `clickTrackingParams` by [siikamiika](https://github.com/siikamiika) * [Funimation] Rewrite extractor * Add `FunimationShowIE` by [Mevious](https://github.com/Mevious) * **Treat the different versions of an episode as different formats of a single video** * This changes the video `id` and will break break existing archives * Compat option `seperate-video-versions` to fall back to old behavior including using the old video ids * Support direct `/player/` URL * Extractor options `language` and `version` to pre-select them during extraction * These options may be removed in the future if we can extract all formats without additional network requests * Do not rely on these for format selection and use `-f` filters instead * [AdobePass] Add Spectrum MSO by [kevinoconnor7](https://github.com/kevinoconnor7), [ohmybahgosh](https://github.com/ohmybahgosh) * [facebook] Extract description and fix title * [fancode] Fix extraction, support live and allow login with refresh token by [zenerdi0de](https://github.com/zenerdi0de) * [plutotv] Improve `_VALID_URL` * [RCTIPlus] Add extractor by [MinePlayersPE](https://github.com/MinePlayersPE) * [Soundcloud] Allow login using oauth token by [blackjack4494](https://github.com/blackjack4494) * [TBS] Support livestreams by [llacb47](https://github.com/llacb47) * [videa] Fix extraction by [nyuszika7h](https://github.com/nyuszika7h) * [yahoo] Fix extraction by [llacb47](https://github.com/llacb47), [pukkandan](https://github.com/pukkandan) * Process videos when using `--ignore-no-formats-error` by [krichbanana](https://github.com/krichbanana) * Fix `--throttled-rate` when using `--load-info-json` * Fix `--flat-playlist` when entry has no `ie_key` * Fix `check_formats` catching `ExtractorError` instead of `DownloadError` * Fix deprecated option `--list-formats-old` * [downloader/ffmpeg] Fix `--ppa` when using simultaneous download * [extractor] Prevent unnecessary download of hls manifests and refactor `hls_split_discontinuity` * [fragment] Handle status of download and errors in threads correctly; and minor refactoring * [thumbnailsconvertor] Treat `jpeg` as `jpg` * [utils] Fix issues with `LazyList` reversal * [extractor] Allow extractors to set their own login hint * [cleanup] Simplify format selector code with `LazyList` and `yield from` * [cleanup] Clean `extractor.common._merge_subtitles` signature * [cleanup] Fix some typos ### 2021.06.23 * Merge youtube-dl: Upto [commit/379f52a](https://github.com/ytdl-org/youtube-dl/commit/379f52a4954013767219d25099cce9e0f9401961) * **Add option `--throttled-rate`** below which video data is re-extracted * [fragment] **Merge during download for `-N`**, and refactor `hls`/`dash` * [websockets] Add `WebSocketFragmentFD` by [nao20010128nao](https://github.com/nao20010128nao), [pukkandan](https://github.com/pukkandan) * Allow `images` formats in addition to video/audio * [downloader/mhtml] Add new downloader for slideshows/storyboards by [fstirlitz](https://github.com/fstirlitz) * [youtube] Temporary **fix for age-gate** * [youtube] Support ongoing live chat by [siikamiika](https://github.com/siikamiika) * [youtube] Improve SAPISID cookie handling by [coletdjnz](https://github.com/coletdjnz) * [youtube] Login is not needed for `:ytrec` * [youtube] Non-fatal alert reporting for unavailable videos page by [coletdjnz](https://github.com/coletdjnz) * [twitcasting] Websocket support by [nao20010128nao](https://github.com/nao20010128nao) * [mediasite] Extract slides by [fstirlitz](https://github.com/fstirlitz) * [funimation] Extract subtitles * [pornhub] Extract `cast` * [hotstar] Use server time for authentication instead of local time * [EmbedThumbnail] Fix for already downloaded thumbnail * [EmbedThumbnail] Add compat-option `embed-thumbnail-atomicparsley` * Expand `--check-formats` to thumbnails * Fix id sanitization in filenames * Skip fixup of existing files and add `--fixup force` to force it * Better error handling of syntax errors in `-f` * Use `NamedTemporaryFile` for `--check-formats` * [aria2c] Lower `--min-split-size` for HTTP downloads * [options] Rename `--add-metadata` to `--embed-metadata` * [utils] Improve `LazyList` and add tests * [build] Build Windows x86 version with py3.7 and remove redundant tests by [pukkandan](https://github.com/pukkandan), [shirt](https://github.com/shirt-dev) * [docs] Clarify that `--embed-metadata` embeds chapter markers * [cleanup] Refactor fixup ### 2021.06.09 * Fix bug where `%(field)d` in filename template throws error * [outtmpl] Improve offset parsing * [test] More rigorous tests for `prepare_filename` ### 2021.06.08 * Remove support for obsolete Python versions: Only 3.6+ is now supported * Merge youtube-dl: Upto [commit/c2350ca](https://github.com/ytdl-org/youtube-dl/commit/c2350cac243ba1ec1586fe85b0d62d1b700047a2) * [hls] Fix decryption for multithreaded downloader * [extractor] Fix pre-checking archive for some extractors * [extractor] Fix FourCC fallback when parsing ISM by [fstirlitz](https://github.com/fstirlitz) * [twitcasting] Add TwitCastingUserIE, TwitCastingLiveIE by [pukkandan](https://github.com/pukkandan), [nao20010128nao](https://github.com/nao20010128nao) * [vidio] Add VidioPremierIE and VidioLiveIE by [MinePlayersPE](Https://github.com/MinePlayersPE) * [viki] Fix extraction from [ytdl-org/youtube-dl@59e583f](https://github.com/ytdl-org/youtube-dl/commit/59e583f7e8530ca92776c866897d895c072e2a82) * [youtube] Support shorts URL * [zoom] Extract transcripts as subtitles * Add field `original_url` with the user-inputted URL * Fix and refactor `prepare_outtmpl` * Make more fields available for `--print` when used with `--flat-playlist` * [utils] Generalize `traverse_dict` to `traverse_obj` * [downloader/ffmpeg] Hide FFmpeg banner unless in verbose mode by [fstirlitz](https://github.com/fstirlitz) * [build] Release `yt-dlp.tar.gz` * [build,update] Add GNU-style SHA512 and prepare updater for similar SHA256 by [nihil-admirari](https://github.com/nihil-admirari) * [pyinst] Show Python version in exe metadata by [nihil-admirari](https://github.com/nihil-admirari) * [docs] Improve documentation of dependencies * [cleanup] Mark unused files * [cleanup] Point all shebang to `python3` by [fstirlitz](https://github.com/fstirlitz) * [cleanup] Remove duplicate file `trovolive.py` ### 2021.06.01 * Merge youtube-dl: Upto [commit/d495292](https://github.com/ytdl-org/youtube-dl/commit/d495292852b6c2f1bd58bc2141ff2b0265c952cf) * Pre-check archive and filters during playlist extraction * Handle Basic Auth `user:pass` in URLs by [hhirtz](https://github.com/hhirtz) and [pukkandan](https://github.com/pukkandan) * [archiveorg] Add YoutubeWebArchiveIE by [coletdjnz](https://github.com/coletdjnz) and [alex-gedeon](https://github.com/alex-gedeon) * [fancode] Add extractor by [rhsmachine](https://github.com/rhsmachine) * [patreon] Support vimeo embeds by [rhsmachine](https://github.com/rhsmachine) * [Saitosan] Add new extractor by [llacb47](https://github.com/llacb47) * [ShemarooMe] Add extractor by [Ashish0804](https://github.com/Ashish0804) and [pukkandan](https://github.com/pukkandan) * [telemundo] Add extractor by [king-millez](https://github.com/king-millez) * [SonyLIV] Add SonyLIVSeriesIE and subtitle support by [Ashish0804](https://github.com/Ashish0804) * [Hotstar] Add HotStarSeriesIE by [Ashish0804](https://github.com/Ashish0804) * [Voot] Add VootSeriesIE by [Ashish0804](https://github.com/Ashish0804) * [vidio] Support login and premium videos by [MinePlayersPE](https://github.com/MinePlayersPE) * [fragment] When using `-N`, do not keep the fragment content in memory * [ffmpeg] Download and merge in a single step if possible * [ThumbnailsConvertor] Support conversion to `png` and make it the default by [louie-github](https://github.com/louie-github) * [VideoConvertor] Generalize with remuxer and allow conditional recoding * [EmbedThumbnail] Embed in `mp4`/`m4a` using mutagen by [tripulse](https://github.com/tripulse) and [pukkandan](https://github.com/pukkandan) * [EmbedThumbnail] Embed if any thumbnail was downloaded, not just the best * [EmbedThumbnail] Correctly escape filename * [update] replace self without launching a subprocess in windows * [update] Block further update for unsupported systems * Refactor `__process_playlist` by creating `LazyList` * Write messages to `stderr` when both `quiet` and `verbose` * Sanitize and sort playlist thumbnails * Remove `None` values from `info.json` * [extractor] Always prefer native hls downloader by default * [extractor] Skip subtitles without URI in m3u8 manifests by [hheimbuerger](https://github.com/hheimbuerger) * [extractor] Functions to parse `socket.io` response as `json` by [pukkandan](https://github.com/pukkandan) and [llacb47](https://github.com/llacb47) * [extractor] Allow `note=False` when extracting manifests * [utils] Escape URLs in `sanitized_Request`, not `sanitize_url` * [hls] Disable external downloader for `webtt` * [youtube] `/live` URLs should raise error if channel is not live * [youtube] Bug fixes * [zee5] Fix m3u8 formats' extension * [ard] Allow URLs without `-` before id by [olifre](https://github.com/olifre) * [cleanup] `YoutubeDL._match_entry` * [cleanup] Refactor updater * [cleanup] Refactor ffmpeg convertors * [cleanup] setup.py ### 2021.05.20 * **Youtube improvements**: * Support youtube music `MP`, `VL` and `browse` pages * Extract more formats for youtube music by [craftingmod](https://github.com/craftingmod), [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan) * Extract multiple subtitles in same language by [pukkandan](https://github.com/pukkandan) and [tpikonen](https://github.com/tpikonen) * Redirect channels that doesn't have a `videos` tab to their `UU` playlists * Support in-channel search * Sort audio-only formats correctly * Always extract `maxresdefault` thumbnail * Extract audio language * Add subtitle language names by [nixxo](https://github.com/nixxo) and [tpikonen](https://github.com/tpikonen) * Show alerts only from the final webpage * Add `html5=1` param to `get_video_info` page requests by [coletdjnz](https://github.com/coletdjnz) * Better message when login required * **Add option `--print`**: to print any field/template * Makes redundant: `--get-description`, `--get-duration`, `--get-filename`, `--get-format`, `--get-id`, `--get-thumbnail`, `--get-title`, `--get-url` * Field `additional_urls` to download additional videos from metadata using [`--parse-metadata`](https://github.com/yt-dlp/yt-dlp#modifying-metadata) * Merge youtube-dl: Upto [commit/dfbbe29](https://github.com/ytdl-org/youtube-dl/commit/dfbbe2902fc67f0f93ee47a8077c148055c67a9b) * Write thumbnail of playlist and add `pl_thumbnail` outtmpl key * [embedthumbnail] Add `flac` support and refactor `mutagen` code by [pukkandan](https://github.com/pukkandan) and [tripulse](https://github.com/tripulse) * [audius:artist] Add extractor by [king-millez](https://github.com/king-millez) * [parlview] Add extractor by [king-millez](https://github.com/king-millez) * [tenplay] Fix extractor by [king-millez](https://github.com/king-millez) * [rmcdecouverte] Generalize `_VALID_URL` * Add compat-option `no-attach-infojson` * Add field `name` for subtitles * Ensure `post_extract` and `pre_process` only run once * Fix `--check-formats` when there is network error * Standardize `write_debug` and `get_param` * [options] Alias `--write-comments`, `--no-write-comments` * [options] Refactor callbacks * [test:download] Only extract enough videos for `playlist_mincount` * [extractor] bugfix for when `compat_opts` is not given * [build] Fix x86 build by [shirt](https://github.com/shirt-dev) * [cleanup] code formatting, youtube tests and readme ### 2021.05.11 * **Deprecate support for python versions < 3.6** * **Subtitle extraction from manifests** by [fstirlitz](https://github.com/fstirlitz). See [be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details * **Improve output template:** * Allow slicing lists/strings using `field.start:end:step` * A field can also be used as offset like `field1+num+field2` * A default value can be given using `field|default` * Prevent invalid fields from causing errors * **Merge youtube-dl**: Upto [commit/a726009](https://github.com/ytdl-org/youtube-dl/commit/a7260099873acc6dc7d76cafad2f6b139087afd0) * **Remove options** `-l`, `-t`, `-A` completely and disable `--auto-number`, `--title`, `--literal`, `--id` * [Plugins] Prioritize plugins over standard extractors and prevent plugins from overwriting the standard extractor classes * [downloader] Fix `quiet` and `to_stderr` * [fragment] Ensure the file is closed on error * [fragment] Make sure first segment is not skipped * [aria2c] Fix whitespace being stripped off * [embedthumbnail] Fix bug where jpeg thumbnails were converted again * [FormatSort] Fix for when some formats have quality and others don't * [utils] Add `network_exceptions` * [utils] Escape URL while sanitizing * [ukcolumn] Add Extractor * [whowatch] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) * [CBS] Improve `_VALID_URL` to support movies * [crackle] Improve extraction * [curiositystream] Fix collections * [francetvinfo] Improve video id extraction * [generic] Respect the encoding in manifest * [limelight] Obey `allow_unplayable_formats` * [mediasite] Generalize URL pattern by [fstirlitz](https://github.com/fstirlitz) * [mxplayer] Add MxplayerShowIE by [Ashish0804](https://github.com/Ashish0804) * [nebula] Move to nebula.app by [Lamieur](https://github.com/Lamieur) * [niconico] Fix HLS formats by [CXwudi](https://github.com/CXwudi), [tsukumijima](https://github.com/tsukumijima), [nao20010128nao](https://github.com/nao20010128nao) and [pukkandan](https://github.com/pukkandan) * [niconico] Fix title and thumbnail extraction by [CXwudi](https://github.com/CXwudi) * [plutotv] Extract subtitles from manifests * [plutotv] Fix format extraction for some urls * [rmcdecouverte] Improve `_VALID_URL` * [sonyliv] Fix `title` and `series` extraction by [Ashish0804](https://github.com/Ashish0804) * [tubi] Raise "no video formats" error when video url is empty * [youtube:tab] Detect playlists inside community posts * [youtube] Add `oembed` to reserved names * [zee5] Fix extraction for some URLs by [Hadi0609](https://github.com/Hadi0609) * [zee5] Fix py2 compatibility * Fix `playlist_index` and add `playlist_autonumber`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details * Add experimental option `--check-formats` to test the URLs before format selection * Option `--compat-options` to revert [some of yt-dlp's changes](https://github.com/yt-dlp/yt-dlp#differences-in-default-behavior) * Deprecates `--list-formats-as-table`, `--list-formats-old` * Fix number of digits in `%(playlist_index)s` * Fix case sensitivity of format selector * Revert "[core] be able to hand over id and title using url_result" * Do not strip out whitespaces in `-o` and `-P` * Fix `preload_download_archive` writing verbose message to `stdout` * Move option warnings to `YoutubeDL`so that they obey `--no-warnings` and can output colors * Py2 compatibility for `FileNotFoundError` ### 2021.04.22 * **Improve output template:** * Objects can be traversed like `%(field.key1.key2)s` * An offset can be added to numeric fields as `%(field+N)s` * Deprecates `--autonumber-start` * **Improve `--sub-langs`:** * Treat `--sub-langs` entries as regex * `all` can be used to refer to all the subtitles * language codes can be prefixed with `-` to exclude it * Deprecates `--all-subs` * Add option `--ignore-no-formats-error` to ignore the "no video format" and similar errors * Add option `--skip-playlist-after-errors` to skip the rest of a playlist after a given number of errors are encountered * Merge youtube-dl: Upto [commit/7e8b3f9](https://github.com/ytdl-org/youtube-dl/commit/7e8b3f9439ebefb3a3a4e5da9c0bd2b595976438) * [downloader] Fix bug in downloader selection * [BilibiliChannel] Fix pagination by [nao20010128nao](https://github.com/nao20010128nao) and [pukkandan](https://github.com/pukkandan) * [rai] Add support for http formats by [nixxo](https://github.com/nixxo) * [TubiTv] Add TubiTvShowIE by [Ashish0804](https://github.com/Ashish0804) * [twitcasting] Fix extractor * [viu:ott] Fix extractor and support series by [lkho](https://github.com/lkho) and [pukkandan](https://github.com/pukkandan) * [youtube:tab] Show unavailable videos in playlists by [coletdjnz](https://github.com/coletdjnz) * [youtube:tab] Reload with unavailable videos for all playlists * [youtube] Ignore invalid stretch ratio * [youtube] Improve channel syncid extraction to support ytcfg by [coletdjnz](https://github.com/coletdjnz) * [youtube] Standardize API calls for tabs, mixes and search by [coletdjnz](https://github.com/coletdjnz) * [youtube] Bugfix in `_extract_ytcfg` * [mildom:user:vod] Download only necessary amount of pages * [mildom] Remove proxy completely by [fstirlitz](https://github.com/fstirlitz) * [go] Fix `_VALID_URL` * [MetadataFromField] Improve regex and add tests * [Exec] Ensure backward compatibility when the command contains `%` * [extractor] Fix inconsistent use of `report_warning` * Ensure `mergeall` selects best format when multistreams are disabled * Improve the yt-dlp.sh script by [fstirlitz](https://github.com/fstirlitz) * [lazy_extractor] Do not load plugins * [ci] Disable fail-fast * [docs] Clarify which deprecated options still work * [docs] Fix typos ### 2021.04.11 * Add option `--convert-thumbnails` (only jpg currently supported) * Format selector `mergeall` to download and merge all formats * Pass any field to `--exec` using similar syntax to output template * Choose downloader for each protocol using `--downloader PROTO:NAME` * Alias `--downloader` for `--external-downloader` * Added `native` as an option for the downloader * Merge youtube-dl: Upto [commit/4fb25ff](https://github.com/ytdl-org/youtube-dl/commit/4fb25ff5a3be5206bb72e5c4046715b1529fb2c7) (except vimeo) * [DiscoveryPlusIndia] Add DiscoveryPlusIndiaShowIE by [Ashish0804](https://github.com/Ashish0804) * [NFHSNetwork] Add extractor by [llacb47](https://github.com/llacb47) * [nebula] Add extractor (watchnebula.com) by [hheimbuerger](https://github.com/hheimbuerger) * [nitter] Fix extraction of reply tweets and update instance list by [B0pol](https://github.com/B0pol) * [nitter] Fix thumbnails by [B0pol](https://github.com/B0pol) * [youtube] Fix thumbnail URL * [youtube] Parse API parameters from initial webpage by [coletdjnz](https://github.com/coletdjnz) * [youtube] Extract comments' approximate timestamp by [coletdjnz](https://github.com/coletdjnz) * [youtube] Fix alert extraction * [bilibili] Fix uploader * [utils] Add `datetime_from_str` and `datetime_add_months` by [coletdjnz](https://github.com/coletdjnz) * Run some `postprocessors` before actual download * Improve argument parsing for `-P`, `-o`, `-S` * Fix some `m3u8` not obeying `--allow-unplayable-formats` * Fix default of `dynamic_mpd` * Deprecate `--all-formats`, `--include-ads`, `--hls-prefer-native`, `--hls-prefer-ffmpeg` * [docs] Improvements ### 2021.04.03 * Merge youtube-dl: Upto [commit/654b4f4](https://github.com/ytdl-org/youtube-dl/commit/654b4f4ff2718f38b3182c1188c5d569c14cc70a) * Ability to set a specific field in the file's metadata using `--parse-metadata` * Ability to select n'th best format like `-f bv*.2` * [DiscoveryPlus] Add discoveryplus.in * [la7] Add podcasts and podcast playlists by [nixxo](https://github.com/nixxo) * [mildom] Update extractor with current proxy by [nao20010128nao](https://github.com/nao20010128nao) * [ard:mediathek] Fix video id extraction * [generic] Detect Invidious' link element * [youtube] Show premium state in `availability` by [coletdjnz](https://github.com/coletdjnz) * [viewsource] Add extractor to handle `view-source:` * [sponskrub] Run before embedding thumbnail * [docs] Improve `--parse-metadata` documentation ### 2021.03.24.1 * Revert [commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf) ### 2021.03.24 * Merge youtube-dl: Upto 2021.03.25 ([commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf)) * Parse metadata from multiple fields using `--parse-metadata` * Ability to load playlist infojson using `--load-info-json` * Write current epoch to infojson when using `--no-clean-infojson` * [youtube_live_chat] fix bug when trying to set cookies * [niconico] Fix for when logged in by [CXwudi](https://github.com/CXwudi) and [xtkoba](https://github.com/xtkoba) * [linuxacadamy] Fix login ### 2021.03.21 * Merge youtube-dl: Upto [commit/7e79ba7](https://github.com/ytdl-org/youtube-dl/commit/7e79ba7dd6e6649dd2ce3a74004b2044f2182881) * Option `--no-clean-infojson` to keep private keys in the infojson * [aria2c] Support retry/abort unavailable fragments by [damianoamatruda](https://github.com/damianoamatruda) * [aria2c] Better default arguments * [movefiles] Fix bugs and make more robust * [formatSort] Fix `quality` being ignored * [splitchapters] Fix for older ffmpeg * [sponskrub] Pass proxy to sponskrub * Make sure `post_hook` gets the final filename * Recursively remove any private keys from infojson * Embed video URL metadata inside `mp4` by [damianoamatruda](https://github.com/damianoamatruda) and [pukkandan](https://github.com/pukkandan) * Merge `webm` formats into `mkv` if thumbnails are to be embedded by [damianoamatruda](https://github.com/damianoamatruda) * Use headers and cookies when downloading subtitles by [damianoamatruda](https://github.com/damianoamatruda) * Parse resolution in info dictionary by [damianoamatruda](https://github.com/damianoamatruda) * More consistent warning messages by [damianoamatruda](https://github.com/damianoamatruda) and [pukkandan](https://github.com/pukkandan) * [docs] Add deprecated options and aliases in readme * [docs] Fix some minor mistakes * [niconico] Partial fix adapted from [animelover1984/youtube-dl@b5eff52](https://github.com/animelover1984/youtube-dl/commit/b5eff52dd9ed5565672ea1694b38c9296db3fade) (login and smile formats still don't work) * [niconico] Add user extractor by [animelover1984](https://github.com/animelover1984) * [bilibili] Add anthology support by [animelover1984](https://github.com/animelover1984) * [amcnetworks] Fix extractor by [2ShedsJackson](https://github.com/2ShedsJackson) * [stitcher] Merge from youtube-dl by [nixxo](https://github.com/nixxo) * [rcs] Improved extraction by [nixxo](https://github.com/nixxo) * [linuxacadamy] Improve regex * [youtube] Show if video is `private`, `unlisted` etc in info (`availability`) by [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan) * [youtube] bugfix for channel playlist extraction * [nbc] Improve metadata extraction by [2ShedsJackson](https://github.com/2ShedsJackson) ### 2021.03.15 * **Split video by chapters**: using option `--split-chapters` * The output file of the split files can be set with `-o`/`-P` using the prefix `chapter:` * Additional keys `section_title`, `section_number`, `section_start`, `section_end` are available in the output template * **Parallel fragment downloads** by [shirt](https://github.com/shirt-dev) * Use option `--concurrent-fragments` (`-N`) to set the number of threads (default 1) * Merge youtube-dl: Upto [commit/3be0980](https://github.com/ytdl-org/youtube-dl/commit/3be098010f667b14075e3dfad1e74e5e2becc8ea) * [zee5] Add Show Extractor by [Ashish0804](https://github.com/Ashish0804) and [pukkandan](https://github.com/pukkandan) * [rai] fix drm check [nixxo](https://github.com/nixxo) * [wimtv] Add extractor by [nixxo](https://github.com/nixxo) * [mtv] Add mtv.it and extract series metadata by [nixxo](https://github.com/nixxo) * [pluto.tv] Add extractor by [kevinoconnor7](https://github.com/kevinoconnor7) * [youtube] Rewrite comment extraction by [coletdjnz](https://github.com/coletdjnz) * [embedthumbnail] Set mtime correctly * Refactor some postprocessor/downloader code by [pukkandan](https://github.com/pukkandan) and [shirt](https://github.com/shirt-dev) ### 2021.03.07 * [youtube] Fix history, mixes, community pages and trending by [pukkandan](https://github.com/pukkandan) and [coletdjnz](https://github.com/coletdjnz) * [youtube] Fix private feeds/playlists on multi-channel accounts by [coletdjnz](https://github.com/coletdjnz) * [youtube] Extract alerts from continuation by [coletdjnz](https://github.com/coletdjnz) * [cbs] Add support for ParamountPlus by [shirt](https://github.com/shirt-dev) * [mxplayer] Rewrite extractor with show support by [pukkandan](https://github.com/pukkandan) and [Ashish0804](https://github.com/Ashish0804) * [gedi] Improvements from youtube-dl by [nixxo](https://github.com/nixxo) * [vimeo] Fix videos with password by [teesid](https://github.com/teesid) * [lbry] Support `lbry://` url by [nixxo](https://github.com/nixxo) * [bilibili] Change `Accept` header by [pukkandan](https://github.com/pukkandan) and [animelover1984](https://github.com/animelover1984) * [trovo] Pass origin header * [rai] Check for DRM by [nixxo](https://github.com/nixxo) * [downloader] Fix bug for `ffmpeg`/`httpie` * [update] Fix updater removing the executable bit on some UNIX distros * [update] Fix current build hash for UNIX * [docs] Include wget/curl/aria2c install instructions for Unix by [Ashish0804](https://github.com/Ashish0804) * Fix some videos downloading with `m3u8` extension * Remove "fixup is ignored" warning when fixup wasn't passed by user ### 2021.03.03.2 * [build] Fix bug ### 2021.03.03 * [youtube] Use new browse API for continuation page extraction by [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan) * Fix HLS playlist downloading by [shirt](https://github.com/shirt-dev) * Merge youtube-dl: Upto [2021.03.03](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.03.03) * [mtv] Fix extractor * [nick] Fix extractor by [DennyDai](https://github.com/DennyDai) * [mxplayer] Add new extractor by [codeasashu](https://github.com/codeasashu) * [youtube] Throw error when `--extractor-retries` are exhausted * Reduce default of `--extractor-retries` to 3 * Fix packaging bugs by [hseg](https://github.com/hseg) ### 2021.03.01 * Allow specifying path in `--external-downloader` * Add option `--sleep-requests` to sleep b/w requests * Add option `--extractor-retries` to retry on known extractor errors * Extract comments only when needed * `--get-comments` doesn't imply `--write-info-json` if `-J`, `-j` or `--print-json` are used * Fix `get_executable_path` by [shirt](https://github.com/shirt-dev) * [youtube] Retry on more known errors than just HTTP-5xx * [youtube] Fix inconsistent `webpage_url` * [tennistv] Fix format sorting * [bilibiliaudio] Recognize the file as audio-only * [hrfensehen] Fix wrong import * [viki] Fix viki play pass authentication by [RobinD42](https://github.com/RobinD42) * [readthedocs] Improvements by [shirt](https://github.com/shirt-dev) * [hls] Fix bug with m3u8 format extraction * [hls] Enable `--hls-use-mpegts` by default when downloading live-streams * [embedthumbnail] Fix bug with deleting original thumbnail * [build] Fix completion paths, zsh pip completion install by [hseg](https://github.com/hseg) * [ci] Disable download tests unless specifically invoked * Cleanup some code and fix typos ### 2021.02.24 * Moved project to an organization [yt-dlp](https://github.com/yt-dlp) * **Completely changed project name to yt-dlp** by [Pccode66](https://github.com/Pccode66) and [pukkandan](https://github.com/pukkandan) * Also, `youtube-dlc` config files are no longer loaded * Merge youtube-dl: Upto [commit/4460329](https://github.com/ytdl-org/youtube-dl/commit/44603290e5002153f3ebad6230cc73aef42cc2cd) (except tmz, gedi) * [Readthedocs](https://yt-dlp.readthedocs.io) support by [shirt](https://github.com/shirt-dev) * [youtube] Show if video was a live stream in info (`was_live`) * [Zee5] Add new extractor by [Ashish0804](https://github.com/Ashish0804) and [pukkandan](https://github.com/pukkandan) * [jwplatform] Add support for `hyland.com` * [tennistv] Fix extractor * [hls] Support media initialization by [shirt](https://github.com/shirt-dev) * [hls] Added options `--hls-split-discontinuity` to better support media discontinuity by [shirt](https://github.com/shirt-dev) * [ffmpeg] Allow passing custom arguments before -i using `--ppa "ffmpeg_i1:ARGS"` syntax * Fix `--windows-filenames` removing `/` from UNIX paths * [hls] Show warning if pycryptodome is not found * [docs] Improvements * Fix documentation of `Extractor Options` * Document `all` in format selection * Document `playable_in_embed` in output templates ### 2021.02.19 * Merge youtube-dl: Upto [commit/cf2dbec](https://github.com/ytdl-org/youtube-dl/commit/cf2dbec6301177a1fddf72862de05fa912d9869d) (except kakao) * [viki] Fix extractor * [niconico] Extract `channel` and `channel_id` by [kurumigi](https://github.com/kurumigi) * [youtube] Multiple page support for hashtag URLs * [youtube] Add more invidious instances * [youtube] Fix comment extraction when comment text is empty * Option `--windows-filenames` to force use of windows compatible filenames * [ExtractAudio] Bugfix * Don't raise `parser.error` when exiting for update * [MoveFiles] Fix for when merger can't run * Changed `--trim-file-name` to `--trim-filenames` to be similar to related options * Format Sort improvements: * Prefer `vp9.2` more than other `vp9` codecs * Remove forced priority of `quality` * Remove unnecessary `field_preference` and misuse of `preference` from extractors * Build improvements: * Fix hash output by [shirt](https://github.com/shirt-dev) * Lock python package versions for x86 and use `wheels` by [shirt](https://github.com/shirt-dev) * Exclude `vcruntime140.dll` from UPX by [jbruchon](https://github.com/jbruchon) * Set version number based on UTC time, not local time * Publish on PyPi only if token is set * [docs] Better document `--prefer-free-formats` and add `--no-prefer-free-format` ### 2021.02.15 * Merge youtube-dl: Upto [2021.02.10](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.02.10) (except archive.org) * [niconico] Improved extraction and support encrypted/SMILE movies by [kurumigi](https://github.com/kurumigi), [tsukumijima](https://github.com/tsukumijima), [bbepis](https://github.com/bbepis), [pukkandan](https://github.com/pukkandan) * Fix HLS AES-128 with multiple keys in external downloaders by [shirt](https://github.com/shirt-dev) * [youtube_live_chat] Fix by using POST API by [siikamiika](https://github.com/siikamiika) * [rumble] Add support for video page * Option `--allow-unplayable-formats` to allow downloading unplayable video formats * [ExtractAudio] Don't re-encode when file is already in a common audio format * [youtube] Fix search continuations * [youtube] Fix for new accounts * Improve build/updater: by [pukkandan](https://github.com/pukkandan) and [shirt](https://github.com/shirt-dev) * Fix SHA256 calculation in build and implement hash checking for updater * Exit immediately in windows once the update process starts * Fix updater for `x86.exe` * Updater looks for both `yt-dlp` and `youtube-dlc` in releases for future-proofing * Change optional dependency to `pycryptodome` * Fix issue with unicode filenames in aria2c by [shirt](https://github.com/shirt-dev) * Fix `allow_playlist_files` not being correctly passed through * Fix for empty HTTP head requests by [shirt](https://github.com/shirt-dev) * Fix `get_executable_path` in UNIX * [sponskrub] Print ffmpeg output and errors to terminal * `__real_download` should be false when ffmpeg unavailable and no download * Show `exe`/`zip`/`source` and 32/64bit in verbose message ### 2021.02.09 * **aria2c support for DASH/HLS**: by [shirt](https://github.com/shirt-dev) * **Implement Updater** (`-U`) by [shirt](https://github.com/shirt-dev) * [youtube] Fix comment extraction * [youtube_live_chat] Improve extraction * [youtube] Fix for channel URLs sometimes not downloading all pages * [aria2c] Changed default arguments to `--console-log-level=warn --summary-interval=0 --file-allocation=none -x16 -j16 -s16` * Add fallback for thumbnails * [embedthumbnail] Keep original thumbnail after conversion if write_thumbnail given * [embedsubtitle] Keep original subtitle after conversion if write_subtitles given * [pyinst.py] Move back to root dir * [youtube] Simplified renderer parsing and bugfixes * [movefiles] Fix compatibility with python2 * [remuxvideo] Fix validation of conditional remux * [sponskrub] Don't raise error when the video does not exist * [docs] Crypto is an optional dependency ### 2021.02.04 * Merge youtube-dl: Upto [2021.02.04.1](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.02.04.1) * **Date/time formatting in output template:** * You can use [`strftime`](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) to format date/time fields. Example: `%(upload_date>%Y-%m-%d)s` * **Multiple output templates:** * Separate output templates can be given for the different metadata files by using `-o TYPE:TEMPLATE` * The allowed types are: `subtitle|thumbnail|description|annotation|infojson|pl_description|pl_infojson` * [youtube] More metadata extraction for channel/playlist URLs (channel, uploader, thumbnail, tags) * New option `--no-write-playlist-metafiles` to prevent writing playlist metadata files * [audius] Fix extractor * [youtube_live_chat] Fix `parse_yt_initial_data` and add `fragment_retries` * [postprocessor] Raise errors correctly * [metadatafromtitle] Fix bug when extracting data from numeric fields * Fix issue with overwriting files * Fix "Default format spec" appearing in quiet mode * [FormatSort] Allow user to prefer av01 over vp9 (The default is still vp9) * [FormatSort] fix bug where `quality` had more priority than `hasvid` * [pyinst] Automatically detect python architecture and working directory * Strip out internal fields such as `_filename` from infojson ### 2021.01.29 * **Features from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl)**: by [animelover1984](https://github.com/animelover1984) and [bbepis](https://github.com/bbepis) * Add `--get-comments` * [youtube] Extract comments * [billibilli] Added BiliBiliSearchIE, BilibiliChannelIE * [billibilli] Extract comments * [billibilli] Better video extraction * Write playlist data to infojson * [FFmpegMetadata] Embed infojson inside the video * [EmbedThumbnail] Try embedding in mp4 using ffprobe and `-disposition` * [EmbedThumbnail] Treat mka like mkv and mov like mp4 * [EmbedThumbnail] Embed in ogg/opus * [VideoRemuxer] Conditionally remux video * [VideoRemuxer] Add `-movflags +faststart` when remuxing to mp4 * [ffmpeg] Print entire stderr in verbose when there is error * [EmbedSubtitle] Warn when embedding ass in mp4 * [anvato] Use NFLTokenGenerator if possible * **Parse additional metadata**: New option `--parse-metadata` to extract additional metadata from existing fields * The extracted fields can be used in `--output` * Deprecated `--metadata-from-title` * [Audius] Add extractor * [youtube] Extract playlist description and write it to `.description` file * Detect existing files even when using `recode`/`remux` (`extract-audio` is partially fixed) * Fix wrong user config from v2021.01.24 * [youtube] Report error message from youtube as error instead of warning * [FormatSort] Fix some fields not sorting from v2021.01.24 * [postprocessor] Deprecate `avconv`/`avprobe`. All current functionality is left untouched. But don't expect any new features to work with avconv * [postprocessor] fix `write_debug` to not throw error when there is no `_downloader` * [movefiles] Don't give "cant find" warning when move is unnecessary * Refactor `update-version`, `pyinst.py` and related files * [ffmpeg] Document more formats that are supported for remux/recode ### 2021.01.24 * Merge youtube-dl: Upto [2021.01.24](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.16) * Plugin support ([documentation](https://github.com/yt-dlp/yt-dlp#plugins)) * **Multiple paths**: New option `-P`/`--paths` to give different paths for different types of files * The syntax is `-P "type:path" -P "type:path"` * Valid types are: home, temp, description, annotation, subtitle, infojson, thumbnail * Additionally, configuration file is taken from home directory or current directory * Allow passing different arguments to different external downloaders * [mildom] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) * Warn when using old style `--external-downloader-args` and `--post-processor-args` * Fix `--no-overwrite` when using `--write-link` * [sponskrub] Output `unrecognized argument` error message correctly * [cbs] Make failure to extract title non-fatal * Fix typecasting when pre-checking archive * Fix issue with setting title on UNIX * Deprecate redundant aliases in `formatSort`. The aliases remain functional for backward compatibility, but will be left undocumented * [tests] Fix test_post_hooks * [tests] Split core and download tests ### 2021.01.20 * [TrovoLive] Add extractor (only VODs) * [pokemon] Add `/#/player` URLs * Improved parsing of multiple postprocessor-args, add `--ppa` as alias * [EmbedThumbnail] Simplify embedding in mkv * [sponskrub] Encode filenames correctly, better debug output and error message * [readme] Cleanup options ### 2021.01.16 * Merge youtube-dl: Upto [2021.01.16](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.16) * **Configuration files:** * Portable configuration file: `./yt-dlp.conf` * Allow the configuration files to be named `yt-dlp` instead of `youtube-dlc`. See [this](https://github.com/yt-dlp/yt-dlp#configuration) for details * Add PyPI release ### 2021.01.14 * Added option `--break-on-reject` * [roosterteeth.com] Fix for bonus episodes by [Zocker1999NET](https://github.com/Zocker1999NET) * [tiktok] Fix for when share_info is empty * [EmbedThumbnail] Fix bug due to incorrect function name * [docs] Changed sponskrub links to point to [yt-dlp/SponSkrub](https://github.com/yt-dlp/SponSkrub) since I am now providing both linux and windows releases * [docs] Change all links to correctly point to new fork URL * [docs] Fixes typos ### 2021.01.12 * [roosterteeth.com] Add subtitle support by [samiksome](https://github.com/samiksome) * Added `--force-overwrites`, `--no-force-overwrites` by [alxnull](https://github.com/alxnull) * Changed fork name to `yt-dlp` * Fix typos by [FelixFrog](https://github.com/FelixFrog) * [ci] Option to skip * [changelog] Added unreleased changes in blackjack4494/yt-dlc ### 2021.01.10 * [archive.org] Fix extractor and add support for audio and playlists by [wporr](https://github.com/wporr) * [Animelab] Added by [mariuszskon](https://github.com/mariuszskon) * [youtube:search] Fix view_count by [ohnonot](https://github.com/ohnonot) * [youtube] Show if video is embeddable in info (`playable_in_embed`) * Update version badge automatically in README * Enable `test_youtube_search_matching` * Create `to_screen` and similar functions in postprocessor/common ### 2021.01.09 * [youtube] Fix bug in automatic caption extraction * Add `post_hooks` to YoutubeDL by [alexmerkel](https://github.com/alexmerkel) * Batch file enumeration improvements by [glenn-slayden](https://github.com/glenn-slayden) * Stop immediately when reaching `--max-downloads` by [glenn-slayden](https://github.com/glenn-slayden) * Fix incorrect ANSI sequence for restoring console-window title by [glenn-slayden](https://github.com/glenn-slayden) * Kill child processes when yt-dlc is killed by [Unrud](https://github.com/Unrud) ### 2021.01.08 * Merge youtube-dl: Upto [2021.01.08](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.08) except stitcher ([1](https://github.com/ytdl-org/youtube-dl/commit/bb38a1215718cdf36d73ff0a7830a64cd9fa37cc), [2](https://github.com/ytdl-org/youtube-dl/commit/a563c97c5cddf55f8989ed7ea8314ef78e30107f)) * Moved changelog to separate file ### 2021.01.07-1 * [Akamai] fix by [nixxo](https://github.com/nixxo) * [Tiktok] merge youtube-dl tiktok extractor by [GreyAlien502](https://github.com/GreyAlien502) * [vlive] add support for playlists by [kyuyeunk](https://github.com/kyuyeunk) * [youtube_live_chat] make sure playerOffsetMs is positive by [siikamiika](https://github.com/siikamiika) * Ignore extra data streams in ffmpeg by [jbruchon](https://github.com/jbruchon) * Allow passing different arguments to different postprocessors using `--postprocessor-args` * Deprecated `--sponskrub-args`. The same can now be done using `--postprocessor-args "sponskrub:<args>"` * [CI] Split tests into core-test and full-test ### 2021.01.07 * Removed priority of `av01` codec in `-S` since most devices don't support it yet * Added `duration_string` to be used in `--output` * Created First Release ### 2021.01.05-1 * **Changed defaults:** * Enabled `--ignore` * Disabled `--video-multistreams` and `--audio-multistreams` * Changed default format selection to `bv*+ba/b` when `--audio-multistreams` is disabled * Changed default format sort order to `res,fps,codec,size,br,asr,proto,ext,has_audio,source,format_id` * Changed `webm` to be more preferable than `flv` in format sorting * Changed default output template to `%(title)s [%(id)s].%(ext)s` * Enabled `--list-formats-as-table` ### 2021.01.05 * **Format Sort:** Added `--format-sort` (`-S`), `--format-sort-force` (`--S-force`) - See [Sorting Formats](README.md#sorting-formats) for details * **Format Selection:** See [Format Selection](README.md#format-selection) for details * New format selectors: `best*`, `worst*`, `bestvideo*`, `bestaudio*`, `worstvideo*`, `worstaudio*` * Changed video format sorting to show video only files and video+audio files together * Added `--video-multistreams`, `--no-video-multistreams`, `--audio-multistreams`, `--no-audio-multistreams` * Added `b`,`w`,`v`,`a` as alias for `best`, `worst`, `video` and `audio` respectively * Shortcut Options: Added `--write-link`, `--write-url-link`, `--write-webloc-link`, `--write-desktop-link` by [h-h-h-h](https://github.com/h-h-h-h) - See [Internet Shortcut Options](README.md#internet-shortcut-options) for details * **Sponskrub integration:** Added `--sponskrub`, `--sponskrub-cut`, `--sponskrub-force`, `--sponskrub-location`, `--sponskrub-args` - See [SponSkrub Options](README.md#sponskrub-sponsorblock-options) for details * Added `--force-download-archive` (`--force-write-archive`) by [h-h-h-h](https://github.com/h-h-h-h) * Added `--list-formats-as-table`, `--list-formats-old` * **Negative Options:** Makes it possible to negate most boolean options by adding a `no-` to the switch. Usefull when you want to reverse an option that is defined in a config file * Added `--no-ignore-dynamic-mpd`, `--no-allow-dynamic-mpd`, `--allow-dynamic-mpd`, `--youtube-include-hls-manifest`, `--no-youtube-include-hls-manifest`, `--no-youtube-skip-hls-manifest`, `--no-download`, `--no-download-archive`, `--resize-buffer`, `--part`, `--mtime`, `--no-keep-fragments`, `--no-cookies`, `--no-write-annotations`, `--no-write-info-json`, `--no-write-description`, `--no-write-thumbnail`, `--youtube-include-dash-manifest`, `--post-overwrites`, `--no-keep-video`, `--no-embed-subs`, `--no-embed-thumbnail`, `--no-add-metadata`, `--no-include-ads`, `--no-write-sub`, `--no-write-auto-sub`, `--no-playlist-reverse`, `--no-restrict-filenames`, `--youtube-include-dash-manifest`, `--no-format-sort-force`, `--flat-videos`, `--no-list-formats-as-table`, `--no-sponskrub`, `--no-sponskrub-cut`, `--no-sponskrub-force` * Renamed: `--write-subs`, `--no-write-subs`, `--no-write-auto-subs`, `--write-auto-subs`. Note that these can still be used without the ending "s" * Relaxed validation for format filters so that any arbitrary field can be used * Fix for embedding thumbnail in mp3 by [pauldubois98](https://github.com/pauldubois98) ([ytdl-org/youtube-dl#21569](https://github.com/ytdl-org/youtube-dl/pull/21569)) * Make Twitch Video ID output from Playlist and VOD extractor same. This is only a temporary fix * Merge youtube-dl: Upto [2021.01.03](https://github.com/ytdl-org/youtube-dl/commit/8e953dcbb10a1a42f4e12e4e132657cb0100a1f8) - See [blackjack4494/yt-dlc#280](https://github.com/blackjack4494/yt-dlc/pull/280) for details * Extractors [tiktok](https://github.com/ytdl-org/youtube-dl/commit/fb626c05867deab04425bad0c0b16b55473841a2) and [hotstar](https://github.com/ytdl-org/youtube-dl/commit/bb38a1215718cdf36d73ff0a7830a64cd9fa37cc) have not been merged * Cleaned up the fork for public use **Note**: All uncredited changes above this point are authored by [pukkandan](https://github.com/pukkandan) ### Unreleased changes in [blackjack4494/yt-dlc](https://github.com/blackjack4494/yt-dlc) * Updated to youtube-dl release 2020.11.26 by [pukkandan](https://github.com/pukkandan) * Youtube improvements by [pukkandan](https://github.com/pukkandan) * Implemented all Youtube Feeds (ytfav, ytwatchlater, ytsubs, ythistory, ytrec) and SearchURL * Fix some improper Youtube URLs * Redirect channel home to /video * Print youtube's warning message * Handle Multiple pages for feeds better * [youtube] Fix ytsearch not returning results sometimes due to promoted content by [coletdjnz](https://github.com/coletdjnz) * [youtube] Temporary fix for automatic captions - disable json3 by [blackjack4494](https://github.com/blackjack4494) * Add --break-on-existing by [gergesh](https://github.com/gergesh) * Pre-check video IDs in the archive before downloading by [pukkandan](https://github.com/pukkandan) * [bitwave.tv] New extractor by [lorpus](https://github.com/lorpus) * [Gedi] Add extractor by [nixxo](https://github.com/nixxo) * [Rcs] Add new extractor by [nixxo](https://github.com/nixxo) * [skyit] New skyitalia extractor by [nixxo](https://github.com/nixxo) * [france.tv] Fix thumbnail URL by [renalid](https://github.com/renalid) * [ina] support mobile links by [B0pol](https://github.com/B0pol) * [instagram] Fix thumbnail extractor by [nao20010128nao](https://github.com/nao20010128nao) * [SouthparkDe] Support for English URLs by [xypwn](https://github.com/xypwn) * [spreaker] fix SpreakerShowIE test URL by [pukkandan](https://github.com/pukkandan) * [Vlive] Fix playlist handling when downloading a channel by [kyuyeunk](https://github.com/kyuyeunk) * [tmz] Fix extractor by [diegorodriguezv](https://github.com/diegorodriguezv) * [ITV] BTCC URL update by [WolfganP](https://github.com/WolfganP) * [generic] Detect embedded bitchute videos by [pukkandan](https://github.com/pukkandan) * [generic] Extract embedded youtube and twitter videos by [diegorodriguezv](https://github.com/diegorodriguezv) * [ffmpeg] Ensure all streams are copied by [pukkandan](https://github.com/pukkandan) * [embedthumbnail] Fix for os.rename error by [pukkandan](https://github.com/pukkandan) * make_win.bat: don't use UPX to pack vcruntime140.dll by [jbruchon](https://github.com/jbruchon) ### Changelog of [blackjack4494/yt-dlc](https://github.com/blackjack4494/yt-dlc) till release 2020.11.11-3 **Note**: This was constructed from the merge commit messages and may not be entirely accurate * [bandcamp] fix failing test. remove subclass hack by [insaneracist](https://github.com/insaneracist) * [bandcamp] restore album downloads by [insaneracist](https://github.com/insaneracist) * [francetv] fix extractor by [Surkal](https://github.com/Surkal) * [gdcvault] fix extractor by [blackjack4494](https://github.com/blackjack4494) * [hotstar] Move to API v1 by [theincognito-inc](https://github.com/theincognito-inc) * [hrfernsehen] add extractor by [blocktrron](https://github.com/blocktrron) * [kakao] new apis by [blackjack4494](https://github.com/blackjack4494) * [la7] fix missing protocol by [nixxo](https://github.com/nixxo) * [mailru] removed escaped braces, use urljoin, added tests by [nixxo](https://github.com/nixxo) * [MTV/Nick] universal mgid extractor + fix nick.de feed by [blackjack4494](https://github.com/blackjack4494) * [mtv] Fix a missing match_id by [nixxo](https://github.com/nixxo) * [Mtv] updated extractor logic & more by [blackjack4494](https://github.com/blackjack4494) * [ndr] support Daserste ndr by [blackjack4494](https://github.com/blackjack4494) * [Netzkino] Only use video id to find metadata by [TobiX](https://github.com/TobiX) * [newgrounds] fix: video download by [insaneracist](https://github.com/insaneracist) * [nitter] Add new extractor by [B0pol](https://github.com/B0pol) * [soundcloud] Resolve audio/x-wav by [tfvlrue](https://github.com/tfvlrue) * [soundcloud] sets pattern and tests by [blackjack4494](https://github.com/blackjack4494) * [SouthparkDE/MTV] another mgid extraction (mtv_base) feed url updated by [blackjack4494](https://github.com/blackjack4494) * [StoryFire] Add new extractor by [sgstair](https://github.com/sgstair) * [twitch] by [geauxlo](https://github.com/geauxlo) * [videa] Adapt to updates by [adrianheine](https://github.com/adrianheine) * [Viki] subtitles, formats by [blackjack4494](https://github.com/blackjack4494) * [vlive] fix extractor for revamped website by [exwm](https://github.com/exwm) * [xtube] fix extractor by [insaneracist](https://github.com/insaneracist) * [youtube] Convert subs when download is skipped by [blackjack4494](https://github.com/blackjack4494) * [youtube] Fix age gate detection by [random-nick](https://github.com/random-nick) * [youtube] fix yt-only playback when age restricted/gated - requires cookies by [blackjack4494](https://github.com/blackjack4494) * [youtube] fix: extract artist metadata from ytInitialData by [insaneracist](https://github.com/insaneracist) * [youtube] fix: extract mix playlist ids from ytInitialData by [insaneracist](https://github.com/insaneracist) * [youtube] fix: mix playlist title by [insaneracist](https://github.com/insaneracist) * [youtube] fix: Youtube Music playlists by [insaneracist](https://github.com/insaneracist) * [Youtube] Fixed problem with new youtube player by [peet1993](https://github.com/peet1993) * [zoom] Fix url parsing for url's containing /share/ and dots by [Romern](https://github.com/Romern) * [zoom] new extractor by [insaneracist](https://github.com/insaneracist) * abc by [adrianheine](https://github.com/adrianheine) * Added Comcast_SSO fix by [merval](https://github.com/merval) * Added DRM logic to brightcove by [merval](https://github.com/merval) * Added regex for ABC.com site. by [kucksdorfs](https://github.com/kucksdorfs) * alura by [hugohaa](https://github.com/hugohaa) * Arbitrary merges by [fstirlitz](https://github.com/fstirlitz) * ard.py_add_playlist_support by [martin54](https://github.com/martin54) * Bugfix/youtube/chapters fix extractor by [gschizas](https://github.com/gschizas) * bugfix_youtube_like_extraction by [RedpointsBots](https://github.com/RedpointsBots) * Create build workflow by [blackjack4494](https://github.com/blackjack4494) * deezer by [LucBerge](https://github.com/LucBerge) * Detect embedded bitchute videos by [pukkandan](https://github.com/pukkandan) * Don't install tests by [l29ah](https://github.com/l29ah) * Don't try to embed/convert json subtitles generated by [youtube](https://github.com/youtube) livechat by [pukkandan](https://github.com/pukkandan) * Doodstream by [sxvghd](https://github.com/sxvghd) * duboku by [lkho](https://github.com/lkho) * elonet by [tpikonen](https://github.com/tpikonen) * ext/remuxe-video by [Zocker1999NET](https://github.com/Zocker1999NET) * fall-back to the old way to fetch subtitles, if needed by [RobinD42](https://github.com/RobinD42) * feature_subscriber_count by [RedpointsBots](https://github.com/RedpointsBots) * Fix external downloader when there is no http_header by [pukkandan](https://github.com/pukkandan) * Fix issue triggered by [tubeup](https://github.com/tubeup) by [nsapa](https://github.com/nsapa) * Fix YoutubePlaylistsIE by [ZenulAbidin](https://github.com/ZenulAbidin) * fix-mitele' by [DjMoren](https://github.com/DjMoren) * fix/google-drive-cookie-issue by [legraphista](https://github.com/legraphista) * fix_tiktok by [mervel-mervel](https://github.com/mervel-mervel) * Fixed problem with JS player URL by [peet1993](https://github.com/peet1993) * fixYTSearch by [xarantolus](https://github.com/xarantolus) * FliegendeWurst-3sat-zdf-merger-bugfix-feature * gilou-bandcamp_update * implement ThisVid extractor by [rigstot](https://github.com/rigstot) * JensTimmerman-patch-1 by [JensTimmerman](https://github.com/JensTimmerman) * Keep download archive in memory for better performance by [jbruchon](https://github.com/jbruchon) * la7-fix by [iamleot](https://github.com/iamleot) * magenta by [adrianheine](https://github.com/adrianheine) * Merge 26564 from [adrianheine](https://github.com/adrianheine) * Merge code from [ddland](https://github.com/ddland) * Merge code from [nixxo](https://github.com/nixxo) * Merge code from [ssaqua](https://github.com/ssaqua) * Merge code from [zubearc](https://github.com/zubearc) * mkvthumbnail by [MrDoritos](https://github.com/MrDoritos) * myvideo_ge by [fonkap](https://github.com/fonkap) * naver by [SeonjaeHyeon](https://github.com/SeonjaeHyeon) * ondemandkorea by [julien-hadleyjack](https://github.com/julien-hadleyjack) * rai-update by [iamleot](https://github.com/iamleot) * RFC: youtube: Polymer UI and JSON endpoints for playlists by [wlritchi](https://github.com/wlritchi) * rutv by [adrianheine](https://github.com/adrianheine) * Sc extractor web auth by [blackjack4494](https://github.com/blackjack4494) * Switch from binary search tree to Python sets by [jbruchon](https://github.com/jbruchon) * tiktok by [skyme5](https://github.com/skyme5) * tvnow by [TinyToweringTree](https://github.com/TinyToweringTree) * twitch-fix by [lel-amri](https://github.com/lel-amri) * Twitter shortener by [blackjack4494](https://github.com/blackjack4494) * Update README.md by [JensTimmerman](https://github.com/JensTimmerman) * Update to reflect website changes. by [amigatomte](https://github.com/amigatomte) * use webarchive to fix a dead link in README by [B0pol](https://github.com/B0pol) * Viki the second by [blackjack4494](https://github.com/blackjack4494) * wdr-subtitles by [mrtnmtth](https://github.com/mrtnmtth) * Webpfix by [alexmerkel](https://github.com/alexmerkel) * Youtube live chat by [siikamiika](https://github.com/siikamiika) ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/Collaborators.md������������������������������������������������������������������0000664�0000000�0000000�00000004533�14277552437�0016505�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Collaborators This is a list of the collaborators of the project and their major contributions. See the [Changelog](Changelog.md) for more details. You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [authors of youtube-dl](https://github.com/ytdl-org/youtube-dl/blob/master/AUTHORS) ## [pukkandan](https://github.com/pukkandan) [![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/pukkandan) * Owner of the fork ## [shirt](https://github.com/shirt-dev) [![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/shirt) * Multithreading (`-N`) and aria2c support for fragment downloads * Support for media initialization and discontinuity in HLS * The self-updater (`-U`) ## [coletdjnz](https://github.com/coletdjnz) [![gh-sponsor](https://img.shields.io/badge/_-Sponsor-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/coletdjnz) * YouTube improvements including: age-gate bypass, private playlists, multiple-clients (to avoid throttling) and a lot of under-the-hood improvements * Added support for new websites YoutubeWebArchive, MainStreaming, PRX, nzherald, Mediaklikk, StarTV etc * Improved/fixed support for Patreon, panopto, gfycat, itv, pbs, SouthParkDE etc ## [Ashish0804](https://github.com/Ashish0804) <sub><sup>[Inactive]</sup></sub> [![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/ashish0804) * Added support for new websites BiliIntl, DiscoveryPlusIndia, OlympicsReplay, PlanetMarathi, ShemarooMe, Utreon, Zee5 etc * Added playlist/series downloads for Hotstar, ParamountPlus, Rumble, SonyLIV, Trovo, TubiTv, Voot etc * Improved/fixed support for HiDive, HotStar, Hungama, LBRY, LinkedInLearning, Mxplayer, SonyLiv, TV2, Vimeo, VLive etc ## [Lesmiscore](https://github.com/Lesmiscore) (nao20010128nao) **Bitcoin**: bc1qfd02r007cutfdjwjmyy9w23rjvtls6ncve7r3s **Monacoin**: mona1q3tf7dzvshrhfe3md379xtvt2n22duhglv5dskr * Download live from start to end for YouTube * Added support for new websites AbemaTV, mildom, PixivSketch, skeb, radiko, voicy, mirrativ, openrec, whowatch, damtomo, 17.live, mixch etc * Improved/fixed support for fc2, YahooJapanNews, tver, iwara etc ���������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/LICENSE���������������������������������������������������������������������������0000664�0000000�0000000�00000002273�14277552437�0014361�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������This is free and unencumbered software released into the public domain. Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. For more information, please refer to <http://unlicense.org/> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/MANIFEST.in�����������������������������������������������������������������������0000664�0000000�0000000�00000000333�14277552437�0015105�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������include AUTHORS include Changelog.md include LICENSE include README.md include completions/*/* include supportedsites.md include yt-dlp.1 include requirements.txt recursive-include devscripts * recursive-include test * �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/Makefile��������������������������������������������������������������������������0000664�0000000�0000000�00000016003�14277552437�0015010�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������all: lazy-extractors yt-dlp doc pypi-files clean: clean-test clean-dist clean-all: clean clean-cache completions: completion-bash completion-fish completion-zsh doc: README.md CONTRIBUTING.md issuetemplates supportedsites ot: offlinetest tar: yt-dlp.tar.gz # Keep this list in sync with MANIFEST.in # intended use: when building a source distribution, # make pypi-files && python setup.py sdist pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \ completions yt-dlp.1 requirements.txt setup.cfg devscripts/* test/* .PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites clean-test: rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \ *.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \ *.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.jpeg *.jpg *.m4a *.mpga *.m4v *.mhtml *.mkv *.mov \ *.mp3 *.mp4 *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp clean-dist: rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \ yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS .mailmap clean-cache: find . \( \ -type d -name .pytest_cache -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \ \) -prune -exec rm -rf {} \; completion-bash: completions/bash/yt-dlp completion-fish: completions/fish/yt-dlp.fish completion-zsh: completions/zsh/_yt-dlp lazy-extractors: yt_dlp/extractor/lazy_extractors.py PREFIX ?= /usr/local DESTDIR ?= . BINDIR ?= $(PREFIX)/bin MANDIR ?= $(PREFIX)/man SHAREDIR ?= $(PREFIX)/share PYTHON ?= /usr/bin/env python3 # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) # set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2 MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi) install: lazy-extractors yt-dlp yt-dlp.1 completions mkdir -p $(DESTDIR)$(BINDIR) install -m755 yt-dlp $(DESTDIR)$(BINDIR)/yt-dlp mkdir -p $(DESTDIR)$(MANDIR)/man1 install -m644 yt-dlp.1 $(DESTDIR)$(MANDIR)/man1/yt-dlp.1 mkdir -p $(DESTDIR)$(SHAREDIR)/bash-completion/completions install -m644 completions/bash/yt-dlp $(DESTDIR)$(SHAREDIR)/bash-completion/completions/yt-dlp mkdir -p $(DESTDIR)$(SHAREDIR)/zsh/site-functions install -m644 completions/zsh/_yt-dlp $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_yt-dlp mkdir -p $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d install -m644 completions/fish/yt-dlp.fish $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d/yt-dlp.fish uninstall: rm -f $(DESTDIR)$(BINDIR)/yt-dlp rm -f $(DESTDIR)$(MANDIR)/man1/yt-dlp.1 rm -f $(DESTDIR)$(SHAREDIR)/bash-completion/completions/yt-dlp rm -f $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_yt-dlp rm -f $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d/yt-dlp.fish codetest: flake8 . test: $(PYTHON) -m pytest $(MAKE) codetest offlinetest: codetest $(PYTHON) -m pytest -k "not download" # XXX: This is hard to maintain CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat \ yt_dlp/extractor/anvato_token_generator yt-dlp: yt_dlp/*.py yt_dlp/*/*.py mkdir -p zip for d in $(CODE_FOLDERS) ; do \ mkdir -p zip/$$d ;\ cp -pPR $$d/*.py zip/$$d/ ;\ done touch -t 200001010101 zip/yt_dlp/*.py zip/yt_dlp/*/*.py zip/yt_dlp/*/*/*.py mv zip/yt_dlp/__main__.py zip/ cd zip ; zip -q ../yt-dlp yt_dlp/*.py yt_dlp/*/*.py yt_dlp/*/*/*.py __main__.py rm -rf zip echo '#!$(PYTHON)' > yt-dlp cat yt-dlp.zip >> yt-dlp rm yt-dlp.zip chmod a+x yt-dlp README.md: yt_dlp/*.py yt_dlp/*/*.py devscripts/make_readme.py COLUMNS=80 $(PYTHON) yt_dlp/__main__.py --ignore-config --help | $(PYTHON) devscripts/make_readme.py CONTRIBUTING.md: README.md devscripts/make_contributing.py $(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md issuetemplates: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml .github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml yt_dlp/version.py $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml .github/ISSUE_TEMPLATE/1_broken_site.yml $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml .github/ISSUE_TEMPLATE/2_site_support_request.yml $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml .github/ISSUE_TEMPLATE/3_site_feature_request.yml $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml .github/ISSUE_TEMPLATE/4_bug_report.yml $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml .github/ISSUE_TEMPLATE/5_feature_request.yml $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/6_question.yml .github/ISSUE_TEMPLATE/6_question.yml supportedsites: $(PYTHON) devscripts/make_supportedsites.py supportedsites.md README.txt: README.md pandoc -f $(MARKDOWN) -t plain README.md -o README.txt yt-dlp.1: README.md devscripts/prepare_manpage.py $(PYTHON) devscripts/prepare_manpage.py yt-dlp.1.temp.md pandoc -s -f $(MARKDOWN) -t man yt-dlp.1.temp.md -o yt-dlp.1 rm -f yt-dlp.1.temp.md completions/bash/yt-dlp: yt_dlp/*.py yt_dlp/*/*.py devscripts/bash-completion.in mkdir -p completions/bash $(PYTHON) devscripts/bash-completion.py completions/zsh/_yt-dlp: yt_dlp/*.py yt_dlp/*/*.py devscripts/zsh-completion.in mkdir -p completions/zsh $(PYTHON) devscripts/zsh-completion.py completions/fish/yt-dlp.fish: yt_dlp/*.py yt_dlp/*/*.py devscripts/fish-completion.in mkdir -p completions/fish $(PYTHON) devscripts/fish-completion.py _EXTRACTOR_FILES = $(shell find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py') yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) $(PYTHON) devscripts/make_lazy_extractors.py $@ yt-dlp.tar.gz: all @tar -czf $(DESTDIR)/yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \ --exclude '*.DS_Store' \ --exclude '*.kate-swp' \ --exclude '*.pyc' \ --exclude '*.pyo' \ --exclude '*~' \ --exclude '__pycache__' \ --exclude '.pytest_cache' \ --exclude '.git' \ -- \ README.md supportedsites.md Changelog.md LICENSE \ CONTRIBUTING.md Collaborators.md CONTRIBUTORS AUTHORS \ Makefile MANIFEST.in yt-dlp.1 README.txt completions \ setup.py setup.cfg yt-dlp yt_dlp requirements.txt \ devscripts test AUTHORS: .mailmap git shortlog -s -n | cut -f2 | sort > AUTHORS .mailmap: git shortlog -s -e -n | awk '!(out[$$NF]++) { $$1="";sub(/^[ \t]+/,""); print}' > .mailmap �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/README.md�������������������������������������������������������������������������0000664�0000000�0000000�00000427337�14277552437�0014647�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<!-- MANPAGE: BEGIN EXCLUDED SECTION --> <div align="center"> [![YT-DLP](https://raw.githubusercontent.com/yt-dlp/yt-dlp/master/.github/banner.svg)](#readme) [![Release version](https://img.shields.io/github/v/release/yt-dlp/yt-dlp?color=brightgreen&label=Download&style=for-the-badge)](#release-files "Release") [![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp "PyPi") [![Donate](https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)](Collaborators.md#collaborators "Donate") [![Matrix](https://img.shields.io/matrix/yt-dlp:matrix.org?color=brightgreen&labelColor=555555&label=&logo=element&style=for-the-badge)](https://matrix.to/#/#yt-dlp:matrix.org "Matrix") [![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)](https://discord.gg/H5MNcFW63r "Discord") [![Supported Sites](https://img.shields.io/badge/-Supported_Sites-brightgreen.svg?style=for-the-badge)](supportedsites.md "Supported Sites") [![License: Unlicense](https://img.shields.io/badge/-Unlicense-blue.svg?style=for-the-badge)](LICENSE "License") [![CI Status](https://img.shields.io/github/workflow/status/yt-dlp/yt-dlp/Core%20Tests/master?label=Tests&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/actions "CI Status") [![Commits](https://img.shields.io/github/commit-activity/m/yt-dlp/yt-dlp?label=commits&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") [![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") </div> <!-- MANPAGE: END EXCLUDED SECTION --> yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on the now inactive [youtube-dlc](https://github.com/blackjack4494/yt-dlc). The main focus of this project is adding new features and patches while also keeping up to date with the original project <!-- MANPAGE: MOVE "USAGE AND OPTIONS" SECTION HERE --> <!-- MANPAGE: BEGIN EXCLUDED SECTION --> * [NEW FEATURES](#new-features) * [Differences in default behavior](#differences-in-default-behavior) * [INSTALLATION](#installation) * [Update](#update) * [Release Files](#release-files) * [Dependencies](#dependencies) * [Compile](#compile) * [USAGE AND OPTIONS](#usage-and-options) * [General Options](#general-options) * [Network Options](#network-options) * [Geo-restriction](#geo-restriction) * [Video Selection](#video-selection) * [Download Options](#download-options) * [Filesystem Options](#filesystem-options) * [Thumbnail Options](#thumbnail-options) * [Internet Shortcut Options](#internet-shortcut-options) * [Verbosity and Simulation Options](#verbosity-and-simulation-options) * [Workarounds](#workarounds) * [Video Format Options](#video-format-options) * [Subtitle Options](#subtitle-options) * [Authentication Options](#authentication-options) * [Post-processing Options](#post-processing-options) * [SponsorBlock Options](#sponsorblock-options) * [Extractor Options](#extractor-options) * [CONFIGURATION](#configuration) * [Authentication with .netrc file](#authentication-with-netrc-file) * [OUTPUT TEMPLATE](#output-template) * [Output template and Windows batch files](#output-template-and-windows-batch-files) * [Output template examples](#output-template-examples) * [FORMAT SELECTION](#format-selection) * [Filtering Formats](#filtering-formats) * [Sorting Formats](#sorting-formats) * [Format Selection examples](#format-selection-examples) * [MODIFYING METADATA](#modifying-metadata) * [Modifying metadata examples](#modifying-metadata-examples) * [EXTRACTOR ARGUMENTS](#extractor-arguments) * [PLUGINS](#plugins) * [EMBEDDING YT-DLP](#embedding-yt-dlp) * [Embedding examples](#embedding-examples) * [DEPRECATED OPTIONS](#deprecated-options) * [CONTRIBUTING](CONTRIBUTING.md#contributing-to-yt-dlp) * [Opening an Issue](CONTRIBUTING.md#opening-an-issue) * [Developer Instructions](CONTRIBUTING.md#developer-instructions) * [MORE](#more) <!-- MANPAGE: END EXCLUDED SECTION --> # NEW FEATURES * Merged with **youtube-dl v2021.12.17+ [commit/b0a60ce](https://github.com/ytdl-org/youtube-dl/commit/b0a60ce2032172aeaaf27fe3866ab72768f10cb2)**<!--([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))--> and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl) * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API * **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples)) * **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that the NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details. * **YouTube improvements**: * Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, YouTube Music Albums/Channels ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723)), and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`) * Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) **\*** * Supports some (but not all) age-gated content without cookies * Download livestreams from the start using `--live-from-start` (*experimental*) * `255kbps` audio is extracted (if available) from YouTube Music when premium cookies are given * Redirect channel's home URL automatically to `/video` to preserve the old behaviour * **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE]` * **Download time range**: Videos can be downloaded partially based on either timestamps or chapters using `--download-sections` * **Split video by chapters**: Videos can be split into multiple files based on chapters using `--split-chapters` * **Multi-threaded fragment downloads**: Download multiple fragments of m3u8/mpd videos in parallel. Use `--concurrent-fragments` (`-N`) option to set the number of threads used * **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats * **New and fixed extractors**: Many new extractors have been added and a lot of existing ones have been fixed. See the [changelog](Changelog.md) or the [list of supported sites](supportedsites.md) * **New MSOs**: Philo, Spectrum, SlingTV, Cablevision, RCN etc. * **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details * **Multiple paths and output templates**: You can give different [output templates](#output-template) and download paths for different types of files. You can also set a temporary path where intermediary files are downloaded to using `--paths` (`-P`) * **Portable Configuration**: Configuration files are automatically loaded from the home and root directories. See [CONFIGURATION](#configuration) for details * **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata` * **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-on-reject` etc * **Improvements**: Regex and other operators in `--format`/`--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc * **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details * **Self-updater**: The releases can be updated using `yt-dlp -U` See [changelog](Changelog.md) or [commits](https://github.com/yt-dlp/yt-dlp/commits) for the full list of changes Features marked with a **\*** have been back-ported to youtube-dl ### Differences in default behavior Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc: * The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details * `avconv` is not supported as an alternative to `ffmpeg` * yt-dlp stores config files in slightly different locations to youtube-dl. See [CONFIGURATION](#configuration) for a list of correct locations * The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before yt-dlp was ever made public and now there are no plans to change it back to `%(title)s-%(id)s.%(ext)s`. Instead, you may use `--compat-options filename` * The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order * The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be preferred. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this * Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both * `--no-abort-on-error` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead * When writing metadata files such as thumbnails, description or infojson, the same information (if available) is also written for playlists. Use `--no-write-playlist-metafiles` or `--compat-options no-playlist-metafiles` to not write these files * `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-info-json`. Use `--no-embed-info-json` or `--compat-options no-attach-info-json` to revert this * Some metadata are embedded into different fields when using `--add-metadata` as compared to youtube-dl. Most notably, `comment` field contains the `webpage_url` and `synopsis` contains the `description`. You can [use `--parse-metadata`](#modifying-metadata) to modify this to your liking or use `--compat-options embed-metadata` to revert this * `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior * The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this * Live chats (if available) are considered as subtitles. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent any live chat/danmaku from downloading * Youtube channel URLs are automatically redirected to `/video`. Append a `/featured` to the URL to download only the videos in the home page. If the channel does not have a videos tab, we try to download the equivalent `UU` playlist instead. For all other tabs, if the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections * Unavailable videos are also listed for youtube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this * If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this * Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead * Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this * When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this * `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi` * yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior For ease of use, a few more compat options are available: * `--compat-options all`: Use all compat options (Do NOT use) * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect` # INSTALLATION You can install yt-dlp using one of the following methods: ### Using the release binary You can simply download the [correct binary file](#release-files) for your OS <!-- MANPAGE: BEGIN EXCLUDED SECTION --> [![Windows](https://img.shields.io/badge/-Windows_x64-blue.svg?style=for-the-badge&logo=windows)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe) [![Linux](https://img.shields.io/badge/-Linux/BSD-red.svg?style=for-the-badge&logo=linux)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp) [![MacOS](https://img.shields.io/badge/-MacOS-lightblue.svg?style=for-the-badge&logo=apple)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos) [![Source Tarball](https://img.shields.io/badge/-Source_tar-green.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) [![Other variants](https://img.shields.io/badge/-Other-grey.svg?style=for-the-badge)](#release-files) [![All versions](https://img.shields.io/badge/-All_Versions-lightgrey.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases) <!-- MANPAGE: END EXCLUDED SECTION --> Note: The manpages, shell completion files etc. are available in the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) <!-- TODO: Move to Wiki --> In UNIX-like OSes (MacOS, Linux, BSD), you can also install the same in one of the following ways: ``` sudo curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp sudo chmod a+rx /usr/local/bin/yt-dlp ``` ``` sudo wget https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -O /usr/local/bin/yt-dlp sudo chmod a+rx /usr/local/bin/yt-dlp ``` ``` sudo aria2c https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp --dir /usr/local/bin -o yt-dlp sudo chmod a+rx /usr/local/bin/yt-dlp ``` ### With [PIP](https://pypi.org/project/pip) You can install the [PyPI package](https://pypi.org/project/yt-dlp) with: ``` python3 -m pip install -U yt-dlp ``` You can install without any of the optional dependencies using: ``` python3 -m pip install --no-deps -U yt-dlp ``` If you want to be on the cutting edge, you can also install the master branch with: ``` python3 -m pip install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz ``` On some systems, you may need to use `py` or `python` instead of `python3` <!-- TODO: Add to Wiki, Remove Taps --> ### With [Homebrew](https://brew.sh) macOS or Linux users that are using Homebrew can also install it by: ``` brew install yt-dlp/taps/yt-dlp ``` ## UPDATE You can use `yt-dlp -U` to update if you are [using the provided release](#using-the-release-binary) If you [installed with pip](#with-pip), simply re-run the same command that was used to install the program If you [installed using Homebrew](#with-homebrew), run `brew upgrade yt-dlp/taps/yt-dlp` <!-- MANPAGE: BEGIN EXCLUDED SECTION --> ## RELEASE FILES #### Recommended File|Description :---|:--- [yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform-independent [zipimport](https://docs.python.org/3/library/zipimport.html) binary. Needs Python (recommended for **Linux/BSD**) [yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win7 SP1+) standalone x64 binary (recommended for **Windows**) [yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|Universal MacOS (10.15+) standalone executable (recommended for **MacOS**) #### Alternatives File|Description :---|:--- [yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Vista SP2+) standalone x86 (32-bit) binary [yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`<br/> ([Not recommended](#standalone-py2exe-builds-windows)) [yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary [yt-dlp_linux.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux.zip)|Unpackaged Linux executable (no auto-update) [yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update) [yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS (10.15+) executable (no auto-update) [yt-dlp_macos_legacy](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos_legacy)|MacOS (10.9+) standalone x64 executable #### Misc File|Description :---|:--- [yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)|Source tarball. Also contains manpages, completions, etc [SHA2-512SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS)|GNU-style SHA512 sums [SHA2-256SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS)|GNU-style SHA256 sums <!-- MANPAGE: END EXCLUDED SECTION --> ## DEPENDENCIES Python versions 3.7+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. <!-- Python 3.5+ uses VC++14 and it is already embedded in the binary created <!x-- https://www.microsoft.com/en-us/download/details.aspx?id=26999 --x> On windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https://download.microsoft.com/download/1/6/5/165255E7-1014-4D0A-B094-B6A430A6BFFC/vcredist_x86.exe) is also necessary to run yt-dlp. You probably already have this, but if the executable throws an error due to missing `MSVCR100.dll` you need to install it manually. --> While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly recommended ### Strongly recommended * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html) <!-- TODO: ffmpeg has merged this patch. Remove this note once there is new release --> **Note**: There are some regressions in newer ffmpeg versions that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds ### Networking * [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE) * [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT <sup>[1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) </sup> * [**websockets**](https://github.com/aaugustin/websockets)\* - For downloading over websocket. Licensed under [BSD-3-Clause](https://github.com/aaugustin/websockets/blob/main/LICENSE) ### Metadata * [**mutagen**](https://github.com/quodlibet/mutagen)\* - For `--embed-thumbnail` in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) * [**AtomicParsley**](https://github.com/wez/atomicparsley) - For `--embed-thumbnail` in `mp4`/`m4a` files when `mutagen`/`ffmpeg` cannot. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) * [**xattr**](https://github.com/xattr/xattr), [**pyxattr**](https://github.com/iustin/pyxattr) or [**setfattr**](http://savannah.nongnu.org/projects/attr) - For writing xattr metadata (`--xattr`) on **Linux**. Licensed under [MIT](https://github.com/xattr/xattr/blob/master/LICENSE.txt), [LGPL2.1](https://github.com/iustin/pyxattr/blob/master/COPYING) and [GPLv2+](http://git.savannah.nongnu.org/cgit/attr.git/tree/doc/COPYING) respectively ### Misc * [**pycryptodomex**](https://github.com/Legrandin/pycryptodome)\* - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD-2-Clause](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) * [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD-3-Clause](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) * [**secretstorage**](https://github.com/mitya57/secretstorage) - For `--cookies-from-browser` to access the **Gnome** keyring while decrypting cookies of **Chromium**-based browsers on **Linux**. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE) * Any external downloader that you want to use with `--downloader` #### Deprecated * [**avconv** and **avprobe**](https://www.libav.org) - Now **deprecated** alternative to ffmpeg. License [depends on the build](https://libav.org/legal) * [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the now **deprecated** [sponskrub options](#sponskrub-options). Licensed under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md) * [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg can be used instead with `--downloader ffmpeg`. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu) * [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp`/`mms` streams. ffmpeg can be used instead with `--downloader ffmpeg`. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright) To use or redistribute the dependencies, you must agree to their respective licensing terms. The standalone release binaries are built with the Python interpreter and the packages marked with **\*** included. If you do not have the necessary dependencies for a task you are attempting, yt-dlp will warn you. All the currently available dependencies are visible at the top of the `--verbose` output ## COMPILE ### Standalone PyInstaller Builds To build the standalone executable, you must have Python and `pyinstaller` (plus any of yt-dlp's [optional dependencies](#dependencies) if needed). Once you have all the necessary dependencies installed, simply run `pyinst.py`. The executable will be built for the same architecture (x86/ARM, 32/64 bit) as the Python used. python3 -m pip install -U pyinstaller -r requirements.txt python3 devscripts/make_lazy_extractors.py python3 pyinst.py On some systems, you may need to use `py` or `python` instead of `python3`. Note that pyinstaller [does not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment. **Important**: Running `pyinstaller` directly **without** using `pyinst.py` is **not** officially supported. This may or may not work correctly. ### Platform-independent Binary (UNIX) You will need the build tools `python` (3.6+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*. After installing these, simply run `make`. You can also run `make yt-dlp` instead to compile only the binary without updating any of the additional files. (The build tools marked with **\*** are not needed for this) ### Standalone Py2Exe Builds (Windows) While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and needs VC++14** on the target computer to run. If you wish to build it anyway, install Python and py2exe, and then simply run `setup.py py2exe` py -m pip install -U py2exe -r requirements.txt py devscripts/make_lazy_extractors.py py setup.py py2exe ### Related scripts * **`devscripts/update-version.py [revision]`** - Update the version number based on current date * **`devscripts/set-variant.py variant [-M update_message]`** - Set the build variant of the executable * **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading. You can also fork the project on github and run your fork's [build workflow](.github/workflows/build.yml) to automatically build a full release # USAGE AND OPTIONS <!-- MANPAGE: BEGIN EXCLUDED SECTION --> yt-dlp [OPTIONS] [--] URL [URL...] `Ctrl+F` is your friend :D <!-- MANPAGE: END EXCLUDED SECTION --> <!-- Auto generated --> ## General Options: -h, --help Print this help text and exit --version Print program version and exit -U, --update Update this program to the latest version --no-update Do not check for updates (default) -i, --ignore-errors Ignore download and postprocessing errors. The download will be considered successful even if the postprocessing fails --no-abort-on-error Continue with next video on download errors; e.g. to skip unavailable videos in a playlist (default) --abort-on-error Abort downloading of further videos if an error occurs (Alias: --no-ignore-errors) --dump-user-agent Display the current user-agent and exit --list-extractors List all supported extractors and exit --extractor-descriptions Output descriptions of all supported extractors and exit --force-generic-extractor Force extraction to use the generic extractor --default-search PREFIX Use this prefix for unqualified URLs. E.g. "gvsearch2:python" downloads two videos from google videos for the search term "python". Use the value "auto" to let yt-dlp guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching --ignore-config Don't load any more configuration files except those given by --config-locations. For backward compatibility, if this option is found inside the system configuration file, the user configuration is not loaded. (Alias: --no-config) --no-config-locations Do not load any custom configuration files (default). When given inside a configuration file, ignore all previous --config-locations defined in the current file --config-locations PATH Location of the main configuration file; either the path to the config or its containing directory ("-" for stdin). Can be used multiple times and inside other configuration files --flat-playlist Do not extract the videos of a playlist, only list them --no-flat-playlist Extract the videos of a playlist --live-from-start Download livestreams from the start. Currently only supported for YouTube (Experimental) --no-live-from-start Download livestreams from the current time (default) --wait-for-video MIN[-MAX] Wait for scheduled streams to become available. Pass the minimum number of seconds (or range) to wait between retries --no-wait-for-video Do not wait for scheduled streams (default) --mark-watched Mark videos watched (even with --simulate) --no-mark-watched Do not mark videos watched (default) --no-colors Do not emit color codes in output (Alias: --no-colours) --compat-options OPTS Options that can help keep compatibility with youtube-dl or youtube-dlc configurations by reverting some of the changes made in yt-dlp. See "Differences in default behavior" for details --alias ALIASES OPTIONS Create aliases for an option string. Unless an alias starts with a dash "-", it is prefixed with "--". Arguments are parsed according to the Python string formatting mini-language. E.g. --alias get-audio,-X "-S=aext:{0},abr -x --audio-format {0}" creates options "--get-audio" and "-X" that takes an argument (ARG0) and expands to "-S=aext:ARG0,abr -x --audio-format ARG0". All defined aliases are listed in the --help output. Alias options can trigger more aliases; so be careful to avoid defining recursive options. As a safety measure, each alias may be triggered a maximum of 100 times. This option can be used multiple times ## Network Options: --proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. To enable SOCKS proxy, specify a proper scheme, e.g. socks5://user:pass@127.0.0.1:1080/. Pass in an empty string (--proxy "") for direct connection --socket-timeout SECONDS Time to wait before giving up, in seconds --source-address IP Client-side IP address to bind to -4, --force-ipv4 Make all connections via IPv4 -6, --force-ipv6 Make all connections via IPv6 ## Geo-restriction: --geo-verification-proxy URL Use this proxy to verify the IP address for some geo-restricted sites. The default proxy specified by --proxy (or none, if the option is not present) is used for the actual downloading --geo-bypass Bypass geographic restriction via faking X-Forwarded-For HTTP header (default) --no-geo-bypass Do not bypass geographic restriction via faking X-Forwarded-For HTTP header --geo-bypass-country CODE Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code --geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction with explicitly provided IP block in CIDR notation ## Video Selection: -I, --playlist-items ITEM_SPEC Comma separated playlist_index of the videos to download. You can specify a range using "[START]:[STOP][:STEP]". For backward compatibility, START-STOP is also supported. Use negative indices to count from the right and negative STEP to download in reverse order. E.g. "-I 1:3,7,-5::2" used on a playlist of size 15 will download the videos at index 1,2,3,7,11,13,15 --min-filesize SIZE Do not download any videos smaller than SIZE, e.g. 50k or 44.6M --max-filesize SIZE Do not download any videos larger than SIZE, e.g. 50k or 44.6M --date DATE Download only videos uploaded on this date. The date can be "YYYYMMDD" or in the format [now|today|yesterday][-N[day|week|month|year]]. E.g. --date today-2weeks --datebefore DATE Download only videos uploaded on or before this date. The date formats accepted is the same as --date --dateafter DATE Download only videos uploaded on or after this date. The date formats accepted is the same as --date --match-filters FILTER Generic video filter. Any "OUTPUT TEMPLATE" field can be compared with a number or a string using the operators defined in "Filtering Formats". You can also simply specify a field to match if the field is present, use "!field" to check if the field is not present, and "&" to check multiple conditions. Use a "\" to escape "&" or quotes if needed. If used multiple times, the filter matches if atleast one of the conditions are met. E.g. --match-filter !is_live --match-filter "like_count>?100 & description~='(?i)\bcats \& dogs\b'" matches only videos that are not live OR those that have a like count more than 100 (or the like field is not available) and also has a description that contains the phrase "cats & dogs" (caseless). Use "--match-filter -" to interactively ask whether to download each video --no-match-filter Do not use generic video filter (default) --no-playlist Download only the video, if the URL refers to a video and a playlist --yes-playlist Download the playlist, if the URL refers to a video and a playlist --age-limit YEARS Download only videos suitable for the given age --download-archive FILE Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it --no-download-archive Do not use archive file (default) --max-downloads NUMBER Abort after downloading NUMBER files --break-on-existing Stop the download process when encountering a file that is in the archive --break-on-reject Stop the download process when encountering a file that has been filtered out --break-per-input Make --break-on-existing, --break-on-reject and --max-downloads act only on the current input URL --no-break-per-input --break-on-existing and similar options terminates the entire download queue --skip-playlist-after-errors N Number of allowed failures until the rest of the playlist is skipped ## Download Options: -N, --concurrent-fragments N Number of fragments of a dash/hlsnative video that should be downloaded concurrently (default is 1) -r, --limit-rate RATE Maximum download rate in bytes per second, e.g. 50K or 4.2M --throttled-rate RATE Minimum download rate in bytes per second below which throttling is assumed and the video data is re-extracted, e.g. 100K -R, --retries RETRIES Number of retries (default is 10), or "infinite" --file-access-retries RETRIES Number of times to retry on file access error (default is 3), or "infinite" --fragment-retries RETRIES Number of retries for a fragment (default is 10), or "infinite" (DASH, hlsnative and ISM) --retry-sleep [TYPE:]EXPR Time to sleep between retries in seconds (optionally) prefixed by the type of retry (http (default), fragment, file_access, extractor) to apply the sleep to. EXPR can be a number, linear=START[:END[:STEP=1]] or exp=START[:END[:BASE=2]]. This option can be used multiple times to set the sleep for the different retry types, e.g. --retry-sleep linear=1::2 --retry-sleep fragment:exp=1:20 --skip-unavailable-fragments Skip unavailable fragments for DASH, hlsnative and ISM downloads (default) (Alias: --no-abort-on-unavailable-fragment) --abort-on-unavailable-fragment Abort download if a fragment is unavailable (Alias: --no-skip-unavailable-fragments) --keep-fragments Keep downloaded fragments on disk after downloading is finished --no-keep-fragments Delete downloaded fragments after downloading is finished (default) --buffer-size SIZE Size of download buffer, e.g. 1024 or 16K (default is 1024) --resize-buffer The buffer size is automatically resized from an initial value of --buffer-size (default) --no-resize-buffer Do not automatically adjust the buffer size --http-chunk-size SIZE Size of a chunk for chunk-based HTTP downloading, e.g. 10485760 or 10M (default is disabled). May be useful for bypassing bandwidth throttling imposed by a webserver (experimental) --playlist-random Download playlist videos in random order --lazy-playlist Process entries in the playlist as they are received. This disables n_entries, --playlist-random and --playlist-reverse --no-lazy-playlist Process videos in the playlist only after the entire playlist is parsed (default) --xattr-set-filesize Set file xattribute ytdl.filesize with expected file size --hls-use-mpegts Use the mpegts container for HLS videos; allowing some players to play the video while downloading, and reducing the chance of file corruption if download is interrupted. This is enabled by default for live streams --no-hls-use-mpegts Do not use the mpegts container for HLS videos. This is default when not downloading live streams --download-sections REGEX Download only chapters whose title matches the given regular expression. Time ranges prefixed by a "*" can also be used in place of chapters to download the specified range. Needs ffmpeg. This option can be used multiple times to download multiple sections, e.g. --download-sections "*10:15-15:00" --download-sections "intro" --downloader [PROTO:]NAME Name or path of the external downloader to use (optionally) prefixed by the protocols (http, ftp, m3u8, dash, rstp, rtmp, mms) to use it for. Currently supports native, aria2c, avconv, axel, curl, ffmpeg, httpie, wget. You can use this option multiple times to set different downloaders for different protocols. E.g. --downloader aria2c --downloader "dash,m3u8:native" will use aria2c for http/ftp downloads, and the native downloader for dash/m3u8 downloads (Alias: --external-downloader) --downloader-args NAME:ARGS Give these arguments to the external downloader. Specify the downloader name and the arguments separated by a colon ":". For ffmpeg, arguments can be passed to different positions using the same syntax as --postprocessor-args. You can use this option multiple times to give different arguments to different downloaders (Alias: --external-downloader-args) ## Filesystem Options: -a, --batch-file FILE File containing URLs to download ("-" for stdin), one URL per line. Lines starting with "#", ";" or "]" are considered as comments and ignored --no-batch-file Do not read URLs from batch file (default) -P, --paths [TYPES:]PATH The paths where the files should be downloaded. Specify the type of file and the path separated by a colon ":". All the same TYPES as --output are supported. Additionally, you can also provide "home" (default) and "temp" paths. All intermediary files are first downloaded to the temp path and then the final files are moved over to the home path after download is finished. This option is ignored if --output is an absolute path -o, --output [TYPES:]TEMPLATE Output filename template; see "OUTPUT TEMPLATE" for details --output-na-placeholder TEXT Placeholder for unavailable fields in "OUTPUT TEMPLATE" (default: "NA") --restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames --no-restrict-filenames Allow Unicode characters, "&" and spaces in filenames (default) --windows-filenames Force filenames to be Windows-compatible --no-windows-filenames Make filenames Windows-compatible only if using Windows (default) --trim-filenames LENGTH Limit the filename length (excluding extension) to the specified number of characters -w, --no-overwrites Do not overwrite any files --force-overwrites Overwrite all video and metadata files. This option includes --no-continue --no-force-overwrites Do not overwrite the video, but overwrite related files (default) -c, --continue Resume partially downloaded files/fragments (default) --no-continue Do not resume partially downloaded fragments. If the file is not fragmented, restart download of the entire file --part Use .part files instead of writing directly into output file (default) --no-part Do not use .part files - write directly into output file --mtime Use the Last-modified header to set the file modification time (default) --no-mtime Do not use the Last-modified header to set the file modification time --write-description Write video description to a .description file --no-write-description Do not write video description (default) --write-info-json Write video metadata to a .info.json file (this may contain personal information) --no-write-info-json Do not write video metadata (default) --write-playlist-metafiles Write playlist metadata in addition to the video metadata when using --write-info-json, --write-description etc. (default) --no-write-playlist-metafiles Do not write playlist metadata when using --write-info-json, --write-description etc. --clean-info-json Remove some private fields such as filenames from the infojson. Note that it could still contain some personal information (default) --no-clean-info-json Write all fields to the infojson --write-comments Retrieve video comments to be placed in the infojson. The comments are fetched even without this option if the extraction is known to be quick (Alias: --get-comments) --no-write-comments Do not retrieve video comments unless the extraction is known to be quick (Alias: --no-get-comments) --load-info-json FILE JSON file containing the video information (created with the "--write-info-json" option) --cookies FILE Netscape formatted file to read cookies from and dump cookie jar in --no-cookies Do not read/dump cookies from/to file (default) --cookies-from-browser BROWSER[+KEYRING][:PROFILE] The name of the browser and (optionally) the name/path of the profile to load cookies from, separated by a ":". Currently supported browsers are: brave, chrome, chromium, edge, firefox, opera, safari, vivaldi. By default, the most recently accessed profile is used. The keyring used for decrypting Chromium cookies on Linux can be (optionally) specified after the browser name separated by a "+". Currently supported keyrings are: basictext, gnomekeyring, kwallet --no-cookies-from-browser Do not load cookies from browser (default) --cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information (such as client ids and signatures) permanently. By default $XDG_CACHE_HOME/yt-dlp or ~/.cache/yt-dlp --no-cache-dir Disable filesystem caching --rm-cache-dir Delete all filesystem cache files ## Thumbnail Options: --write-thumbnail Write thumbnail image to disk --no-write-thumbnail Do not write thumbnail image to disk (default) --write-all-thumbnails Write all thumbnail image formats to disk --list-thumbnails List available thumbnails of each video. Simulate unless --no-simulate is used ## Internet Shortcut Options: --write-link Write an internet shortcut file, depending on the current platform (.url, .webloc or .desktop). The URL may be cached by the OS --write-url-link Write a .url Windows internet shortcut. The OS caches the URL based on the file path --write-webloc-link Write a .webloc macOS internet shortcut --write-desktop-link Write a .desktop Linux internet shortcut ## Verbosity and Simulation Options: -q, --quiet Activate quiet mode. If used with --verbose, print the log to stderr --no-warnings Ignore warnings -s, --simulate Do not download the video and do not write anything to disk --no-simulate Download the video even if printing/listing options are used --ignore-no-formats-error Ignore "No video formats" error. Useful for extracting metadata even if the videos are not actually available for download (experimental) --no-ignore-no-formats-error Throw error when no downloadable video formats are found (default) --skip-download Do not download the video but write all related files (Alias: --no-download) -O, --print [WHEN:]TEMPLATE Field name or output template to print to screen, optionally prefixed with when to print it, separated by a ":". Supported values of "WHEN" are the same as that of --use-postprocessor, and "video" (default). Implies --quiet. Implies --simulate unless --no-simulate or later stages of WHEN are used. This option can be used multiple times --print-to-file [WHEN:]TEMPLATE FILE Append given template to the file. The values of WHEN and TEMPLATE are same as that of --print. FILE uses the same syntax as the output template. This option can be used multiple times -j, --dump-json Quiet, but print JSON information for each video. Simulate unless --no-simulate is used. See "OUTPUT TEMPLATE" for a description of available keys -J, --dump-single-json Quiet, but print JSON information for each url or infojson passed. Simulate unless --no-simulate is used. If the URL refers to a playlist, the whole playlist information is dumped in a single line --force-write-archive Force download archive entries to be written as far as no errors occur, even if -s or another simulation option is used (Alias: --force-download-archive) --newline Output progress bar as new lines --no-progress Do not print progress bar --progress Show progress bar, even if in quiet mode --console-title Display progress in console titlebar --progress-template [TYPES:]TEMPLATE Template for progress outputs, optionally prefixed with one of "download:" (default), "download-title:" (the console title), "postprocess:", or "postprocess-title:". The video's fields are accessible under the "info" key and the progress attributes are accessible under "progress" key. E.g. --console-title --progress-template "download-title:%(info.id)s-%(progress.eta)s" -v, --verbose Print various debugging information --dump-pages Print downloaded pages encoded using base64 to debug problems (very verbose) --write-pages Write downloaded intermediary pages to files in the current directory to debug problems --print-traffic Display sent and read HTTP traffic ## Workarounds: --encoding ENCODING Force the specified encoding (experimental) --legacy-server-connect Explicitly allow HTTPS connection to servers that do not support RFC 5746 secure renegotiation --no-check-certificates Suppress HTTPS certificate validation --prefer-insecure Use an unencrypted connection to retrieve information about the video (Currently supported only for YouTube) --add-header FIELD:VALUE Specify a custom HTTP header and its value, separated by a colon ":". You can use this option multiple times --bidi-workaround Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH --sleep-requests SECONDS Number of seconds to sleep between requests during data extraction --sleep-interval SECONDS Number of seconds to sleep before each download. This is the minimum time to sleep when used along with --max-sleep-interval (Alias: --min-sleep-interval) --max-sleep-interval SECONDS Maximum number of seconds to sleep. Can only be used along with --min-sleep-interval --sleep-subtitles SECONDS Number of seconds to sleep before each subtitle download ## Video Format Options: -f, --format FORMAT Video format code, see "FORMAT SELECTION" for more details -S, --format-sort SORTORDER Sort the formats by the fields given, see "Sorting Formats" for more details --format-sort-force Force user specified sort order to have precedence over all fields, see "Sorting Formats" for more details (Alias: --S-force) --no-format-sort-force Some fields have precedence over the user specified sort order (default) --video-multistreams Allow multiple video streams to be merged into a single file --no-video-multistreams Only one video stream is downloaded for each output file (default) --audio-multistreams Allow multiple audio streams to be merged into a single file --no-audio-multistreams Only one audio stream is downloaded for each output file (default) --prefer-free-formats Prefer video formats with free containers over non-free ones of same quality. Use with "-S ext" to strictly prefer free containers irrespective of quality --no-prefer-free-formats Don't give any special preference to free containers (default) --check-formats Make sure formats are selected only from those that are actually downloadable --check-all-formats Check all formats for whether they are actually downloadable --no-check-formats Do not check that the formats are actually downloadable -F, --list-formats List available formats of each video. Simulate unless --no-simulate is used --merge-output-format FORMAT Containers that may be used when merging formats, separated by "/", e.g. "mp4/mkv". Ignored if no merge is required. (currently supported: avi, flv, mkv, mov, mp4, webm) ## Subtitle Options: --write-subs Write subtitle file --no-write-subs Do not write subtitle file (default) --write-auto-subs Write automatically generated subtitle file (Alias: --write-automatic-subs) --no-write-auto-subs Do not write auto-generated subtitles (default) (Alias: --no-write-automatic-subs) --list-subs List available subtitles of each video. Simulate unless --no-simulate is used --sub-format FORMAT Subtitle format; accepts formats preference, e.g. "srt" or "ass/srt/best" --sub-langs LANGS Languages of the subtitles to download (can be regex) or "all" separated by commas, e.g. --sub-langs "en.*,ja". You can prefix the language code with a "-" to exclude it from the requested languages, e.g. --sub-langs all,-live_chat. Use --list-subs for a list of available language tags ## Authentication Options: -u, --username USERNAME Login with this account ID -p, --password PASSWORD Account password. If this option is left out, yt-dlp will ask interactively -2, --twofactor TWOFACTOR Two-factor authentication code -n, --netrc Use .netrc authentication data --netrc-location PATH Location of .netrc authentication data; either the path or its containing directory. Defaults to ~/.netrc --video-password PASSWORD Video password (vimeo, youku) --ap-mso MSO Adobe Pass multiple-system operator (TV provider) identifier, use --ap-list-mso for a list of available MSOs --ap-username USERNAME Multiple-system operator account login --ap-password PASSWORD Multiple-system operator account password. If this option is left out, yt-dlp will ask interactively --ap-list-mso List all supported multiple-system operators --client-certificate CERTFILE Path to client certificate file in PEM format. May include the private key --client-certificate-key KEYFILE Path to private key file for client certificate --client-certificate-password PASSWORD Password for client certificate private key, if encrypted. If not provided, and the key is encrypted, yt-dlp will ask interactively ## Post-Processing Options: -x, --extract-audio Convert video files to audio-only files (requires ffmpeg and ffprobe) --audio-format FORMAT Format to convert the audio to when -x is used. (currently supported: best (default), aac, alac, flac, m4a, mp3, opus, vorbis, wav). You can specify multiple rules using similar syntax as --remux-video --audio-quality QUALITY Specify ffmpeg audio quality to use when converting the audio with -x. Insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default 5) --remux-video FORMAT Remux the video into another container if necessary (currently supported: avi, flv, mkv, mov, mp4, webm, aac, aiff, alac, flac, m4a, mka, mp3, ogg, opus, vorbis, wav). If target container does not support the video/audio codec, remuxing will fail. You can specify multiple rules; e.g. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 and anything else to mkv --recode-video FORMAT Re-encode the video into another format if necessary. The syntax and supported formats are the same as --remux-video --postprocessor-args NAME:ARGS Give these arguments to the postprocessors. Specify the postprocessor/executable name and the arguments separated by a colon ":" to give the argument to the specified postprocessor/executable. Supported PP are: Merger, ModifyChapters, SplitChapters, ExtractAudio, VideoRemuxer, VideoConvertor, Metadata, EmbedSubtitle, EmbedThumbnail, SubtitlesConvertor, ThumbnailsConvertor, FixupStretched, FixupM4a, FixupM3u8, FixupTimestamp and FixupDuration. The supported executables are: AtomicParsley, FFmpeg and FFprobe. You can also specify "PP+EXE:ARGS" to give the arguments to the specified executable only when being used by the specified postprocessor. Additionally, for ffmpeg/ffprobe, "_i"/"_o" can be appended to the prefix optionally followed by a number to pass the argument before the specified input/output file, e.g. --ppa "Merger+ffmpeg_i1:-v quiet". You can use this option multiple times to give different arguments to different postprocessors. (Alias: --ppa) -k, --keep-video Keep the intermediate video file on disk after post-processing --no-keep-video Delete the intermediate video file after post-processing (default) --post-overwrites Overwrite post-processed files (default) --no-post-overwrites Do not overwrite post-processed files --embed-subs Embed subtitles in the video (only for mp4, webm and mkv videos) --no-embed-subs Do not embed subtitles (default) --embed-thumbnail Embed thumbnail in the video as cover art --no-embed-thumbnail Do not embed thumbnail (default) --embed-metadata Embed metadata to the video file. Also embeds chapters/infojson if present unless --no-embed-chapters/--no-embed-info-json are used (Alias: --add-metadata) --no-embed-metadata Do not add metadata to file (default) (Alias: --no-add-metadata) --embed-chapters Add chapter markers to the video file (Alias: --add-chapters) --no-embed-chapters Do not add chapter markers (default) (Alias: --no-add-chapters) --embed-info-json Embed the infojson as an attachment to mkv/mka video files --no-embed-info-json Do not embed the infojson as an attachment to the video file --parse-metadata FROM:TO Parse additional metadata like title/artist from other fields; see "MODIFYING METADATA" for details --replace-in-metadata FIELDS REGEX REPLACE Replace text in a metadata field using the given regex. This option can be used multiple times --xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards) --concat-playlist POLICY Concatenate videos in a playlist. One of "never", "always", or "multi_video" (default; only when the videos form a single show). All the video files must have same codecs and number of streams to be concatable. The "pl_video:" prefix can be used with "--paths" and "--output" to set the output filename for the concatenated files. See "OUTPUT TEMPLATE" for details --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn (the default; fix file if we can, warn otherwise), force (try fixing even if file already exists) --ffmpeg-location PATH Location of the ffmpeg binary; either the path to the binary or its containing directory --exec [WHEN:]CMD Execute a command, optionally prefixed with when to execute it (after_move if unspecified), separated by a ":". Supported values of "WHEN" are the same as that of --use-postprocessor. Same syntax as the output template can be used to pass any field as arguments to the command. After download, an additional field "filepath" that contains the final path of the downloaded file is also available, and if no fields are passed, %(filepath)q is appended to the end of the command. This option can be used multiple times --no-exec Remove any previously defined --exec --convert-subs FORMAT Convert the subtitles to another format (currently supported: ass, lrc, srt, vtt) (Alias: --convert-subtitles) --convert-thumbnails FORMAT Convert the thumbnails to another format (currently supported: jpg, png, webp). You can specify multiple rules using similar syntax as --remux-video --split-chapters Split video into multiple files based on internal chapters. The "chapter:" prefix can be used with "--paths" and "--output" to set the output filename for the split files. See "OUTPUT TEMPLATE" for details --no-split-chapters Do not split video based on chapters (default) --remove-chapters REGEX Remove chapters whose title matches the given regular expression. The syntax is the same as --download-sections. This option can be used multiple times --no-remove-chapters Do not remove any chapters from the file (default) --force-keyframes-at-cuts Force keyframes at cuts when downloading/splitting/removing sections. This is slow due to needing a re-encode, but the resulting video may have fewer artifacts around the cuts --no-force-keyframes-at-cuts Do not force keyframes around the chapters when cutting/splitting (default) --use-postprocessor NAME[:ARGS] The (case sensitive) name of plugin postprocessors to be enabled, and (optionally) arguments to be passed to it, separated by a colon ":". ARGS are a semicolon ";" delimited list of NAME=VALUE. The "when" argument determines when the postprocessor is invoked. It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), "before_dl" (before each video download), "post_process" (after each video download; default), "after_move" (after moving video file to it's final locations), "after_video" (after downloading and processing all formats of a video), or "playlist" (at end of playlist). This option can be used multiple times to add different postprocessors ## SponsorBlock Options: Make chapter entries for, or remove various segments (sponsor, introductions, etc.) from downloaded YouTube videos using the [SponsorBlock API](https://sponsor.ajay.app) --sponsorblock-mark CATS SponsorBlock categories to create chapters for, separated by commas. Available categories are sponsor, intro, outro, selfpromo, preview, filler, interaction, music_offtopic, poi_highlight, all and default (=all). You can prefix the category with a "-" to exclude it. See [1] for description of the categories. E.g. --sponsorblock-mark all,-preview [1] https://wiki.sponsor.ajay.app/w/Segment_Categories --sponsorblock-remove CATS SponsorBlock categories to be removed from the video file, separated by commas. If a category is present in both mark and remove, remove takes precedence. The syntax and available categories are the same as for --sponsorblock-mark except that "default" refers to "all,-filler" and poi_highlight is not available --sponsorblock-chapter-title TEMPLATE An output template for the title of the SponsorBlock chapters created by --sponsorblock-mark. The only available fields are start_time, end_time, category, categories, name, category_names. Defaults to "[SponsorBlock]: %(category_names)l" --no-sponsorblock Disable both --sponsorblock-mark and --sponsorblock-remove --sponsorblock-api URL SponsorBlock API location, defaults to https://sponsor.ajay.app ## Extractor Options: --extractor-retries RETRIES Number of retries for known extractor errors (default is 3), or "infinite" --allow-dynamic-mpd Process dynamic DASH manifests (default) (Alias: --no-ignore-dynamic-mpd) --ignore-dynamic-mpd Do not process dynamic DASH manifests (Alias: --no-allow-dynamic-mpd) --hls-split-discontinuity Split HLS playlists to different formats at discontinuities such as ad breaks --no-hls-split-discontinuity Do not split HLS playlists to different formats at discontinuities such as ad breaks (default) --extractor-args KEY:ARGS Pass these arguments to the extractor. See "EXTRACTOR ARGUMENTS" for details. You can use this option multiple times to give arguments for different extractors # CONFIGURATION You can configure yt-dlp by placing any supported command line option to a configuration file. The configuration is loaded from the following locations: 1. **Main Configuration**: The file given by `--config-location` 1. **Portable Configuration**: `yt-dlp.conf` in the same directory as the bundled binary. If you are running from source-code (`<root dir>/yt_dlp/__main__.py`), the root directory is used instead. 1. **Home Configuration**: `yt-dlp.conf` in the home path given by `-P`, or in the current directory if no such path is given 1. **User Configuration**: * `%XDG_CONFIG_HOME%/yt-dlp/config` (recommended on Linux/macOS) * `%XDG_CONFIG_HOME%/yt-dlp.conf` * `%APPDATA%/yt-dlp/config` (recommended on Windows) * `%APPDATA%/yt-dlp/config.txt` * `~/yt-dlp.conf` * `~/yt-dlp.conf.txt` `%XDG_CONFIG_HOME%` defaults to `~/.config` if undefined. On windows, `%APPDATA%` generally points to `C:\Users\<user name>\AppData\Roaming` and `~` points to `%HOME%` if present, `%USERPROFILE%` (generally `C:\Users\<user name>`), or `%HOMEDRIVE%%HOMEPATH%` 1. **System Configuration**: `/etc/yt-dlp.conf` E.g. with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: ``` # Lines starting with # are comments # Always extract audio -x # Do not copy the mtime --no-mtime # Use this proxy --proxy 127.0.0.1:3128 # Save all videos under YouTube directory in your home directory -o ~/YouTube/%(title)s.%(ext)s ``` Note that options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded. ### Config file encoding The config files are decoded according to the UTF BOM if present, and in the encoding from system locale otherwise. If you want your file to be decoded differently, add `# coding: ENCODING` to the beginning of the file (e.g. `# coding: shift-jis`). There must be no characters before that, even spaces or BOM. ### Authentication with `.netrc` file You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every yt-dlp execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in `--netrc-location` and restrict permissions to read/write by only you: ``` touch $HOME/.netrc chmod a-rwx,u+rw $HOME/.netrc ``` After that you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase: ``` machine <extractor> login <username> password <password> ``` E.g. ``` machine youtube login myaccount@gmail.com password my_youtube_password machine twitch login my_twitch_account_name password my_twitch_password ``` To activate authentication with the `.netrc` file you should pass `--netrc` to yt-dlp or place it in the [configuration file](#configuration). The default location of the .netrc file is `$HOME` (`~`) in UNIX. On Windows, it is `%HOME%` if present, `%USERPROFILE%` (generally `C:\Users\<user name>`) or `%HOMEDRIVE%%HOMEPATH%` # OUTPUT TEMPLATE The `-o` option is used to indicate a template for the output file names while `-P` option is used to specify the path each type of file should be saved to. <!-- MANPAGE: BEGIN EXCLUDED SECTION --> **tl;dr:** [navigate me to examples](#output-template-examples). <!-- MANPAGE: END EXCLUDED SECTION --> The simplest usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is _not_ recommended and could break some post-processing). It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [Python string formatting operations](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting), e.g. `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. The field names themselves (the part inside the parenthesis) can also have some special formatting: 1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. E.g. `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields 1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. E.g. `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d` 1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. E.g. `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s` 1. **Alternatives**: Alternate fields can be specified separated with a `,`. E.g. `%(release_date>%Y,upload_date>%Y|Unknown)s` 1. **Replacement**: A replacement value can specified using a `&` separator. If the field is *not* empty, this replacement value will be used instead of the actual field content. This is done after alternate fields are considered; thus the replacement is used if *any* of the alternative fields is *not* empty. 1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-template`. E.g. `%(uploader|Unknown)s` 1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing), `h` = HTML escaping, `l` = a comma separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (e.g. 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted) 1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. E.g. `%(title)+.100U` is NFKC To summarize, the general syntax for a field is: ``` %(name[.keys][addition][>strf][,alternate][&replacement][|default])[flags][width][.precision][length]type ``` Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`, `pl_video`. E.g. `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"` will put the thumbnails in a folder with the same name as the video. If any of the templates is empty, that type of file will not be written. E.g. `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video. The available fields are: - `id` (string): Video identifier - `title` (string): Video title - `fulltitle` (string): Video title ignoring live timestamp and generic title - `url` (string): Video URL - `ext` (string): Video filename extension - `alt_title` (string): A secondary title of the video - `description` (string): The description of the video - `display_id` (string): An alternative identifier for the video - `uploader` (string): Full name of the video uploader - `license` (string): License name the video is licensed under - `creator` (string): The creator of the video - `timestamp` (numeric): UNIX timestamp of the moment the video became available - `upload_date` (string): Video upload date in UTC (YYYYMMDD) - `release_timestamp` (numeric): UNIX timestamp of the moment the video was released - `release_date` (string): The date (YYYYMMDD) when the video was released in UTC - `modified_timestamp` (numeric): UNIX timestamp of the moment the video was last modified - `modified_date` (string): The date (YYYYMMDD) when the video was last modified in UTC - `uploader_id` (string): Nickname or id of the video uploader - `channel` (string): Full name of the channel the video is uploaded on - `channel_id` (string): Id of the channel - `channel_follower_count` (numeric): Number of followers of the channel - `location` (string): Physical location where the video was filmed - `duration` (numeric): Length of the video in seconds - `duration_string` (string): Length of the video (HH:mm:ss) - `view_count` (numeric): How many users have watched the video on the platform - `like_count` (numeric): Number of positive ratings of the video - `dislike_count` (numeric): Number of negative ratings of the video - `repost_count` (numeric): Number of reposts of the video - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage - `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used) - `age_limit` (numeric): Age restriction for the video (years) - `live_status` (string): One of "not_live", "is_live", "is_upcoming", "was_live", "post_live" (was live, but VOD is not yet processed) - `is_live` (boolean): Whether this video is a live stream or a fixed-length video - `was_live` (boolean): Whether this video was originally a live stream - `playable_in_embed` (string): Whether this video is allowed to play in embedded players on other sites - `availability` (string): Whether the video is "private", "premium_only", "subscriber_only", "needs_auth", "unlisted" or "public" - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL - `format` (string): A human-readable description of the format - `format_id` (string): Format code specified by `--format` - `format_note` (string): Additional info about the format - `width` (numeric): Width of the video - `height` (numeric): Height of the video - `resolution` (string): Textual description of width and height - `tbr` (numeric): Average bitrate of audio and video in KBit/s - `abr` (numeric): Average audio bitrate in KBit/s - `acodec` (string): Name of the audio codec in use - `asr` (numeric): Audio sampling rate in Hertz - `vbr` (numeric): Average video bitrate in KBit/s - `fps` (numeric): Frame rate - `dynamic_range` (string): The dynamic range of the video - `audio_channels` (numeric): The number of audio channels - `stretched_ratio` (float): `width:height` of the video's pixels, if not square - `vcodec` (string): Name of the video codec in use - `container` (string): Name of the container format - `filesize` (numeric): The number of bytes, if known in advance - `filesize_approx` (numeric): An estimate for the number of bytes - `protocol` (string): The protocol that will be used for the actual download - `extractor` (string): Name of the extractor - `extractor_key` (string): Key name of the extractor - `epoch` (numeric): Unix epoch of when the information extraction was completed - `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start` - `video_autonumber` (numeric): Number that will be increased with each video - `n_entries` (numeric): Total number of extracted items in the playlist - `playlist_id` (string): Identifier of the playlist that contains the video - `playlist_title` (string): Name of the playlist that contains the video - `playlist` (string): `playlist_id` or `playlist_title` - `playlist_count` (numeric): Total number of items in the playlist. May not be known if entire playlist is not extracted - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according the final index - `playlist_autonumber` (numeric): Position of the video in the playlist download queue padded with leading zeros according to the total length of the playlist - `playlist_uploader` (string): Full name of the playlist uploader - `playlist_uploader_id` (string): Nickname or id of the playlist uploader - `webpage_url` (string): A URL to the video webpage which if given to yt-dlp should allow to get the same result again - `webpage_url_basename` (string): The basename of the webpage URL - `webpage_url_domain` (string): The domain of the webpage URL - `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries) Available for the video that belongs to some logical chapter or section: - `chapter` (string): Name or title of the chapter the video belongs to - `chapter_number` (numeric): Number of the chapter the video belongs to - `chapter_id` (string): Id of the chapter the video belongs to Available for the video that is an episode of some series or programme: - `series` (string): Title of the series or programme the video episode belongs to - `season` (string): Title of the season the video episode belongs to - `season_number` (numeric): Number of the season the video episode belongs to - `season_id` (string): Id of the season the video episode belongs to - `episode` (string): Title of the video episode - `episode_number` (numeric): Number of the video episode within a season - `episode_id` (string): Id of the video episode Available for the media that is a track or a part of a music album: - `track` (string): Title of the track - `track_number` (numeric): Number of the track within an album or a disc - `track_id` (string): Id of the track - `artist` (string): Artist(s) of the track - `genre` (string): Genre(s) of the track - `album` (string): Title of the album the track belongs to - `album_type` (string): Type of the album - `album_artist` (string): List of all artists appeared on the album - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to - `release_year` (numeric): Year (YYYY) when the album was released Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters: - `section_title` (string): Title of the chapter - `section_number` (numeric): Number of the chapter within the file - `section_start` (numeric): Start time of the chapter in seconds - `section_end` (numeric): End time of the chapter in seconds Available only when used in `--print`: - `urls` (string): The URLs of all requested formats, one in each line - `filename` (string): Name of the video file. Note that the actual filename may be different due to post-processing. Use `--exec echo` to get the name after all postprocessing is complete - `formats_table` (table): The video format table as printed by `--list-formats` - `thumbnails_table` (table): The thumbnail format table as printed by `--list-thumbnails` - `subtitles_table` (table): The subtitle format table as printed by `--list-subs` - `automatic_captions_table` (table): The automatic subtitle format table as printed by `--list-subs` Available only in `--sponsorblock-chapter-title`: - `start_time` (numeric): Start time of the chapter in seconds - `end_time` (numeric): End time of the chapter in seconds - `categories` (list): The SponsorBlock categories the chapter belongs to - `category` (string): The smallest SponsorBlock category the chapter belongs to - `category_names` (list): Friendly names of the categories - `name` (string): Friendly name of the smallest category Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. E.g. for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). **Tip**: Look at the `-j` output to identify which fields are available for the particular URL For numeric sequences you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting); e.g. `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. Output templates can also contain arbitrary hierarchical path, e.g. `-o "%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s"` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you. To use percent literals in an output template use `%%`. To output to stdout use `-o -`. The current default template is `%(title)s [%(id)s].%(ext)s`. In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title. <!-- MANPAGE: BEGIN EXCLUDED SECTION --> #### Output template and Windows batch files If you are using an output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`. <!-- MANPAGE: END EXCLUDED SECTION --> #### Output template examples ```bash $ yt-dlp --get-filename -o "test video.%(ext)s" BaW_jenozKc test video.webm # Literal name with correct extension $ yt-dlp --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc youtube-dl test video ''_ä↭𝕐.webm # All kinds of weird characters $ yt-dlp --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames youtube-dl_test_video_.webm # Restricted file name # Download YouTube playlist videos in separate directory indexed by video order in a playlist $ yt-dlp -o "%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s" "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" # Download YouTube playlist videos in separate directories according to their uploaded year $ yt-dlp -o "%(upload_date>%Y)s/%(title)s.%(ext)s" "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" # Prefix playlist index with " - " separator, but only if it is available $ yt-dlp -o '%(playlist_index|)s%(playlist_index& - |)s%(title)s.%(ext)s' BaW_jenozKc "https://www.youtube.com/user/TheLinuxFoundation/playlists" # Download all playlists of YouTube channel/user keeping each playlist in separate directory: $ yt-dlp -o "%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s" "https://www.youtube.com/user/TheLinuxFoundation/playlists" # Download Udemy course keeping each chapter in separate directory under MyVideos directory in your home $ yt-dlp -u user -p password -P "~/MyVideos" -o "%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s" "https://www.udemy.com/java-tutorial" # Download entire series season keeping each series and each season in separate directory under C:/MyVideos $ yt-dlp -P "C:/MyVideos" -o "%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" "https://videomore.ru/kino_v_detalayah/5_sezon/367617" # Download video as "C:\MyVideos\uploader\title.ext", subtitles as "C:\MyVideos\subs\uploader\title.ext" # and put all temporary files in "C:\MyVideos\tmp" $ yt-dlp -P "C:/MyVideos" -P "temp:tmp" -P "subtitle:subs" -o "%(uploader)s/%(title)s.%(ext)s" BaW_jenoz --write-subs # Download video as "C:\MyVideos\uploader\title.ext" and subtitles as "C:\MyVideos\uploader\subs\title.ext" $ yt-dlp -P "C:/MyVideos" -o "%(uploader)s/%(title)s.%(ext)s" -o "subtitle:%(uploader)s/subs/%(title)s.%(ext)s" BaW_jenozKc --write-subs # Stream the video being downloaded to stdout $ yt-dlp -o - BaW_jenozKc ``` # FORMAT SELECTION By default, yt-dlp tries to download the best available quality if you **don't** pass any options. This is generally equivalent to using `-f bestvideo*+bestaudio/best`. However, if multiple audiostreams is enabled (`--audio-multistreams`), the default format changes to `-f bestvideo+bestaudio/best`. Similarly, if ffmpeg is unavailable, or if you use yt-dlp to stream to `stdout` (`-o -`), the default becomes `-f best/bestvideo+bestaudio`. **Deprecation warning**: Latest versions of yt-dlp can stream multiple formats to the stdout simultaneously using ffmpeg. So, in future versions, the default for this will be set to `-f bv*+ba/b` similar to normal downloads. If you want to preserve the `-f b/bv+ba` setting, it is recommended to explicitly specify it in the configuration options. The general syntax for format selection is `-f FORMAT` (or `--format FORMAT`) where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download. <!-- MANPAGE: BEGIN EXCLUDED SECTION --> **tl;dr:** [navigate me to examples](#format-selection-examples). <!-- MANPAGE: END EXCLUDED SECTION --> The simplest case is requesting a specific format; e.g. with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific. You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file. You can use `-f -` to interactively provide the format selector *for each video* You can also use special names to select particular edge case formats: - `all`: Select **all formats** separately - `mergeall`: Select and **merge all formats** (Must be used with `--audio-multistreams`, `--video-multistreams` or both) - `b*`, `best*`: Select the best quality format that **contains either** a video or an audio or both (ie; `vcodec!=none or acodec!=none`) - `b`, `best`: Select the best quality format that **contains both** video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]` - `bv`, `bestvideo`: Select the best quality **video-only** format. Equivalent to `best*[acodec=none]` - `bv*`, `bestvideo*`: Select the best quality format that **contains video**. It may also contain audio. Equivalent to `best*[vcodec!=none]` - `ba`, `bestaudio`: Select the best quality **audio-only** format. Equivalent to `best*[vcodec=none]` - `ba*`, `bestaudio*`: Select the best quality format that **contains audio**. It may also contain video. Equivalent to `best*[acodec!=none]` ([Do not use!](https://github.com/yt-dlp/yt-dlp/issues/979#issuecomment-919629354)) - `w*`, `worst*`: Select the worst quality format that contains either a video or an audio - `w`, `worst`: Select the worst quality format that contains both video and audio. Equivalent to `worst*[vcodec!=none][acodec!=none]` - `wv`, `worstvideo`: Select the worst quality video-only format. Equivalent to `worst*[acodec=none]` - `wv*`, `worstvideo*`: Select the worst quality format that contains video. It may also contain audio. Equivalent to `worst*[vcodec!=none]` - `wa`, `worstaudio`: Select the worst quality audio-only format. Equivalent to `worst*[vcodec=none]` - `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]` For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recommended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-S +size` or more rigorously, `-S +size,+br,+res,+fps` instead of `-f worst`. See [Sorting Formats](#sorting-formats) for more details. You can select the n'th best format of a type by using `best<type>.<n>`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream. If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that formats on the left hand side are preferred; e.g. `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download. If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of multiple formats into a single file using `-f <format1>+<format2>+...` (requires ffmpeg installed); e.g. `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg. **Deprecation warning**: Since the *below* described behavior is complex and counter-intuitive, this will be removed and multistreams will be enabled by default in the future. A new operator will be instead added to limit formats to single audio/video Unless `--video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, unless `--audio-multistreams` is used, all formats with an audio stream except the first one are ignored. E.g. `-f bestvideo+best+bestaudio --video-multistreams --audio-multistreams` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download and merge both formats while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`. ## Filtering Formats You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals): - `filesize`: The number of bytes, if known in advance - `width`: Width of the video, if known - `height`: Height of the video, if known - `tbr`: Average bitrate of audio and video in KBit/s - `abr`: Average audio bitrate in KBit/s - `vbr`: Average video bitrate in KBit/s - `asr`: Audio sampling rate in Hertz - `fps`: Frame rate Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains), `~=` (matches regex) and following string meta fields: - `ext`: File extension - `acodec`: Name of the audio codec in use - `vcodec`: Name of the video codec in use - `container`: Name of the container format - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`) - `format_id`: A short description of the format - `language`: Language code Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). The comparand of a string comparison needs to be quoted with either double or single quotes if it contains spaces or special characters other than `._-`. Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering. Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats. Format selectors can also be grouped using parentheses; e.g. `-f "(mp4,webm)[height<480]"` will download the best pre-merged mp4 and webm formats with a height lower than 480. ## Sorting Formats You can change the criteria for being considered the `best` by using `-S` (`--format-sort`). The general format for this is `--format-sort field1,field2...`. The available fields are: - `hasvid`: Gives priority to formats that has a video stream - `hasaud`: Gives priority to formats that has a audio stream - `ie_pref`: The format preference - `lang`: The language preference - `quality`: The quality of the format - `source`: The preference of the source - `proto`: Protocol used for download (`https`/`ftps` > `http`/`ftp` > `m3u8_native`/`m3u8` > `http_dash_segments`> `websocket_frag` > `mms`/`rtsp` > `f4f`/`f4m`) - `vcodec`: Video Codec (`av01` > `vp9.2` > `vp9` > `h265` > `h264` > `vp8` > `h263` > `theora` > other) - `acodec`: Audio Codec (`flac`/`alac` > `wav`/`aiff` > `opus` > `vorbis` > `aac` > `mp4a` > `mp3` > `eac3` > `ac3` > `dts` > other) - `codec`: Equivalent to `vcodec,acodec` - `vext`: Video Extension (`mp4` > `webm` > `flv` > other). If `--prefer-free-formats` is used, `webm` is preferred. - `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `opus` > `ogg` > `webm` > `m4a` > `mp3` > `aac`. - `ext`: Equivalent to `vext,aext` - `filesize`: Exact filesize, if known in advance - `fs_approx`: Approximate filesize calculated from the manifests - `size`: Exact filesize if available, otherwise approximate filesize - `height`: Height of video - `width`: Width of video - `res`: Video resolution, calculated as the smallest dimension. - `fps`: Framerate of video - `hdr`: The dynamic range of the video (`DV` > `HDR12` > `HDR10+` > `HDR10` > `HLG` > `SDR`) - `channels`: The number of audio channels - `tbr`: Total average bitrate in KBit/s - `vbr`: Average video bitrate in KBit/s - `abr`: Average audio bitrate in KBit/s - `br`: Equivalent to using `tbr,vbr,abr` - `asr`: Audio sample rate in Hz **Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names. All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. E.g. `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. E.g. `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. E.g. `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. E.g. `filesize~1G` prefers the format with filesize closest to 1 GiB. The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec:vp9.2,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order. Note that the default has `vcodec:vp9.2`; i.e. `av1` is not preferred. Similarly, the default for hdr is `hdr:12`; i.e. dolby vision is not preferred. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats. If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`. **Tip**: You can use the `-v -F` to see how the formats have been sorted (worst to best). ## Format Selection examples ```bash # Download and merge the best video-only format and the best audio-only format, # or download the best combined format if video-only format is not available $ yt-dlp -f "bv+ba/b" # Download best format that contains video, # and if it doesn't already have an audio stream, merge it with best audio-only format $ yt-dlp -f "bv*+ba/b" # Same as above $ yt-dlp # Download the best video-only format and the best audio-only format without merging them # For this case, an output template should be used since # by default, bestvideo and bestaudio will have the same file name. $ yt-dlp -f "bv,ba" -o "%(title)s.f%(format_id)s.%(ext)s" # Download and merge the best format that has a video stream, # and all audio-only formats into one file $ yt-dlp -f "bv*+mergeall[vcodec=none]" --audio-multistreams # Download and merge the best format that has a video stream, # and the best 2 audio-only formats into one file $ yt-dlp -f "bv*+ba+ba.2" --audio-multistreams # The following examples show the old method (without -S) of format selection # and how to use -S to achieve a similar but (generally) better result # Download the worst video available (old method) $ yt-dlp -f "wv*+wa/w" # Download the best video available but with the smallest resolution $ yt-dlp -S "+res" # Download the smallest video available $ yt-dlp -S "+size,+br" # Download the best mp4 video available, or the best video if no mp4 available $ yt-dlp -f "bv*[ext=mp4]+ba[ext=m4a]/b[ext=mp4] / bv*+ba/b" # Download the best video with the best extension # (For video, mp4 > webm > flv. For audio, m4a > aac > mp3 ...) $ yt-dlp -S "ext" # Download the best video available but no better than 480p, # or the worst video if there is no video under 480p $ yt-dlp -f "bv*[height<=480]+ba/b[height<=480] / wv*+ba/w" # Download the best video available with the largest height but no better than 480p, # or the best video with the smallest resolution if there is no video under 480p $ yt-dlp -S "height:480" # Download the best video available with the largest resolution but no better than 480p, # or the best video with the smallest resolution if there is no video under 480p # Resolution is determined by using the smallest dimension. # So this works correctly for vertical videos as well $ yt-dlp -S "res:480" # Download the best video (that also has audio) but no bigger than 50 MB, # or the worst video (that also has audio) if there is no video under 50 MB $ yt-dlp -f "b[filesize<50M] / w" # Download largest video (that also has audio) but no bigger than 50 MB, # or the smallest video (that also has audio) if there is no video under 50 MB $ yt-dlp -f "b" -S "filesize:50M" # Download best video (that also has audio) that is closest in size to 50 MB $ yt-dlp -f "b" -S "filesize~50M" # Download best video available via direct link over HTTP/HTTPS protocol, # or the best video available via any protocol if there is no such video $ yt-dlp -f "(bv*+ba/b)[protocol^=http][protocol!*=dash] / (bv*+ba/b)" # Download best video available via the best protocol # (https/ftps > http/ftp > m3u8_native > m3u8 > http_dash_segments ...) $ yt-dlp -S "proto" # Download the best video with either h264 or h265 codec, # or the best video if there is no such video $ yt-dlp -f "(bv*[vcodec~='^((he|a)vc|h26[45])']+ba) / (bv*+ba/b)" # Download the best video with best codec no better than h264, # or the best video with worst codec if there is no such video $ yt-dlp -S "codec:h264" # Download the best video with worst codec no worse than h264, # or the best video with best codec if there is no such video $ yt-dlp -S "+codec:h264" # More complex examples # Download the best video no better than 720p preferring framerate greater than 30, # or the worst video (still preferring framerate greater than 30) if there is no such video $ yt-dlp -f "((bv*[fps>30]/bv*)[height<=720]/(wv*[fps>30]/wv*)) + ba / (b[fps>30]/b)[height<=720]/(w[fps>30]/w)" # Download the video with the largest resolution no better than 720p, # or the video with the smallest resolution available if there is no such video, # preferring larger framerate for formats with the same resolution $ yt-dlp -S "res:720,fps" # Download the video with smallest resolution no worse than 480p, # or the video with the largest resolution available if there is no such video, # preferring better codec and then larger total bitrate for the same resolution $ yt-dlp -S "+res:480,codec,br" ``` # MODIFYING METADATA The metadata obtained by the extractors can be modified by using `--parse-metadata` and `--replace-in-metadata` `--replace-in-metadata FIELDS REGEX REPLACE` is used to replace text in any metadata field using [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax). [Backreferences](https://docs.python.org/3/library/re.html?highlight=backreferences#re.sub) can be used in the replace string for advanced use. The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields. Note that any field created by this can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--add-metadata`. This option also has a few special uses: * You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. E.g. `--parse-metadata "description:(?P<additional_urls>https?://www\.vimeo\.com/\d+)` will download the first vimeo video found in the description * You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file - you can use this to set a different "description" and "synopsis". To modify the metadata of individual streams, use the `meta<n>_` prefix (e.g. `meta1_language`). Any value set to the `meta_` field will overwrite all default values. **Note**: Metadata modification happens before format selection, post-extraction and other post-processing operations. Some fields may be added or changed during these steps, overriding your changes. For reference, these are the fields yt-dlp adds by default to the file metadata: Metadata fields | From :--------------------------|:------------------------------------------------ `title` | `track` or `title` `date` | `upload_date` `description`, `synopsis` | `description` `purl`, `comment` | `webpage_url` `track` | `track_number` `artist` | `artist`, `creator`, `uploader` or `uploader_id` `genre` | `genre` `album` | `album` `album_artist` | `album_artist` `disc` | `disc_number` `show` | `series` `season_number` | `season_number` `episode_id` | `episode` or `episode_id` `episode_sort` | `episode_number` `language` of each stream | the format's `language` **Note**: The file format may not support some of these fields ## Modifying metadata examples ```bash # Interpret the title as "Artist - Title" $ yt-dlp --parse-metadata "title:%(artist)s - %(title)s" # Regex example $ yt-dlp --parse-metadata "description:Artist - (?P<artist>.+)" # Set title as "Series name S01E05" $ yt-dlp --parse-metadata "%(series)s S%(season_number)02dE%(episode_number)02d:%(title)s" # Prioritize uploader as the "artist" field in video metadata $ yt-dlp --parse-metadata "%(uploader|)s:%(meta_artist)s" --add-metadata # Set "comment" field in video metadata using description instead of webpage_url, # handling multiple lines correctly $ yt-dlp --parse-metadata "description:(?s)(?P<meta_comment>.+)" --add-metadata # Do not set any "synopsis" in the video metadata $ yt-dlp --parse-metadata ":(?P<meta_synopsis>)" # Remove "formats" field from the infojson by setting it to an empty string $ yt-dlp --parse-metadata ":(?P<formats>)" -j # Replace all spaces and "_" in title and uploader with a `-` $ yt-dlp --replace-in-metadata "title,uploader" "[ _]" "-" ``` # EXTRACTOR ARGUMENTS Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=android_embedded,web;include_live_dash" --extractor-args "funimation:version=uncut"` The following extractors use this feature: #### youtube * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively * `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients. * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `include_live_dash`: Include live dash formats even without `--live-from-start` (These formats don't download properly) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all` * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others * `innertube_key`: Innertube API key to use for all API requests #### youtubetab (YouTube playlists, channels, feeds, etc.) * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) * `approximate_date`: Extract approximate `upload_date` in flat-playlist. This may cause date-based filters to be slightly off #### funimation * `language`: Languages to extract, e.g. `funimation:language=english,japanese` * `version`: The video version to extract - `uncut` or `simulcast` #### crunchyroll * `language`: Languages to extract, e.g. `crunchyroll:language=jaJp` * `hardsub`: Which hard-sub versions to extract, e.g. `crunchyroll:hardsub=None,enUS` #### crunchyrollbeta * `format`: Which stream type(s) to extract (default: `adaptive_hls`). Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `download_dash`, `multitrack_adaptive_hls_v2` * `hardsub`: Preference order for which hardsub versions to extract (default: `None` = no hardsubs), e.g. `crunchyrollbeta:hardsub=en-US,None` #### vikichannel * `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers` #### niconico * `segment_duration`: Segment duration in milliseconds for HLS-DMC formats. Use it at your own risk since this feature **may result in your account termination.** #### youtubewebarchive * `check_all`: Try to check more at the cost of more requests. One or more of `thumbnails`, `captures` #### gamejolt * `comment_sort`: `hot` (default), `you` (cookies needed), `top`, `new` - choose comment sorting mode (on GameJolt's side) #### hotstar * `res`: resolution to ignore - one or more of `sd`, `hd`, `fhd` * `vcodec`: vcodec to ignore - one or more of `h264`, `h265`, `dvh265` * `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv` #### tiktok * `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`, e.g. `20.2.1` * `manifest_app_version`: Numeric app version to call mobile APIs with, e.g. `221` #### rokfinchannel * `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks` NOTE: These options may be changed/removed in the future without concern for backward compatibility <!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE --> # PLUGINS Plugins are loaded from `<root-dir>/ytdlp_plugins/<type>/__init__.py`; where `<root-dir>` is the directory of the binary (`<root-dir>/yt-dlp`), or the root directory of the module if you are running directly from source-code (`<root dir>/yt_dlp/__main__.py`). Plugins are currently not supported for the `pip` version Plugins can be of `<type>`s `extractor` or `postprocessor`. Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it. Postprocessor plugins can be invoked using `--use-postprocessor NAME`. See [ytdlp_plugins](ytdlp_plugins) for example plugins. Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. Use plugins at your own risk and only if you trust the code If you are a plugin author, add [ytdlp-plugins](https://github.com/topics/ytdlp-plugins) as a topic to your repository for discoverability # EMBEDDING YT-DLP yt-dlp makes the best effort to be a good command-line program, and thus should be callable from any programming language. Your program should avoid parsing the normal stdout since they may change in future versions. Instead they should use options such as `-J`, `--print`, `--progress-template`, `--exec` etc to create console output that you can reliably reproduce and parse. From a Python program, you can embed yt-dlp in a more powerful fashion, like this: ```python from yt_dlp import YoutubeDL URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] with YoutubeDL() as ydl: ydl.download(URLS) ``` Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L180). **Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the [example below](#extracting-information) ## Embedding examples #### Extracting information ```python import json import yt_dlp URL = 'https://www.youtube.com/watch?v=BaW_jenozKc' # ℹ️ See help(yt_dlp.YoutubeDL) for a list of available options and public functions ydl_opts = {} with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(URL, download=False) # ℹ️ ydl.sanitize_info makes the info json-serializable print(json.dumps(ydl.sanitize_info(info))) ``` #### Download using an info-json ```python import yt_dlp INFO_FILE = 'path/to/video.info.json' with yt_dlp.YoutubeDL() as ydl: error_code = ydl.download_with_info_file(INFO_FILE) print('Some videos failed to download' if error_code else 'All videos successfully downloaded') ``` #### Extract audio ```python import yt_dlp URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] ydl_opts = { 'format': 'm4a/bestaudio/best', # ℹ️ See help(yt_dlp.postprocessor) for a list of available Postprocessors and their arguments 'postprocessors': [{ # Extract audio using ffmpeg 'key': 'FFmpegExtractAudio', 'preferredcodec': 'm4a', }] } with yt_dlp.YoutubeDL(ydl_opts) as ydl: error_code = ydl.download(URLS) ``` #### Filter videos ```python import yt_dlp URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] def longer_than_a_minute(info, *, incomplete): """Download only videos longer than a minute (or with unknown duration)""" duration = info.get('duration') if duration and duration < 60: return 'The video is too short' ydl_opts = { 'match_filter': longer_than_a_minute, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: error_code = ydl.download(URLS) ``` #### Adding logger and progress hook ```python import yt_dlp URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] class MyLogger: def debug(self, msg): # For compatibility with youtube-dl, both debug and info are passed into debug # You can distinguish them by the prefix '[debug] ' if msg.startswith('[debug] '): pass else: self.info(msg) def info(self, msg): pass def warning(self, msg): pass def error(self, msg): print(msg) # ℹ️ See "progress_hooks" in help(yt_dlp.YoutubeDL) def my_hook(d): if d['status'] == 'finished': print('Done downloading, now post-processing ...') ydl_opts = { 'logger': MyLogger(), 'progress_hooks': [my_hook], } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download(URLS) ``` #### Add a custom PostProcessor ```python import yt_dlp URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] # ℹ️ See help(yt_dlp.postprocessor.PostProcessor) class MyCustomPP(yt_dlp.postprocessor.PostProcessor): def run(self, info): self.to_screen('Doing stuff') return [], info with yt_dlp.YoutubeDL() as ydl: # ℹ️ "when" can take any value in yt_dlp.utils.POSTPROCESS_WHEN ydl.add_post_processor(MyCustomPP(), when='pre_process') ydl.download(URLS) ``` #### Use a custom format selector ```python import yt_dlp URL = ['https://www.youtube.com/watch?v=BaW_jenozKc'] def format_selector(ctx): """ Select the best video and the best audio that won't result in an mkv. NOTE: This is just an example and does not handle all cases """ # formats are already sorted worst to best formats = ctx.get('formats')[::-1] # acodec='none' means there is no audio best_video = next(f for f in formats if f['vcodec'] != 'none' and f['acodec'] == 'none') # find compatible audio extension audio_ext = {'mp4': 'm4a', 'webm': 'webm'}[best_video['ext']] # vcodec='none' means there is no video best_audio = next(f for f in formats if ( f['acodec'] != 'none' and f['vcodec'] == 'none' and f['ext'] == audio_ext)) # These are the minimum required fields for a merged format yield { 'format_id': f'{best_video["format_id"]}+{best_audio["format_id"]}', 'ext': best_video['ext'], 'requested_formats': [best_video, best_audio], # Must be + separated list of protocols 'protocol': f'{best_video["protocol"]}+{best_audio["protocol"]}' } ydl_opts = { 'format': format_selector, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download(URLS) ``` <!-- MANPAGE: MOVE "NEW FEATURES" SECTION HERE --> # DEPRECATED OPTIONS These are all the deprecated options and the current alternative to achieve the same effect #### Almost redundant options While these options are almost the same as their new counterparts, there are some differences that prevents them being redundant -j, --dump-json --print "%()j" -F, --list-formats --print formats_table --list-thumbnails --print thumbnails_table --print playlist:thumbnails_table --list-subs --print automatic_captions_table --print subtitles_table #### Redundant options While these options are redundant, they are still expected to be used due to their ease of use --get-description --print description --get-duration --print duration_string --get-filename --print filename --get-format --print format --get-id --print id --get-thumbnail --print thumbnail -e, --get-title --print title -g, --get-url --print urls --match-title REGEX --match-filter "title ~= (?i)REGEX" --reject-title REGEX --match-filter "title !~= (?i)REGEX" --min-views COUNT --match-filter "view_count >=? COUNT" --max-views COUNT --match-filter "view_count <=? COUNT" --user-agent UA --add-header "User-Agent:UA" --referer URL --add-header "Referer:URL" --playlist-start NUMBER -I NUMBER: --playlist-end NUMBER -I :NUMBER --playlist-reverse -I ::-1 --no-playlist-reverse Default #### Not recommended While these options still work, their use is not recommended since there are other alternatives to achieve the same --exec-before-download CMD --exec "before_dl:CMD" --no-exec-before-download --no-exec --all-formats -f all --all-subs --sub-langs all --write-subs --print-json -j --no-simulate --autonumber-size NUMBER Use string formatting, e.g. %(autonumber)03d --autonumber-start NUMBER Use internal field formatting like %(autonumber+NUMBER)s --id -o "%(id)s.%(ext)s" --metadata-from-title FORMAT --parse-metadata "%(title)s:FORMAT" --hls-prefer-native --downloader "m3u8:native" --hls-prefer-ffmpeg --downloader "m3u8:ffmpeg" --list-formats-old --compat-options list-formats (Alias: --no-list-formats-as-table) --list-formats-as-table --compat-options -list-formats [Default] (Alias: --no-list-formats-old) --youtube-skip-dash-manifest --extractor-args "youtube:skip=dash" (Alias: --no-youtube-include-dash-manifest) --youtube-skip-hls-manifest --extractor-args "youtube:skip=hls" (Alias: --no-youtube-include-hls-manifest) --youtube-include-dash-manifest Default (Alias: --no-youtube-skip-dash-manifest) --youtube-include-hls-manifest Default (Alias: --no-youtube-skip-hls-manifest) #### Developer options These options are not intended to be used by the end-user --test Download only part of video for testing extractors --load-pages Load pages dumped by --write-pages --youtube-print-sig-code For testing youtube signatures --allow-unplayable-formats List unplayable formats also --no-allow-unplayable-formats Default #### Old aliases These are aliases that are no longer documented for various reasons --avconv-location --ffmpeg-location --clean-infojson --clean-info-json --cn-verification-proxy URL --geo-verification-proxy URL --dump-headers --print-traffic --dump-intermediate-pages --dump-pages --force-write-download-archive --force-write-archive --load-info --load-info-json --no-clean-infojson --no-clean-info-json --no-split-tracks --no-split-chapters --no-write-srt --no-write-subs --prefer-unsecure --prefer-insecure --rate-limit RATE --limit-rate RATE --split-tracks --split-chapters --srt-lang LANGS --sub-langs LANGS --trim-file-names LENGTH --trim-filenames LENGTH --write-srt --write-subs --yes-overwrites --force-overwrites #### Sponskrub Options Support for [SponSkrub](https://github.com/faissaloo/SponSkrub) has been deprecated in favor of the `--sponsorblock` options --sponskrub --sponsorblock-mark all --no-sponskrub --no-sponsorblock --sponskrub-cut --sponsorblock-remove all --no-sponskrub-cut --sponsorblock-remove -all --sponskrub-force Not applicable --no-sponskrub-force Not applicable --sponskrub-location Not applicable --sponskrub-args Not applicable #### No longer supported These options may no longer work as intended --prefer-avconv avconv is not officially supported by yt-dlp (Alias: --no-prefer-ffmpeg) --prefer-ffmpeg Default (Alias: --no-prefer-avconv) -C, --call-home Not implemented --no-call-home Default --include-ads No longer supported --no-include-ads Default --write-annotations No supported site has annotations now --no-write-annotations Default --compat-options seperate-video-versions No longer needed #### Removed These options were deprecated since 2014 and have now been entirely removed -A, --auto-number -o "%(autonumber)s-%(id)s.%(ext)s" -t, -l, --title, --literal -o "%(title)s-%(id)s.%(ext)s" # CONTRIBUTING See [CONTRIBUTING.md](CONTRIBUTING.md#contributing-to-yt-dlp) for instructions on [Opening an Issue](CONTRIBUTING.md#opening-an-issue) and [Contributing code to the project](CONTRIBUTING.md#developer-instructions) # MORE For FAQ see the [youtube-dl README](https://github.com/ytdl-org/youtube-dl#faq) �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/�����������������������������������������������������������������������0000775�0000000�0000000�00000000000�14277552437�0015536�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/SizeOfImage.patch������������������������������������������������������0000664�0000000�0000000�00000000223�14277552437�0020716�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������BSDIFF402�������3��������D������BZh91AY&SYgm��DD`�@�� �!`ЊeH lMBZh91AY&SY>���� � M l %rE8P>BZh9rE8P���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/SizeOfImage_w.patch����������������������������������������������������0000664�0000000�0000000�00000000224�14277552437�0021245�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������BSDIFF402�������4��������D������BZh91AY&SYk.��DH`�@�� �!`ЊeH  te`BZh91AY&SY֤?����� �0�R~ovrE8P֤?BZh9rE8P��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/__init__.py������������������������������������������������������������0000664�0000000�0000000�00000000116�14277552437�0017645�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Empty file needed to make devscripts.utils properly importable from outside ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/bash-completion.in�����������������������������������������������������0000664�0000000�0000000�00000001500�14277552437�0021146�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������__yt_dlp() { local cur prev opts fileopts diropts keywords COMPREPLY=() cur="${COMP_WORDS[COMP_CWORD]}" prev="${COMP_WORDS[COMP_CWORD-1]}" opts="{{flags}}" keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory" fileopts="-a|--batch-file|--download-archive|--cookies|--load-info" diropts="--cache-dir" if [[ ${prev} =~ ${fileopts} ]]; then COMPREPLY=( $(compgen -f -- ${cur}) ) return 0 elif [[ ${prev} =~ ${diropts} ]]; then COMPREPLY=( $(compgen -d -- ${cur}) ) return 0 fi if [[ ${cur} =~ : ]]; then COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) ) return 0 elif [[ ${cur} == * ]] ; then COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) return 0 fi } complete -F __yt_dlp yt-dlp ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/bash-completion.py�����������������������������������������������������0000775�0000000�0000000�00000001526�14277552437�0021203�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import yt_dlp BASH_COMPLETION_FILE = "completions/bash/yt-dlp" BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in" def build_completion(opt_parser): opts_flag = [] for group in opt_parser.option_groups: for option in group.option_list: # for every long flag opts_flag.append(option.get_opt_string()) with open(BASH_COMPLETION_TEMPLATE) as f: template = f.read() with open(BASH_COMPLETION_FILE, "w") as f: # just using the special char filled_template = template.replace("{{flags}}", " ".join(opts_flag)) f.write(filled_template) parser = yt_dlp.parseOpts(ignore_config_files=True)[0] build_completion(parser) ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/check-porn.py����������������������������������������������������������0000664�0000000�0000000�00000003411�14277552437�0020140�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 """ This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check if we are not 'age_limit' tagging some porn site A second approach implemented relies on a list of porn domains, to activate it pass the list filename as the only argument """ # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import urllib.parse import urllib.request from test.helper import gettestcases if len(sys.argv) > 1: METHOD = 'LIST' LIST = open(sys.argv[1]).read().decode('utf8').strip() else: METHOD = 'EURISTIC' for test in gettestcases(): if METHOD == 'EURISTIC': try: webpage = urllib.request.urlopen(test['url'], timeout=10).read() except Exception: print('\nFail: {}'.format(test['name'])) continue webpage = webpage.decode('utf8', 'replace') RESULT = 'porn' in webpage.lower() elif METHOD == 'LIST': domain = urllib.parse.urlparse(test['url']).netloc if not domain: print('\nFail: {}'.format(test['name'])) continue domain = '.'.join(domain.split('.')[-2:]) RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST) if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or test['info_dict']['age_limit'] != 18): print('\nPotential missing age_limit check: {}'.format(test['name'])) elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and test['info_dict']['age_limit'] == 18): print('\nPotential false negative: {}'.format(test['name'])) else: sys.stdout.write('.') sys.stdout.flush() print() �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/fish-completion.in�����������������������������������������������������0000664�0000000�0000000�00000000176�14277552437�0021172�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������ {{commands}} complete --command yt-dlp --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory" ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/fish-completion.py�����������������������������������������������������0000775�0000000�0000000�00000003206�14277552437�0021214�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import optparse import yt_dlp from yt_dlp.utils import shell_quote FISH_COMPLETION_FILE = 'completions/fish/yt-dlp.fish' FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in' EXTRA_ARGS = { 'remux-video': ['--arguments', 'mp4 mkv', '--exclusive'], 'recode-video': ['--arguments', 'mp4 flv ogg webm mkv', '--exclusive'], # Options that need a file parameter 'download-archive': ['--require-parameter'], 'cookies': ['--require-parameter'], 'load-info': ['--require-parameter'], 'batch-file': ['--require-parameter'], } def build_completion(opt_parser): commands = [] for group in opt_parser.option_groups: for option in group.option_list: long_option = option.get_opt_string().strip('-') complete_cmd = ['complete', '--command', 'yt-dlp', '--long-option', long_option] if option._short_opts: complete_cmd += ['--short-option', option._short_opts[0].strip('-')] if option.help != optparse.SUPPRESS_HELP: complete_cmd += ['--description', option.help] complete_cmd.extend(EXTRA_ARGS.get(long_option, [])) commands.append(shell_quote(complete_cmd)) with open(FISH_COMPLETION_TEMPLATE) as f: template = f.read() filled_template = template.replace('{{commands}}', '\n'.join(commands)) with open(FISH_COMPLETION_FILE, 'w') as f: f.write(filled_template) parser = yt_dlp.parseOpts(ignore_config_files=True)[0] build_completion(parser) ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/generate_aes_testdata.py�����������������������������������������������0000664�0000000�0000000�00000002163�14277552437�0022425�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import codecs import subprocess from yt_dlp.aes import aes_encrypt, key_expansion from yt_dlp.utils import intlist_to_bytes secret_msg = b'Secret message goes here' def hex_str(int_list): return codecs.encode(intlist_to_bytes(int_list), 'hex') def openssl_encode(algo, key, iv): cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)] prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) out, _ = prog.communicate(secret_msg) return out iv = key = [0x20, 0x15] + 14 * [0] r = openssl_encode('aes-128-cbc', key, iv) print('aes_cbc_decrypt') print(repr(r)) password = key new_key = aes_encrypt(password, key_expansion(password)) r = openssl_encode('aes-128-ctr', new_key, iv) print('aes_decrypt_text 16') print(repr(r)) password = key + 16 * [0] new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16) r = openssl_encode('aes-256-ctr', new_key, iv) print('aes_decrypt_text 32') print(repr(r)) �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/lazy_load_template.py��������������������������������������������������0000664�0000000�0000000�00000002127�14277552437�0021763�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import importlib import random import re from ..utils import ( age_restricted, bug_reports_message, classproperty, write_string, ) # These bloat the lazy_extractors, so allow them to passthrough silently ALLOWED_CLASSMETHODS = {'get_testcases', 'extract_from_webpage'} class LazyLoadMetaClass(type): def __getattr__(cls, name): if '_real_class' not in cls.__dict__ and name not in ALLOWED_CLASSMETHODS: write_string( 'WARNING: Falling back to normal extractor since lazy extractor ' f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n') return getattr(cls.real_class, name) class LazyLoadExtractor(metaclass=LazyLoadMetaClass): @classproperty def real_class(cls): if '_real_class' not in cls.__dict__: cls._real_class = getattr(importlib.import_module(cls._module), cls.__name__) return cls._real_class def __new__(cls, *args, **kwargs): instance = cls.real_class.__new__(cls.real_class) instance.__init__(*args, **kwargs) return instance �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/logo.ico���������������������������������������������������������������0000664�0000000�0000000�00000120123�14277552437�0017171�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������� �%��f���@@��� �(@����00��� �($��^�� ��� �(��ۂ����� �( ������� �(��+��PNG  ��� IHDR���������\rf���sRGB����gAMA�� a��� pHYs����od��IDATx^ U$, `BIgt ! (! a fD��3AvpPP $ $"ks~e]:BwխzܥW1cƌ-&L0{wi?oNf0ikkuGSSӖvfj B415f?ZZZIwM8Y߮0!D`I 9b!D�!H/fף=885^T�~:~߶ߤ!D fe_@�oA D\6`O=5F )B@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!ƏwM&f?SNs`9E#�\ƙ7v;03ZZܸrڌ=+P&eCDq$� Ffz*wíui5߿6oFYH�1fZ[}zfFv#<@!)Ca'" Bί5zq3`."^hZ?gxyj+7ь6]Z1cH�e`Ɨ-nո]O7-:O!^�׻5<<x� t�<`q ʯC ځ jF$?_[Vw b'e�@uzkM@6s^�K�.: W�H�D;Fliqk6k|>'`ԏ^We�6 �Z3�?>dN�p7v˷3Xa. vH�D;m�d-r^|1 MX 9ux3@�VYd67|Q hG KB`O۽N6[f?1a�`ב�b;˓( f_y�t6s|?2�M~^ѣ϶z+C&b#:VǍFFY?wذ$s�Ǝ �y-̺9�EK@\[n-7FP pQim{*~---s:ҚR.F8G 2�ǚ /_OkC|mmu-_ @x}+\w}U �nݢEs==\j!8TD}3c&g~K8μΣ֯õf�׿b Gȇd�~۷þ+3 MQәq~%5DGػ=b[{ }MAe�?#o@v{CqÆ K<ߵ1B-~_2یw}xesj+vdھ{ U&-`aGC3� T ZϚ.7O(Wwi1d Ց.aܞOP|/ٻ]fGDٻ-ݘTR�p?éٯ]]ynڴinG{ |�@=*~c*f]*Ief�db]#6  68og<nnDhx!_a<e^{*PI/+WSO=5 07WVO5p5O43d~=OYE%G+N`e@�>iL� Wf_M3�2}p{ |6;>2f?I9EXۻ}:#)�g}?yǭ>%A±o=4ѲƯ AHx5<\^)>B+7$`LD|"P ӟ_`}UqJ �>Zs :@0,X&Z%&~ГCu.̀�n^ǟB2�&L<_AlKyތLUݩ<*N_�`gqFj˗:*Ա'=/n}Zf̡=3�x Y=`L@b`J/J �QSN9=39;\}n=qABRQ3<[ �~s${֎O|fؔYg 4ẗ$~Q%/1w]~W_MMܱj*wg&q< *-'p_*o!rf\e;a^8`}<ο*>/2sA\ �  7zԤw}={v 17wF�Ǎ|}]Ȓ�Rlf�.�(~8נXӻ;%|ϻ)�@#3g[tij厗_~]znɉpݷ~;pzp-)Q<d�7,UN,`ǻПuxW>h7$gf"dߡJ) @JTΓ裏s&~U##�\ɾVq�c{([ͥe[ nƹ 7w B@ c;v'hd0iR{Y7z/gC @qO.\^|ԤK,q3gLl F|kjV >߾cU:Z< IߙvO ʄd�~gC Ll �� X |? !0?\_2$ct7 #�Y|ف/N͹&nTy�XRչZ1"l~�%3�|3A{>�bѽp7r2Cl�7ߟt` 0CsS1U`yi;sѩW3 L-6 M1X 3}]< 3;+'\ @OwO<Djc`nUK@/s}*L$ð q[#nfg�i}eBxOK(3shhSNu\rI<OFV;Y oeZ0@a tə/رs� ZG 3C �2kb0}t_$c򕓇9~`}u^{[n]jŏ;3T)MHſZ_.k)@x @Ȱ\2�!#0m܇5e j0Vl٩if0K 9nJ �.3@4@W� d` ݙY DlNGM_>o9@Wǘwt7=R� =8#<xG@Qp_KfE[A@V *|`wqx3C>�1"SD4 |_Y>^sgWmNz�� &XeKP䨚�$= Ra\�Fyk`Nʸ%7OW^<@Qq1 Eǧ)l^C^JvS#6Yg%U� {*q M~abXDد,, ds �s+'Lw?pw}f];*%�DYW`lipG?hb񧀠d?h]2�ĆMB}{ P~E3�3x_X`/3fkƽIwuW4_ʃ7-M�.)'uKi{d�x.S\2 �= \;X �4�`Iy睗Px10] o T!0)@*> f�Ha0aM�p3搵 KEQ(F et7 -�pO8bŊԌa)STj�?~a#@W q^ �lcahG6>lEt}fYIC|bH�\t&͚5tM;w'Um\r*2�s  -�I\L$@QU;cH_�zk /qLs9')V39�2R`3�TƝGe1 e` &mg˦]n„h�,r`*e�!�HiO;.M^{핈I `T<بi �9 ("+4 ?Ѯi`�hZsu8@$ݦ �stA~]9X Oˁ-�XE+P?A ]էZ2[+:`7{ =(8mWNg߱j��d3Xڛ׬Yq]vYCHKA!&W˞,{\ f�s�^̘C4%2W|vL* Od�Y9j��`>=ө;70[BG`nJdz_9y\c}2 Zк؝Y։0W膮6D JEwJ!id 4 R@_m|Ԅl..Cٞ�U uF>efE"k:}!`M_y,.A_yy6!vo rE~^ƠR@^ ?-Zs:"{cs,@emz}qZIWN�{%^ݧ̊,ڙؐ%}ȃ M�J �C=t}>o _Gdp_�d+un#N2+`O ,/2� uf@1AXY^w}G0()ԓXT.@}&]`<E] fn$2`n_KfU[ Z1*ȁYW`+#מlg4,ibò2n,8ef2;EgExW|U[|Ħ,/Z?8e\Ur@ �J,b}_K)p@ D`3:\O �v:,(gjևѣ@ |x gy�\[_\fEwF+͒^Kug0 ,fiQ>8 *C�p8�+fx'ݼya,+ZDKb&jKg1O߹Eք! 4MSfD_)K{!E tz&twZ_›LEm**)�@~s|dFC_|.�|�Lצ`7ࢣ27�yvzd]דuCȬv#=�ϹM' nf7,^f㔍Ie�p<@c /0YO WVh;H641Z%HV2p}xӸ)3d3)Z*-�φzR(kѺCJX6,dXآ* �zv! 1$83V dJUE*/�~,~=T)Wp @2,מ?[%`lՆI\r#XP֫gC@ ȑ\xкX,oՎ0�BWZVIK�ǘZA?>�{ *9)R}njoU0.�U�}q`.+]a$Ɵ=S#�`t4VbmKN09溇E_a-va X&sdgBa!ܖz$kn`;�ϱ9 ;D4Q � Y�; ,MEi5ke[.ܼv3| )KTe̶֭~�-& |u'lJ'!Vhn_y%BTu#F�T>}z.`U;.{z0D�f;ˆasBαh?c^Z$*x@Xm(}vxd2� Y<9ٻE~1yFmVV�MV\ߦ?̙t|ϛ-Cs(M?]>8!i?X07wx-ѻm$$�9+ơ ѭ)IOCPCP2�6t pʋ @E @9pqYgy0˲6�+r0wO50hH�phUc=VP2�{ a;T�&-[̭^0͙EQ}+a0о Ym @LD)�۸.$ V֏lHhng{7SV�xU@__ |ub#j)E%2� �Ѱ$#͐ �|I hX Sg0e�A �|pؠ͘J=C�| �6vmޘH�DB /8*@\=ň@4,8 [(f$�aa��?6jTakا�" +#PJiĥ #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@A�^" !�+~(ս??{g2a>D�F?As8q",I u{.5 ommfba8~D@K/3f̐9B��]cwKjC_!h\<57+SS?8Ʉv-_k)P03^mKb#5W3:c ����IENDB`(���@������� ���������������������&&&!((()))((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))'''''')))((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((())))))(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()**++++**(()(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()) X U)) (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((!!! ((((((((((((((((((((((((((((((((((((((((((((((((((((((++����++(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((BBBGGGGGGGGGGGGGGGGGGGGGDDD)))'''((((((((((((((((((((((((((((((((((((((((((((((((**\���� ** (('((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((&&&&&&...$$$((&****))$((')) )) ))!((())$)) )) )) )) )) )) ++$$?��&&1)) ****((%(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((&&&&&&...%% **!""L S''.**##D##C$$>))%&&4##C##C##C##C##C##C$$>''.����##B** S!!N))&**(()(((((((((((((((((((((((((((((((((((((((((((((((((((((()))((((((((((((;;;?????????????????????<<<))&))#����))$''0 ����q ���� S++(()(((((((((((((((((((((((((((((((((((((((((((((((()))ooo```""")))((("""!!!!!!!!!!!!!!!!!!!!!"""**l����������������%%;��������������((,������������%%<)) ((((((((((((((((((((((((((((((((((((((((((((((((NNN---'''(((((((((((((((((((((((((((++����e����f##D����z**~������r����++((((((((((((((((((((((((((((((((((((((((((((((((UUUjjj)))(((((((((((((((((((((++����.... ����--++-- ���� W.. ++`����))!..f����))$((&((((((((((((((((((((((((((((((((((((((((((((()))&&&???)))((((((((((((((((((++����++++x����++(((++����++)) ##B��''0**&&5��%%9))"(((((((((((((((((((((((((((((((((((((((((((((((()))$$$<<<kkk((((((((((((((((((++e���� ****Y���� ))"((&++����++))$''-����""I**((&���� T**(((((((((((((((((((((((((((((((((((((((((((((((((((((()))zzz((((((((((((((((((**""H����'',**$$<��&&4))#**_���� ** ((&** ����e++++ ����u++(((((((((((((((((((((((((((((((((((((((((((((((((((&&&---}}}(((((((((((((((((())$''0��##E,,(()����""L**)) $$B��''0))$++����-- --����++((((((((((((((((((((((((((((((((((((((((((((())) sssZZZ((((((((((((((((((((((((++ ���� %%9m������k++))$'',����""H**++����""J%%9����++(((((((((((((((((((((((((((((((((((((((((((((PPP$$$(((((((((((((((((((((((((((**b��������������++(('** ����f++**b��������������q++(((((((((((((((((((((((((((((((((((((((((((((XXXBBB$$$)))((((((((((((((((((((((((((((((**��������++(((++����++**##E���������� ((+))$((((((((((((((((((((((((((((((((((((((((((((((((|||)))(((((((((((((((((((((((((((((((((((((('++""H&&6!!R���� ))!((&++����++(('((+ini))&[z''1))!(()((((((((((((((((((((((((((((((((((((((((((((((((((('''((((((((((((((((((((((((((((((((((((((((((((((((**++++++$$@��&&2))#,,]���� **(('(('++++++((&**++++))#(()(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((())%((*����##D''2 ����''/))$(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((('++ ����h&&3����������""L**(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((**| b'', !!O**((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()**++++))#(((++++++++++))%(()(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))&&&5((()))(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((***(((����***0))))))((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((())))))%%%(���0���`���� ���������������������'''u((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))((())))))((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((&))$))%(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((('''.''/'',(('(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))(((((((((((((((((((((((((((((((((((((((++ ))"(('(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((&&&MMMYYYYYYYYYYYYYYYOOO'''((((('(('(((((((((((((((((((((((((((++n��&&2))#(('((((((((((((((((((((((((((((((((((((((((((((((((((((((((('''""$((********++))#))#++++++++,,""H����##F,,**++))%(()(((((((((((((((((((((((((((((((((((()))$$$&&&((($$$mmmqqq((T p !!Oc ""M��q ((())#((((((((((((((((((((((((((((((((((((###===xxx###)))))&��������������������""L ��������**(('(((((((((((((((((((((((((((((((((OOO""")))(((((((((((())!$$>��%%;��''/""I��!!O** ��""I����""G**((((((((((((((((((((((((((((((((($$$<<<,,,)))((((((((())"%%7��^00���**-- n��** ,,�� ,,))$��t++((((((((((((((((((((((((((((((((()))!!!MMMGGG (((((((((((&))$��..f��&&5++ S��##C,,l��''1++ ��++(((((((((((((((((((((((((((((((((((()))!!! FFFnnn((((((((((((++ ��..##C����!!M++%%9��a,,!!N����##D..�� ++((((((((((((((((((((((((((((((((()))555...%%%((((((((((((++��%%:X����l++))%��,,&&4��,,�� **(('((((((((((((((((((((((((((((((yyyOOO$$$)))(((((((((((()) $$@����++++ ��,,))"��������++(((((((((((((((((((((((((((((((((~~~((((((((((((((((((((((()**[ ��++++�� **** ##E)) ((((((((((((((((((((((((((((((((('''(((777%%%)))(((((((((((((((((((((((((()**))#''0-- �� ))",,l��'',))$((&((())#**''0))&)) (()((((((((((((((((((((((((((((((((((((&&&"""(((((((((((((((((((((((((((((((((((((((((&))$++j��(()����##C)) ((&((%((&(('))$((&(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((**""I����$$>������b++((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((('((*""I""J''/$$<""J""J""J&&5))%(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((('****((&))"******))%(()((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))((('''((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((����''')))((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))(((r(��� ���@���� ���������������������'''((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((''')))(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()**++((&(()((((((((((((((((((((((((((((((((((((((())))))((((((((((((((((((((()) \''/((%(((((((((((((((((((((((((((((((((((((((###^^^xxxxxxxxx``a$$"********))"++++..��""J--**((((((((((((((((((((((((((())) i]\ R""Fmhdk��ug!!Q**((((((((((((((((((((('''$$$wwwIII !!! %%z���� ����t����^**((((((((((((((((((&&&###"""''''''**��&&4&&2!!N��++ ''. ++(((((((((((((((((((((&&&[[[...$$$(((++��''0g��""H))".. .. ))&((&(((((((((((((((((('''MMM000$$$(((**[��i R��m,, .. ��((&��&&6))#(((((((((((((((&&&###&&&###(((((())%(()����..++z����((*))$((((((((((((((('''$$$UUU$$$)))(((((((())) %%9h //��%%9%%8h T)) ((((((((((((((((((((())))))((((((((((((((((()))"++-- ""L��c++++**++**(()(((((((((((((((((((((((((((((((((((((((((((((((((((((((((**sW W**(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()**++))#++++))#(()(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((('''(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((%%%)''')))(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((***'''(������0���� ���������������������'''((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((('**((%(()((((((((((((((((((((((((((())))))((((((((((((((&''-X''0((&((((((((((((((((((((((((%%%!!!dddffh##++++**++,,** _.. **(()(((((((((((((((***+++ooouujck !!N**(((((((((((($$$111 u��}** (('((((((((((((###777$$$&&&++ |g-- V ''-$$=))"((((((((('''&&&fff###'''++**  S����!!K**((((((((($$$222]]]'''((())#&&2 ''. ((+ ((*((%(((((((((((('''((((((((((()))#++** y**++++**((&(()((((((((((((((('''(((((((((((((((((((('))$^ Qre(()(('((((((((((((((((((((((((((((((((((((((((((((((((((((((**)) ++**((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((('''g((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))((((������ ���� ���������������������'''(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((())%((&(((((((((((((((((()))**'((&((&))$''/(()((%((((((((()))%%%[[[^^[!!$'',''.))!}++))$(()((($$$>>>ggg???\\WFFT  ''.))%(((&&&'''7zti**(((###;;;___""!)) u X**((('''000---)))((&)) ^~))!&&5((%((((((((('''&&&(((((((((((&((&%%9##B&&3))#))$(((((((((((((((((((((((((((((((('))#))"))%(()((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((('''(((((((((((((((((((((((((((((((((((((((((('''���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/make_contributing.py���������������������������������������������������0000775�0000000�0000000�00000001373�14277552437�0021623�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 import optparse import re def main(): return # This is unused in yt-dlp parser = optparse.OptionParser(usage='%prog INFILE OUTFILE') options, args = parser.parse_args() if len(args) != 2: parser.error('Expected an input and an output filename') infile, outfile = args with open(infile, encoding='utf-8') as inf: readme = inf.read() bug_text = re.search( r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1) dev_text = re.search( r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING yt-dlp', readme).group(1) out = bug_text + dev_text with open(outfile, 'w', encoding='utf-8') as outf: outf.write(out) if __name__ == '__main__': main() ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/make_issue_template.py�������������������������������������������������0000664�0000000�0000000�00000005107�14277552437�0022133�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import re from devscripts.utils import ( get_filename_args, read_file, read_version, write_file, ) VERBOSE_TMPL = ''' - type: checkboxes id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU <your command line>`) required: true - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea id: log attributes: label: Complete Verbose Output description: | It should start like this: placeholder: | [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] yt-dlp version %(version)s [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest Latest version: %(version)s, Current version: %(version)s yt-dlp is up to date (%(version)s) <more lines> render: shell validations: required: true '''.strip() NO_SKIP = ''' - type: checkboxes attributes: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - label: I understand that I will be **blocked** if I remove or skip any mandatory\\* field required: true '''.strip() def main(): fields = {'version': read_version(), 'no_skip': NO_SKIP} fields['verbose'] = VERBOSE_TMPL % fields fields['verbose_optional'] = re.sub(r'(\n\s+validations:)?\n\s+required: true', '', fields['verbose']) infile, outfile = get_filename_args(has_infile=True) write_file(outfile, read_file(infile) % fields) if __name__ == '__main__': main() ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/make_lazy_extractors.py������������������������������������������������0000664�0000000�0000000�00000007400�14277552437�0022343�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from inspect import getsource from devscripts.utils import get_filename_args, read_file, write_file NO_ATTR = object() STATIC_CLASS_PROPERTIES = ['IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_VALID_URL', '_WORKING', '_NETRC_MACHINE', 'age_limit'] CLASS_METHODS = [ 'ie_key', 'working', 'description', 'suitable', '_match_valid_url', '_match_id', 'get_temp_id', 'is_suitable' ] IE_TEMPLATE = ''' class {name}({bases}): _module = {module!r} ''' MODULE_TEMPLATE = read_file('devscripts/lazy_load_template.py') def main(): lazy_extractors_filename = get_filename_args(default_outfile='yt_dlp/extractor/lazy_extractors.py') if os.path.exists(lazy_extractors_filename): os.remove(lazy_extractors_filename) _ALL_CLASSES = get_all_ies() # Must be before import from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR}) module_src = '\n'.join(( MODULE_TEMPLATE, ' _module = None', *extra_ie_code(DummyInfoExtractor), '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n', *build_ies(_ALL_CLASSES, (InfoExtractor, SearchInfoExtractor), DummyInfoExtractor), )) write_file(lazy_extractors_filename, f'{module_src}\n') def get_all_ies(): PLUGINS_DIRNAME = 'ytdlp_plugins' BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked' if os.path.exists(PLUGINS_DIRNAME): os.rename(PLUGINS_DIRNAME, BLOCKED_DIRNAME) try: from yt_dlp.extractor.extractors import _ALL_CLASSES finally: if os.path.exists(BLOCKED_DIRNAME): os.rename(BLOCKED_DIRNAME, PLUGINS_DIRNAME) return _ALL_CLASSES def extra_ie_code(ie, base=None): for var in STATIC_CLASS_PROPERTIES: val = getattr(ie, var) if val != (getattr(base, var) if base else NO_ATTR): yield f' {var} = {val!r}' yield '' for name in CLASS_METHODS: f = getattr(ie, name) if not base or f.__func__ != getattr(base, name).__func__: yield getsource(f) def build_ies(ies, bases, attr_base): names = [] for ie in sort_ies(ies, bases): yield build_lazy_ie(ie, ie.__name__, attr_base) if ie in ies: names.append(ie.__name__) yield f'\n_ALL_CLASSES = [{", ".join(names)}]' def sort_ies(ies, ignored_bases): """find the correct sorting and add the required base classes so that subclasses can be correctly created""" classes, returned_classes = ies[:-1], set() assert ies[-1].__name__ == 'GenericIE', 'Last IE must be GenericIE' while classes: for c in classes[:]: bases = set(c.__bases__) - {object, *ignored_bases} restart = False for b in sorted(bases, key=lambda x: x.__name__): if b not in classes and b not in returned_classes: assert b.__name__ != 'GenericIE', 'Cannot inherit from GenericIE' classes.insert(0, b) restart = True if restart: break if bases <= returned_classes: yield c returned_classes.add(c) classes.remove(c) break yield ies[-1] def build_lazy_ie(ie, name, attr_base): bases = ', '.join({ 'InfoExtractor': 'LazyLoadExtractor', 'SearchInfoExtractor': 'LazyLoadSearchExtractor', }.get(base.__name__, base.__name__) for base in ie.__bases__) s = IE_TEMPLATE.format(name=name, module=ie.__module__, bases=bases) return s + '\n'.join(extra_ie_code(ie, attr_base)) if __name__ == '__main__': main() ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/make_readme.py���������������������������������������������������������0000775�0000000�0000000�00000004621�14277552437�0020350�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 """ yt-dlp --help | make_readme.py This must be run in a console of correct width """ # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import functools import re from devscripts.utils import read_file, write_file README_FILE = 'README.md' OPTIONS_START = 'General Options:' OPTIONS_END = 'CONFIGURATION' EPILOG_START = 'See full documentation' ALLOWED_OVERSHOOT = 2 DISABLE_PATCH = object() def take_section(text, start=None, end=None, *, shift=0): return text[ text.index(start) + shift if start else None: text.index(end) + shift if end else None ] def apply_patch(text, patch): return text if patch[0] is DISABLE_PATCH else re.sub(*patch, text) options = take_section(sys.stdin.read(), f'\n {OPTIONS_START}', f'\n{EPILOG_START}', shift=1) max_width = max(map(len, options.split('\n'))) switch_col_width = len(re.search(r'(?m)^\s{5,}', options).group()) delim = f'\n{" " * switch_col_width}' PATCHES = ( ( # Standardize update message r'(?m)^( -U, --update\s+).+(\n \s.+)*$', r'\1Update this program to the latest version', ), ( # Headings r'(?m)^ (\w.+\n)( (?=\w))?', r'## \1' ), ( # Do not split URLs rf'({delim[:-1]})? (?P<label>\[\S+\] )?(?P<url>https?({delim})?:({delim})?/({delim})?/(({delim})?\S+)+)\s', lambda mobj: ''.join((delim, mobj.group('label') or '', re.sub(r'\s+', '', mobj.group('url')), '\n')) ), ( # Do not split "words" rf'(?m)({delim}\S+)+$', lambda mobj: ''.join((delim, mobj.group(0).replace(delim, ''))) ), ( # Allow overshooting last line rf'(?m)^(?P<prev>.+)${delim}(?P<current>.+)$(?!{delim})', lambda mobj: (mobj.group().replace(delim, ' ') if len(mobj.group()) - len(delim) + 1 <= max_width + ALLOWED_OVERSHOOT else mobj.group()) ), ( # Avoid newline when a space is available b/w switch and description DISABLE_PATCH, # This creates issues with prepare_manpage r'(?m)^(\s{4}-.{%d})(%s)' % (switch_col_width - 6, delim), r'\1 ' ), ) readme = read_file(README_FILE) write_file(README_FILE, ''.join(( take_section(readme, end=f'## {OPTIONS_START}'), functools.reduce(apply_patch, PATCHES, options), take_section(readme, f'# {OPTIONS_END}'), ))) ���������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/make_supportedsites.py�������������������������������������������������0000664�0000000�0000000�00000000745�14277552437�0022210�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from devscripts.utils import get_filename_args, write_file from yt_dlp.extractor import list_extractor_classes def main(): out = '\n'.join(ie.description() for ie in list_extractor_classes() if ie.IE_DESC is not False) write_file(get_filename_args(), f'# Supported sites\n{out}\n') if __name__ == '__main__': main() ���������������������������yt-dlp-2022.08.19/devscripts/prepare_manpage.py�����������������������������������������������������0000664�0000000�0000000�00000005475�14277552437�0021251�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import os.path import re from devscripts.utils import ( compose_functions, get_filename_args, read_file, write_file, ) ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) README_FILE = os.path.join(ROOT_DIR, 'README.md') PREFIX = r'''%yt-dlp(1) # NAME yt\-dlp \- A youtube-dl fork with additional features and patches # SYNOPSIS **yt-dlp** \[OPTIONS\] URL [URL...] # DESCRIPTION ''' def filter_excluded_sections(readme): EXCLUDED_SECTION_BEGIN_STRING = re.escape('<!-- MANPAGE: BEGIN EXCLUDED SECTION -->') EXCLUDED_SECTION_END_STRING = re.escape('<!-- MANPAGE: END EXCLUDED SECTION -->') return re.sub( rf'(?s){EXCLUDED_SECTION_BEGIN_STRING}.+?{EXCLUDED_SECTION_END_STRING}\n', '', readme) def move_sections(readme): MOVE_TAG_TEMPLATE = '<!-- MANPAGE: MOVE "%s" SECTION HERE -->' sections = re.findall(r'(?m)^%s$' % ( re.escape(MOVE_TAG_TEMPLATE).replace(r'\%', '%') % '(.+)'), readme) for section_name in sections: move_tag = MOVE_TAG_TEMPLATE % section_name if readme.count(move_tag) > 1: raise Exception(f'There is more than one occurrence of "{move_tag}". This is unexpected') sections = re.findall(rf'(?sm)(^# {re.escape(section_name)}.+?)(?=^# )', readme) if len(sections) < 1: raise Exception(f'The section {section_name} does not exist') elif len(sections) > 1: raise Exception(f'There are multiple occurrences of section {section_name}, this is unhandled') readme = readme.replace(sections[0], '', 1).replace(move_tag, sections[0], 1) return readme def filter_options(readme): section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0) options = '# OPTIONS\n' for line in section.split('\n')[1:]: mobj = re.fullmatch(r'''(?x) \s{4}(?P<opt>-(?:,\s|[^\s])+) (?:\s(?P<meta>(?:[^\s]|\s(?!\s))+))? (\s{2,}(?P<desc>.+))? ''', line) if not mobj: options += f'{line.lstrip()}\n' continue option, metavar, description = mobj.group('opt', 'meta', 'desc') # Pandoc's definition_lists. See http://pandoc.org/README.html option = f'{option} *{metavar}*' if metavar else option description = f'{description}\n' if description else '' options += f'\n{option}\n: {description}' continue return readme.replace(section, options, 1) TRANSFORM = compose_functions(filter_excluded_sections, move_sections, filter_options) def main(): write_file(get_filename_args(), PREFIX + TRANSFORM(read_file(README_FILE))) if __name__ == '__main__': main() ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/run_tests.bat����������������������������������������������������������0000664�0000000�0000000�00000000527�14277552437�0020260�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������@setlocal @echo off cd /d %~dp0.. if ["%~1"]==[""] ( set "test_set="test"" ) else if ["%~1"]==["core"] ( set "test_set="-m not download"" ) else if ["%~1"]==["download"] ( set "test_set="-m "download"" ) else ( echo.Invalid test type "%~1". Use "core" ^| "download" exit /b 1 ) set PYTHONWARNINGS=error pytest %test_set% �������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/run_tests.sh�����������������������������������������������������������0000775�0000000�0000000�00000000451�14277552437�0020123�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env sh if [ -z $1 ]; then test_set='test' elif [ $1 = 'core' ]; then test_set="-m not download" elif [ $1 = 'download' ]; then test_set="-m download" else echo 'Invalid test type "'$1'". Use "core" | "download"' exit 1 fi python3 -bb -Werror -m pytest "$test_set" �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/set-variant.py���������������������������������������������������������0000664�0000000�0000000�00000001664�14277552437�0020354�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import argparse import functools import re from devscripts.utils import compose_functions, read_file, write_file VERSION_FILE = 'yt_dlp/version.py' def parse_options(): parser = argparse.ArgumentParser(description='Set the build variant of the package') parser.add_argument('variant', help='Name of the variant') parser.add_argument('-M', '--update-message', default=None, help='Message to show in -U') return parser.parse_args() def property_setter(name, value): return functools.partial(re.sub, rf'(?m)^{name}\s*=\s*.+$', f'{name} = {value!r}') opts = parse_options() transform = compose_functions( property_setter('VARIANT', opts.variant), property_setter('UPDATE_HINT', opts.update_message) ) write_file(VERSION_FILE, transform(read_file(VERSION_FILE))) ����������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/update-formulae.py�����������������������������������������������������0000664�0000000�0000000�00000002062�14277552437�0021202�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 """ Usage: python3 ./devscripts/update-formulae.py <path-to-formulae-rb> <version> version can be either 0-aligned (yt-dlp version) or normalized (PyPi version) """ # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import json import re import urllib.request from devscripts.utils import read_file, write_file filename, version = sys.argv[1:] normalized_version = '.'.join(str(int(x)) for x in version.split('.')) pypi_release = json.loads(urllib.request.urlopen( 'https://pypi.org/pypi/yt-dlp/%s/json' % normalized_version ).read().decode()) tarball_file = next(x for x in pypi_release['urls'] if x['filename'].endswith('.tar.gz')) sha256sum = tarball_file['digests']['sha256'] url = tarball_file['url'] formulae_text = read_file(filename) formulae_text = re.sub(r'sha256 "[0-9a-f]*?"', 'sha256 "%s"' % sha256sum, formulae_text, count=1) formulae_text = re.sub(r'url "[^"]*?"', 'url "%s"' % url, formulae_text, count=1) write_file(filename, formulae_text) ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/update-version.py������������������������������������������������������0000664�0000000�0000000�00000002424�14277552437�0021057�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import contextlib import subprocess import sys from datetime import datetime from devscripts.utils import read_version, write_file def get_new_version(revision): version = datetime.utcnow().strftime('%Y.%m.%d') if revision: assert revision.isdigit(), 'Revision must be a number' else: old_version = read_version().split('.') if version.split('.') == old_version[:3]: revision = str(int((old_version + [0])[3]) + 1) return f'{version}.{revision}' if revision else version def get_git_head(): with contextlib.suppress(Exception): sp = subprocess.Popen(['git', 'rev-parse', '--short', 'HEAD'], stdout=subprocess.PIPE) return sp.communicate()[0].decode().strip() or None VERSION = get_new_version((sys.argv + [''])[1]) GIT_HEAD = get_git_head() VERSION_FILE = f'''\ # Autogenerated by devscripts/update-version.py __version__ = {VERSION!r} RELEASE_GIT_HEAD = {GIT_HEAD!r} VARIANT = None UPDATE_HINT = None ''' write_file('yt_dlp/version.py', VERSION_FILE) print(f'::set-output name=ytdlp_version::{VERSION}') print(f'\nVersion = {VERSION}, Git HEAD = {GIT_HEAD}') ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/utils.py���������������������������������������������������������������0000664�0000000�0000000�00000001717�14277552437�0017256�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import argparse import functools def read_file(fname): with open(fname, encoding='utf-8') as f: return f.read() def write_file(fname, content): with open(fname, 'w', encoding='utf-8') as f: return f.write(content) # Get the version without importing the package def read_version(fname='yt_dlp/version.py'): exec(compile(read_file(fname), fname, 'exec')) return locals()['__version__'] def get_filename_args(has_infile=False, default_outfile=None): parser = argparse.ArgumentParser() if has_infile: parser.add_argument('infile', help='Input file') kwargs = {'nargs': '?', 'default': default_outfile} if default_outfile else {} parser.add_argument('outfile', **kwargs, help='Output file') opts = parser.parse_args() if has_infile: return opts.infile, opts.outfile return opts.outfile def compose_functions(*functions): return lambda x: functools.reduce(lambda y, f: f(y), functions, x) �������������������������������������������������yt-dlp-2022.08.19/devscripts/zsh-completion.in������������������������������������������������������0000664�0000000�0000000�00000001540�14277552437�0021041�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#compdef yt-dlp __yt_dlp() { local curcontext="$curcontext" fileopts diropts cur prev typeset -A opt_args fileopts="{{fileopts}}" diropts="{{diropts}}" cur=$words[CURRENT] case $cur in :) _arguments '*: :(::ytfavorites ::ytrecommended ::ytsubscriptions ::ytwatchlater ::ythistory)' ;; *) prev=$words[CURRENT-1] if [[ ${prev} =~ ${fileopts} ]]; then _path_files elif [[ ${prev} =~ ${diropts} ]]; then _path_files -/ elif [[ ${prev} == "--remux-video" ]]; then _arguments '*: :(mp4 mkv)' elif [[ ${prev} == "--recode-video" ]]; then _arguments '*: :(mp4 flv ogg webm mkv)' else _arguments '*: :({{flags}})' fi ;; esac } __yt_dlp����������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/devscripts/zsh-completion.py������������������������������������������������������0000775�0000000�0000000�00000002545�14277552437�0021074�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import yt_dlp ZSH_COMPLETION_FILE = "completions/zsh/_yt-dlp" ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in" def build_completion(opt_parser): opts = [opt for group in opt_parser.option_groups for opt in group.option_list] opts_file = [opt for opt in opts if opt.metavar == "FILE"] opts_dir = [opt for opt in opts if opt.metavar == "DIR"] fileopts = [] for opt in opts_file: if opt._short_opts: fileopts.extend(opt._short_opts) if opt._long_opts: fileopts.extend(opt._long_opts) diropts = [] for opt in opts_dir: if opt._short_opts: diropts.extend(opt._short_opts) if opt._long_opts: diropts.extend(opt._long_opts) flags = [opt.get_opt_string() for opt in opts] with open(ZSH_COMPLETION_TEMPLATE) as f: template = f.read() template = template.replace("{{fileopts}}", "|".join(fileopts)) template = template.replace("{{diropts}}", "|".join(diropts)) template = template.replace("{{flags}}", " ".join(flags)) with open(ZSH_COMPLETION_FILE, "w") as f: f.write(template) parser = yt_dlp.parseOpts(ignore_config_files=True)[0] build_completion(parser) �����������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/pyinst.py�������������������������������������������������������������������������0000664�0000000�0000000�00000011473�14277552437�0015256�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) import platform from PyInstaller.__main__ import run as run_pyinstaller from devscripts.utils import read_version OS_NAME, MACHINE, ARCH = sys.platform, platform.machine(), platform.architecture()[0][:2] if MACHINE in ('x86_64', 'AMD64') or ('i' in MACHINE and '86' in MACHINE): # NB: Windows x86 has MACHINE = AMD64 irrespective of bitness MACHINE = 'x86' if ARCH == '32' else '' def main(): opts, version = parse_options(), read_version() onedir = '--onedir' in opts or '-D' in opts if not onedir and '-F' not in opts and '--onefile' not in opts: opts.append('--onefile') name, final_file = exe(onedir) print(f'Building yt-dlp v{version} for {OS_NAME} {platform.machine()} with options {opts}') print('Remember to update the version using "devscripts/update-version.py"') if not os.path.isfile('yt_dlp/extractor/lazy_extractors.py'): print('WARNING: Building without lazy_extractors. Run ' '"devscripts/make_lazy_extractors.py" to build lazy extractors', file=sys.stderr) print(f'Destination: {final_file}\n') opts = [ f'--name={name}', '--icon=devscripts/logo.ico', '--upx-exclude=vcruntime140.dll', '--noconfirm', *dependency_options(), *opts, 'yt_dlp/__main__.py', ] print(f'Running PyInstaller with {opts}') run_pyinstaller(opts) set_version_info(final_file, version) def parse_options(): # Compatibility with older arguments opts = sys.argv[1:] if opts[0:1] in (['32'], ['64']): if ARCH != opts[0]: raise Exception(f'{opts[0]}bit executable cannot be built on a {ARCH}bit system') opts = opts[1:] return opts def exe(onedir): """@returns (name, path)""" name = '_'.join(filter(None, ( 'yt-dlp', {'win32': '', 'darwin': 'macos'}.get(OS_NAME, OS_NAME), MACHINE ))) return name, ''.join(filter(None, ( 'dist/', onedir and f'{name}/', name, OS_NAME == 'win32' and '.exe' ))) def version_to_list(version): version_list = version.split('.') return list(map(int, version_list)) + [0] * (4 - len(version_list)) def dependency_options(): # Due to the current implementation, these are auto-detected, but explicitly add them just in case dependencies = [pycryptodome_module(), 'mutagen', 'brotli', 'certifi', 'websockets'] excluded_modules = ('youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts') yield from (f'--hidden-import={module}' for module in dependencies) yield '--collect-submodules=websockets' yield from (f'--exclude-module={module}' for module in excluded_modules) def pycryptodome_module(): try: import Cryptodome # noqa: F401 except ImportError: try: import Crypto # noqa: F401 print('WARNING: Using Crypto since Cryptodome is not available. ' 'Install with: pip install pycryptodomex', file=sys.stderr) return 'Crypto' except ImportError: pass return 'Cryptodome' def set_version_info(exe, version): if OS_NAME == 'win32': windows_set_version(exe, version) def windows_set_version(exe, version): from PyInstaller.utils.win32.versioninfo import ( FixedFileInfo, SetVersion, StringFileInfo, StringStruct, StringTable, VarFileInfo, VarStruct, VSVersionInfo, ) version_list = version_to_list(version) suffix = MACHINE and f'_{MACHINE}' SetVersion(exe, VSVersionInfo( ffi=FixedFileInfo( filevers=version_list, prodvers=version_list, mask=0x3F, flags=0x0, OS=0x4, fileType=0x1, subtype=0x0, date=(0, 0), ), kids=[ StringFileInfo([StringTable('040904B0', [ StringStruct('Comments', 'yt-dlp%s Command Line Interface' % suffix), StringStruct('CompanyName', 'https://github.com/yt-dlp'), StringStruct('FileDescription', 'yt-dlp%s' % (MACHINE and f' ({MACHINE})')), StringStruct('FileVersion', version), StringStruct('InternalName', f'yt-dlp{suffix}'), StringStruct('LegalCopyright', 'pukkandan.ytdlp@gmail.com | UNLICENSE'), StringStruct('OriginalFilename', f'yt-dlp{suffix}.exe'), StringStruct('ProductName', f'yt-dlp{suffix}'), StringStruct( 'ProductVersion', f'{version}{suffix} on Python {platform.python_version()}'), ])]), VarFileInfo([VarStruct('Translation', [0, 1200])]) ] )) if __name__ == '__main__': main() �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/requirements.txt������������������������������������������������������������������0000664�0000000�0000000�00000000221�14277552437�0016627�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������mutagen pycryptodomex websockets brotli; platform_python_implementation=='CPython' brotlicffi; platform_python_implementation!='CPython' certifi �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/setup.cfg�������������������������������������������������������������������������0000664�0000000�0000000�00000001420�14277552437�0015166�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������[wheel] universal = true [flake8] exclude = build,venv,.tox,.git,.pytest_cache ignore = E402,E501,E731,E741,W503 max_line_length = 120 per_file_ignores = devscripts/lazy_load_template.py: F401 [tool:pytest] addopts = -ra -v --strict-markers markers = download [tox:tox] skipsdist = true envlist = py{36,37,38,39,310},pypy{36,37,38,39} skip_missing_interpreters = true [testenv] # tox deps = pytest commands = pytest {posargs:"-m not download"} passenv = HOME # For test_compat_expanduser setenv = # PYTHONWARNINGS = error # Catches PIP's warnings too [isort] py_version = 37 multi_line_output = VERTICAL_HANGING_INDENT line_length = 80 reverse_relative = true ensure_newline_before_comments = true include_trailing_comma = true known_first_party = test ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/setup.py��������������������������������������������������������������������������0000664�0000000�0000000�00000012010�14277552437�0015054�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 import os.path import subprocess import sys import warnings try: from setuptools import Command, find_packages, setup setuptools_available = True except ImportError: from distutils.core import Command, setup setuptools_available = False from devscripts.utils import read_file, read_version VERSION = read_version() DESCRIPTION = 'A youtube-dl fork with additional features and patches' LONG_DESCRIPTION = '\n\n'.join(( 'Official repository: <https://github.com/yt-dlp/yt-dlp>', '**PS**: Some links in this document will not work since this is a copy of the README.md from Github', read_file('README.md'))) REQUIREMENTS = read_file('requirements.txt').splitlines() def packages(): if setuptools_available: return find_packages(exclude=('youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts')) return [ 'yt_dlp', 'yt_dlp.extractor', 'yt_dlp.downloader', 'yt_dlp.postprocessor', 'yt_dlp.compat', 'yt_dlp.extractor.anvato_token_generator', ] def py2exe_params(): import py2exe # noqa: F401 warnings.warn( 'py2exe builds do not support pycryptodomex and needs VC++14 to run. ' 'The recommended way is to use "pyinst.py" to build using pyinstaller') return { 'console': [{ 'script': './yt_dlp/__main__.py', 'dest_base': 'yt-dlp', 'version': VERSION, 'description': DESCRIPTION, 'comments': LONG_DESCRIPTION.split('\n')[0], 'product_name': 'yt-dlp', 'product_version': VERSION, 'icon_resources': [(1, 'devscripts/logo.ico')], }], 'options': { 'py2exe': { 'bundle_files': 0, 'compressed': 1, 'optimize': 2, 'dist_dir': './dist', 'excludes': ['Crypto', 'Cryptodome'], # py2exe cannot import Crypto 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], # Modules that are only imported dynamically must be added here 'includes': ['yt_dlp.compat._legacy'], } }, 'zipfile': None } def build_params(): files_spec = [ ('share/bash-completion/completions', ['completions/bash/yt-dlp']), ('share/zsh/site-functions', ['completions/zsh/_yt-dlp']), ('share/fish/vendor_completions.d', ['completions/fish/yt-dlp.fish']), ('share/doc/yt_dlp', ['README.txt']), ('share/man/man1', ['yt-dlp.1']) ] data_files = [] for dirname, files in files_spec: resfiles = [] for fn in files: if not os.path.exists(fn): warnings.warn(f'Skipping file {fn} since it is not present. Try running " make pypi-files " first') else: resfiles.append(fn) data_files.append((dirname, resfiles)) params = {'data_files': data_files} if setuptools_available: params['entry_points'] = {'console_scripts': ['yt-dlp = yt_dlp:main']} else: params['scripts'] = ['yt-dlp'] return params class build_lazy_extractors(Command): description = 'Build the extractor lazy loading module' user_options = [] def initialize_options(self): pass def finalize_options(self): pass def run(self): if self.dry_run: print('Skipping build of lazy extractors in dry run mode') return subprocess.run([sys.executable, 'devscripts/make_lazy_extractors.py']) params = py2exe_params() if sys.argv[1:2] == ['py2exe'] else build_params() setup( name='yt-dlp', version=VERSION, maintainer='pukkandan', maintainer_email='pukkandan.ytdlp@gmail.com', description=DESCRIPTION, long_description=LONG_DESCRIPTION, long_description_content_type='text/markdown', url='https://github.com/yt-dlp/yt-dlp', packages=packages(), install_requires=REQUIREMENTS, python_requires='>=3.7', project_urls={ 'Documentation': 'https://github.com/yt-dlp/yt-dlp#readme', 'Source': 'https://github.com/yt-dlp/yt-dlp', 'Tracker': 'https://github.com/yt-dlp/yt-dlp/issues', 'Funding': 'https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators', }, classifiers=[ 'Topic :: Multimedia :: Video', 'Development Status :: 5 - Production/Stable', 'Environment :: Console', 'Programming Language :: Python', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: Implementation', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', 'License :: Public Domain', 'Operating System :: OS Independent', ], cmdclass={'build_lazy_extractors': build_lazy_extractors}, **params ) ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/supportedsites.md�����������������������������������������������������������������0000664�0000000�0000000�00000143712�14277552437�0016777�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Supported sites - **0000studio:archive** - **0000studio:clip** - **17live** - **17live:clip** - **1tv**: Первый канал - **20.detik.com** - **20min** - **23video** - **247sports** - **24video** - **3qsdn**: 3Q SDN - **3sat** - **4tube** - **56.com** - **6play** - **7plus** - **8tracks** - **91porn** - **9c9media** - **9gag**: 9GAG - **9now.com.au** - **abc.net.au** - **abc.net.au:iview** - **abc.net.au:iview:showseries** - **abcnews** - **abcnews:video** - **abcotvs**: ABC Owned Television Stations - **abcotvs:clips** - **AbemaTV**: [<abbr title="netrc machine"><em>abematv</em></abbr>] - **AbemaTVTitle** - **AcademicEarth:Course** - **acast** - **acast:channel** - **AcFunBangumi** - **AcFunVideo** - **ADN**: [<abbr title="netrc machine"><em>animedigitalnetwork</em></abbr>] Anime Digital Network - **AdobeConnect** - **adobetv** - **adobetv:channel** - **adobetv:embed** - **adobetv:show** - **adobetv:video** - **AdultSwim** - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault - **aenetworks:collection** - **aenetworks:show** - **afreecatv**: [<abbr title="netrc machine"><em>afreecatv</em></abbr>] afreecatv.com - **afreecatv:live**: [<abbr title="netrc machine"><em>afreecatv</em></abbr>] afreecatv.com - **afreecatv:user** - **AirMozilla** - **AliExpressLive** - **AlJazeera** - **Allocine** - **AlphaPorno** - **Alsace20TV** - **Alsace20TVEmbed** - **Alura**: [<abbr title="netrc machine"><em>alura</em></abbr>] - **AluraCourse**: [<abbr title="netrc machine"><em>aluracourse</em></abbr>] - **Amara** - **AmazonStore** - **AMCNetworks** - **AmericasTestKitchen** - **AmericasTestKitchenSeason** - **AmHistoryChannel** - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **Angel** - **AnimalPlanet** - **AnimeOnDemand**: [<abbr title="netrc machine"><em>animeondemand</em></abbr>] - **ant1newsgr:article**: ant1news.gr articles - **ant1newsgr:embed**: ant1news.gr embedded videos - **ant1newsgr:watch**: ant1news.gr videos - **Anvato** - **aol.com**: Yahoo screen and movies - **APA** - **Aparat** - **AppleConnect** - **AppleDaily**: 臺灣蘋果日報 - **ApplePodcasts** - **appletrailers** - **appletrailers:section** - **archive.org**: archive.org video and audio - **ArcPublishing** - **ARD** - **ARD:mediathek** - **ARDBetaMediathek** - **Arkena** - **arte.sky.it** - **ArteTV** - **ArteTVCategory** - **ArteTVEmbed** - **ArteTVPlaylist** - **AsianCrush** - **AsianCrushPlaylist** - **AtresPlayer**: [<abbr title="netrc machine"><em>atresplayer</em></abbr>] - **AtScaleConfEvent** - **ATTTechChannel** - **ATVAt** - **AudiMedia** - **AudioBoom** - **Audiodraft:custom** - **Audiodraft:generic** - **audiomack** - **audiomack:album** - **Audius**: Audius.co - **audius:artist**: Audius.co profile/artist pages - **audius:playlist**: Audius.co playlists - **audius:track**: Audius track ID or API link. Prepend with "audius:" - **AWAAN** - **awaan:live** - **awaan:season** - **awaan:video** - **AZMedien**: AZ Medien videos - **BaiduVideo**: 百度视频 - **BanBye** - **BanByeChannel** - **bandaichannel** - **Bandcamp** - **Bandcamp:album** - **Bandcamp:user** - **Bandcamp:weekly** - **bangumi.bilibili.com**: BiliBili番剧 - **BannedVideo** - **bbc**: [<abbr title="netrc machine"><em>bbc</em></abbr>] BBC - **bbc.co.uk**: [<abbr title="netrc machine"><em>bbc</em></abbr>] BBC iPlayer - **bbc.co.uk:article**: BBC articles - **bbc.co.uk:iplayer:episodes** - **bbc.co.uk:iplayer:group** - **bbc.co.uk:playlist** - **BBVTV**: [<abbr title="netrc machine"><em>bbvtv</em></abbr>] - **BBVTVLive**: [<abbr title="netrc machine"><em>bbvtv</em></abbr>] - **BBVTVRecordings**: [<abbr title="netrc machine"><em>bbvtv</em></abbr>] - **Beatport** - **Beeg** - **BehindKink** - **Bellator** - **BellMedia** - **Bet** - **bfi:player** - **bfmtv** - **bfmtv:article** - **bfmtv:live** - **BibelTV** - **Bigflix** - **Bigo** - **Bild**: Bild.de - **BiliBili** - **Bilibili category extractor** - **BilibiliAudio** - **BilibiliAudioAlbum** - **BilibiliChannel** - **BiliBiliPlayer** - **BiliBiliSearch**: Bilibili video search; "bilisearch:" prefix - **BiliIntl**: [<abbr title="netrc machine"><em>biliintl</em></abbr>] - **BiliIntlSeries**: [<abbr title="netrc machine"><em>biliintl</em></abbr>] - **BiliLive** - **BioBioChileTV** - **Biography** - **BIQLE** - **BitChute** - **BitChuteChannel** - **bitwave:replay** - **bitwave:stream** - **BlackboardCollaborate** - **BleacherReport** - **BleacherReportCMS** - **blogger.com** - **Bloomberg** - **BokeCC** - **BongaCams** - **BostonGlobe** - **Box** - **Bpb**: Bundeszentrale für politische Bildung - **BR**: Bayerischer Rundfunk - **BravoTV** - **Break** - **BreitBart** - **brightcove:legacy** - **brightcove:new** - **BRMediathek**: Bayerischer Rundfunk Mediathek - **bt:article**: Bergens Tidende Articles - **bt:vestlendingen**: Bergens Tidende - Vestlendingen - **BusinessInsider** - **BuzzFeed** - **BYUtv** - **CableAV** - **Callin** - **Caltrans** - **CAM4** - **Camdemy** - **CamdemyFolder** - **CamModels** - **CamtasiaEmbed** - **CamWithHer** - **CanalAlpha** - **canalc2.tv** - **Canalplus**: mycanal.fr and piwiplus.fr - **Canvas** - **CanvasEen**: canvas.be and een.be - **CarambaTV** - **CarambaTVPage** - **CartoonNetwork** - **cbc.ca** - **cbc.ca:player** - **CBS** - **CBSInteractive** - **CBSLocal** - **CBSLocalArticle** - **cbsnews**: CBS News - **cbsnews:embed** - **cbsnews:livevideo**: CBS News Live Videos - **cbssports** - **cbssports:embed** - **CCMA** - **CCTV**: 央视网 - **CDA** - **Cellebrite** - **CeskaTelevize** - **CGTN** - **channel9**: Channel 9 - **CharlieRose** - **Chaturbate** - **Chilloutzone** - **Chingari** - **ChingariUser** - **chirbit** - **chirbit:profile** - **cielotv.it** - **Cinchcast** - **Cinemax** - **CiscoLiveSearch** - **CiscoLiveSession** - **ciscowebex**: Cisco Webex - **CJSW** - **cliphunter** - **Clippit** - **ClipRs** - **Clipsyndicate** - **ClipYouEmbed** - **CloserToTruth** - **CloudflareStream** - **Cloudy** - **Clubic** - **Clyp** - **cmt.com** - **CNBC** - **CNBCVideo** - **CNN** - **CNNArticle** - **CNNBlogs** - **ComedyCentral** - **ComedyCentralTV** - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED - **CONtv** - **CookingChannel** - **Corus** - **Coub** - **CozyTV** - **cp24** - **cpac** - **cpac:playlist** - **Cracked** - **Crackle** - **Craftsy** - **CrooksAndLiars** - **CrowdBunker** - **CrowdBunkerChannel** - **crunchyroll**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>] - **crunchyroll:beta**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>] - **crunchyroll:playlist**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>] - **crunchyroll:playlist:beta**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>] - **CSpan**: C-SPAN - **CSpanCongress** - **CtsNews**: 華視新聞 - **CTV** - **CTVNews** - **cu.ntv.co.jp**: Nippon Television Network - **CultureUnplugged** - **curiositystream**: [<abbr title="netrc machine"><em>curiositystream</em></abbr>] - **curiositystream:collections**: [<abbr title="netrc machine"><em>curiositystream</em></abbr>] - **curiositystream:series**: [<abbr title="netrc machine"><em>curiositystream</em></abbr>] - **CWTV** - **Cybrary**: [<abbr title="netrc machine"><em>cybrary</em></abbr>] - **CybraryCourse**: [<abbr title="netrc machine"><em>cybrary</em></abbr>] - **Daftsex** - **DagelijkseKost**: dagelijksekost.een.be - **DailyMail** - **dailymotion**: [<abbr title="netrc machine"><em>dailymotion</em></abbr>] - **dailymotion:playlist**: [<abbr title="netrc machine"><em>dailymotion</em></abbr>] - **dailymotion:user**: [<abbr title="netrc machine"><em>dailymotion</em></abbr>] - **DailyWire** - **DailyWirePodcast** - **damtomo:record** - **damtomo:video** - **daum.net** - **daum.net:clip** - **daum.net:playlist** - **daum.net:user** - **daystar:clip** - **DBTV** - **DctpTv** - **DeezerAlbum** - **DeezerPlaylist** - **defense.gouv.fr** - **democracynow** - **DestinationAmerica** - **DHM**: Filmarchiv - Deutsches Historisches Museum - **Digg** - **DigitalConcertHall**: [<abbr title="netrc machine"><em>digitalconcerthall</em></abbr>] DigitalConcertHall extractor - **DigitallySpeaking** - **Digiteka** - **Discovery** - **DiscoveryLife** - **DiscoveryNetworksDe** - **DiscoveryPlus** - **DiscoveryPlusIndia** - **DiscoveryPlusIndiaShow** - **DiscoveryPlusItaly** - **DiscoveryPlusItalyShow** - **Disney** - **DIYNetwork** - **dlive:stream** - **dlive:vod** - **DoodStream** - **Dotsub** - **Douyin** - **DouyuShow** - **DouyuTV**: 斗鱼 - **DPlay** - **DRBonanza** - **Drooble** - **Dropbox** - **Dropout**: [<abbr title="netrc machine"><em>dropout</em></abbr>] - **DropoutSeason** - **DrTuber** - **drtv** - **drtv:live** - **DTube** - **duboku**: www.duboku.io - **duboku:list**: www.duboku.io entire series - **Dumpert** - **dvtv**: http://video.aktualne.cz/ - **dw** - **dw:article** - **EaglePlatform** - **EbaumsWorld** - **EchoMsk** - **egghead:course**: egghead.io course - **egghead:lesson**: egghead.io lesson - **ehftv** - **eHow** - **EinsUndEinsTV**: [<abbr title="netrc machine"><em>1und1tv</em></abbr>] - **EinsUndEinsTVLive**: [<abbr title="netrc machine"><em>1und1tv</em></abbr>] - **EinsUndEinsTVRecordings**: [<abbr title="netrc machine"><em>1und1tv</em></abbr>] - **Einthusan** - **eitb.tv** - **EllenTube** - **EllenTubePlaylist** - **EllenTubeVideo** - **Elonet** - **ElPais**: El País - **Embedly** - **EMPFlix** - **Engadget** - **Epicon** - **EpiconSeries** - **Eporner** - **EroProfile**: [<abbr title="netrc machine"><em>eroprofile</em></abbr>] - **EroProfile:album** - **ertflix**: ERTFLIX videos - **ertflix:codename**: ERTFLIX videos by codename - **ertwebtv:embed**: ert.gr webtv embedded videos - **Escapist** - **ESPN** - **ESPNArticle** - **ESPNCricInfo** - **EsriVideo** - **Europa** - **EuropeanTour** - **EUScreen** - **EWETV**: [<abbr title="netrc machine"><em>ewetv</em></abbr>] - **EWETVLive**: [<abbr title="netrc machine"><em>ewetv</em></abbr>] - **EWETVRecordings**: [<abbr title="netrc machine"><em>ewetv</em></abbr>] - **ExpoTV** - **Expressen** - **ExtremeTube** - **EyedoTV** - **facebook**: [<abbr title="netrc machine"><em>facebook</em></abbr>] - **facebook:reel** - **FacebookPluginsVideo** - **fancode:live**: [<abbr title="netrc machine"><em>fancode</em></abbr>] - **fancode:vod**: [<abbr title="netrc machine"><em>fancode</em></abbr>] - **faz.net** - **fc2**: [<abbr title="netrc machine"><em>fc2</em></abbr>] - **fc2:embed** - **fc2:live** - **Fczenit** - **Fifa** - **Filmmodu** - **filmon** - **filmon:channel** - **Filmweb** - **FiveThirtyEight** - **FiveTV** - **Flickr** - **Folketinget**: Folketinget (ft.dk; Danish parliament) - **FoodNetwork** - **FootyRoom** - **Formula1** - **FOX** - **FOX9** - **FOX9News** - **Foxgay** - **foxnews**: Fox News and Fox Business Video - **foxnews:article** - **FoxSports** - **fptplay**: fptplay.vn - **FranceCulture** - **FranceInter** - **FranceTV** - **francetvinfo.fr** - **FranceTVSite** - **Freesound** - **freespeech.org** - **freetv:series** - **FreeTvMovies** - **FrontendMasters**: [<abbr title="netrc machine"><em>frontendmasters</em></abbr>] - **FrontendMastersCourse**: [<abbr title="netrc machine"><em>frontendmasters</em></abbr>] - **FrontendMastersLesson**: [<abbr title="netrc machine"><em>frontendmasters</em></abbr>] - **FujiTVFODPlus7** - **Funimation**: [<abbr title="netrc machine"><em>funimation</em></abbr>] - **funimation:page**: [<abbr title="netrc machine"><em>funimation</em></abbr>] - **funimation:show**: [<abbr title="netrc machine"><em>funimation</em></abbr>] - **Funk** - **Fusion** - **Fux** - **FuyinTV** - **Gab** - **GabTV** - **Gaia**: [<abbr title="netrc machine"><em>gaia</em></abbr>] - **GameInformer** - **GameJolt** - **GameJoltCommunity** - **GameJoltGame** - **GameJoltGameSoundtrack** - **GameJoltSearch** - **GameJoltUser** - **GameSpot** - **GameStar** - **Gaskrank** - **Gazeta** - **GDCVault**: [<abbr title="netrc machine"><em>gdcvault</em></abbr>] - **GediDigital** - **gem.cbc.ca**: [<abbr title="netrc machine"><em>cbcgem</em></abbr>] - **gem.cbc.ca:live** - **gem.cbc.ca:playlist** - **Gettr** - **GettrStreaming** - **Gfycat** - **GiantBomb** - **Giga** - **GlattvisionTV**: [<abbr title="netrc machine"><em>glattvisiontv</em></abbr>] - **GlattvisionTVLive**: [<abbr title="netrc machine"><em>glattvisiontv</em></abbr>] - **GlattvisionTVRecordings**: [<abbr title="netrc machine"><em>glattvisiontv</em></abbr>] - **Glide**: Glide mobile video messages (glide.me) - **Globo**: [<abbr title="netrc machine"><em>globo</em></abbr>] - **GloboArticle** - **glomex**: Glomex videos - **glomex:embed**: Glomex embedded videos - **Go** - **GoDiscovery** - **GodTube** - **Gofile** - **Golem** - **goodgame:stream** - **google:podcasts** - **google:podcasts:feed** - **GoogleDrive** - **GoogleDrive:Folder** - **GoPro** - **Goshgay** - **GoToStage** - **GPUTechConf** - **Gronkh** - **gronkh:feed** - **gronkh:vods** - **Groupon** - **Harpodeon** - **hbo** - **HearThisAt** - **Heise** - **HellPorno** - **Helsinki**: helsinki.fi - **HentaiStigma** - **hetklokhuis** - **hgtv.com:show** - **HGTVDe** - **HGTVUsa** - **HiDive**: [<abbr title="netrc machine"><em>hidive</em></abbr>] - **HistoricFilms** - **history:player** - **history:topic**: History.com Topic - **hitbox** - **hitbox:live** - **HitRecord** - **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau - **Holodex** - **HotNewHipHop** - **hotstar** - **hotstar:playlist** - **hotstar:series** - **Howcast** - **HowStuffWorks** - **hrfernsehen** - **HRTi**: [<abbr title="netrc machine"><em>hrti</em></abbr>] - **HRTiPlaylist**: [<abbr title="netrc machine"><em>hrti</em></abbr>] - **HSEProduct** - **HSEShow** - **html5** - **Huajiao**: 花椒直播 - **HuffPost**: Huffington Post - **Hungama** - **HungamaAlbumPlaylist** - **HungamaSong** - **huya:live**: huya.com - **Hypem** - **Hytale** - **Icareus** - **ign.com** - **IGNArticle** - **IGNVideo** - **IHeartRadio** - **iheartradio:podcast** - **imdb**: Internet Movie Database trailers - **imdb:list**: Internet Movie Database lists - **Imgur** - **imgur:album** - **imgur:gallery** - **Ina** - **Inc** - **IndavideoEmbed** - **InfoQ** - **Instagram**: [<abbr title="netrc machine"><em>instagram</em></abbr>] - **instagram:story**: [<abbr title="netrc machine"><em>instagram</em></abbr>] - **instagram:tag**: [<abbr title="netrc machine"><em>instagram</em></abbr>] Instagram hashtag search URLs - **instagram:user**: [<abbr title="netrc machine"><em>instagram</em></abbr>] Instagram user profile - **InstagramIOS**: IOS instagram:// URL - **Internazionale** - **InternetVideoArchive** - **InvestigationDiscovery** - **IPrima**: [<abbr title="netrc machine"><em>iprima</em></abbr>] - **IPrimaCNN** - **iq.com**: International version of iQiyi - **iq.com:album** - **iqiyi**: [<abbr title="netrc machine"><em>iqiyi</em></abbr>] 爱奇艺 - **ITProTV** - **ITProTVCourse** - **ITTF** - **ITV** - **ITVBTCC** - **ivi**: ivi.ru - **ivi:compilation**: ivi.ru compilations - **ivideon**: Ivideon TV - **Iwara** - **iwara:playlist** - **iwara:user** - **Ixigua** - **Izlesene** - **Jable** - **JablePlaylist** - **Jamendo** - **JamendoAlbum** - **JeuxVideo** - **Joj** - **Jove** - **JWPlatform** - **Kakao** - **Kaltura** - **Karaoketv** - **KarriereVideos** - **Katsomo** - **KeezMovies** - **KelbyOne** - **Ketnet** - **khanacademy** - **khanacademy:unit** - **Kicker** - **KickStarter** - **KinjaEmbed** - **KinoPoisk** - **KompasVideo** - **KonserthusetPlay** - **Koo** - **KrasView**: Красвью - **KTH** - **Ku6** - **KUSI** - **kuwo:album**: 酷我音乐 - 专辑 - **kuwo:category**: 酷我音乐 - 分类 - **kuwo:chart**: 酷我音乐 - 排行榜 - **kuwo:mv**: 酷我音乐 - MV - **kuwo:singer**: 酷我音乐 - 歌手 - **kuwo:song**: 酷我音乐 - **la7.it** - **la7.it:pod:episode** - **la7.it:podcast** - **laola1tv** - **laola1tv:embed** - **LastFM** - **LastFMPlaylist** - **LastFMUser** - **lbry** - **lbry:channel** - **LCI** - **Lcp** - **LcpPlay** - **Le**: 乐视网 - **Lecture2Go** - **Lecturio**: [<abbr title="netrc machine"><em>lecturio</em></abbr>] - **LecturioCourse**: [<abbr title="netrc machine"><em>lecturio</em></abbr>] - **LecturioDeCourse**: [<abbr title="netrc machine"><em>lecturio</em></abbr>] - **LEGO** - **Lemonde** - **Lenta** - **LePlaylist** - **LetvCloud**: 乐视云 - **Libsyn** - **life**: Life.ru - **life:embed** - **likee** - **likee:user** - **limelight** - **limelight:channel** - **limelight:channel_list** - **LineLive** - **LineLiveChannel** - **LinkedIn**: [<abbr title="netrc machine"><em>linkedin</em></abbr>] - **linkedin:learning**: [<abbr title="netrc machine"><em>linkedin</em></abbr>] - **linkedin:learning:course**: [<abbr title="netrc machine"><em>linkedin</em></abbr>] - **LinuxAcademy**: [<abbr title="netrc machine"><em>linuxacademy</em></abbr>] - **Liputan6** - **LiTV** - **LiveJournal** - **livestream** - **livestream:original** - **Livestreamfails** - **Lnk** - **LnkGo** - **loc**: Library of Congress - **LocalNews8** - **LoveHomePorn** - **LRTStream** - **LRTVOD** - **lynda**: [<abbr title="netrc machine"><em>lynda</em></abbr>] lynda.com videos - **lynda:course**: [<abbr title="netrc machine"><em>lynda</em></abbr>] lynda.com online courses - **m6** - **MagentaMusik360** - **mailru**: Видео@Mail.Ru - **mailru:music**: Музыка@Mail.Ru - **mailru:music:search**: Музыка@Mail.Ru - **MainStreaming**: MainStreaming Player - **MallTV** - **mangomolo:live** - **mangomolo:video** - **MangoTV**: 芒果TV - **ManotoTV**: Manoto TV (Episode) - **ManotoTVLive**: Manoto TV (Live) - **ManotoTVShow**: Manoto TV (Show) - **ManyVids** - **MaoriTV** - **Markiza** - **MarkizaPage** - **massengeschmack.tv** - **Masters** - **MatchTV** - **MDR**: MDR.DE and KiKA - **MedalTV** - **media.ccc.de** - **media.ccc.de:lists** - **Mediaite** - **MediaKlikk** - **Medialaan** - **Mediaset** - **MediasetShow** - **Mediasite** - **MediasiteCatalog** - **MediasiteNamedCatalog** - **Medici** - **megaphone.fm**: megaphone.fm embedded players - **megatvcom**: megatv.com videos - **megatvcom:embed**: megatv.com embedded videos - **Meipai**: 美拍 - **MelonVOD** - **META** - **metacafe** - **Metacritic** - **mewatch** - **Mgoon** - **MiaoPai** - **microsoftstream**: Microsoft Stream - **mildom**: Record ongoing live by specific user in Mildom - **mildom:clip**: Clip in Mildom - **mildom:user:vod**: Download all VODs from specific user in Mildom - **mildom:vod**: VOD in Mildom - **minds** - **minds:channel** - **minds:group** - **MinistryGrid** - **Minoto** - **miomio.tv** - **mirrativ** - **mirrativ:user** - **MirrorCoUK** - **MiTele**: mitele.es - **mixch** - **mixch:archive** - **mixcloud** - **mixcloud:playlist** - **mixcloud:user** - **MLB** - **MLBTV**: [<abbr title="netrc machine"><em>mlb</em></abbr>] - **MLBVideo** - **MLSSoccer** - **Mnet** - **MNetTV**: [<abbr title="netrc machine"><em>mnettv</em></abbr>] - **MNetTVLive**: [<abbr title="netrc machine"><em>mnettv</em></abbr>] - **MNetTVRecordings**: [<abbr title="netrc machine"><em>mnettv</em></abbr>] - **MochaVideo** - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net - **Mofosex** - **MofosexEmbed** - **Mojvideo** - **Morningstar**: morningstar.com - **Motherless** - **MotherlessGroup** - **Motorsport**: motorsport.com - **MotorTrend** - **MovieClips** - **MovieFap** - **Moviepilot** - **MoviewPlay** - **Moviezine** - **MovingImage** - **MSN** - **mtg**: MTG services - **mtv** - **mtv.de** - **mtv.it** - **mtv.it:programma** - **mtv:video** - **mtvjapan** - **mtvservices:embedded** - **MTVUutisetArticle** - **MuenchenTV**: münchen.tv - **Murrtube** - **MurrtubeUser**: Murrtube user profile - **MuseScore** - **MusicdexAlbum** - **MusicdexArtist** - **MusicdexPlaylist** - **MusicdexSong** - **mva**: Microsoft Virtual Academy videos - **mva:course**: Microsoft Virtual Academy courses - **Mwave** - **MwaveMeetGreet** - **Mxplayer** - **MxplayerShow** - **MyChannels** - **MySpace** - **MySpace:album** - **MySpass** - **Myvi** - **MyVideoGe** - **MyVidster** - **MyviEmbed** - **n-tv.de** - **N1Info:article** - **N1InfoAsset** - **Nate** - **NateProgram** - **natgeo:video** - **NationalGeographicTV** - **Naver** - **Naver:live** - **navernow** - **NBA** - **nba:watch** - **nba:watch:collection** - **NBAChannel** - **NBAEmbed** - **NBAWatchEmbed** - **NBC** - **NBCNews** - **nbcolympics** - **nbcolympics:stream** - **NBCSports** - **NBCSportsStream** - **NBCSportsVPlayer** - **ndr**: NDR.de - Norddeutscher Rundfunk - **ndr:embed** - **ndr:embed:base** - **NDTV** - **Nebula**: [<abbr title="netrc machine"><em>watchnebula</em></abbr>] - **nebula:channel**: [<abbr title="netrc machine"><em>watchnebula</em></abbr>] - **nebula:subscriptions**: [<abbr title="netrc machine"><em>watchnebula</em></abbr>] - **NerdCubedFeed** - **netease:album**: 网易云音乐 - 专辑 - **netease:djradio**: 网易云音乐 - 电台 - **netease:mv**: 网易云音乐 - MV - **netease:playlist**: 网易云音乐 - 歌单 - **netease:program**: 网易云音乐 - 电台节目 - **netease:singer**: 网易云音乐 - 歌手 - **netease:song**: 网易云音乐 - **NetPlusTV**: [<abbr title="netrc machine"><em>netplus</em></abbr>] - **NetPlusTVLive**: [<abbr title="netrc machine"><em>netplus</em></abbr>] - **NetPlusTVRecordings**: [<abbr title="netrc machine"><em>netplus</em></abbr>] - **Netverse** - **NetversePlaylist** - **Netzkino** - **Newgrounds** - **Newgrounds:playlist** - **Newgrounds:user** - **Newstube** - **Newsy** - **NextMedia**: 蘋果日報 - **NextMediaActionNews**: 蘋果日報 - 動新聞 - **NextTV**: 壹電視 - **Nexx** - **NexxEmbed** - **NFB** - **NFHSNetwork** - **nfl.com**: (**Currently broken**) - **nfl.com:article**: (**Currently broken**) - **NhkForSchoolBangumi** - **NhkForSchoolProgramList** - **NhkForSchoolSubject**: Portal page for each school subjects, like Japanese (kokugo, 国語) or math (sansuu/suugaku or 算数・数学) - **NhkVod** - **NhkVodProgram** - **nhl.com** - **nick.com** - **nick.de** - **nickelodeon:br** - **nickelodeonru** - **nicknight** - **niconico**: [<abbr title="netrc machine"><em>niconico</em></abbr>] ニコニコ動画 - **niconico:history**: NicoNico user history. Requires cookies. - **niconico:playlist** - **niconico:series** - **niconico:tag**: NicoNico video tag URLs - **NiconicoUser** - **nicovideo:search**: Nico video search; "nicosearch:" prefix - **nicovideo:search:date**: Nico video search, newest first; "nicosearchdate:" prefix - **nicovideo:search_url**: Nico video search URLs - **Nintendo** - **Nitter** - **njoy**: N-JOY - **njoy:embed** - **NJPWWorld**: [<abbr title="netrc machine"><em>njpwworld</em></abbr>] 新日本プロレスワールド - **NobelPrize** - **NonkTube** - **NoodleMagazine** - **Noovo** - **Normalboots** - **NosVideo** - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz - **NovaEmbed** - **NovaPlay** - **nowness** - **nowness:playlist** - **nowness:series** - **Noz** - **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **npo.nl:live** - **npo.nl:radio** - **npo.nl:radio:fragment** - **Npr** - **NRK** - **NRKPlaylist** - **NRKRadioPodkast** - **NRKSkole**: NRK Skole - **NRKTV**: NRK TV and NRK Radio - **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte - **NRKTVEpisode** - **NRKTVEpisodes** - **NRKTVSeason** - **NRKTVSeries** - **NRLTV** - **ntv.ru** - **Nuvid** - **NYTimes** - **NYTimesArticle** - **NYTimesCooking** - **nzherald** - **NZZ** - **ocw.mit.edu** - **OdaTV** - **Odnoklassniki** - **OktoberfestTV** - **OlympicsReplay** - **on24**: ON24 - **OnDemandKorea** - **OneFootball** - **onet.pl** - **onet.tv** - **onet.tv:channel** - **OnetMVP** - **OnionStudios** - **Ooyala** - **OoyalaExternal** - **Opencast** - **OpencastPlaylist** - **openrec** - **openrec:capture** - **openrec:movie** - **OraTV** - **orf:fm4:story**: fm4.orf.at stories - **orf:iptv**: iptv.ORF.at - **orf:radio** - **orf:tvthek**: ORF TVthek - **OsnatelTV**: [<abbr title="netrc machine"><em>osnateltv</em></abbr>] - **OsnatelTVLive**: [<abbr title="netrc machine"><em>osnateltv</em></abbr>] - **OsnatelTVRecordings**: [<abbr title="netrc machine"><em>osnateltv</em></abbr>] - **OutsideTV** - **PacktPub**: [<abbr title="netrc machine"><em>packtpub</em></abbr>] - **PacktPubCourse** - **PalcoMP3:artist** - **PalcoMP3:song** - **PalcoMP3:video** - **pandora.tv**: 판도라TV - **Panopto** - **PanoptoList** - **PanoptoPlaylist** - **ParamountNetwork** - **ParamountPlus** - **ParamountPlusSeries** - **Parler**: Posts on parler.com - **parliamentlive.tv**: UK parliament videos - **Parlview** - **Patreon** - **PatreonCampaign** - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) - **PearVideo** - **PeekVids** - **peer.tv** - **PeerTube** - **PeerTube:Playlist** - **peloton**: [<abbr title="netrc machine"><em>peloton</em></abbr>] - **peloton:live**: Peloton Live - **People** - **PerformGroup** - **periscope**: Periscope - **periscope:user**: Periscope user videos - **PhilharmonieDeParis**: Philharmonie de Paris - **phoenix.de** - **Photobucket** - **Piapro**: [<abbr title="netrc machine"><em>piapro</em></abbr>] - **Picarto** - **PicartoVod** - **Piksel** - **Pinkbike** - **Pinterest** - **PinterestCollection** - **pixiv:sketch** - **pixiv:sketch:user** - **Pladform** - **PlanetMarathi** - **Platzi**: [<abbr title="netrc machine"><em>platzi</em></abbr>] - **PlatziCourse**: [<abbr title="netrc machine"><em>platzi</em></abbr>] - **play.fm** - **player.sky.it** - **PlayPlusTV**: [<abbr title="netrc machine"><em>playplustv</em></abbr>] - **PlayStuff** - **PlaysTV** - **PlaySuisse** - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz - **Playvid** - **PlayVids** - **Playwire** - **pluralsight**: [<abbr title="netrc machine"><em>pluralsight</em></abbr>] - **pluralsight:course** - **PlutoTV** - **Podchaser** - **podomatic** - **Pokemon** - **PokemonWatch** - **PokerGo**: [<abbr title="netrc machine"><em>pokergo</em></abbr>] - **PokerGoCollection**: [<abbr title="netrc machine"><em>pokergo</em></abbr>] - **PolsatGo** - **PolskieRadio** - **polskieradio:kierowcow** - **polskieradio:player** - **polskieradio:podcast** - **polskieradio:podcast:list** - **PolskieRadioCategory** - **Popcorntimes** - **PopcornTV** - **PornCom** - **PornerBros** - **Pornez** - **PornFlip** - **PornHd** - **PornHub**: [<abbr title="netrc machine"><em>pornhub</em></abbr>] PornHub and Thumbzilla - **PornHubPagedVideoList**: [<abbr title="netrc machine"><em>pornhub</em></abbr>] - **PornHubPlaylist**: [<abbr title="netrc machine"><em>pornhub</em></abbr>] - **PornHubUser**: [<abbr title="netrc machine"><em>pornhub</em></abbr>] - **PornHubUserVideosUpload**: [<abbr title="netrc machine"><em>pornhub</em></abbr>] - **Pornotube** - **PornoVoisines** - **PornoXO** - **PornTube** - **PremiershipRugby** - **PressTV** - **ProjectVeritas** - **prosiebensat1**: ProSiebenSat.1 Digital - **PRXAccount** - **PRXSeries** - **prxseries:search**: PRX Series Search; "prxseries:" prefix - **prxstories:search**: PRX Stories Search; "prxstories:" prefix - **PRXStory** - **puhutv** - **puhutv:serie** - **Puls4** - **Pyvideo** - **qqmusic**: QQ音乐 - **qqmusic:album**: QQ音乐 - 专辑 - **qqmusic:playlist**: QQ音乐 - 歌单 - **qqmusic:singer**: QQ音乐 - 歌手 - **qqmusic:toplist**: QQ音乐 - 排行榜 - **QuantumTV**: [<abbr title="netrc machine"><em>quantumtv</em></abbr>] - **QuantumTVLive**: [<abbr title="netrc machine"><em>quantumtv</em></abbr>] - **QuantumTVRecordings**: [<abbr title="netrc machine"><em>quantumtv</em></abbr>] - **Qub** - **R7** - **R7Article** - **Radiko** - **RadikoRadio** - **radio.de** - **radiobremen** - **radiocanada** - **radiocanada:audiovideo** - **radiofrance** - **RadioJavan** - **radiokapital** - **radiokapital:show** - **RadioZetPodcast** - **radlive** - **radlive:channel** - **radlive:season** - **Rai** - **RaiNews** - **RaiPlay** - **RaiPlayLive** - **RaiPlayPlaylist** - **RaiPlaySound** - **RaiPlaySoundLive** - **RaiPlaySoundPlaylist** - **RaiSudtirol** - **RayWenderlich** - **RayWenderlichCourse** - **RBMARadio** - **RCS** - **RCSEmbeds** - **RCSVarious** - **RCTIPlus** - **RCTIPlusSeries** - **RCTIPlusTV** - **RDS**: RDS.ca - **RedBull** - **RedBullEmbed** - **RedBullTV** - **RedBullTVRrnContent** - **Reddit** - **RedGifs** - **RedGifsSearch**: Redgifs search - **RedGifsUser**: Redgifs user - **RedTube** - **RegioTV** - **RENTV** - **RENTVArticle** - **Restudy** - **Reuters** - **ReverbNation** - **RICE** - **RMCDecouverte** - **RockstarGames** - **Rokfin**: [<abbr title="netrc machine"><em>rokfin</em></abbr>] - **rokfin:channel**: Rokfin Channels - **rokfin:search**: Rokfin Search; "rkfnsearch:" prefix - **rokfin:stack**: Rokfin Stacks - **RoosterTeeth**: [<abbr title="netrc machine"><em>roosterteeth</em></abbr>] - **RoosterTeethSeries**: [<abbr title="netrc machine"><em>roosterteeth</em></abbr>] - **RottenTomatoes** - **Rozhlas** - **RTBF**: [<abbr title="netrc machine"><em>rtbf</em></abbr>] - **RTDocumentry** - **RTDocumentryPlaylist** - **rte**: Raidió Teilifís Éireann TV - **rte:radio**: Raidió Teilifís Éireann radio - **rtl.lu:article** - **rtl.lu:tele-vod** - **rtl.nl**: rtl.nl and rtlxl.nl - **rtl2** - **rtl2:you** - **rtl2:you:series** - **RTLLuLive** - **RTLLuRadio** - **RTNews** - **RTP** - **RTRFM** - **RTS**: RTS.ch - **rtve.es:alacarta**: RTVE a la carta - **rtve.es:audio**: RTVE audio - **rtve.es:infantil**: RTVE infantil - **rtve.es:live**: RTVE.es live streams - **rtve.es:television** - **RTVNH** - **RTVS** - **rtvslo.si** - **RUHD** - **Rule34Video** - **RumbleChannel** - **RumbleEmbed** - **Ruptly** - **rutube**: Rutube videos - **rutube:channel**: Rutube channel - **rutube:embed**: Rutube embedded videos - **rutube:movie**: Rutube movies - **rutube:person**: Rutube person videos - **rutube:playlist**: Rutube playlists - **rutube:tags**: Rutube tags - **RUTV**: RUTV.RU - **Ruutu** - **Ruv** - **ruv.is:spila** - **safari**: [<abbr title="netrc machine"><em>safari</em></abbr>] safaribooksonline.com online video - **safari:api**: [<abbr title="netrc machine"><em>safari</em></abbr>] - **safari:course**: [<abbr title="netrc machine"><em>safari</em></abbr>] safaribooksonline.com online courses - **Saitosan** - **SAKTV**: [<abbr title="netrc machine"><em>saktv</em></abbr>] - **SAKTVLive**: [<abbr title="netrc machine"><em>saktv</em></abbr>] - **SAKTVRecordings**: [<abbr title="netrc machine"><em>saktv</em></abbr>] - **SaltTV**: [<abbr title="netrc machine"><em>salttv</em></abbr>] - **SaltTVLive**: [<abbr title="netrc machine"><em>salttv</em></abbr>] - **SaltTVRecordings**: [<abbr title="netrc machine"><em>salttv</em></abbr>] - **SampleFocus** - **Sapo**: SAPO Vídeos - **savefrom.net** - **SBS**: sbs.com.au - **schooltv** - **ScienceChannel** - **screen.yahoo:search**: Yahoo screen search; "yvsearch:" prefix - **Screencast** - **ScreencastOMatic** - **ScrippsNetworks** - **scrippsnetworks:watch** - **Scrolller** - **SCTE**: [<abbr title="netrc machine"><em>scte</em></abbr>] - **SCTECourse**: [<abbr title="netrc machine"><em>scte</em></abbr>] - **Seeker** - **SenateGov** - **SenateISVP** - **SendtoNews** - **Servus** - **Sexu** - **SeznamZpravy** - **SeznamZpravyArticle** - **Shahid**: [<abbr title="netrc machine"><em>shahid</em></abbr>] - **ShahidShow** - **Shared**: shared.sx - **ShareVideosEmbed** - **ShemarooMe** - **ShowRoomLive** - **simplecast** - **simplecast:episode** - **simplecast:podcast** - **Sina** - **Skeb** - **sky.it** - **sky:news** - **sky:news:story** - **sky:sports** - **sky:sports:news** - **skyacademy.it** - **SkylineWebcams** - **skynewsarabia:article** - **skynewsarabia:video** - **SkyNewsAU** - **Slideshare** - **SlidesLive** - **Slutload** - **Snotr** - **Sohu** - **SonyLIV**: [<abbr title="netrc machine"><em>sonyliv</em></abbr>] - **SonyLIVSeries** - **soundcloud**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>] - **soundcloud:playlist**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>] - **soundcloud:related**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>] - **soundcloud:search**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>] Soundcloud search; "scsearch:" prefix - **soundcloud:set**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>] - **soundcloud:trackstation**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>] - **soundcloud:user**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>] - **SoundcloudEmbed** - **soundgasm** - **soundgasm:profile** - **southpark.cc.com** - **southpark.cc.com:español** - **southpark.de** - **southpark.lat** - **southpark.nl** - **southparkstudios.dk** - **SovietsCloset** - **SovietsClosetPlaylist** - **SpankBang** - **SpankBangPlaylist** - **Spankwire** - **Spiegel** - **Sport5** - **SportBox** - **SportDeutschland** - **spotify**: Spotify episodes - **spotify:show**: Spotify shows - **Spreaker** - **SpreakerPage** - **SpreakerShow** - **SpreakerShowPage** - **SpringboardPlatform** - **Sprout** - **sr:mediathek**: Saarländischer Rundfunk - **SRGSSR** - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites - **stanfordoc**: Stanford Open ClassRoom - **StarTrek** - **startv** - **Steam** - **SteamCommunityBroadcast** - **Stitcher** - **StitcherShow** - **StoryFire** - **StoryFireSeries** - **StoryFireUser** - **Streamable** - **Streamanity** - **streamcloud.eu** - **StreamCZ** - **StreamFF** - **StreetVoice** - **StretchInternet** - **Stripchat** - **stv:player** - **Substack** - **SunPorno** - **sverigesradio:episode** - **sverigesradio:publication** - **SVT** - **SVTPage** - **SVTPlay**: SVT Play and Öppet arkiv - **SVTSeries** - **SWRMediathek** - **Syfy** - **SYVDK** - **SztvHu** - **t-online.de** - **Tagesschau** - **Tass** - **TBS** - **TDSLifeway** - **Teachable**: [<abbr title="netrc machine"><em>teachable</em></abbr>] - **TeachableCourse**: [<abbr title="netrc machine"><em>teachable</em></abbr>] - **teachertube**: teachertube.com videos - **teachertube:user:collection**: teachertube.com user and collection videos - **TeachingChannel** - **Teamcoco** - **TeamTreeHouse**: [<abbr title="netrc machine"><em>teamtreehouse</em></abbr>] - **TechTalks** - **techtv.mit.edu** - **TedEmbed** - **TedPlaylist** - **TedSeries** - **TedTalk** - **Tele13** - **Tele5** - **TeleBruxelles** - **Telecinco**: telecinco.es, cuatro.com and mediaset.es - **Telegraaf** - **telegram:embed** - **TeleMB** - **Telemundo** - **TeleQuebec** - **TeleQuebecEmission** - **TeleQuebecLive** - **TeleQuebecSquat** - **TeleQuebecVideo** - **TeleTask** - **Telewebion** - **Tempo** - **TennisTV**: [<abbr title="netrc machine"><em>tennistv</em></abbr>] - **TenPlay**: [<abbr title="netrc machine"><em>10play</em></abbr>] - **TF1** - **TFO** - **TheHoleTv** - **TheIntercept** - **ThePlatform** - **ThePlatformFeed** - **TheStar** - **TheSun** - **ThetaStream** - **ThetaVideo** - **TheWeatherChannel** - **ThisAmericanLife** - **ThisAV** - **ThisOldHouse** - **ThreeSpeak** - **ThreeSpeakUser** - **TikTok** - **tiktok:effect** - **tiktok:sound** - **tiktok:tag** - **tiktok:user** - **tinypic**: tinypic.com videos - **TLC** - **TMZ** - **TNAFlix** - **TNAFlixNetworkEmbed** - **toggle** - **toggo** - **Tokentube** - **Tokentube:channel** - **ToonGoggles** - **tou.tv**: [<abbr title="netrc machine"><em>toutv</em></abbr>] - **Toypics**: Toypics video - **ToypicsUser**: Toypics user profile - **TrailerAddict**: (**Currently broken**) - **TravelChannel** - **Trilulilu** - **Trovo** - **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix - **TrovoChannelVod**: All VODs of a trovo.live channel; "trovovod:" prefix - **TrovoVod** - **TrueID** - **TruNews** - **Truth** - **TruTV** - **Tube8** - **TubeTuGraz**: [<abbr title="netrc machine"><em>tubetugraz</em></abbr>] tube.tugraz.at - **TubeTuGrazSeries**: [<abbr title="netrc machine"><em>tubetugraz</em></abbr>] - **TubiTv**: [<abbr title="netrc machine"><em>tubitv</em></abbr>] - **TubiTvShow** - **Tumblr**: [<abbr title="netrc machine"><em>tumblr</em></abbr>] - **tunein:clip** - **tunein:program** - **tunein:station** - **tunein:topic** - **TunePk** - **Turbo** - **tv.dfb.de** - **TV2** - **TV2Article** - **TV2DK** - **TV2DKBornholmPlay** - **tv2play.hu** - **tv2playseries.hu** - **TV4**: tv4.se and tv4play.se - **TV5MondePlus**: TV5MONDE+ - **tv5unis** - **tv5unis:video** - **tv8.it** - **TVA** - **TVANouvelles** - **TVANouvellesArticle** - **TVC** - **TVCArticle** - **TVer** - **tvigle**: Интернет-телевидение Tvigle.ru - **TVIPlayer** - **tvland.com** - **TVN24** - **TVNet** - **TVNoe** - **TVNow** - **TVNowAnnual** - **TVNowFilm** - **TVNowNew** - **TVNowSeason** - **TVNowShow** - **tvopengr:embed**: tvopen.gr embedded videos - **tvopengr:watch**: tvopen.gr (and ethnos.gr) videos - **tvp**: Telewizja Polska - **tvp:embed**: Telewizja Polska - **tvp:series** - **tvp:stream** - **TVPlayer** - **TVPlayHome** - **Tweakers** - **TwitCasting** - **TwitCastingLive** - **TwitCastingUser** - **twitch:clips**: [<abbr title="netrc machine"><em>twitch</em></abbr>] - **twitch:stream**: [<abbr title="netrc machine"><em>twitch</em></abbr>] - **twitch:vod**: [<abbr title="netrc machine"><em>twitch</em></abbr>] - **TwitchCollection**: [<abbr title="netrc machine"><em>twitch</em></abbr>] - **TwitchVideos**: [<abbr title="netrc machine"><em>twitch</em></abbr>] - **TwitchVideosClips**: [<abbr title="netrc machine"><em>twitch</em></abbr>] - **TwitchVideosCollections**: [<abbr title="netrc machine"><em>twitch</em></abbr>] - **twitter** - **twitter:amplify** - **twitter:broadcast** - **twitter:card** - **twitter:shortener** - **udemy**: [<abbr title="netrc machine"><em>udemy</em></abbr>] - **udemy:course**: [<abbr title="netrc machine"><em>udemy</em></abbr>] - **UDNEmbed**: 聯合影音 - **UFCArabia**: [<abbr title="netrc machine"><em>ufcarabia</em></abbr>] - **UFCTV**: [<abbr title="netrc machine"><em>ufctv</em></abbr>] - **ukcolumn** - **UKTVPlay** - **umg:de**: Universal Music Deutschland - **Unistra** - **Unity** - **uol.com.br** - **uplynk** - **uplynk:preplay** - **Urort**: NRK P3 Urørt - **URPlay** - **USANetwork** - **USAToday** - **ustream** - **ustream:channel** - **ustudio** - **ustudio:embed** - **Utreon** - **Varzesh3** - **Vbox7** - **VeeHD** - **Veo** - **Veoh** - **Vesti**: Вести.Ru - **Vevo** - **VevoPlaylist** - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet - **vh1.com** - **vhx:embed**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] - **Viafree** - **vice** - **vice:article** - **vice:show** - **Vidbit** - **Viddler** - **Videa** - **video.arnes.si**: Arnes Video - **video.google:search**: Google Video search; "gvsearch:" prefix - **video.sky.it** - **video.sky.it:live** - **VideoDetective** - **videofy.me** - **videomore** - **videomore:season** - **videomore:video** - **VideoPress** - **Vidio**: [<abbr title="netrc machine"><em>vidio</em></abbr>] - **VidioLive**: [<abbr title="netrc machine"><em>vidio</em></abbr>] - **VidioPremier**: [<abbr title="netrc machine"><em>vidio</em></abbr>] - **VidLii** - **vier**: [<abbr title="netrc machine"><em>vier</em></abbr>] vier.be and vijf.be - **vier:videos** - **viewlift** - **viewlift:embed** - **Viidea** - **viki**: [<abbr title="netrc machine"><em>viki</em></abbr>] - **viki:channel**: [<abbr title="netrc machine"><em>viki</em></abbr>] - **vimeo**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] - **vimeo:album**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] - **vimeo:channel**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] - **vimeo:group**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] - **vimeo:likes**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] Vimeo user likes - **vimeo:ondemand**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] - **vimeo:review**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] Review pages on vimeo - **vimeo:user**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] - **vimeo:watchlater**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication) - **Vimm:recording** - **Vimm:stream** - **ViMP** - **ViMP:Playlist** - **Vimple**: Vimple - one-click video hosting - **Vine** - **vine:user** - **Viqeo** - **Viu** - **viu:ott**: [<abbr title="netrc machine"><em>viu</em></abbr>] - **viu:playlist** - **Vivo**: vivo.sx - **vk**: [<abbr title="netrc machine"><em>vk</em></abbr>] VK - **vk:uservideos**: [<abbr title="netrc machine"><em>vk</em></abbr>] VK - User's Videos - **vk:wallpost**: [<abbr title="netrc machine"><em>vk</em></abbr>] - **vlive**: [<abbr title="netrc machine"><em>vlive</em></abbr>] - **vlive:channel**: [<abbr title="netrc machine"><em>vlive</em></abbr>] - **vlive:post**: [<abbr title="netrc machine"><em>vlive</em></abbr>] - **vm.tiktok** - **Vodlocker** - **VODPl** - **VODPlatform** - **VoiceRepublic** - **voicy** - **voicy:channel** - **Voot** - **VootSeries** - **VoxMedia** - **VoxMediaVolume** - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **Vrak** - **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza - **VrtNU**: [<abbr title="netrc machine"><em>vrtnu</em></abbr>] VrtNU.be - **vrv**: [<abbr title="netrc machine"><em>vrv</em></abbr>] - **vrv:series** - **VShare** - **VTM** - **VTXTV**: [<abbr title="netrc machine"><em>vtxtv</em></abbr>] - **VTXTVLive**: [<abbr title="netrc machine"><em>vtxtv</em></abbr>] - **VTXTVRecordings**: [<abbr title="netrc machine"><em>vtxtv</em></abbr>] - **VuClip** - **Vupload** - **VVVVID** - **VVVVIDShow** - **VyboryMos** - **Vzaar** - **Wakanim** - **Walla** - **WalyTV**: [<abbr title="netrc machine"><em>walytv</em></abbr>] - **WalyTVLive**: [<abbr title="netrc machine"><em>walytv</em></abbr>] - **WalyTVRecordings**: [<abbr title="netrc machine"><em>walytv</em></abbr>] - **wasdtv:clip** - **wasdtv:record** - **wasdtv:stream** - **washingtonpost** - **washingtonpost:article** - **wat.tv** - **WatchBox** - **WatchESPN** - **WatchIndianPorn**: Watch Indian Porn - **WDR** - **wdr:mobile**: (**Currently broken**) - **WDRElefant** - **WDRPage** - **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix - **Webcaster** - **WebcasterFeed** - **WebOfStories** - **WebOfStoriesPlaylist** - **Weibo** - **WeiboMobile** - **WeiqiTV**: WQTV - **wetv:episode** - **WeTvSeries** - **whowatch** - **wikimedia.org** - **Willow** - **WimTV** - **Wistia** - **WistiaPlaylist** - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **WorldStarHipHop** - **wppilot** - **wppilot:channels** - **WSJ**: Wall Street Journal - **WSJArticle** - **WWE** - **XBef** - **XboxClips** - **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, WolfStream, XVideoSharing - **XHamster** - **XHamsterEmbed** - **XHamsterUser** - **xiami:album**: 虾米音乐 - 专辑 - **xiami:artist**: 虾米音乐 - 歌手 - **xiami:collection**: 虾米音乐 - 精选集 - **xiami:song**: 虾米音乐 - **ximalaya**: 喜马拉雅FM - **ximalaya:album**: 喜马拉雅FM 专辑 - **xinpianchang**: xinpianchang.com - **XMinus** - **XNXX** - **Xstream** - **XTube** - **XTubeUser**: XTube user profile - **Xuite**: 隨意窩Xuite影音 - **XVideos** - **XXXYMovies** - **Yahoo**: Yahoo screen and movies - **yahoo:gyao** - **yahoo:gyao:player** - **yahoo:japannews**: Yahoo! Japan News - **YandexDisk** - **yandexmusic:album**: Яндекс.Музыка - Альбом - **yandexmusic:artist:albums**: Яндекс.Музыка - Артист - Альбомы - **yandexmusic:artist:tracks**: Яндекс.Музыка - Артист - Треки - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист - **yandexmusic:track**: Яндекс.Музыка - Трек - **YandexVideo** - **YandexVideoPreview** - **YapFiles** - **YesJapan** - **yinyuetai:video**: 音悦Tai - **Ynet** - **YouJizz** - **youku**: 优酷 - **youku:show** - **YouNowChannel** - **YouNowLive** - **YouNowMoment** - **YouPorn** - **YourPorn** - **YourUpload** - **youtube**: YouTube - **youtube:clip** - **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies) - **youtube:history**: Youtube watch history; ":ythis" keyword (requires cookies) - **youtube:music:search_url**: YouTube music search URLs with selectable sections, e.g. #songs - **youtube:notif**: YouTube notifications; ":ytnotif" keyword (requires cookies) - **youtube:playlist**: YouTube playlists - **youtube:recommended**: YouTube recommended videos; ":ytrec" keyword - **youtube:search**: YouTube search; "ytsearch:" prefix - **youtube:search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix - **youtube:search_url**: YouTube search URLs with sorting and filter support - **youtube:stories**: YouTube channel stories; "ytstories:" prefix - **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies) - **youtube:tab**: YouTube Tabs - **youtube:user**: YouTube user videos; "ytuser:" prefix - **youtube:watchlater**: Youtube watch later list; ":ytwatchlater" keyword (requires cookies) - **YoutubeLivestreamEmbed**: YouTube livestream embeds - **YoutubeYtBe**: youtu.be - **Zapiks** - **Zattoo**: [<abbr title="netrc machine"><em>zattoo</em></abbr>] - **ZattooLive**: [<abbr title="netrc machine"><em>zattoo</em></abbr>] - **ZattooMovies**: [<abbr title="netrc machine"><em>zattoo</em></abbr>] - **ZattooRecordings**: [<abbr title="netrc machine"><em>zattoo</em></abbr>] - **ZDF** - **ZDFChannel** - **Zee5**: [<abbr title="netrc machine"><em>zee5</em></abbr>] - **zee5:series** - **ZenYandex** - **ZenYandexChannel** - **Zhihu** - **zingmp3**: zingmp3.vn - **zingmp3:album** - **zingmp3:chart-home** - **zingmp3:chart-music-video** - **zingmp3:user** - **zingmp3:week-chart** - **zoom** - **Zype** - **generic**: Generic downloader that works on some sites ������������������������������������������������������yt-dlp-2022.08.19/test/�����������������������������������������������������������������������������0000775�0000000�0000000�00000000000�14277552437�0014327�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/__init__.py������������������������������������������������������������������0000664�0000000�0000000�00000000000�14277552437�0016426�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/helper.py��������������������������������������������������������������������0000664�0000000�0000000�00000027334�14277552437�0016171�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import errno import hashlib import json import os.path import re import ssl import sys import types import yt_dlp.extractor from yt_dlp import YoutubeDL from yt_dlp.compat import compat_os_name from yt_dlp.utils import preferredencoding, write_string if 'pytest' in sys.modules: import pytest is_download_test = pytest.mark.download else: def is_download_test(testClass): return testClass def get_params(override=None): PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'parameters.json') LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'local_parameters.json') with open(PARAMETERS_FILE, encoding='utf-8') as pf: parameters = json.load(pf) if os.path.exists(LOCAL_PARAMETERS_FILE): with open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf: parameters.update(json.load(pf)) if override: parameters.update(override) return parameters def try_rm(filename): """ Remove a file if it exists """ try: os.remove(filename) except OSError as ose: if ose.errno != errno.ENOENT: raise def report_warning(message, *args, **kwargs): ''' Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored ''' if sys.stderr.isatty() and compat_os_name != 'nt': _msg_header = '\033[0;33mWARNING:\033[0m' else: _msg_header = 'WARNING:' output = f'{_msg_header} {message}\n' if 'b' in getattr(sys.stderr, 'mode', ''): output = output.encode(preferredencoding()) sys.stderr.write(output) class FakeYDL(YoutubeDL): def __init__(self, override=None): # Different instances of the downloader can't share the same dictionary # some test set the "sublang" parameter, which would break the md5 checks. params = get_params(override=override) super().__init__(params, auto_init=False) self.result = [] def to_screen(self, s, *args, **kwargs): print(s) def trouble(self, s, *args, **kwargs): raise Exception(s) def download(self, x): self.result.append(x) def expect_warning(self, regex): # Silence an expected warning matching a regex old_report_warning = self.report_warning def report_warning(self, message, *args, **kwargs): if re.match(regex, message): return old_report_warning(message, *args, **kwargs) self.report_warning = types.MethodType(report_warning, self) def gettestcases(include_onlymatching=False): for ie in yt_dlp.extractor.gen_extractors(): yield from ie.get_testcases(include_onlymatching) def getwebpagetestcases(): for ie in yt_dlp.extractor.gen_extractors(): for tc in ie.get_webpage_testcases(): tc.setdefault('add_ie', []).append('Generic') yield tc md5 = lambda s: hashlib.md5(s.encode()).hexdigest() def expect_value(self, got, expected, field): if isinstance(expected, str) and expected.startswith('re:'): match_str = expected[len('re:'):] match_rex = re.compile(match_str) self.assertTrue( isinstance(got, str), f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}') self.assertTrue( match_rex.match(got), f'field {field} (value: {got!r}) should match {match_str!r}') elif isinstance(expected, str) and expected.startswith('startswith:'): start_str = expected[len('startswith:'):] self.assertTrue( isinstance(got, str), f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}') self.assertTrue( got.startswith(start_str), f'field {field} (value: {got!r}) should start with {start_str!r}') elif isinstance(expected, str) and expected.startswith('contains:'): contains_str = expected[len('contains:'):] self.assertTrue( isinstance(got, str), f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}') self.assertTrue( contains_str in got, f'field {field} (value: {got!r}) should contain {contains_str!r}') elif isinstance(expected, type): self.assertTrue( isinstance(got, expected), f'Expected type {expected!r} for field {field}, but got value {got!r} of type {type(got)!r}') elif isinstance(expected, dict) and isinstance(got, dict): expect_dict(self, got, expected) elif isinstance(expected, list) and isinstance(got, list): self.assertEqual( len(expected), len(got), 'Expect a list of length %d, but got a list of length %d for field %s' % ( len(expected), len(got), field)) for index, (item_got, item_expected) in enumerate(zip(got, expected)): type_got = type(item_got) type_expected = type(item_expected) self.assertEqual( type_expected, type_got, 'Type mismatch for list item at index %d for field %s, expected %r, got %r' % ( index, field, type_expected, type_got)) expect_value(self, item_got, item_expected, field) else: if isinstance(expected, str) and expected.startswith('md5:'): self.assertTrue( isinstance(got, str), f'Expected field {field} to be a unicode object, but got value {got!r} of type {type(got)!r}') got = 'md5:' + md5(got) elif isinstance(expected, str) and re.match(r'^(?:min|max)?count:\d+', expected): self.assertTrue( isinstance(got, (list, dict)), f'Expected field {field} to be a list or a dict, but it is of type {type(got).__name__}') op, _, expected_num = expected.partition(':') expected_num = int(expected_num) if op == 'mincount': assert_func = assertGreaterEqual msg_tmpl = 'Expected %d items in field %s, but only got %d' elif op == 'maxcount': assert_func = assertLessEqual msg_tmpl = 'Expected maximum %d items in field %s, but got %d' elif op == 'count': assert_func = assertEqual msg_tmpl = 'Expected exactly %d items in field %s, but got %d' else: assert False assert_func( self, len(got), expected_num, msg_tmpl % (expected_num, field, len(got))) return self.assertEqual( expected, got, f'Invalid value for field {field}, expected {expected!r}, got {got!r}') def expect_dict(self, got_dict, expected_dict): for info_field, expected in expected_dict.items(): got = got_dict.get(info_field) expect_value(self, got, expected, info_field) def sanitize_got_info_dict(got_dict): IGNORED_FIELDS = ( *YoutubeDL._format_fields, # Lists 'formats', 'thumbnails', 'subtitles', 'automatic_captions', 'comments', 'entries', # Auto-generated 'autonumber', 'playlist', 'format_index', 'video_ext', 'audio_ext', 'duration_string', 'epoch', 'fulltitle', 'extractor', 'extractor_key', 'filepath', 'infojson_filename', 'original_url', 'n_entries', # Only live_status needs to be checked 'is_live', 'was_live', ) IGNORED_PREFIXES = ('', 'playlist', 'requested', 'webpage') def sanitize(key, value): if isinstance(value, str) and len(value) > 100 and key != 'thumbnail': return f'md5:{md5(value)}' elif isinstance(value, list) and len(value) > 10: return f'count:{len(value)}' elif key.endswith('_count') and isinstance(value, int): return int return value test_info_dict = { key: sanitize(key, value) for key, value in got_dict.items() if value is not None and key not in IGNORED_FIELDS and not any( key.startswith(f'{prefix}_') for prefix in IGNORED_PREFIXES) } # display_id may be generated from id if test_info_dict.get('display_id') == test_info_dict.get('id'): test_info_dict.pop('display_id') return test_info_dict def expect_info_dict(self, got_dict, expected_dict): expect_dict(self, got_dict, expected_dict) # Check for the presence of mandatory fields if got_dict.get('_type') not in ('playlist', 'multi_video'): mandatory_fields = ['id', 'title'] if expected_dict.get('ext'): mandatory_fields.extend(('url', 'ext')) for key in mandatory_fields: self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) # Check for mandatory fields that are automatically set by YoutubeDL for key in ['webpage_url', 'extractor', 'extractor_key']: self.assertTrue(got_dict.get(key), 'Missing field: %s' % key) test_info_dict = sanitize_got_info_dict(got_dict) missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys()) if missing_keys: def _repr(v): if isinstance(v, str): return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n') elif isinstance(v, type): return v.__name__ else: return repr(v) info_dict_str = '' if len(missing_keys) != len(expected_dict): info_dict_str += ''.join( f' {_repr(k)}: {_repr(v)},\n' for k, v in test_info_dict.items() if k not in missing_keys) if info_dict_str: info_dict_str += '\n' info_dict_str += ''.join( f' {_repr(k)}: {_repr(test_info_dict[k])},\n' for k in missing_keys) write_string( '\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr) self.assertFalse( missing_keys, 'Missing keys in test definition: %s' % ( ', '.join(sorted(missing_keys)))) def assertRegexpMatches(self, text, regexp, msg=None): if hasattr(self, 'assertRegexp'): return self.assertRegexp(text, regexp, msg) else: m = re.match(regexp, text) if not m: note = 'Regexp didn\'t match: %r not found' % (regexp) if len(text) < 1000: note += ' in %r' % text if msg is None: msg = note else: msg = note + ', ' + msg self.assertTrue(m, msg) def assertGreaterEqual(self, got, expected, msg=None): if not (got >= expected): if msg is None: msg = f'{got!r} not greater than or equal to {expected!r}' self.assertTrue(got >= expected, msg) def assertLessEqual(self, got, expected, msg=None): if not (got <= expected): if msg is None: msg = f'{got!r} not less than or equal to {expected!r}' self.assertTrue(got <= expected, msg) def assertEqual(self, got, expected, msg=None): if not (got == expected): if msg is None: msg = f'{got!r} not equal to {expected!r}' self.assertTrue(got == expected, msg) def expect_warnings(ydl, warnings_re): real_warning = ydl.report_warning def _report_warning(w, *args, **kwargs): if not any(re.search(w_re, w) for w_re in warnings_re): real_warning(w, *args, **kwargs) ydl.report_warning = _report_warning def http_server_port(httpd): if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): # In Jython SSLSocket is not a subclass of socket.socket sock = httpd.socket.sock else: sock = httpd.socket return sock.getsockname()[1] ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/parameters.json��������������������������������������������������������������0000664�0000000�0000000�00000002264�14277552437�0017371�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������{ "check_formats": false, "consoletitle": false, "continuedl": true, "forcedescription": false, "forcefilename": false, "forceformat": false, "forcethumbnail": false, "forcetitle": false, "forceurl": false, "force_write_download_archive": false, "format": "b/bv", "ignoreerrors": false, "listformats": null, "logtostderr": false, "matchtitle": null, "max_downloads": null, "overwrites": null, "nopart": false, "noprogress": false, "outtmpl": "%(id)s.%(ext)s", "password": null, "playliststart": 1, "prefer_free_formats": false, "quiet": false, "ratelimit": null, "rejecttitle": null, "retries": 10, "simulate": false, "subtitleslang": null, "subtitlesformat": "best", "test": true, "updatetime": true, "usenetrc": false, "username": null, "verbose": true, "writedescription": false, "writeinfojson": true, "writeannotations": false, "writelink": false, "writeurllink": false, "writewebloclink": false, "writedesktoplink": false, "writesubtitles": false, "allsubtitles": false, "listsubtitles": false, "fixup": "never" } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/test_InfoExtractor.py��������������������������������������������������������0000664�0000000�0000000�00000247741�14277552437�0020546�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import http.server import threading from test.helper import FakeYDL, expect_dict, expect_value, http_server_port from yt_dlp.compat import compat_etree_fromstring from yt_dlp.extractor import YoutubeIE, get_info_extractor from yt_dlp.extractor.common import InfoExtractor from yt_dlp.utils import ( ExtractorError, RegexNotFoundError, encode_data_uri, strip_jsonp, ) TEAPOT_RESPONSE_STATUS = 418 TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>" class InfoExtractorTestRequestHandler(http.server.BaseHTTPRequestHandler): def log_message(self, format, *args): pass def do_GET(self): if self.path == '/teapot': self.send_response(TEAPOT_RESPONSE_STATUS) self.send_header('Content-Type', 'text/html; charset=utf-8') self.end_headers() self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) else: assert False class DummyIE(InfoExtractor): pass class TestInfoExtractor(unittest.TestCase): def setUp(self): self.ie = DummyIE(FakeYDL()) def test_ie_key(self): self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE) def test_html_search_regex(self): html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>' search = lambda re, *args: self.ie._html_search_regex(re, html, *args) self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video') def test_opengraph(self): ie = self.ie html = ''' <meta name="og:title" content='Foo'/> <meta content="Some video's description " name="og:description"/> <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&key2=val2'/> <meta content='application/x-shockwave-flash' property='og:video:type'> <meta content='Foo' property=og:foobar> <meta name="og:test1" content='foo > < bar'/> <meta name="og:test2" content="foo >//< bar"/> <meta property=og-test3 content='Ill-formatted opengraph'/> ''' self.assertEqual(ie._og_search_title(html), 'Foo') self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2') self.assertEqual(ie._og_search_video_url(html, default=None), None) self.assertEqual(ie._og_search_property('foobar', html), 'Foo') self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar') self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar') self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph') self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar') self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True) def test_html_search_meta(self): ie = self.ie html = ''' <meta name="a" content="1" /> <meta name='b' content='2'> <meta name="c" content='3'> <meta name=d content='4'> <meta property="e" content='5' > <meta content="6" name="f"> ''' self.assertEqual(ie._html_search_meta('a', html), '1') self.assertEqual(ie._html_search_meta('b', html), '2') self.assertEqual(ie._html_search_meta('c', html), '3') self.assertEqual(ie._html_search_meta('d', html), '4') self.assertEqual(ie._html_search_meta('e', html), '5') self.assertEqual(ie._html_search_meta('f', html), '6') self.assertEqual(ie._html_search_meta(('a', 'b', 'c'), html), '1') self.assertEqual(ie._html_search_meta(('c', 'b', 'a'), html), '3') self.assertEqual(ie._html_search_meta(('z', 'x', 'c'), html), '3') self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True) def test_search_json_ld_realworld(self): _TESTS = [ # https://github.com/ytdl-org/youtube-dl/issues/23306 ( r'''<script type="application/ld+json"> { "@context": "http://schema.org/", "@type": "VideoObject", "name": "1 On 1 With Kleio", "url": "https://www.eporner.com/hd-porn/xN49A1cT3eB/1-On-1-With-Kleio/", "duration": "PT0H12M23S", "thumbnailUrl": ["https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", "https://imggen.eporner.com/780814/1920/1080/9.jpg"], "contentUrl": "https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4", "embedUrl": "https://www.eporner.com/embed/xN49A1cT3eB/1-On-1-With-Kleio/", "image": "https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", "width": "1920", "height": "1080", "encodingFormat": "mp4", "bitrate": "6617kbps", "isFamilyFriendly": "False", "description": "Kleio Valentien", "uploadDate": "2015-12-05T21:24:35+01:00", "interactionStatistic": { "@type": "InteractionCounter", "interactionType": { "@type": "http://schema.org/WatchAction" }, "userInteractionCount": 1120958 }, "aggregateRating": { "@type": "AggregateRating", "ratingValue": "88", "ratingCount": "630", "bestRating": "100", "worstRating": "0" }, "actor": [{ "@type": "Person", "name": "Kleio Valentien", "url": "https://www.eporner.com/pornstar/kleio-valentien/" }]} </script>''', { 'title': '1 On 1 With Kleio', 'description': 'Kleio Valentien', 'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4', 'timestamp': 1449347075, 'duration': 743.0, 'view_count': 1120958, 'width': 1920, 'height': 1080, }, {}, ), ( r'''<script type="application/ld+json"> { "@context": "https://schema.org", "@graph": [ { "@type": "NewsArticle", "mainEntityOfPage": { "@type": "WebPage", "@id": "https://www.ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn" }, "headline": "Συμμορία ανηλίκων – δικηγόρος θυμάτων: ήθελαν να τους αποτελειώσουν", "name": "Συμμορία ανηλίκων – δικηγόρος θυμάτων: ήθελαν να τους αποτελειώσουν", "description": "Τα παιδιά δέχθηκαν την επίθεση επειδή αρνήθηκαν να γίνουν μέλη της συμμορίας, ανέφερε ο Γ. Ζαχαρόπουλος.", "image": { "@type": "ImageObject", "url": "https://ant1media.azureedge.net/imgHandler/1100/a635c968-be71-447c-bf9c-80d843ece21e.jpg", "width": 1100, "height": 756 }, "datePublished": "2021-11-10T08:50:00+03:00", "dateModified": "2021-11-10T08:52:53+03:00", "author": { "@type": "Person", "@id": "https://www.ant1news.gr/", "name": "Ant1news", "image": "https://www.ant1news.gr/images/logo-e5d7e4b3e714c88e8d2eca96130142f6.png", "url": "https://www.ant1news.gr/" }, "publisher": { "@type": "Organization", "@id": "https://www.ant1news.gr#publisher", "name": "Ant1news", "url": "https://www.ant1news.gr", "logo": { "@type": "ImageObject", "url": "https://www.ant1news.gr/images/logo-e5d7e4b3e714c88e8d2eca96130142f6.png", "width": 400, "height": 400 }, "sameAs": [ "https://www.facebook.com/Ant1news.gr", "https://twitter.com/antennanews", "https://www.youtube.com/channel/UC0smvAbfczoN75dP0Hw4Pzw", "https://www.instagram.com/ant1news/" ] }, "keywords": "μαχαίρωμα,συμμορία ανηλίκων,ΕΙΔΗΣΕΙΣ,ΕΙΔΗΣΕΙΣ ΣΗΜΕΡΑ,ΝΕΑ,Κοινωνία - Ant1news", "articleSection": "Κοινωνία" } ] } </script>''', { 'timestamp': 1636523400, 'title': 'md5:91fe569e952e4d146485740ae927662b', }, {'expected_type': 'NewsArticle'}, ), ( r'''<script type="application/ld+json"> {"url":"/vrtnu/a-z/het-journaal/2021/het-journaal-het-journaal-19u-20211231/", "name":"Het journaal 19u", "description":"Het journaal 19u van vrijdag 31 december 2021.", "potentialAction":{"url":"https://vrtnu.page.link/pfVy6ihgCAJKgHqe8","@type":"ShareAction"}, "mainEntityOfPage":{"@id":"1640092242445","@type":"WebPage"}, "publication":[{ "startDate":"2021-12-31T19:00:00.000+01:00", "endDate":"2022-01-30T23:55:00.000+01:00", "publishedBy":{"name":"een","@type":"Organization"}, "publishedOn":{"url":"https://www.vrt.be/vrtnu/","name":"VRT NU","@type":"BroadcastService"}, "@id":"pbs-pub-3a7ec233-da95-4c1e-9b2b-cf5fdfebcbe8", "@type":"BroadcastEvent" }], "video":{ "name":"Het journaal - Aflevering 365 (Seizoen 2021)", "description":"Het journaal 19u van vrijdag 31 december 2021. Bekijk aflevering 365 van seizoen 2021 met VRT NU via de site of app.", "thumbnailUrl":"//images.vrt.be/width1280/2021/12/31/80d5ed00-6a64-11ec-b07d-02b7b76bf47f.jpg", "expires":"2022-01-30T23:55:00.000+01:00", "hasPart":[ {"name":"Explosie Turnhout","startOffset":70,"@type":"Clip"}, {"name":"Jaarwisseling","startOffset":440,"@type":"Clip"}, {"name":"Natuurbranden Colorado","startOffset":1179,"@type":"Clip"}, {"name":"Klimaatverandering","startOffset":1263,"@type":"Clip"}, {"name":"Zacht weer","startOffset":1367,"@type":"Clip"}, {"name":"Financiële balans","startOffset":1383,"@type":"Clip"}, {"name":"Club Brugge","startOffset":1484,"@type":"Clip"}, {"name":"Mentale gezondheid bij topsporters","startOffset":1575,"@type":"Clip"}, {"name":"Olympische Winterspelen","startOffset":1728,"@type":"Clip"}, {"name":"Sober oudjaar in Nederland","startOffset":1873,"@type":"Clip"} ], "duration":"PT34M39.23S", "uploadDate":"2021-12-31T19:00:00.000+01:00", "@id":"vid-9457d0c6-b8ac-4aba-b5e1-15aa3a3295b5", "@type":"VideoObject" }, "genre":["Nieuws en actua"], "episodeNumber":365, "partOfSeries":{"name":"Het journaal","@id":"222831405527","@type":"TVSeries"}, "partOfSeason":{"name":"Seizoen 2021","@id":"961809365527","@type":"TVSeason"}, "@context":"https://schema.org","@id":"961685295527","@type":"TVEpisode"}</script> ''', { 'chapters': [ {"title": "Explosie Turnhout", "start_time": 70, "end_time": 440}, {"title": "Jaarwisseling", "start_time": 440, "end_time": 1179}, {"title": "Natuurbranden Colorado", "start_time": 1179, "end_time": 1263}, {"title": "Klimaatverandering", "start_time": 1263, "end_time": 1367}, {"title": "Zacht weer", "start_time": 1367, "end_time": 1383}, {"title": "Financiële balans", "start_time": 1383, "end_time": 1484}, {"title": "Club Brugge", "start_time": 1484, "end_time": 1575}, {"title": "Mentale gezondheid bij topsporters", "start_time": 1575, "end_time": 1728}, {"title": "Olympische Winterspelen", "start_time": 1728, "end_time": 1873}, {"title": "Sober oudjaar in Nederland", "start_time": 1873, "end_time": 2079.23} ], 'title': 'Het journaal - Aflevering 365 (Seizoen 2021)' }, {} ), ( # test multiple thumbnails in a list r''' <script type="application/ld+json"> {"@context":"https://schema.org", "@type":"VideoObject", "thumbnailUrl":["https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"]} </script>''', { 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}], }, {}, ), ( # test single thumbnail r''' <script type="application/ld+json"> {"@context":"https://schema.org", "@type":"VideoObject", "thumbnailUrl":"https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"} </script>''', { 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}], }, {}, ) ] for html, expected_dict, search_json_ld_kwargs in _TESTS: expect_dict( self, self.ie._search_json_ld(html, None, **search_json_ld_kwargs), expected_dict ) def test_download_json(self): uri = encode_data_uri(b'{"foo": "blah"}', 'application/json') self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'}) uri = encode_data_uri(b'callback({"foo": "blah"})', 'application/javascript') self.assertEqual(self.ie._download_json(uri, None, transform_source=strip_jsonp), {'foo': 'blah'}) uri = encode_data_uri(b'{"foo": invalid}', 'application/json') self.assertRaises(ExtractorError, self.ie._download_json, uri, None) self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) def test_parse_html5_media_entries(self): # inline video tag expect_dict( self, self.ie._parse_html5_media_entries( 'https://127.0.0.1/video.html', r'<html><video src="/vid.mp4" /></html>', None)[0], { 'formats': [{ 'url': 'https://127.0.0.1/vid.mp4', }], }) # from https://www.r18.com/ # with kpbs in label expect_dict( self, self.ie._parse_html5_media_entries( 'https://www.r18.com/', r''' <video id="samplevideo_amateur" class="js-samplevideo video-js vjs-default-skin vjs-big-play-centered" controls preload="auto" width="400" height="225" poster="//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg"> <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_sm_w.mp4" type="video/mp4" res="240" label="300kbps"> <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dm_w.mp4" type="video/mp4" res="480" label="1000kbps"> <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dmb_w.mp4" type="video/mp4" res="740" label="1500kbps"> <p>Your browser does not support the video tag.</p> </video> ''', None)[0], { 'formats': [{ 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_sm_w.mp4', 'ext': 'mp4', 'format_id': '300kbps', 'height': 240, 'tbr': 300, }, { 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dm_w.mp4', 'ext': 'mp4', 'format_id': '1000kbps', 'height': 480, 'tbr': 1000, }, { 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dmb_w.mp4', 'ext': 'mp4', 'format_id': '1500kbps', 'height': 740, 'tbr': 1500, }], 'thumbnail': '//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg' }) # from https://www.csfd.cz/ # with width and height expect_dict( self, self.ie._parse_html5_media_entries( 'https://www.csfd.cz/', r''' <video width="770" height="328" preload="none" controls poster="https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360" > <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327358_eac647.mp4" type="video/mp4" width="640" height="360"> <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327360_3d2646.mp4" type="video/mp4" width="1280" height="720"> <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327356_91f258.mp4" type="video/mp4" width="1920" height="1080"> <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327359_962b4a.webm" type="video/webm" width="640" height="360"> <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327361_6feee0.webm" type="video/webm" width="1280" height="720"> <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327357_8ab472.webm" type="video/webm" width="1920" height="1080"> <track src="https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt" type="text/x-srt" kind="subtitles" srclang="cs" label="cs"> </video> ''', None)[0], { 'formats': [{ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327358_eac647.mp4', 'ext': 'mp4', 'width': 640, 'height': 360, }, { 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327360_3d2646.mp4', 'ext': 'mp4', 'width': 1280, 'height': 720, }, { 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327356_91f258.mp4', 'ext': 'mp4', 'width': 1920, 'height': 1080, }, { 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327359_962b4a.webm', 'ext': 'webm', 'width': 640, 'height': 360, }, { 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327361_6feee0.webm', 'ext': 'webm', 'width': 1280, 'height': 720, }, { 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327357_8ab472.webm', 'ext': 'webm', 'width': 1920, 'height': 1080, }], 'subtitles': { 'cs': [{'url': 'https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt'}] }, 'thumbnail': 'https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360' }) # from https://tamasha.com/v/Kkdjw # with height in label expect_dict( self, self.ie._parse_html5_media_entries( 'https://tamasha.com/v/Kkdjw', r''' <video crossorigin="anonymous"> <source src="https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4" type="video/mp4" label="AUTO" res="0"/> <source src="https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4" type="video/mp4" label="240p" res="240"/> <source src="https://s-v2.tamasha.com/statics/videos_file/20/00/Kkdjw_200041c66f657fc967db464d156eafbc1ed9fe6f_n_144.mp4" type="video/mp4" label="144p" res="144"/> </video> ''', None)[0], { 'formats': [{ 'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4', }, { 'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4', 'ext': 'mp4', 'format_id': '240p', 'height': 240, }, { 'url': 'https://s-v2.tamasha.com/statics/videos_file/20/00/Kkdjw_200041c66f657fc967db464d156eafbc1ed9fe6f_n_144.mp4', 'ext': 'mp4', 'format_id': '144p', 'height': 144, }] }) # from https://www.directvnow.com # with data-src expect_dict( self, self.ie._parse_html5_media_entries( 'https://www.directvnow.com', r''' <video id="vid1" class="header--video-masked active" muted playsinline> <source data-src="https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4" type="video/mp4" /> </video> ''', None)[0], { 'formats': [{ 'ext': 'mp4', 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4', }] }) # from https://www.directvnow.com # with data-src expect_dict( self, self.ie._parse_html5_media_entries( 'https://www.directvnow.com', r''' <video id="vid1" class="header--video-masked active" muted playsinline> <source data-src="https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4" type="video/mp4" /> </video> ''', None)[0], { 'formats': [{ 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4', 'ext': 'mp4', }] }) # from https://www.klarna.com/uk/ # with data-video-src expect_dict( self, self.ie._parse_html5_media_entries( 'https://www.directvnow.com', r''' <video loop autoplay muted class="responsive-video block-kl__video video-on-medium"> <source src="" data-video-desktop data-video-src="https://www.klarna.com/uk/wp-content/uploads/sites/11/2019/01/KL062_Smooth3_0_DogWalking_5s_920x080_.mp4" type="video/mp4" /> </video> ''', None)[0], { 'formats': [{ 'url': 'https://www.klarna.com/uk/wp-content/uploads/sites/11/2019/01/KL062_Smooth3_0_DogWalking_5s_920x080_.mp4', 'ext': 'mp4', }], }) # from https://0000.studio/ # with type attribute but without extension in URL expect_dict( self, self.ie._parse_html5_media_entries( 'https://0000.studio', r''' <video src="https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92" controls="controls" type="video/mp4" preload="metadata" autoplay="autoplay" playsinline class="object-contain"> </video> ''', None)[0], { 'formats': [{ 'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92', 'ext': 'mp4', }], }) def test_extract_jwplayer_data_realworld(self): # from http://www.suffolk.edu/sjc/ expect_dict( self, self.ie._extract_jwplayer_data(r''' <script type='text/javascript'> jwplayer('my-video').setup({ file: 'rtmp://192.138.214.154/live/sjclive', fallback: 'true', width: '95%', aspectratio: '16:9', primary: 'flash', mediaid:'XEgvuql4' }); </script> ''', None, require_title=False), { 'id': 'XEgvuql4', 'formats': [{ 'url': 'rtmp://192.138.214.154/live/sjclive', 'ext': 'flv' }] }) # from https://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary/ expect_dict( self, self.ie._extract_jwplayer_data(r''' <script type="text/javascript"> jwplayer("mediaplayer").setup({ 'videoid': "7564", 'width': "100%", 'aspectratio': "16:9", 'stretching': "exactfit", 'autostart': 'false', 'flashplayer': "https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf", 'file': "https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv", 'image': "https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg", 'filefallback': "https://cdn.pornoxo.com/key=9ZPsTR5EvPLQrBaak2MUGA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/m_4b2157147afe5efa93ce1978e0265289c193874e02597.mp4", 'logo.hide': true, 'skin': "https://t04.vipstreamservice.com/jwplayer/skin/modieus-blk.zip", 'plugins': "https://t04.vipstreamservice.com/jwplayer/dock/dockableskinnableplugin.swf", 'dockableskinnableplugin.piclink': "/index.php?key=ajax-videothumbsn&vid=7564&data=2009-12--14--4b2157147afe5efa93ce1978e0265289c193874e02597.flv--17370", 'controlbar': 'bottom', 'modes': [ {type: 'flash', src: 'https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf'} ], 'provider': 'http' }); //noinspection JSAnnotator invideo.setup({ adsUrl: "/banner-iframe/?zoneId=32", adsUrl2: "", autostart: false }); </script> ''', 'dummy', require_title=False), { 'thumbnail': 'https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg', 'formats': [{ 'url': 'https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv', 'ext': 'flv' }] }) # from http://www.indiedb.com/games/king-machine/videos expect_dict( self, self.ie._extract_jwplayer_data(r''' <script> jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/\/www.indiedb.com\/","displaytitle":false,"autostart":false,"repeat":false,"title":"king machine trailer 1","sharing":{"link":"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1","code":"<iframe width=\"560\" height=\"315\" src=\"http:\/\/www.indiedb.com\/media\/iframe\/1522983\" frameborder=\"0\" allowfullscreen><\/iframe><br><a href=\"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1\">king machine trailer 1 - Indie DB<\/a>"},"related":{"file":"http:\/\/rss.indiedb.com\/media\/recommended\/1522983\/feed\/rss.xml","dimensions":"160x120","onclick":"link"},"sources":[{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode_mp4\/king-machine-trailer.mp4","label":"360p SD","default":"true"},{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode720p_mp4\/king-machine-trailer.mp4","label":"720p HD"}],"image":"http:\/\/media.indiedb.com\/cache\/images\/games\/1\/50\/49678\/thumb_620x2000\/king-machine-trailer.mp4.jpg","advertising":{"client":"vast","tag":"http:\/\/ads.intergi.com\/adrawdata\/3.0\/5205\/4251742\/0\/1013\/ADTECH;cors=yes;width=560;height=315;referring_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;content_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;media_id=1522983;title=king+machine+trailer+1;device=__DEVICE__;model=__MODEL__;os=Windows+OS;osversion=__OSVERSION__;ua=__UA__;ip=109.171.17.81;uniqueid=1522983;tags=__TAGS__;number=58cac25928151;time=1489683033"},"width":620,"height":349}).once("play", function(event) { videoAnalytics("play"); }).once("complete", function(event) { videoAnalytics("completed"); }); </script> ''', 'dummy'), { 'title': 'king machine trailer 1', 'thumbnail': 'http://media.indiedb.com/cache/images/games/1/50/49678/thumb_620x2000/king-machine-trailer.mp4.jpg', 'formats': [{ 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode_mp4/king-machine-trailer.mp4', 'height': 360, 'ext': 'mp4' }, { 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode720p_mp4/king-machine-trailer.mp4', 'height': 720, 'ext': 'mp4' }] }) def test_parse_m3u8_formats(self): _TEST_CASES = [ ( # https://github.com/ytdl-org/youtube-dl/issues/11995 # http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor 'img_bipbop_adv_example_fmp4', 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', [{ 'format_id': 'aud1-English', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a1/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'language': 'en', 'ext': 'mp4', 'protocol': 'm3u8_native', 'audio_ext': 'mp4', }, { 'format_id': 'aud2-English', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a2/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'language': 'en', 'ext': 'mp4', 'protocol': 'm3u8_native', 'audio_ext': 'mp4', }, { 'format_id': 'aud3-English', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a3/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'language': 'en', 'ext': 'mp4', 'protocol': 'm3u8_native', 'audio_ext': 'mp4', }, { 'format_id': '530', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v2/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 480, 'height': 270, 'vcodec': 'avc1.640015', }, { 'format_id': '561', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v2/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 480, 'height': 270, 'vcodec': 'avc1.640015', }, { 'format_id': '753', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v2/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 480, 'height': 270, 'vcodec': 'avc1.640015', }, { 'format_id': '895', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v3/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 640, 'height': 360, 'vcodec': 'avc1.64001e', }, { 'format_id': '926', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v3/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 640, 'height': 360, 'vcodec': 'avc1.64001e', }, { 'format_id': '1118', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v3/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 640, 'height': 360, 'vcodec': 'avc1.64001e', }, { 'format_id': '1265', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v4/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 768, 'height': 432, 'vcodec': 'avc1.64001e', }, { 'format_id': '1295', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v4/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 768, 'height': 432, 'vcodec': 'avc1.64001e', }, { 'format_id': '1487', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v4/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 768, 'height': 432, 'vcodec': 'avc1.64001e', }, { 'format_id': '2168', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v5/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 960, 'height': 540, 'vcodec': 'avc1.640020', }, { 'format_id': '2198', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v5/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 960, 'height': 540, 'vcodec': 'avc1.640020', }, { 'format_id': '2390', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v5/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 960, 'height': 540, 'vcodec': 'avc1.640020', }, { 'format_id': '3168', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v6/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1280, 'height': 720, 'vcodec': 'avc1.640020', }, { 'format_id': '3199', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v6/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1280, 'height': 720, 'vcodec': 'avc1.640020', }, { 'format_id': '3391', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v6/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1280, 'height': 720, 'vcodec': 'avc1.640020', }, { 'format_id': '4670', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v7/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1920, 'height': 1080, 'vcodec': 'avc1.64002a', }, { 'format_id': '4701', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v7/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1920, 'height': 1080, 'vcodec': 'avc1.64002a', }, { 'format_id': '4893', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v7/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1920, 'height': 1080, 'vcodec': 'avc1.64002a', }, { 'format_id': '6170', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v8/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1920, 'height': 1080, 'vcodec': 'avc1.64002a', }, { 'format_id': '6200', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v8/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1920, 'height': 1080, 'vcodec': 'avc1.64002a', }, { 'format_id': '6392', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v8/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1920, 'height': 1080, 'vcodec': 'avc1.64002a', }, { 'format_id': '7968', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v9/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1920, 'height': 1080, 'vcodec': 'avc1.64002a', }, { 'format_id': '7998', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v9/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1920, 'height': 1080, 'vcodec': 'avc1.64002a', }, { 'format_id': '8190', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v9/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1920, 'height': 1080, 'vcodec': 'avc1.64002a', }], {} ), ( 'bipbop_16x9', 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', [{ 'format_id': 'bipbop_audio-BipBop Audio 2', 'format_index': None, 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/alternate_audio_aac/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', 'language': 'eng', 'ext': 'mp4', 'protocol': 'm3u8_native', 'preference': None, 'quality': None, 'vcodec': 'none', 'audio_ext': 'mp4', 'video_ext': 'none', }, { 'format_id': '41', 'format_index': None, 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear0/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', 'tbr': 41.457, 'ext': 'mp4', 'fps': None, 'protocol': 'm3u8_native', 'preference': None, 'quality': None, 'vcodec': 'none', 'acodec': 'mp4a.40.2', 'audio_ext': 'mp4', 'video_ext': 'none', 'abr': 41.457, }, { 'format_id': '263', 'format_index': None, 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear1/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', 'tbr': 263.851, 'ext': 'mp4', 'fps': None, 'protocol': 'm3u8_native', 'preference': None, 'quality': None, 'width': 416, 'height': 234, 'vcodec': 'avc1.4d400d', 'acodec': 'mp4a.40.2', 'video_ext': 'mp4', 'audio_ext': 'none', 'vbr': 263.851, 'abr': 0, }, { 'format_id': '577', 'format_index': None, 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear2/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', 'tbr': 577.61, 'ext': 'mp4', 'fps': None, 'protocol': 'm3u8_native', 'preference': None, 'quality': None, 'width': 640, 'height': 360, 'vcodec': 'avc1.4d401e', 'acodec': 'mp4a.40.2', 'video_ext': 'mp4', 'audio_ext': 'none', 'vbr': 577.61, 'abr': 0, }, { 'format_id': '915', 'format_index': None, 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear3/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', 'tbr': 915.905, 'ext': 'mp4', 'fps': None, 'protocol': 'm3u8_native', 'preference': None, 'quality': None, 'width': 960, 'height': 540, 'vcodec': 'avc1.4d401f', 'acodec': 'mp4a.40.2', 'video_ext': 'mp4', 'audio_ext': 'none', 'vbr': 915.905, 'abr': 0, }, { 'format_id': '1030', 'format_index': None, 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear4/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', 'tbr': 1030.138, 'ext': 'mp4', 'fps': None, 'protocol': 'm3u8_native', 'preference': None, 'quality': None, 'width': 1280, 'height': 720, 'vcodec': 'avc1.4d401f', 'acodec': 'mp4a.40.2', 'video_ext': 'mp4', 'audio_ext': 'none', 'vbr': 1030.138, 'abr': 0, }, { 'format_id': '1924', 'format_index': None, 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear5/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', 'tbr': 1924.009, 'ext': 'mp4', 'fps': None, 'protocol': 'm3u8_native', 'preference': None, 'quality': None, 'width': 1920, 'height': 1080, 'vcodec': 'avc1.4d401f', 'acodec': 'mp4a.40.2', 'video_ext': 'mp4', 'audio_ext': 'none', 'vbr': 1924.009, 'abr': 0, }], { 'en': [{ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/eng/prog_index.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native' }, { 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/eng_forced/prog_index.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native' }], 'fr': [{ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/fra/prog_index.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native' }, { 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/fra_forced/prog_index.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native' }], 'es': [{ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/spa/prog_index.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native' }, { 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/spa_forced/prog_index.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native' }], 'ja': [{ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/jpn/prog_index.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native' }, { 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/jpn_forced/prog_index.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native' }], } ), ] for m3u8_file, m3u8_url, expected_formats, expected_subs in _TEST_CASES: with open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, encoding='utf-8') as f: formats, subs = self.ie._parse_m3u8_formats_and_subtitles( f.read(), m3u8_url, ext='mp4') self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) expect_value(self, subs, expected_subs, None) def test_parse_mpd_formats(self): _TEST_CASES = [ ( # https://github.com/ytdl-org/youtube-dl/issues/13919 # Also tests duplicate representation ids, see # https://github.com/ytdl-org/youtube-dl/issues/15111 'float_duration', 'http://unknown/manifest.mpd', # mpd_url None, # mpd_base_url [{ 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'm4a', 'format_id': '318597', 'format_note': 'DASH audio', 'protocol': 'http_dash_segments', 'acodec': 'mp4a.40.2', 'vcodec': 'none', 'tbr': 61.587, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': '318597', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'none', 'vcodec': 'avc1.42001f', 'tbr': 318.597, 'width': 340, 'height': 192, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': '638590', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'none', 'vcodec': 'avc1.42001f', 'tbr': 638.59, 'width': 512, 'height': 288, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': '1022565', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'none', 'vcodec': 'avc1.4d001f', 'tbr': 1022.565, 'width': 688, 'height': 384, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': '2046506', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'none', 'vcodec': 'avc1.4d001f', 'tbr': 2046.506, 'width': 1024, 'height': 576, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': '3998017', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'none', 'vcodec': 'avc1.640029', 'tbr': 3998.017, 'width': 1280, 'height': 720, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': '5997485', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'none', 'vcodec': 'avc1.640032', 'tbr': 5997.485, 'width': 1920, 'height': 1080, }], {}, ), ( # https://github.com/ytdl-org/youtube-dl/pull/14844 'urls_only', 'http://unknown/manifest.mpd', # mpd_url None, # mpd_base_url [{ 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': 'h264_aac_144p_m4s', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'mp4a.40.2', 'vcodec': 'avc3.42c01e', 'tbr': 200, 'width': 256, 'height': 144, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': 'h264_aac_240p_m4s', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'mp4a.40.2', 'vcodec': 'avc3.42c01e', 'tbr': 400, 'width': 424, 'height': 240, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': 'h264_aac_360p_m4s', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'mp4a.40.2', 'vcodec': 'avc3.42c01e', 'tbr': 800, 'width': 640, 'height': 360, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': 'h264_aac_480p_m4s', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'mp4a.40.2', 'vcodec': 'avc3.42c01e', 'tbr': 1200, 'width': 856, 'height': 480, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': 'h264_aac_576p_m4s', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'mp4a.40.2', 'vcodec': 'avc3.42c01e', 'tbr': 1600, 'width': 1024, 'height': 576, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': 'h264_aac_720p_m4s', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'mp4a.40.2', 'vcodec': 'avc3.42c01e', 'tbr': 2400, 'width': 1280, 'height': 720, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': 'h264_aac_1080p_m4s', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'mp4a.40.2', 'vcodec': 'avc3.42c01e', 'tbr': 4400, 'width': 1920, 'height': 1080, }], {}, ), ( # https://github.com/ytdl-org/youtube-dl/issues/20346 # Media considered unfragmented even though it contains # Initialization tag 'unfragmented', 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', # mpd_url 'https://v.redd.it/hw1x7rcg7zl21', # mpd_base_url [{ 'url': 'https://v.redd.it/hw1x7rcg7zl21/audio', 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', 'ext': 'm4a', 'format_id': 'AUDIO-1', 'format_note': 'DASH audio', 'container': 'm4a_dash', 'acodec': 'mp4a.40.2', 'vcodec': 'none', 'tbr': 129.87, 'asr': 48000, }, { 'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_240', 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', 'ext': 'mp4', 'format_id': 'VIDEO-2', 'format_note': 'DASH video', 'container': 'mp4_dash', 'acodec': 'none', 'vcodec': 'avc1.4d401e', 'tbr': 608.0, 'width': 240, 'height': 240, 'fps': 30, }, { 'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_360', 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', 'ext': 'mp4', 'format_id': 'VIDEO-1', 'format_note': 'DASH video', 'container': 'mp4_dash', 'acodec': 'none', 'vcodec': 'avc1.4d401e', 'tbr': 804.261, 'width': 360, 'height': 360, 'fps': 30, }], {}, ), ( 'subtitles', 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/', [{ 'format_id': 'audio=128001', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'ext': 'm4a', 'tbr': 128.001, 'asr': 48000, 'format_note': 'DASH audio', 'container': 'm4a_dash', 'vcodec': 'none', 'acodec': 'mp4a.40.2', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', 'protocol': 'http_dash_segments', 'audio_ext': 'm4a', 'video_ext': 'none', 'abr': 128.001, }, { 'format_id': 'video=100000', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'ext': 'mp4', 'width': 336, 'height': 144, 'tbr': 100, 'format_note': 'DASH video', 'container': 'mp4_dash', 'vcodec': 'avc1.4D401F', 'acodec': 'none', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', 'protocol': 'http_dash_segments', 'video_ext': 'mp4', 'audio_ext': 'none', 'vbr': 100, }, { 'format_id': 'video=326000', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'ext': 'mp4', 'width': 562, 'height': 240, 'tbr': 326, 'format_note': 'DASH video', 'container': 'mp4_dash', 'vcodec': 'avc1.4D401F', 'acodec': 'none', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', 'protocol': 'http_dash_segments', 'video_ext': 'mp4', 'audio_ext': 'none', 'vbr': 326, }, { 'format_id': 'video=698000', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'ext': 'mp4', 'width': 844, 'height': 360, 'tbr': 698, 'format_note': 'DASH video', 'container': 'mp4_dash', 'vcodec': 'avc1.4D401F', 'acodec': 'none', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', 'protocol': 'http_dash_segments', 'video_ext': 'mp4', 'audio_ext': 'none', 'vbr': 698, }, { 'format_id': 'video=1493000', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'ext': 'mp4', 'width': 1126, 'height': 480, 'tbr': 1493, 'format_note': 'DASH video', 'container': 'mp4_dash', 'vcodec': 'avc1.4D401F', 'acodec': 'none', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', 'protocol': 'http_dash_segments', 'video_ext': 'mp4', 'audio_ext': 'none', 'vbr': 1493, }, { 'format_id': 'video=4482000', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'ext': 'mp4', 'width': 1688, 'height': 720, 'tbr': 4482, 'format_note': 'DASH video', 'container': 'mp4_dash', 'vcodec': 'avc1.4D401F', 'acodec': 'none', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', 'protocol': 'http_dash_segments', 'video_ext': 'mp4', 'audio_ext': 'none', 'vbr': 4482, }], { 'en': [ { 'ext': 'mp4', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', 'protocol': 'http_dash_segments', } ] }, ) ] for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES: with open('./test/testdata/mpd/%s.mpd' % mpd_file, encoding='utf-8') as f: formats, subtitles = self.ie._parse_mpd_formats_and_subtitles( compat_etree_fromstring(f.read().encode()), mpd_base_url=mpd_base_url, mpd_url=mpd_url) self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) expect_value(self, subtitles, expected_subtitles, None) def test_parse_ism_formats(self): _TEST_CASES = [ ( 'sintel', 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', [{ 'format_id': 'audio-128', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'ext': 'isma', 'tbr': 128, 'asr': 48000, 'vcodec': 'none', 'acodec': 'AACL', 'protocol': 'ism', '_download_params': { 'stream_type': 'audio', 'duration': 8880746666, 'timescale': 10000000, 'width': 0, 'height': 0, 'fourcc': 'AACL', 'codec_private_data': '1190', 'sampling_rate': 48000, 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4 }, 'audio_ext': 'isma', 'video_ext': 'none', 'abr': 128, }, { 'format_id': 'video-100', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'ext': 'ismv', 'width': 336, 'height': 144, 'tbr': 100, 'vcodec': 'AVC1', 'acodec': 'none', 'protocol': 'ism', '_download_params': { 'stream_type': 'video', 'duration': 8880746666, 'timescale': 10000000, 'width': 336, 'height': 144, 'fourcc': 'AVC1', 'codec_private_data': '00000001674D401FDA0544EFFC2D002CBC40000003004000000C03C60CA80000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4 }, 'video_ext': 'ismv', 'audio_ext': 'none', 'vbr': 100, }, { 'format_id': 'video-326', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'ext': 'ismv', 'width': 562, 'height': 240, 'tbr': 326, 'vcodec': 'AVC1', 'acodec': 'none', 'protocol': 'ism', '_download_params': { 'stream_type': 'video', 'duration': 8880746666, 'timescale': 10000000, 'width': 562, 'height': 240, 'fourcc': 'AVC1', 'codec_private_data': '00000001674D401FDA0241FE23FFC3BC83BA44000003000400000300C03C60CA800000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4 }, 'video_ext': 'ismv', 'audio_ext': 'none', 'vbr': 326, }, { 'format_id': 'video-698', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'ext': 'ismv', 'width': 844, 'height': 360, 'tbr': 698, 'vcodec': 'AVC1', 'acodec': 'none', 'protocol': 'ism', '_download_params': { 'stream_type': 'video', 'duration': 8880746666, 'timescale': 10000000, 'width': 844, 'height': 360, 'fourcc': 'AVC1', 'codec_private_data': '00000001674D401FDA0350BFB97FF06AF06AD1000003000100000300300F1832A00000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4 }, 'video_ext': 'ismv', 'audio_ext': 'none', 'vbr': 698, }, { 'format_id': 'video-1493', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'ext': 'ismv', 'width': 1126, 'height': 480, 'tbr': 1493, 'vcodec': 'AVC1', 'acodec': 'none', 'protocol': 'ism', '_download_params': { 'stream_type': 'video', 'duration': 8880746666, 'timescale': 10000000, 'width': 1126, 'height': 480, 'fourcc': 'AVC1', 'codec_private_data': '00000001674D401FDA011C3DE6FFF0D890D871000003000100000300300F1832A00000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4 }, 'video_ext': 'ismv', 'audio_ext': 'none', 'vbr': 1493, }, { 'format_id': 'video-4482', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'ext': 'ismv', 'width': 1688, 'height': 720, 'tbr': 4482, 'vcodec': 'AVC1', 'acodec': 'none', 'protocol': 'ism', '_download_params': { 'stream_type': 'video', 'duration': 8880746666, 'timescale': 10000000, 'width': 1688, 'height': 720, 'fourcc': 'AVC1', 'codec_private_data': '00000001674D401FDA01A816F97FFC1ABC1AB440000003004000000C03C60CA80000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4 }, 'video_ext': 'ismv', 'audio_ext': 'none', 'vbr': 4482, }], { 'eng': [ { 'ext': 'ismt', 'protocol': 'ism', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', '_download_params': { 'stream_type': 'text', 'duration': 8880746666, 'timescale': 10000000, 'fourcc': 'TTML', 'codec_private_data': '' } } ] }, ), ] for ism_file, ism_url, expected_formats, expected_subtitles in _TEST_CASES: with open('./test/testdata/ism/%s.Manifest' % ism_file, encoding='utf-8') as f: formats, subtitles = self.ie._parse_ism_formats_and_subtitles( compat_etree_fromstring(f.read().encode()), ism_url=ism_url) self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) expect_value(self, subtitles, expected_subtitles, None) def test_parse_f4m_formats(self): _TEST_CASES = [ ( # https://github.com/ytdl-org/youtube-dl/issues/14660 'custom_base_url', 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m', [{ 'manifest_url': 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m', 'ext': 'flv', 'format_id': '2148', 'protocol': 'f4m', 'tbr': 2148, 'width': 1280, 'height': 720, }] ), ] for f4m_file, f4m_url, expected_formats in _TEST_CASES: with open('./test/testdata/f4m/%s.f4m' % f4m_file, encoding='utf-8') as f: formats = self.ie._parse_f4m_formats( compat_etree_fromstring(f.read().encode()), f4m_url, None) self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) def test_parse_xspf(self): _TEST_CASES = [ ( 'foo_xspf', 'https://example.org/src/foo_xspf.xspf', [{ 'id': 'foo_xspf', 'title': 'Pandemonium', 'description': 'Visit http://bigbrother404.bandcamp.com', 'duration': 202.416, 'formats': [{ 'manifest_url': 'https://example.org/src/foo_xspf.xspf', 'url': 'https://example.org/src/cd1/track%201.mp3', }], }, { 'id': 'foo_xspf', 'title': 'Final Cartridge (Nichico Twelve Remix)', 'description': 'Visit http://bigbrother404.bandcamp.com', 'duration': 255.857, 'formats': [{ 'manifest_url': 'https://example.org/src/foo_xspf.xspf', 'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3', }], }, { 'id': 'foo_xspf', 'title': 'Rebuilding Nightingale', 'description': 'Visit http://bigbrother404.bandcamp.com', 'duration': 287.915, 'formats': [{ 'manifest_url': 'https://example.org/src/foo_xspf.xspf', 'url': 'https://example.org/src/track3.mp3', }, { 'manifest_url': 'https://example.org/src/foo_xspf.xspf', 'url': 'https://example.com/track3.mp3', }] }] ), ] for xspf_file, xspf_url, expected_entries in _TEST_CASES: with open('./test/testdata/xspf/%s.xspf' % xspf_file, encoding='utf-8') as f: entries = self.ie._parse_xspf( compat_etree_fromstring(f.read().encode()), xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url) expect_value(self, entries, expected_entries, None) for i in range(len(entries)): expect_dict(self, entries[i], expected_entries[i]) def test_response_with_expected_status_returns_content(self): # Checks for mitigations against the effects of # <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which # manifest as `_download_webpage`, `_download_xml`, `_download_json`, # or the underlying `_download_webpage_handle` returning no content # when a response matches `expected_status`. httpd = http.server.HTTPServer( ('127.0.0.1', 0), InfoExtractorTestRequestHandler) port = http_server_port(httpd) server_thread = threading.Thread(target=httpd.serve_forever) server_thread.daemon = True server_thread.start() (content, urlh) = self.ie._download_webpage_handle( 'http://127.0.0.1:%d/teapot' % port, None, expected_status=TEAPOT_RESPONSE_STATUS) self.assertEqual(content, TEAPOT_RESPONSE_BODY) if __name__ == '__main__': unittest.main() �������������������������������yt-dlp-2022.08.19/test/test_YoutubeDL.py������������������������������������������������������������0000664�0000000�0000000�00000141412�14277552437�0017617�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import copy import json import urllib.error from test.helper import FakeYDL, assertRegexpMatches from yt_dlp import YoutubeDL from yt_dlp.compat import compat_os_name from yt_dlp.extractor import YoutubeIE from yt_dlp.extractor.common import InfoExtractor from yt_dlp.postprocessor.common import PostProcessor from yt_dlp.utils import ( ExtractorError, LazyList, OnDemandPagedList, int_or_none, match_filter_func, ) TEST_URL = 'http://localhost/sample.mp4' class YDL(FakeYDL): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.downloaded_info_dicts = [] self.msgs = [] def process_info(self, info_dict): self.downloaded_info_dicts.append(info_dict.copy()) def to_screen(self, msg, *args, **kwargs): self.msgs.append(msg) def dl(self, *args, **kwargs): assert False, 'Downloader must not be invoked for test_YoutubeDL' def _make_result(formats, **kwargs): res = { 'formats': formats, 'id': 'testid', 'title': 'testttitle', 'extractor': 'testex', 'extractor_key': 'TestEx', 'webpage_url': 'http://example.com/watch?v=shenanigans', } res.update(**kwargs) return res class TestFormatSelection(unittest.TestCase): def test_prefer_free_formats(self): # Same resolution => download webm ydl = YDL() ydl.params['prefer_free_formats'] = True formats = [ {'ext': 'webm', 'height': 460, 'url': TEST_URL}, {'ext': 'mp4', 'height': 460, 'url': TEST_URL}, ] info_dict = _make_result(formats) yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['ext'], 'webm') # Different resolution => download best quality (mp4) ydl = YDL() ydl.params['prefer_free_formats'] = True formats = [ {'ext': 'webm', 'height': 720, 'url': TEST_URL}, {'ext': 'mp4', 'height': 1080, 'url': TEST_URL}, ] info_dict['formats'] = formats yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['ext'], 'mp4') # No prefer_free_formats => prefer mp4 and webm ydl = YDL() ydl.params['prefer_free_formats'] = False formats = [ {'ext': 'webm', 'height': 720, 'url': TEST_URL}, {'ext': 'mp4', 'height': 720, 'url': TEST_URL}, {'ext': 'flv', 'height': 720, 'url': TEST_URL}, ] info_dict['formats'] = formats yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['ext'], 'mp4') ydl = YDL() ydl.params['prefer_free_formats'] = False formats = [ {'ext': 'flv', 'height': 720, 'url': TEST_URL}, {'ext': 'webm', 'height': 720, 'url': TEST_URL}, ] info_dict['formats'] = formats yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['ext'], 'webm') def test_format_selection(self): formats = [ {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL}, {'format_id': 'example-with-dashes', 'ext': 'webm', 'preference': 1, 'url': TEST_URL}, {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL}, {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL}, {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL}, ] info_dict = _make_result(formats) def test(inp, *expected, multi=False): ydl = YDL({ 'format': inp, 'allow_multiple_video_streams': multi, 'allow_multiple_audio_streams': multi, }) ydl.process_ie_result(info_dict.copy()) downloaded = map(lambda x: x['format_id'], ydl.downloaded_info_dicts) self.assertEqual(list(downloaded), list(expected)) test('20/47', '47') test('20/71/worst', '35') test(None, '2') test('webm/mp4', '47') test('3gp/40/mp4', '35') test('example-with-dashes', 'example-with-dashes') test('all', '2', '47', '45', 'example-with-dashes', '35') test('mergeall', '2+47+45+example-with-dashes+35', multi=True) def test_format_selection_audio(self): formats = [ {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL}, {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL}, {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': TEST_URL}, {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': TEST_URL}, ] info_dict = _make_result(formats) ydl = YDL({'format': 'bestaudio'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'audio-high') ydl = YDL({'format': 'worstaudio'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'audio-low') formats = [ {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL}, {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': TEST_URL}, ] info_dict = _make_result(formats) ydl = YDL({'format': 'bestaudio/worstaudio/best'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'vid-high') def test_format_selection_audio_exts(self): formats = [ {'format_id': 'mp3-64', 'ext': 'mp3', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, {'format_id': 'ogg-64', 'ext': 'ogg', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, {'format_id': 'aac-64', 'ext': 'aac', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, {'format_id': 'mp3-32', 'ext': 'mp3', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'}, {'format_id': 'aac-32', 'ext': 'aac', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'}, ] info_dict = _make_result(formats) ydl = YDL({'format': 'best'}) ie = YoutubeIE(ydl) ie._sort_formats(info_dict['formats']) ydl.process_ie_result(copy.deepcopy(info_dict)) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'aac-64') ydl = YDL({'format': 'mp3'}) ie = YoutubeIE(ydl) ie._sort_formats(info_dict['formats']) ydl.process_ie_result(copy.deepcopy(info_dict)) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'mp3-64') ydl = YDL({'prefer_free_formats': True}) ie = YoutubeIE(ydl) ie._sort_formats(info_dict['formats']) ydl.process_ie_result(copy.deepcopy(info_dict)) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'ogg-64') def test_format_selection_video(self): formats = [ {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': TEST_URL}, {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': TEST_URL}, {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': TEST_URL}, ] info_dict = _make_result(formats) ydl = YDL({'format': 'bestvideo'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'dash-video-high') ydl = YDL({'format': 'worstvideo'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'dash-video-low') ydl = YDL({'format': 'bestvideo[format_id^=dash][format_id$=low]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'dash-video-low') formats = [ {'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL}, ] info_dict = _make_result(formats) ydl = YDL({'format': 'bestvideo[vcodec=avc1.123456]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot') def test_format_selection_string_ops(self): formats = [ {'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL}, {'format_id': 'zxc-cxz', 'ext': 'webm', 'url': TEST_URL}, ] info_dict = _make_result(formats) # equals (=) ydl = YDL({'format': '[format_id=abc-cba]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'abc-cba') # does not equal (!=) ydl = YDL({'format': '[format_id!=abc-cba]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'zxc-cxz') ydl = YDL({'format': '[format_id!=abc-cba][format_id!=zxc-cxz]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) # starts with (^=) ydl = YDL({'format': '[format_id^=abc]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'abc-cba') # does not start with (!^=) ydl = YDL({'format': '[format_id!^=abc]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'zxc-cxz') ydl = YDL({'format': '[format_id!^=abc][format_id!^=zxc]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) # ends with ($=) ydl = YDL({'format': '[format_id$=cba]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'abc-cba') # does not end with (!$=) ydl = YDL({'format': '[format_id!$=cba]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'zxc-cxz') ydl = YDL({'format': '[format_id!$=cba][format_id!$=cxz]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) # contains (*=) ydl = YDL({'format': '[format_id*=bc-cb]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'abc-cba') # does not contain (!*=) ydl = YDL({'format': '[format_id!*=bc-cb]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'zxc-cxz') ydl = YDL({'format': '[format_id!*=abc][format_id!*=zxc]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) ydl = YDL({'format': '[format_id!*=-]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) def test_youtube_format_selection(self): # FIXME: Rewrite in accordance with the new format sorting options return order = [ '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13', # Apple HTTP Live Streaming '96', '95', '94', '93', '92', '132', '151', # 3D '85', '84', '102', '83', '101', '82', '100', # Dash video '137', '248', '136', '247', '135', '246', '245', '244', '134', '243', '133', '242', '160', # Dash audio '141', '172', '140', '171', '139', ] def format_info(f_id): info = YoutubeIE._formats[f_id].copy() # XXX: In real cases InfoExtractor._parse_mpd_formats() fills up 'acodec' # and 'vcodec', while in tests such information is incomplete since # commit a6c2c24479e5f4827ceb06f64d855329c0a6f593 # test_YoutubeDL.test_youtube_format_selection is broken without # this fix if 'acodec' in info and 'vcodec' not in info: info['vcodec'] = 'none' elif 'vcodec' in info and 'acodec' not in info: info['acodec'] = 'none' info['format_id'] = f_id info['url'] = 'url:' + f_id return info formats_order = [format_info(f_id) for f_id in order] info_dict = _make_result(list(formats_order), extractor='youtube') ydl = YDL({'format': 'bestvideo+bestaudio'}) yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], '248+172') self.assertEqual(downloaded['ext'], 'mp4') info_dict = _make_result(list(formats_order), extractor='youtube') ydl = YDL({'format': 'bestvideo[height>=999999]+bestaudio/best'}) yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], '38') info_dict = _make_result(list(formats_order), extractor='youtube') ydl = YDL({'format': 'bestvideo/best,bestaudio'}) yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] self.assertEqual(downloaded_ids, ['137', '141']) info_dict = _make_result(list(formats_order), extractor='youtube') ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])+bestaudio'}) yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] self.assertEqual(downloaded_ids, ['137+141', '248+141']) info_dict = _make_result(list(formats_order), extractor='youtube') ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])[height<=720]+bestaudio'}) yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] self.assertEqual(downloaded_ids, ['136+141', '247+141']) info_dict = _make_result(list(formats_order), extractor='youtube') ydl = YDL({'format': '(bestvideo[ext=none]/bestvideo[ext=webm])+bestaudio'}) yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] self.assertEqual(downloaded_ids, ['248+141']) for f1, f2 in zip(formats_order, formats_order[1:]): info_dict = _make_result([f1, f2], extractor='youtube') ydl = YDL({'format': 'best/bestvideo'}) yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], f1['format_id']) info_dict = _make_result([f2, f1], extractor='youtube') ydl = YDL({'format': 'best/bestvideo'}) yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], f1['format_id']) def test_audio_only_extractor_format_selection(self): # For extractors with incomplete formats (all formats are audio-only or # video-only) best and worst should fallback to corresponding best/worst # video-only or audio-only formats (as per # https://github.com/ytdl-org/youtube-dl/pull/5556) formats = [ {'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL}, {'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL}, ] info_dict = _make_result(formats) ydl = YDL({'format': 'best'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'high') ydl = YDL({'format': 'worst'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'low') def test_format_not_available(self): formats = [ {'format_id': 'regular', 'ext': 'mp4', 'height': 360, 'url': TEST_URL}, {'format_id': 'video', 'ext': 'mp4', 'height': 720, 'acodec': 'none', 'url': TEST_URL}, ] info_dict = _make_result(formats) # This must fail since complete video-audio format does not match filter # and extractor does not provide incomplete only formats (i.e. only # video-only or audio-only). ydl = YDL({'format': 'best[height>360]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) def test_format_selection_issue_10083(self): # See https://github.com/ytdl-org/youtube-dl/issues/10083 formats = [ {'format_id': 'regular', 'height': 360, 'url': TEST_URL}, {'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL}, {'format_id': 'audio', 'vcodec': 'none', 'url': TEST_URL}, ] info_dict = _make_result(formats) ydl = YDL({'format': 'best[height>360]/bestvideo[height>360]+bestaudio'}) ydl.process_ie_result(info_dict.copy()) self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'video+audio') def test_invalid_format_specs(self): def assert_syntax_error(format_spec): self.assertRaises(SyntaxError, YDL, {'format': format_spec}) assert_syntax_error('bestvideo,,best') assert_syntax_error('+bestaudio') assert_syntax_error('bestvideo+') assert_syntax_error('/') assert_syntax_error('[720<height]') def test_format_filtering(self): formats = [ {'format_id': 'A', 'filesize': 500, 'width': 1000}, {'format_id': 'B', 'filesize': 1000, 'width': 500}, {'format_id': 'C', 'filesize': 1000, 'width': 400}, {'format_id': 'D', 'filesize': 2000, 'width': 600}, {'format_id': 'E', 'filesize': 3000}, {'format_id': 'F'}, {'format_id': 'G', 'filesize': 1000000}, ] for f in formats: f['url'] = 'http://_/' f['ext'] = 'unknown' info_dict = _make_result(formats) ydl = YDL({'format': 'best[filesize<3000]'}) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'D') ydl = YDL({'format': 'best[filesize<=3000]'}) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'E') ydl = YDL({'format': 'best[filesize <= ? 3000]'}) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'F') ydl = YDL({'format': 'best [filesize = 1000] [width>450]'}) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'B') ydl = YDL({'format': 'best [filesize = 1000] [width!=450]'}) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'C') ydl = YDL({'format': '[filesize>?1]'}) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'G') ydl = YDL({'format': '[filesize<1M]'}) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'E') ydl = YDL({'format': '[filesize<1MiB]'}) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'G') ydl = YDL({'format': 'all[width>=400][width<=600]'}) ydl.process_ie_result(info_dict) downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] self.assertEqual(downloaded_ids, ['D', 'C', 'B']) ydl = YDL({'format': 'best[height<40]'}) try: ydl.process_ie_result(info_dict) except ExtractorError: pass self.assertEqual(ydl.downloaded_info_dicts, []) def test_default_format_spec(self): ydl = YDL({'simulate': True}) self.assertEqual(ydl._default_format_spec({}), 'bestvideo*+bestaudio/best') ydl = YDL({}) self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') ydl = YDL({'simulate': True}) self.assertEqual(ydl._default_format_spec({'is_live': True}), 'bestvideo*+bestaudio/best') ydl = YDL({'outtmpl': '-'}) self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') ydl = YDL({}) self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo*+bestaudio/best') self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') class TestYoutubeDL(unittest.TestCase): def test_subtitles(self): def s_formats(lang, autocaption=False): return [{ 'ext': ext, 'url': f'http://localhost/video.{lang}.{ext}', '_auto': autocaption, } for ext in ['vtt', 'srt', 'ass']] subtitles = {l: s_formats(l) for l in ['en', 'fr', 'es']} auto_captions = {l: s_formats(l, True) for l in ['it', 'pt', 'es']} info_dict = { 'id': 'test', 'title': 'Test', 'url': 'http://localhost/video.mp4', 'subtitles': subtitles, 'automatic_captions': auto_captions, 'extractor': 'TEST', 'webpage_url': 'http://example.com/watch?v=shenanigans', } def get_info(params={}): params.setdefault('simulate', True) ydl = YDL(params) ydl.report_warning = lambda *args, **kargs: None return ydl.process_video_result(info_dict, download=False) result = get_info() self.assertFalse(result.get('requested_subtitles')) self.assertEqual(result['subtitles'], subtitles) self.assertEqual(result['automatic_captions'], auto_captions) result = get_info({'writesubtitles': True}) subs = result['requested_subtitles'] self.assertTrue(subs) self.assertEqual(set(subs.keys()), {'en'}) self.assertTrue(subs['en'].get('data') is None) self.assertEqual(subs['en']['ext'], 'ass') result = get_info({'writesubtitles': True, 'subtitlesformat': 'foo/srt'}) subs = result['requested_subtitles'] self.assertEqual(subs['en']['ext'], 'srt') result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']}) subs = result['requested_subtitles'] self.assertTrue(subs) self.assertEqual(set(subs.keys()), {'es', 'fr'}) result = get_info({'writesubtitles': True, 'subtitleslangs': ['all', '-en']}) subs = result['requested_subtitles'] self.assertTrue(subs) self.assertEqual(set(subs.keys()), {'es', 'fr'}) result = get_info({'writesubtitles': True, 'subtitleslangs': ['en', 'fr', '-en']}) subs = result['requested_subtitles'] self.assertTrue(subs) self.assertEqual(set(subs.keys()), {'fr'}) result = get_info({'writesubtitles': True, 'subtitleslangs': ['-en', 'en']}) subs = result['requested_subtitles'] self.assertTrue(subs) self.assertEqual(set(subs.keys()), {'en'}) result = get_info({'writesubtitles': True, 'subtitleslangs': ['e.+']}) subs = result['requested_subtitles'] self.assertTrue(subs) self.assertEqual(set(subs.keys()), {'es', 'en'}) result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']}) subs = result['requested_subtitles'] self.assertTrue(subs) self.assertEqual(set(subs.keys()), {'es', 'pt'}) self.assertFalse(subs['es']['_auto']) self.assertTrue(subs['pt']['_auto']) result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']}) subs = result['requested_subtitles'] self.assertTrue(subs) self.assertEqual(set(subs.keys()), {'es', 'pt'}) self.assertTrue(subs['es']['_auto']) self.assertTrue(subs['pt']['_auto']) def test_add_extra_info(self): test_dict = { 'extractor': 'Foo', } extra_info = { 'extractor': 'Bar', 'playlist': 'funny videos', } YDL.add_extra_info(test_dict, extra_info) self.assertEqual(test_dict['extractor'], 'Foo') self.assertEqual(test_dict['playlist'], 'funny videos') outtmpl_info = { 'id': '1234', 'ext': 'mp4', 'width': None, 'height': 1080, 'filesize': 1024, 'title1': '$PATH', 'title2': '%PATH%', 'title3': 'foo/bar\\test', 'title4': 'foo "bar" test', 'title5': 'áéí 𝐀', 'timestamp': 1618488000, 'duration': 100000, 'playlist_index': 1, 'playlist_autonumber': 2, '__last_playlist_index': 100, 'n_entries': 10, 'formats': [{'id': 'id 1'}, {'id': 'id 2'}, {'id': 'id 3'}] } def test_prepare_outtmpl_and_filename(self): def test(tmpl, expected, *, info=None, **params): params['outtmpl'] = tmpl ydl = YoutubeDL(params) ydl._num_downloads = 1 self.assertEqual(ydl.validate_outtmpl(tmpl), None) out = ydl.evaluate_outtmpl(tmpl, info or self.outtmpl_info) fname = ydl.prepare_filename(info or self.outtmpl_info) if not isinstance(expected, (list, tuple)): expected = (expected, expected) for (name, got), expect in zip((('outtmpl', out), ('filename', fname)), expected): if callable(expect): self.assertTrue(expect(got), f'Wrong {name} from {tmpl}') else: self.assertEqual(got, expect, f'Wrong {name} from {tmpl}') # Side-effects original_infodict = dict(self.outtmpl_info) test('foo.bar', 'foo.bar') original_infodict['epoch'] = self.outtmpl_info.get('epoch') self.assertTrue(isinstance(original_infodict['epoch'], int)) test('%(epoch)d', int_or_none) self.assertEqual(original_infodict, self.outtmpl_info) # Auto-generated fields test('%(id)s.%(ext)s', '1234.mp4') test('%(duration_string)s', ('27:46:40', '27-46-40')) test('%(resolution)s', '1080p') test('%(playlist_index)s', '001') test('%(playlist_autonumber)s', '02') test('%(autonumber)s', '00001') test('%(autonumber+2)03d', '005', autonumber_start=3) test('%(autonumber)s', '001', autonumber_size=3) # Escaping % test('%', '%') test('%%', '%') test('%%%%', '%%') test('%s', '%s') test('%%%s', '%%s') test('%d', '%d') test('%abc%', '%abc%') test('%%(width)06d.%(ext)s', '%(width)06d.mp4') test('%%%(height)s', '%1080') test('%(width)06d.%(ext)s', 'NA.mp4') test('%(width)06d.%%(ext)s', 'NA.%(ext)s') test('%%(width)06d.%(ext)s', '%(width)06d.mp4') # ID sanitization test('%(id)s', '_abcd', info={'id': '_abcd'}) test('%(some_id)s', '_abcd', info={'some_id': '_abcd'}) test('%(formats.0.id)s', '_abcd', info={'formats': [{'id': '_abcd'}]}) test('%(id)s', '-abcd', info={'id': '-abcd'}) test('%(id)s', '.abcd', info={'id': '.abcd'}) test('%(id)s', 'ab__cd', info={'id': 'ab__cd'}) test('%(id)s', ('ab:cd', 'ab:cd'), info={'id': 'ab:cd'}) test('%(id.0)s', '-', info={'id': '--'}) # Invalid templates self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%(title)'), ValueError)) test('%(invalid@tmpl|def)s', 'none', outtmpl_na_placeholder='none') test('%(..)s', 'NA') # Entire info_dict def expect_same_infodict(out): got_dict = json.loads(out) for info_field, expected in self.outtmpl_info.items(): self.assertEqual(got_dict.get(info_field), expected, info_field) return True test('%()j', (expect_same_infodict, str)) # NA placeholder NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s' test(NA_TEST_OUTTMPL, 'NA-NA-def-1234.mp4') test(NA_TEST_OUTTMPL, 'none-none-def-1234.mp4', outtmpl_na_placeholder='none') test(NA_TEST_OUTTMPL, '--def-1234.mp4', outtmpl_na_placeholder='') test('%(non_existent.0)s', 'NA') # String formatting FMT_TEST_OUTTMPL = '%%(height)%s.%%(ext)s' test(FMT_TEST_OUTTMPL % 's', '1080.mp4') test(FMT_TEST_OUTTMPL % 'd', '1080.mp4') test(FMT_TEST_OUTTMPL % '6d', ' 1080.mp4') test(FMT_TEST_OUTTMPL % '-6d', '1080 .mp4') test(FMT_TEST_OUTTMPL % '06d', '001080.mp4') test(FMT_TEST_OUTTMPL % ' 06d', ' 01080.mp4') test(FMT_TEST_OUTTMPL % ' 06d', ' 01080.mp4') test(FMT_TEST_OUTTMPL % '0 6d', ' 01080.mp4') test(FMT_TEST_OUTTMPL % '0 6d', ' 01080.mp4') test(FMT_TEST_OUTTMPL % ' 0 6d', ' 01080.mp4') # Type casting test('%(id)d', '1234') test('%(height)c', '1') test('%(ext)c', 'm') test('%(id)d %(id)r', "1234 '1234'") test('%(id)r %(height)r', "'1234' 1080") test('%(ext)s-%(ext|def)d', 'mp4-def') test('%(width|0)04d', '0000') test('a%(width|)d', 'a', outtmpl_na_placeholder='none') FORMATS = self.outtmpl_info['formats'] sanitize = lambda x: x.replace(':', ':').replace('"', """).replace('\n', ' ') # Custom type casting test('%(formats.:.id)l', 'id 1, id 2, id 3') test('%(formats.:.id)#l', ('id 1\nid 2\nid 3', 'id 1 id 2 id 3')) test('%(ext)l', 'mp4') test('%(formats.:.id) 18l', ' id 1, id 2, id 3') test('%(formats)j', (json.dumps(FORMATS), sanitize(json.dumps(FORMATS)))) test('%(formats)#j', (json.dumps(FORMATS, indent=4), sanitize(json.dumps(FORMATS, indent=4)))) test('%(title5).3B', 'á') test('%(title5)U', 'áéí 𝐀') test('%(title5)#U', 'a\u0301e\u0301i\u0301 𝐀') test('%(title5)+U', 'áéí A') test('%(title5)+#U', 'a\u0301e\u0301i\u0301 A') test('%(height)D', '1k') test('%(filesize)#D', '1Ki') test('%(height)5.2D', ' 1.08k') test('%(title4)#S', 'foo_bar_test') test('%(title4).10S', ('foo "bar" ', 'foo "bar"' + ('#' if compat_os_name == 'nt' else ' '))) if compat_os_name == 'nt': test('%(title4)q', ('"foo \\"bar\\" test"', ""foo ⧹"bar⧹" test"")) test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', '"id 1" "id 2" "id 3"')) test('%(formats.0.id)#q', ('"id 1"', '"id 1"')) else: test('%(title4)q', ('\'foo "bar" test\'', '\'foo "bar" test\'')) test('%(formats.:.id)#q', "'id 1' 'id 2' 'id 3'") test('%(formats.0.id)#q', "'id 1'") # Internal formatting test('%(timestamp-1000>%H-%M-%S)s', '11-43-20') test('%(title|%)s %(title|%%)s', '% %%') test('%(id+1-height+3)05d', '00158') test('%(width+100)05d', 'NA') test('%(formats.0) 15s', ('% 15s' % FORMATS[0], '% 15s' % sanitize(str(FORMATS[0])))) test('%(formats.0)r', (repr(FORMATS[0]), sanitize(repr(FORMATS[0])))) test('%(height.0)03d', '001') test('%(-height.0)04d', '-001') test('%(formats.-1.id)s', FORMATS[-1]['id']) test('%(formats.0.id.-1)d', FORMATS[0]['id'][-1]) test('%(formats.3)s', 'NA') test('%(formats.:2:-1)r', repr(FORMATS[:2:-1])) test('%(formats.0.id.-1+id)f', '1235.000000') test('%(formats.0.id.-1+formats.1.id.-1)d', '3') # Alternates test('%(title,id)s', '1234') test('%(width-100,height+20|def)d', '1100') test('%(width-100,height+width|def)s', 'def') test('%(timestamp-x>%H\\,%M\\,%S,timestamp>%H\\,%M\\,%S)s', '12,00,00') # Replacement test('%(id&foo)s.bar', 'foo.bar') test('%(title&foo)s.bar', 'NA.bar') test('%(title&foo|baz)s.bar', 'baz.bar') test('%(x,id&foo|baz)s.bar', 'foo.bar') test('%(x,title&foo|baz)s.bar', 'baz.bar') # Laziness def gen(): yield from range(5) raise self.assertTrue(False, 'LazyList should not be evaluated till here') test('%(key.4)s', '4', info={'key': LazyList(gen())}) # Empty filename test('%(foo|)s-%(bar|)s.%(ext)s', '-.mp4') # test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4')) # fixme # test('%(foo|)s', ('', '_')) # fixme # Environment variable expansion for prepare_filename os.environ['__yt_dlp_var'] = 'expanded' envvar = '%__yt_dlp_var%' if compat_os_name == 'nt' else '$__yt_dlp_var' test(envvar, (envvar, 'expanded')) if compat_os_name == 'nt': test('%s%', ('%s%', '%s%')) os.environ['s'] = 'expanded' test('%s%', ('%s%', 'expanded')) # %s% should be expanded before escaping %s os.environ['(test)s'] = 'expanded' test('%(test)s%', ('NA%', 'expanded')) # Environment should take priority over template # Path expansion and escaping test('Hello %(title1)s', 'Hello $PATH') test('Hello %(title2)s', 'Hello %PATH%') test('%(title3)s', ('foo/bar\\test', 'foo⧸bar⧹test')) test('folder/%(title3)s', ('folder/foo/bar\\test', 'folder%sfoo⧸bar⧹test' % os.path.sep)) def test_format_note(self): ydl = YoutubeDL() self.assertEqual(ydl._format_note({}), '') assertRegexpMatches(self, ydl._format_note({ 'vbr': 10, }), r'^\s*10k$') assertRegexpMatches(self, ydl._format_note({ 'fps': 30, }), r'^30fps$') def test_postprocessors(self): filename = 'post-processor-testfile.mp4' audiofile = filename + '.mp3' class SimplePP(PostProcessor): def run(self, info): with open(audiofile, 'wt') as f: f.write('EXAMPLE') return [info['filepath']], info def run_pp(params, PP): with open(filename, 'wt') as f: f.write('EXAMPLE') ydl = YoutubeDL(params) ydl.add_post_processor(PP()) ydl.post_process(filename, {'filepath': filename}) run_pp({'keepvideo': True}, SimplePP) self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename) self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) os.unlink(filename) os.unlink(audiofile) run_pp({'keepvideo': False}, SimplePP) self.assertFalse(os.path.exists(filename), '%s exists' % filename) self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) os.unlink(audiofile) class ModifierPP(PostProcessor): def run(self, info): with open(info['filepath'], 'wt') as f: f.write('MODIFIED') return [], info run_pp({'keepvideo': False}, ModifierPP) self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename) os.unlink(filename) def test_match_filter(self): first = { 'id': '1', 'url': TEST_URL, 'title': 'one', 'extractor': 'TEST', 'duration': 30, 'filesize': 10 * 1024, 'playlist_id': '42', 'uploader': "變態妍字幕版 太妍 тест", 'creator': "тест ' 123 ' тест--", 'webpage_url': 'http://example.com/watch?v=shenanigans', } second = { 'id': '2', 'url': TEST_URL, 'title': 'two', 'extractor': 'TEST', 'duration': 10, 'description': 'foo', 'filesize': 5 * 1024, 'playlist_id': '43', 'uploader': "тест 123", 'webpage_url': 'http://example.com/watch?v=SHENANIGANS', } videos = [first, second] def get_videos(filter_=None): ydl = YDL({'match_filter': filter_, 'simulate': True}) for v in videos: ydl.process_ie_result(v, download=True) return [v['id'] for v in ydl.downloaded_info_dicts] res = get_videos() self.assertEqual(res, ['1', '2']) def f(v, incomplete): if v['id'] == '1': return None else: return 'Video id is not 1' res = get_videos(f) self.assertEqual(res, ['1']) f = match_filter_func('duration < 30') res = get_videos(f) self.assertEqual(res, ['2']) f = match_filter_func('description = foo') res = get_videos(f) self.assertEqual(res, ['2']) f = match_filter_func('description =? foo') res = get_videos(f) self.assertEqual(res, ['1', '2']) f = match_filter_func('filesize > 5KiB') res = get_videos(f) self.assertEqual(res, ['1']) f = match_filter_func('playlist_id = 42') res = get_videos(f) self.assertEqual(res, ['1']) f = match_filter_func('uploader = "變態妍字幕版 太妍 тест"') res = get_videos(f) self.assertEqual(res, ['1']) f = match_filter_func('uploader != "變態妍字幕版 太妍 тест"') res = get_videos(f) self.assertEqual(res, ['2']) f = match_filter_func('creator = "тест \' 123 \' тест--"') res = get_videos(f) self.assertEqual(res, ['1']) f = match_filter_func("creator = 'тест \\' 123 \\' тест--'") res = get_videos(f) self.assertEqual(res, ['1']) f = match_filter_func(r"creator = 'тест \' 123 \' тест--' & duration > 30") res = get_videos(f) self.assertEqual(res, []) def test_playlist_items_selection(self): INDICES, PAGE_SIZE = list(range(1, 11)), 3 def entry(i, evaluated): evaluated.append(i) return { 'id': str(i), 'title': str(i), 'url': TEST_URL, } def pagedlist_entries(evaluated): def page_func(n): start = PAGE_SIZE * n for i in INDICES[start: start + PAGE_SIZE]: yield entry(i, evaluated) return OnDemandPagedList(page_func, PAGE_SIZE) def page_num(i): return (i + PAGE_SIZE - 1) // PAGE_SIZE def generator_entries(evaluated): for i in INDICES: yield entry(i, evaluated) def list_entries(evaluated): return list(generator_entries(evaluated)) def lazylist_entries(evaluated): return LazyList(generator_entries(evaluated)) def get_downloaded_info_dicts(params, entries): ydl = YDL(params) ydl.process_ie_result({ '_type': 'playlist', 'id': 'test', 'extractor': 'test:playlist', 'extractor_key': 'test:playlist', 'webpage_url': 'http://example.com', 'entries': entries, }) return ydl.downloaded_info_dicts def test_selection(params, expected_ids, evaluate_all=False): expected_ids = list(expected_ids) if evaluate_all: generator_eval = pagedlist_eval = INDICES elif not expected_ids: generator_eval = pagedlist_eval = [] else: generator_eval = INDICES[0: max(expected_ids)] pagedlist_eval = INDICES[PAGE_SIZE * page_num(min(expected_ids)) - PAGE_SIZE: PAGE_SIZE * page_num(max(expected_ids))] for name, func, expected_eval in ( ('list', list_entries, INDICES), ('Generator', generator_entries, generator_eval), # ('LazyList', lazylist_entries, generator_eval), # Generator and LazyList follow the exact same code path ('PagedList', pagedlist_entries, pagedlist_eval), ): evaluated = [] entries = func(evaluated) results = [(v['playlist_autonumber'] - 1, (int(v['id']), v['playlist_index'])) for v in get_downloaded_info_dicts(params, entries)] self.assertEqual(results, list(enumerate(zip(expected_ids, expected_ids))), f'Entries of {name} for {params}') self.assertEqual(sorted(evaluated), expected_eval, f'Evaluation of {name} for {params}') test_selection({}, INDICES) test_selection({'playlistend': 20}, INDICES, True) test_selection({'playlistend': 2}, INDICES[:2]) test_selection({'playliststart': 11}, [], True) test_selection({'playliststart': 2}, INDICES[1:]) test_selection({'playlist_items': '2-4'}, INDICES[1:4]) test_selection({'playlist_items': '2,4'}, [2, 4]) test_selection({'playlist_items': '20'}, [], True) test_selection({'playlist_items': '0'}, []) # Tests for https://github.com/ytdl-org/youtube-dl/issues/10591 test_selection({'playlist_items': '2-4,3-4,3'}, [2, 3, 4]) test_selection({'playlist_items': '4,2'}, [4, 2]) # Tests for https://github.com/yt-dlp/yt-dlp/issues/720 # https://github.com/yt-dlp/yt-dlp/issues/302 test_selection({'playlistreverse': True}, INDICES[::-1]) test_selection({'playliststart': 2, 'playlistreverse': True}, INDICES[:0:-1]) test_selection({'playlist_items': '2,4', 'playlistreverse': True}, [4, 2]) test_selection({'playlist_items': '4,2'}, [4, 2]) # Tests for --playlist-items start:end:step test_selection({'playlist_items': ':'}, INDICES, True) test_selection({'playlist_items': '::1'}, INDICES, True) test_selection({'playlist_items': '::-1'}, INDICES[::-1], True) test_selection({'playlist_items': ':6'}, INDICES[:6]) test_selection({'playlist_items': ':-6'}, INDICES[:-5], True) test_selection({'playlist_items': '-1:6:-2'}, INDICES[:4:-2], True) test_selection({'playlist_items': '9:-6:-2'}, INDICES[8:3:-2], True) test_selection({'playlist_items': '1:inf:2'}, INDICES[::2], True) test_selection({'playlist_items': '-2:inf'}, INDICES[-2:], True) test_selection({'playlist_items': ':inf:-1'}, [], True) test_selection({'playlist_items': '0-2:2'}, [2]) test_selection({'playlist_items': '1-:2'}, INDICES[::2], True) test_selection({'playlist_items': '0--2:2'}, INDICES[1:-1:2], True) test_selection({'playlist_items': '10::3'}, [10], True) test_selection({'playlist_items': '-1::3'}, [10], True) test_selection({'playlist_items': '11::3'}, [], True) test_selection({'playlist_items': '-15::2'}, INDICES[1::2], True) test_selection({'playlist_items': '-15::15'}, [], True) def test_urlopen_no_file_protocol(self): # see https://github.com/ytdl-org/youtube-dl/issues/8227 ydl = YDL() self.assertRaises(urllib.error.URLError, ydl.urlopen, 'file:///etc/passwd') def test_do_not_override_ie_key_in_url_transparent(self): ydl = YDL() class Foo1IE(InfoExtractor): _VALID_URL = r'foo1:' def _real_extract(self, url): return { '_type': 'url_transparent', 'url': 'foo2:', 'ie_key': 'Foo2', 'title': 'foo1 title', 'id': 'foo1_id', } class Foo2IE(InfoExtractor): _VALID_URL = r'foo2:' def _real_extract(self, url): return { '_type': 'url', 'url': 'foo3:', 'ie_key': 'Foo3', } class Foo3IE(InfoExtractor): _VALID_URL = r'foo3:' def _real_extract(self, url): return _make_result([{'url': TEST_URL}], title='foo3 title') ydl.add_info_extractor(Foo1IE(ydl)) ydl.add_info_extractor(Foo2IE(ydl)) ydl.add_info_extractor(Foo3IE(ydl)) ydl.extract_info('foo1:') downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['url'], TEST_URL) self.assertEqual(downloaded['title'], 'foo1 title') self.assertEqual(downloaded['id'], 'testid') self.assertEqual(downloaded['extractor'], 'testex') self.assertEqual(downloaded['extractor_key'], 'TestEx') # Test case for https://github.com/ytdl-org/youtube-dl/issues/27064 def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self): class _YDL(YDL): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def trouble(self, s, tb=None): pass ydl = _YDL({ 'format': 'extra', 'ignoreerrors': True, }) class VideoIE(InfoExtractor): _VALID_URL = r'video:(?P<id>\d+)' def _real_extract(self, url): video_id = self._match_id(url) formats = [{ 'format_id': 'default', 'url': 'url:', }] if video_id == '0': raise ExtractorError('foo') if video_id == '2': formats.append({ 'format_id': 'extra', 'url': TEST_URL, }) return { 'id': video_id, 'title': 'Video %s' % video_id, 'formats': formats, } class PlaylistIE(InfoExtractor): _VALID_URL = r'playlist:' def _entries(self): for n in range(3): video_id = str(n) yield { '_type': 'url_transparent', 'ie_key': VideoIE.ie_key(), 'id': video_id, 'url': 'video:%s' % video_id, 'title': 'Video Transparent %s' % video_id, } def _real_extract(self, url): return self.playlist_result(self._entries()) ydl.add_info_extractor(VideoIE(ydl)) ydl.add_info_extractor(PlaylistIE(ydl)) info = ydl.extract_info('playlist:') entries = info['entries'] self.assertEqual(len(entries), 3) self.assertTrue(entries[0] is None) self.assertTrue(entries[1] is None) self.assertEqual(len(ydl.downloaded_info_dicts), 1) downloaded = ydl.downloaded_info_dicts[0] entries[2].pop('requested_downloads', None) self.assertEqual(entries[2], downloaded) self.assertEqual(downloaded['url'], TEST_URL) self.assertEqual(downloaded['title'], 'Video Transparent 2') self.assertEqual(downloaded['id'], '2') self.assertEqual(downloaded['extractor'], 'Video') self.assertEqual(downloaded['extractor_key'], 'Video') if __name__ == '__main__': unittest.main() ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/test_YoutubeDLCookieJar.py���������������������������������������������������0000664�0000000�0000000�00000003551�14277552437�0021407�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import re import tempfile from yt_dlp.utils import YoutubeDLCookieJar class TestYoutubeDLCookieJar(unittest.TestCase): def test_keep_session_cookies(self): cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt') cookiejar.load(ignore_discard=True, ignore_expires=True) tf = tempfile.NamedTemporaryFile(delete=False) try: cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True) temp = tf.read().decode() self.assertTrue(re.search( r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp)) self.assertTrue(re.search( r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpires0\s+YoutubeDLExpires0Value', temp)) finally: tf.close() os.remove(tf.name) def test_strip_httponly_prefix(self): cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt') cookiejar.load(ignore_discard=True, ignore_expires=True) def assert_cookie_has_value(key): self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE') assert_cookie_has_value('HTTPONLY_COOKIE') assert_cookie_has_value('JS_ACCESSIBLE_COOKIE') def test_malformed_cookies(self): cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt') cookiejar.load(ignore_discard=True, ignore_expires=True) # Cookies should be empty since all malformed cookie file entries # will be ignored self.assertFalse(cookiejar._cookies) if __name__ == '__main__': unittest.main() �������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/test_aes.py������������������������������������������������������������������0000664�0000000�0000000�00000015474�14277552437�0016523�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import base64 from yt_dlp.aes import ( BLOCK_SIZE_BYTES, aes_cbc_decrypt, aes_cbc_decrypt_bytes, aes_cbc_encrypt, aes_ctr_decrypt, aes_ctr_encrypt, aes_decrypt, aes_decrypt_text, aes_ecb_decrypt, aes_ecb_encrypt, aes_encrypt, aes_gcm_decrypt_and_verify, aes_gcm_decrypt_and_verify_bytes, key_expansion, pad_block, ) from yt_dlp.dependencies import Cryptodome_AES from yt_dlp.utils import bytes_to_intlist, intlist_to_bytes # the encrypted data can be generate with 'devscripts/generate_aes_testdata.py' class TestAES(unittest.TestCase): def setUp(self): self.key = self.iv = [0x20, 0x15] + 14 * [0] self.secret_msg = b'Secret message goes here' def test_encrypt(self): msg = b'message' key = list(range(16)) encrypted = aes_encrypt(bytes_to_intlist(msg), key) decrypted = intlist_to_bytes(aes_decrypt(encrypted, key)) self.assertEqual(decrypted, msg) def test_cbc_decrypt(self): data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd' decrypted = intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(data), self.key, self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) if Cryptodome_AES: decrypted = aes_cbc_decrypt_bytes(data, intlist_to_bytes(self.key), intlist_to_bytes(self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_cbc_encrypt(self): data = bytes_to_intlist(self.secret_msg) encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv)) self.assertEqual( encrypted, b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd') def test_ctr_decrypt(self): data = bytes_to_intlist(b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') decrypted = intlist_to_bytes(aes_ctr_decrypt(data, self.key, self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_ctr_encrypt(self): data = bytes_to_intlist(self.secret_msg) encrypted = intlist_to_bytes(aes_ctr_encrypt(data, self.key, self.iv)) self.assertEqual( encrypted, b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') def test_gcm_decrypt(self): data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f.\x08\xb4T\xe4/\x17\xbd' authentication_tag = b'\xe8&I\x80rI\x07\x9d}YWuU@:e' decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify( bytes_to_intlist(data), self.key, bytes_to_intlist(authentication_tag), self.iv[:12])) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) if Cryptodome_AES: decrypted = aes_gcm_decrypt_and_verify_bytes( data, intlist_to_bytes(self.key), authentication_tag, intlist_to_bytes(self.iv[:12])) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_decrypt_text(self): password = intlist_to_bytes(self.key).decode() encrypted = base64.b64encode( intlist_to_bytes(self.iv[:8]) + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae' ).decode() decrypted = (aes_decrypt_text(encrypted, password, 16)) self.assertEqual(decrypted, self.secret_msg) password = intlist_to_bytes(self.key).decode() encrypted = base64.b64encode( intlist_to_bytes(self.iv[:8]) + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83' ).decode() decrypted = (aes_decrypt_text(encrypted, password, 32)) self.assertEqual(decrypted, self.secret_msg) def test_ecb_encrypt(self): data = bytes_to_intlist(self.secret_msg) data += [0x08] * (BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES) encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key, self.iv)) self.assertEqual( encrypted, b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') def test_ecb_decrypt(self): data = bytes_to_intlist(b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') decrypted = intlist_to_bytes(aes_ecb_decrypt(data, self.key, self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_key_expansion(self): key = '4f6bdaa39e2f8cb07f5e722d9edef314' self.assertEqual(key_expansion(bytes_to_intlist(bytearray.fromhex(key))), [ 0x4F, 0x6B, 0xDA, 0xA3, 0x9E, 0x2F, 0x8C, 0xB0, 0x7F, 0x5E, 0x72, 0x2D, 0x9E, 0xDE, 0xF3, 0x14, 0x53, 0x66, 0x20, 0xA8, 0xCD, 0x49, 0xAC, 0x18, 0xB2, 0x17, 0xDE, 0x35, 0x2C, 0xC9, 0x2D, 0x21, 0x8C, 0xBE, 0xDD, 0xD9, 0x41, 0xF7, 0x71, 0xC1, 0xF3, 0xE0, 0xAF, 0xF4, 0xDF, 0x29, 0x82, 0xD5, 0x2D, 0xAD, 0xDE, 0x47, 0x6C, 0x5A, 0xAF, 0x86, 0x9F, 0xBA, 0x00, 0x72, 0x40, 0x93, 0x82, 0xA7, 0xF9, 0xBE, 0x82, 0x4E, 0x95, 0xE4, 0x2D, 0xC8, 0x0A, 0x5E, 0x2D, 0xBA, 0x4A, 0xCD, 0xAF, 0x1D, 0x54, 0xC7, 0x26, 0x98, 0xC1, 0x23, 0x0B, 0x50, 0xCB, 0x7D, 0x26, 0xEA, 0x81, 0xB0, 0x89, 0xF7, 0x93, 0x60, 0x4E, 0x94, 0x52, 0x43, 0x45, 0xC4, 0x99, 0x3E, 0x63, 0x2E, 0x18, 0x8E, 0xEA, 0xD9, 0xCA, 0xE7, 0x7B, 0x39, 0x98, 0xA4, 0x3E, 0xFD, 0x01, 0x9A, 0x5D, 0xD3, 0x19, 0x14, 0xB7, 0x0A, 0xB0, 0x4E, 0x1C, 0xED, 0x28, 0xEA, 0x22, 0x10, 0x29, 0x70, 0x7F, 0xC3, 0x30, 0x64, 0xC8, 0xC9, 0xE8, 0xA6, 0xC1, 0xE9, 0xC0, 0x4C, 0xE3, 0xF9, 0xE9, 0x3C, 0x9C, 0x3A, 0xD9, 0x58, 0x54, 0xF3, 0xB4, 0x86, 0xCC, 0xDC, 0x74, 0xCA, 0x2F, 0x25, 0x9D, 0xF6, 0xB3, 0x1F, 0x44, 0xAE, 0xE7, 0xEC]) def test_pad_block(self): block = [0x21, 0xA0, 0x43, 0xFF] self.assertEqual(pad_block(block, 'pkcs7'), block + [0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C]) self.assertEqual(pad_block(block, 'iso7816'), block + [0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) self.assertEqual(pad_block(block, 'whitespace'), block + [0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20]) self.assertEqual(pad_block(block, 'zero'), block + [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) block = list(range(16)) for mode in ('pkcs7', 'iso7816', 'whitespace', 'zero'): self.assertEqual(pad_block(block, mode), block, mode) if __name__ == '__main__': unittest.main() ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/test_age_restriction.py������������������������������������������������������0000664�0000000�0000000�00000002534�14277552437�0021125�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import is_download_test, try_rm from yt_dlp import YoutubeDL def _download_restricted(url, filename, age): """ Returns true if the file has been downloaded """ params = { 'age_limit': age, 'skip_download': True, 'writeinfojson': True, 'outtmpl': '%(id)s.%(ext)s', } ydl = YoutubeDL(params) ydl.add_default_info_extractors() json_filename = os.path.splitext(filename)[0] + '.info.json' try_rm(json_filename) ydl.download([url]) res = os.path.exists(json_filename) try_rm(json_filename) return res @is_download_test class TestAgeRestriction(unittest.TestCase): def _assert_restricted(self, url, filename, age, old_age=None): self.assertTrue(_download_restricted(url, filename, old_age)) self.assertFalse(_download_restricted(url, filename, age)) def test_youtube(self): self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10) def test_youporn(self): self._assert_restricted( 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', '505835.mp4', 2, old_age=25) if __name__ == '__main__': unittest.main() ��������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/test_all_urls.py�������������������������������������������������������������0000664�0000000�0000000�00000013750�14277552437�0017563�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import collections from test.helper import gettestcases from yt_dlp.extractor import FacebookIE, YoutubeIE, gen_extractors class TestAllURLsMatching(unittest.TestCase): def setUp(self): self.ies = gen_extractors() def matching_ies(self, url): return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic'] def assertMatch(self, url, ie_list): self.assertEqual(self.matching_ies(url), ie_list) def test_youtube_playlist_matching(self): assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist']) assertTab = lambda url: self.assertMatch(url, ['youtube:tab']) assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585 assertPlaylist('PL63F0C78739B09958') assertTab('https://www.youtube.com/AsapSCIENCE') assertTab('https://www.youtube.com/embedded') assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668 self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M')) # Top tracks assertTab('https://www.youtube.com/playlist?list=MCUS.20142101') def test_youtube_matching(self): self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M')) self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) # 668 self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube']) # self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) # /v/ is no longer valid self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube']) self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube']) def test_youtube_channel_matching(self): assertChannel = lambda url: self.assertMatch(url, ['youtube:tab']) assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM') assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec') assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') def test_youtube_user_matching(self): self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab']) def test_youtube_feeds(self): self.assertMatch('https://www.youtube.com/feed/library', ['youtube:tab']) self.assertMatch('https://www.youtube.com/feed/history', ['youtube:tab']) self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab']) self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab']) def test_youtube_search_matching(self): self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) def test_facebook_matching(self): self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268')) self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793')) def test_no_duplicates(self): ies = gen_extractors() for tc in gettestcases(include_onlymatching=True): url = tc['url'] for ie in ies: if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'): self.assertTrue(ie.suitable(url), f'{type(ie).__name__} should match URL {url!r}') else: self.assertFalse( ie.suitable(url), f'{type(ie).__name__} should not match URL {url!r} . That URL belongs to {tc["name"]}.') def test_keywords(self): self.assertMatch(':ytsubs', ['youtube:subscriptions']) self.assertMatch(':ytsubscriptions', ['youtube:subscriptions']) self.assertMatch(':ythistory', ['youtube:history']) def test_vimeo_matching(self): self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel']) self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel']) self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo']) self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user']) self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user']) self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review']) # https://github.com/ytdl-org/youtube-dl/issues/1930 def test_soundcloud_not_matching_sets(self): self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set']) def test_tumblr(self): self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr']) self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr']) def test_pbs(self): # https://github.com/ytdl-org/youtube-dl/issues/2350 self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs']) self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs']) def test_no_duplicated_ie_names(self): name_accu = collections.defaultdict(list) for ie in self.ies: name_accu[ie.IE_NAME.lower()].append(type(ie).__name__) for (ie_name, ie_list) in name_accu.items(): self.assertEqual( len(ie_list), 1, f'Multiple extractors with the same IE_NAME "{ie_name}" ({", ".join(ie_list)})') if __name__ == '__main__': unittest.main() ������������������������yt-dlp-2022.08.19/test/test_cache.py����������������������������������������������������������������0000664�0000000�0000000�00000002754�14277552437�0017013�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import shutil from test.helper import FakeYDL from yt_dlp.cache import Cache def _is_empty(d): return not bool(os.listdir(d)) def _mkdir(d): if not os.path.exists(d): os.mkdir(d) class TestCache(unittest.TestCase): def setUp(self): TEST_DIR = os.path.dirname(os.path.abspath(__file__)) TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata') _mkdir(TESTDATA_DIR) self.test_dir = os.path.join(TESTDATA_DIR, 'cache_test') self.tearDown() def tearDown(self): if os.path.exists(self.test_dir): shutil.rmtree(self.test_dir) def test_cache(self): ydl = FakeYDL({ 'cachedir': self.test_dir, }) c = Cache(ydl) obj = {'x': 1, 'y': ['ä', '\\a', True]} self.assertEqual(c.load('test_cache', 'k.'), None) c.store('test_cache', 'k.', obj) self.assertEqual(c.load('test_cache', 'k2'), None) self.assertFalse(_is_empty(self.test_dir)) self.assertEqual(c.load('test_cache', 'k.'), obj) self.assertEqual(c.load('test_cache', 'y'), None) self.assertEqual(c.load('test_cache2', 'k.'), None) c.remove() self.assertFalse(os.path.exists(self.test_dir)) self.assertEqual(c.load('test_cache', 'k.'), None) if __name__ == '__main__': unittest.main() ��������������������yt-dlp-2022.08.19/test/test_compat.py���������������������������������������������������������������0000664�0000000�0000000�00000010665�14277552437�0017233�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import struct import urllib.parse from yt_dlp import compat from yt_dlp.compat import ( compat_etree_fromstring, compat_expanduser, compat_urllib_parse_unquote, compat_urllib_parse_urlencode, ) class TestCompat(unittest.TestCase): def test_compat_passthrough(self): with self.assertWarns(DeprecationWarning): compat.compat_basestring with self.assertWarns(DeprecationWarning): compat.WINDOWS_VT_MODE # TODO: Test submodule # compat.asyncio.events # Must not raise error def test_compat_expanduser(self): old_home = os.environ.get('HOME') test_str = R'C:\Documents and Settings\тест\Application Data' try: os.environ['HOME'] = test_str self.assertEqual(compat_expanduser('~'), test_str) finally: os.environ['HOME'] = old_home or '' def test_compat_urllib_parse_unquote(self): self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def') self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def') self.assertEqual(compat_urllib_parse_unquote(''), '') self.assertEqual(compat_urllib_parse_unquote('%'), '%') self.assertEqual(compat_urllib_parse_unquote('%%'), '%%') self.assertEqual(compat_urllib_parse_unquote('%%%'), '%%%') self.assertEqual(compat_urllib_parse_unquote('%2F'), '/') self.assertEqual(compat_urllib_parse_unquote('%2f'), '/') self.assertEqual(compat_urllib_parse_unquote('%E6%B4%A5%E6%B3%A2'), '津波') self.assertEqual( compat_urllib_parse_unquote('''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%25%E2%96%85%E2%96%86%E2%96%87%E2%96%88" /> %<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a'''), '''<meta property="og:description" content="▁▂▃▄%▅▆▇█" /> %<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''') self.assertEqual( compat_urllib_parse_unquote('''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%'''), '''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%Things%''') def test_compat_urllib_parse_unquote_plus(self): self.assertEqual(urllib.parse.unquote_plus('abc%20def'), 'abc def') self.assertEqual(urllib.parse.unquote_plus('%7e/abc+def'), '~/abc def') def test_compat_urllib_parse_urlencode(self): self.assertEqual(compat_urllib_parse_urlencode({'abc': 'def'}), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode({'abc': b'def'}), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode({b'abc': 'def'}), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode({b'abc': b'def'}), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode([('abc', 'def')]), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode([('abc', b'def')]), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def') def test_compat_etree_fromstring(self): xml = ''' <root foo="bar" spam="中文"> <normal>foo</normal> <chinese>中文</chinese> <foo><bar>spam</bar></foo> </root> ''' doc = compat_etree_fromstring(xml.encode()) self.assertTrue(isinstance(doc.attrib['foo'], str)) self.assertTrue(isinstance(doc.attrib['spam'], str)) self.assertTrue(isinstance(doc.find('normal').text, str)) self.assertTrue(isinstance(doc.find('chinese').text, str)) self.assertTrue(isinstance(doc.find('foo/bar').text, str)) def test_compat_etree_fromstring_doctype(self): xml = '''<?xml version="1.0"?> <!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd"> <smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>''' compat_etree_fromstring(xml) def test_struct_unpack(self): self.assertEqual(struct.unpack('!B', b'\x00'), (0,)) if __name__ == '__main__': unittest.main() ���������������������������������������������������������������������������yt-dlp-2022.08.19/test/test_cookies.py��������������������������������������������������������������0000664�0000000�0000000�00000015425�14277552437�0017403�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import unittest from datetime import datetime, timezone from yt_dlp import cookies from yt_dlp.cookies import ( LinuxChromeCookieDecryptor, MacChromeCookieDecryptor, WindowsChromeCookieDecryptor, _get_linux_desktop_environment, _LinuxDesktopEnvironment, parse_safari_cookies, pbkdf2_sha1, ) class Logger: def debug(self, message, *args, **kwargs): print(f'[verbose] {message}') def info(self, message, *args, **kwargs): print(message) def warning(self, message, *args, **kwargs): self.error(message) def error(self, message, *args, **kwargs): raise Exception(message) class MonkeyPatch: def __init__(self, module, temporary_values): self._module = module self._temporary_values = temporary_values self._backup_values = {} def __enter__(self): for name, temp_value in self._temporary_values.items(): self._backup_values[name] = getattr(self._module, name) setattr(self._module, name, temp_value) def __exit__(self, exc_type, exc_val, exc_tb): for name, backup_value in self._backup_values.items(): setattr(self._module, name, backup_value) class TestCookies(unittest.TestCase): def test_get_desktop_environment(self): """ based on https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util_unittest.cc """ test_cases = [ ({}, _LinuxDesktopEnvironment.OTHER), ({'DESKTOP_SESSION': 'gnome'}, _LinuxDesktopEnvironment.GNOME), ({'DESKTOP_SESSION': 'mate'}, _LinuxDesktopEnvironment.GNOME), ({'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE), ({'DESKTOP_SESSION': 'kde'}, _LinuxDesktopEnvironment.KDE), ({'DESKTOP_SESSION': 'xfce'}, _LinuxDesktopEnvironment.XFCE), ({'GNOME_DESKTOP_SESSION_ID': 1}, _LinuxDesktopEnvironment.GNOME), ({'KDE_FULL_SESSION': 1}, _LinuxDesktopEnvironment.KDE), ({'XDG_CURRENT_DESKTOP': 'X-Cinnamon'}, _LinuxDesktopEnvironment.CINNAMON), ({'XDG_CURRENT_DESKTOP': 'GNOME'}, _LinuxDesktopEnvironment.GNOME), ({'XDG_CURRENT_DESKTOP': 'GNOME:GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME), ({'XDG_CURRENT_DESKTOP': 'GNOME : GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME), ({'XDG_CURRENT_DESKTOP': 'Unity', 'DESKTOP_SESSION': 'gnome-fallback'}, _LinuxDesktopEnvironment.GNOME), ({'XDG_CURRENT_DESKTOP': 'KDE', 'KDE_SESSION_VERSION': '5'}, _LinuxDesktopEnvironment.KDE), ({'XDG_CURRENT_DESKTOP': 'KDE'}, _LinuxDesktopEnvironment.KDE), ({'XDG_CURRENT_DESKTOP': 'Pantheon'}, _LinuxDesktopEnvironment.PANTHEON), ({'XDG_CURRENT_DESKTOP': 'Unity'}, _LinuxDesktopEnvironment.UNITY), ({'XDG_CURRENT_DESKTOP': 'Unity:Unity7'}, _LinuxDesktopEnvironment.UNITY), ({'XDG_CURRENT_DESKTOP': 'Unity:Unity8'}, _LinuxDesktopEnvironment.UNITY), ] for env, expected_desktop_environment in test_cases: self.assertEqual(_get_linux_desktop_environment(env), expected_desktop_environment) def test_chrome_cookie_decryptor_linux_derive_key(self): key = LinuxChromeCookieDecryptor.derive_key(b'abc') self.assertEqual(key, b'7\xa1\xec\xd4m\xfcA\xc7\xb19Z\xd0\x19\xdcM\x17') def test_chrome_cookie_decryptor_mac_derive_key(self): key = MacChromeCookieDecryptor.derive_key(b'abc') self.assertEqual(key, b'Y\xe2\xc0\xd0P\xf6\xf4\xe1l\xc1\x8cQ\xcb|\xcdY') def test_chrome_cookie_decryptor_linux_v10(self): with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b''}): encrypted_value = b'v10\xccW%\xcd\xe6\xe6\x9fM" \xa7\xb0\xca\xe4\x07\xd6' value = 'USD' decryptor = LinuxChromeCookieDecryptor('Chrome', Logger()) self.assertEqual(decryptor.decrypt(encrypted_value), value) def test_chrome_cookie_decryptor_linux_v11(self): with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b''}): encrypted_value = b'v11#\x81\x10>`w\x8f)\xc0\xb2\xc1\r\xf4\x1al\xdd\x93\xfd\xf8\xf8N\xf2\xa9\x83\xf1\xe9o\x0elVQd' value = 'tz=Europe.London' decryptor = LinuxChromeCookieDecryptor('Chrome', Logger()) self.assertEqual(decryptor.decrypt(encrypted_value), value) def test_chrome_cookie_decryptor_windows_v10(self): with MonkeyPatch(cookies, { '_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2<z\x16]\n\xbb\xb8\xcb\xd7\x9bA\xc3\x14e\x99{\xd6\xf4&' }): encrypted_value = b'v10T\xb8\xf3\xb8\x01\xa7TtcV\xfc\x88\xb8\xb8\xef\x05\xb5\xfd\x18\xc90\x009\xab\xb1\x893\x85)\x87\xe1\xa9-\xa3\xad=' value = '32101439' decryptor = WindowsChromeCookieDecryptor('', Logger()) self.assertEqual(decryptor.decrypt(encrypted_value), value) def test_chrome_cookie_decryptor_mac_v10(self): with MonkeyPatch(cookies, {'_get_mac_keyring_password': lambda *args, **kwargs: b'6eIDUdtKAacvlHwBVwvg/Q=='}): encrypted_value = b'v10\xb3\xbe\xad\xa1[\x9fC\xa1\x98\xe0\x9a\x01\xd9\xcf\xbfc' value = '2021-06-01-22' decryptor = MacChromeCookieDecryptor('', Logger()) self.assertEqual(decryptor.decrypt(encrypted_value), value) def test_safari_cookie_parsing(self): cookies = \ b'cook\x00\x00\x00\x01\x00\x00\x00i\x00\x00\x01\x00\x01\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00Y' \ b'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x008\x00\x00\x00B\x00\x00\x00F\x00\x00\x00H' \ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x03\xa5>\xc3A\x00\x00\x80\xc3\x07:\xc3A' \ b'localhost\x00foo\x00/\x00test%20%3Bcookie\x00\x00\x00\x054\x07\x17 \x05\x00\x00\x00Kbplist00\xd1\x01' \ b'\x02_\x10\x18NSHTTPCookieAcceptPolicy\x10\x02\x08\x0b&\x00\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00' \ b'\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00(' jar = parse_safari_cookies(cookies) self.assertEqual(len(jar), 1) cookie = list(jar)[0] self.assertEqual(cookie.domain, 'localhost') self.assertEqual(cookie.port, None) self.assertEqual(cookie.path, '/') self.assertEqual(cookie.name, 'foo') self.assertEqual(cookie.value, 'test%20%3Bcookie') self.assertFalse(cookie.secure) expected_expiration = datetime(2021, 6, 18, 21, 39, 19, tzinfo=timezone.utc) self.assertEqual(cookie.expires, int(expected_expiration.timestamp())) def test_pbkdf2_sha1(self): key = pbkdf2_sha1(b'peanuts', b' ' * 16, 1, 16) self.assertEqual(key, b'g\xe1\x8e\x0fQ\x1c\x9b\xf3\xc9`!\xaa\x90\xd9\xd34') �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/test_download.py�������������������������������������������������������������0000775�0000000�0000000�00000026114�14277552437�0017556�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import collections import hashlib import http.client import json import socket import urllib.error from test.helper import ( assertGreaterEqual, expect_info_dict, expect_warnings, get_params, gettestcases, getwebpagetestcases, is_download_test, report_warning, try_rm, ) import yt_dlp.YoutubeDL # isort: split from yt_dlp.extractor import get_info_extractor from yt_dlp.utils import ( DownloadError, ExtractorError, UnavailableVideoError, format_bytes, join_nonempty, ) RETRIES = 3 class YoutubeDL(yt_dlp.YoutubeDL): def __init__(self, *args, **kwargs): self.to_stderr = self.to_screen self.processed_info_dicts = [] super().__init__(*args, **kwargs) def report_warning(self, message, *args, **kwargs): # Don't accept warnings during tests raise ExtractorError(message) def process_info(self, info_dict): self.processed_info_dicts.append(info_dict.copy()) return super().process_info(info_dict) def _file_md5(fn): with open(fn, 'rb') as f: return hashlib.md5(f.read()).hexdigest() normal_test_cases = gettestcases() webpage_test_cases = getwebpagetestcases() tests_counter = collections.defaultdict(collections.Counter) @is_download_test class TestDownload(unittest.TestCase): # Parallel testing in nosetests. See # http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html _multiprocess_shared_ = True maxDiff = None COMPLETED_TESTS = {} def __str__(self): """Identify each test with the `add_ie` attribute, if available.""" cls, add_ie = type(self), getattr(self, self._testMethodName).add_ie return f'{self._testMethodName} ({cls.__module__}.{cls.__name__}){f" [{add_ie}]" if add_ie else ""}:' # Dynamically generate tests def generator(test_case, tname): def test_template(self): if self.COMPLETED_TESTS.get(tname): return self.COMPLETED_TESTS[tname] = True ie = yt_dlp.extractor.get_info_extractor(test_case['name'])() other_ies = [get_info_extractor(ie_key)() for ie_key in test_case.get('add_ie', [])] is_playlist = any(k.startswith('playlist') for k in test_case) test_cases = test_case.get( 'playlist', [] if is_playlist else [test_case]) def print_skipping(reason): print('Skipping %s: %s' % (test_case['name'], reason)) self.skipTest(reason) if not ie.working(): print_skipping('IE marked as not _WORKING') for tc in test_cases: info_dict = tc.get('info_dict', {}) params = tc.get('params', {}) if not info_dict.get('id'): raise Exception(f'Test {tname} definition incorrect - "id" key is not present') elif not info_dict.get('ext'): if params.get('skip_download') and params.get('ignore_no_formats_error'): continue raise Exception(f'Test {tname} definition incorrect - "ext" key must be present to define the output file') if 'skip' in test_case: print_skipping(test_case['skip']) for other_ie in other_ies: if not other_ie.working(): print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key()) params = get_params(test_case.get('params', {})) params['outtmpl'] = tname + '_' + params['outtmpl'] if is_playlist and 'playlist' not in test_case: params.setdefault('extract_flat', 'in_playlist') params.setdefault('playlistend', test_case.get('playlist_mincount')) params.setdefault('skip_download', True) ydl = YoutubeDL(params, auto_init=False) ydl.add_default_info_extractors() finished_hook_called = set() def _hook(status): if status['status'] == 'finished': finished_hook_called.add(status['filename']) ydl.add_progress_hook(_hook) expect_warnings(ydl, test_case.get('expected_warnings', [])) def get_tc_filename(tc): return ydl.prepare_filename(dict(tc.get('info_dict', {}))) res_dict = None def try_rm_tcs_files(tcs=None): if tcs is None: tcs = test_cases for tc in tcs: tc_filename = get_tc_filename(tc) try_rm(tc_filename) try_rm(tc_filename + '.part') try_rm(os.path.splitext(tc_filename)[0] + '.info.json') try_rm_tcs_files() try: try_num = 1 while True: try: # We're not using .download here since that is just a shim # for outside error handling, and returns the exit code # instead of the result dict. res_dict = ydl.extract_info( test_case['url'], force_generic_extractor=params.get('force_generic_extractor', False)) except (DownloadError, ExtractorError) as err: # Check if the exception is not a network related one if (err.exc_info[0] not in (urllib.error.URLError, socket.timeout, UnavailableVideoError, http.client.BadStatusLine) or (err.exc_info[0] == urllib.error.HTTPError and err.exc_info[1].code == 503)): err.msg = f'{getattr(err, "msg", err)} ({tname})' raise if try_num == RETRIES: report_warning('%s failed due to network errors, skipping...' % tname) return print(f'Retrying: {try_num} failed tries\n\n##########\n\n') try_num += 1 else: break if is_playlist: self.assertTrue(res_dict['_type'] in ['playlist', 'multi_video']) self.assertTrue('entries' in res_dict) expect_info_dict(self, res_dict, test_case.get('info_dict', {})) if 'playlist_mincount' in test_case: assertGreaterEqual( self, len(res_dict['entries']), test_case['playlist_mincount'], 'Expected at least %d in playlist %s, but got only %d' % ( test_case['playlist_mincount'], test_case['url'], len(res_dict['entries']))) if 'playlist_count' in test_case: self.assertEqual( len(res_dict['entries']), test_case['playlist_count'], 'Expected %d entries in playlist %s, but got %d.' % ( test_case['playlist_count'], test_case['url'], len(res_dict['entries']), )) if 'playlist_duration_sum' in test_case: got_duration = sum(e['duration'] for e in res_dict['entries']) self.assertEqual( test_case['playlist_duration_sum'], got_duration) # Generalize both playlists and single videos to unified format for # simplicity if 'entries' not in res_dict: res_dict['entries'] = [res_dict] for tc_num, tc in enumerate(test_cases): tc_res_dict = res_dict['entries'][tc_num] # First, check test cases' data against extracted data alone expect_info_dict(self, tc_res_dict, tc.get('info_dict', {})) # Now, check downloaded file consistency tc_filename = get_tc_filename(tc) if not test_case.get('params', {}).get('skip_download', False): self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename) self.assertTrue(tc_filename in finished_hook_called) expected_minsize = tc.get('file_minsize', 10000) if expected_minsize is not None: if params.get('test'): expected_minsize = max(expected_minsize, 10000) got_fsize = os.path.getsize(tc_filename) assertGreaterEqual( self, got_fsize, expected_minsize, 'Expected %s to be at least %s, but it\'s only %s ' % (tc_filename, format_bytes(expected_minsize), format_bytes(got_fsize))) if 'md5' in tc: md5_for_file = _file_md5(tc_filename) self.assertEqual(tc['md5'], md5_for_file) # Finally, check test cases' data again but this time against # extracted data from info JSON file written during processing info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json' self.assertTrue( os.path.exists(info_json_fn), 'Missing info file %s' % info_json_fn) with open(info_json_fn, encoding='utf-8') as infof: info_dict = json.load(infof) expect_info_dict(self, info_dict, tc.get('info_dict', {})) finally: try_rm_tcs_files() if is_playlist and res_dict is not None and res_dict.get('entries'): # Remove all other files that may have been extracted if the # extractor returns full results even with extract_flat res_tcs = [{'info_dict': e} for e in res_dict['entries']] try_rm_tcs_files(res_tcs) return test_template # And add them to TestDownload def inject_tests(test_cases, label=''): for test_case in test_cases: name = test_case['name'] tname = join_nonempty('test', name, label, tests_counter[name][label], delim='_') tests_counter[name][label] += 1 test_method = generator(test_case, tname) test_method.__name__ = tname test_method.add_ie = ','.join(test_case.get('add_ie', [])) setattr(TestDownload, test_method.__name__, test_method) inject_tests(normal_test_cases) # TODO: disable redirection to the IE to ensure we are actually testing the webpage extraction inject_tests(webpage_test_cases, 'webpage') def batch_generator(name): def test_template(self): for label, num_tests in tests_counter[name].items(): for i in range(num_tests): test_name = join_nonempty('test', name, label, i, delim='_') try: getattr(self, test_name)() except unittest.SkipTest: print(f'Skipped {test_name}') return test_template for name in tests_counter: test_method = batch_generator(name) test_method.__name__ = f'test_{name}_all' test_method.add_ie = '' setattr(TestDownload, test_method.__name__, test_method) del test_method if __name__ == '__main__': unittest.main() ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/test_downloader_http.py������������������������������������������������������0000664�0000000�0000000�00000006477�14277552437�0021153�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import http.server import re import threading from test.helper import http_server_port, try_rm from yt_dlp import YoutubeDL from yt_dlp.downloader.http import HttpFD from yt_dlp.utils import encodeFilename TEST_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_SIZE = 10 * 1024 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): def log_message(self, format, *args): pass def send_content_range(self, total=None): range_header = self.headers.get('Range') start = end = None if range_header: mobj = re.search(r'^bytes=(\d+)-(\d+)', range_header) if mobj: start = int(mobj.group(1)) end = int(mobj.group(2)) valid_range = start is not None and end is not None if valid_range: content_range = 'bytes %d-%d' % (start, end) if total: content_range += '/%d' % total self.send_header('Content-Range', content_range) return (end - start + 1) if valid_range else total def serve(self, range=True, content_length=True): self.send_response(200) self.send_header('Content-Type', 'video/mp4') size = TEST_SIZE if range: size = self.send_content_range(TEST_SIZE) if content_length: self.send_header('Content-Length', size) self.end_headers() self.wfile.write(b'#' * size) def do_GET(self): if self.path == '/regular': self.serve() elif self.path == '/no-content-length': self.serve(content_length=False) elif self.path == '/no-range': self.serve(range=False) elif self.path == '/no-range-no-content-length': self.serve(range=False, content_length=False) else: assert False class FakeLogger: def debug(self, msg): pass def warning(self, msg): pass def error(self, msg): pass class TestHttpFD(unittest.TestCase): def setUp(self): self.httpd = http.server.HTTPServer( ('127.0.0.1', 0), HTTPTestRequestHandler) self.port = http_server_port(self.httpd) self.server_thread = threading.Thread(target=self.httpd.serve_forever) self.server_thread.daemon = True self.server_thread.start() def download(self, params, ep): params['logger'] = FakeLogger() ydl = YoutubeDL(params) downloader = HttpFD(ydl, params) filename = 'testfile.mp4' try_rm(encodeFilename(filename)) self.assertTrue(downloader.real_download(filename, { 'url': 'http://127.0.0.1:%d/%s' % (self.port, ep), }), ep) self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep) try_rm(encodeFilename(filename)) def download_all(self, params): for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'): self.download(params, ep) def test_regular(self): self.download_all({}) def test_chunked(self): self.download_all({ 'http_chunk_size': 1000, }) if __name__ == '__main__': unittest.main() �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/test_execution.py������������������������������������������������������������0000664�0000000�0000000�00000003247�14277552437�0017751�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import contextlib import subprocess from yt_dlp.utils import encodeArgument rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) try: _DEV_NULL = subprocess.DEVNULL except AttributeError: _DEV_NULL = open(os.devnull, 'wb') class TestExecution(unittest.TestCase): def test_import(self): subprocess.check_call([sys.executable, '-c', 'import yt_dlp'], cwd=rootDir) def test_module_exec(self): subprocess.check_call([sys.executable, '-m', 'yt_dlp', '--ignore-config', '--version'], cwd=rootDir, stdout=_DEV_NULL) def test_main_exec(self): subprocess.check_call([sys.executable, 'yt_dlp/__main__.py', '--ignore-config', '--version'], cwd=rootDir, stdout=_DEV_NULL) def test_cmdline_umlauts(self): p = subprocess.Popen( [sys.executable, 'yt_dlp/__main__.py', '--ignore-config', encodeArgument('ä'), '--version'], cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE) _, stderr = p.communicate() self.assertFalse(stderr) def test_lazy_extractors(self): try: subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'yt_dlp/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL) subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL) finally: with contextlib.suppress(OSError): os.remove('yt_dlp/extractor/lazy_extractors.py') if __name__ == '__main__': unittest.main() ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/test_http.py�����������������������������������������������������������������0000664�0000000�0000000�00000015720�14277552437�0016724�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import http.server import ssl import threading import urllib.request from test.helper import http_server_port from yt_dlp import YoutubeDL TEST_DIR = os.path.dirname(os.path.abspath(__file__)) class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): def log_message(self, format, *args): pass def do_GET(self): if self.path == '/video.html': self.send_response(200) self.send_header('Content-Type', 'text/html; charset=utf-8') self.end_headers() self.wfile.write(b'<html><video src="/vid.mp4" /></html>') elif self.path == '/vid.mp4': self.send_response(200) self.send_header('Content-Type', 'video/mp4') self.end_headers() self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]') elif self.path == '/%E4%B8%AD%E6%96%87.html': self.send_response(200) self.send_header('Content-Type', 'text/html; charset=utf-8') self.end_headers() self.wfile.write(b'<html><video src="/vid.mp4" /></html>') else: assert False class FakeLogger: def debug(self, msg): pass def warning(self, msg): pass def error(self, msg): pass class TestHTTP(unittest.TestCase): def setUp(self): self.httpd = http.server.HTTPServer( ('127.0.0.1', 0), HTTPTestRequestHandler) self.port = http_server_port(self.httpd) self.server_thread = threading.Thread(target=self.httpd.serve_forever) self.server_thread.daemon = True self.server_thread.start() class TestHTTPS(unittest.TestCase): def setUp(self): certfn = os.path.join(TEST_DIR, 'testcert.pem') self.httpd = http.server.HTTPServer( ('127.0.0.1', 0), HTTPTestRequestHandler) sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) sslctx.load_cert_chain(certfn, None) self.httpd.socket = sslctx.wrap_socket(self.httpd.socket, server_side=True) self.port = http_server_port(self.httpd) self.server_thread = threading.Thread(target=self.httpd.serve_forever) self.server_thread.daemon = True self.server_thread.start() def test_nocheckcertificate(self): ydl = YoutubeDL({'logger': FakeLogger()}) self.assertRaises( Exception, ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port) ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True}) r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port) self.assertEqual(r['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port) class TestClientCert(unittest.TestCase): def setUp(self): certfn = os.path.join(TEST_DIR, 'testcert.pem') self.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate') cacertfn = os.path.join(self.certdir, 'ca.crt') self.httpd = http.server.HTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler) sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) sslctx.verify_mode = ssl.CERT_REQUIRED sslctx.load_verify_locations(cafile=cacertfn) sslctx.load_cert_chain(certfn, None) self.httpd.socket = sslctx.wrap_socket(self.httpd.socket, server_side=True) self.port = http_server_port(self.httpd) self.server_thread = threading.Thread(target=self.httpd.serve_forever) self.server_thread.daemon = True self.server_thread.start() def _run_test(self, **params): ydl = YoutubeDL({ 'logger': FakeLogger(), # Disable client-side validation of unacceptable self-signed testcert.pem # The test is of a check on the server side, so unaffected 'nocheckcertificate': True, **params, }) r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port) self.assertEqual(r['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port) def test_certificate_combined_nopass(self): self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithkey.crt')) def test_certificate_nocombined_nopass(self): self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'), client_certificate_key=os.path.join(self.certdir, 'client.key')) def test_certificate_combined_pass(self): self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithencryptedkey.crt'), client_certificate_password='foobar') def test_certificate_nocombined_pass(self): self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'), client_certificate_key=os.path.join(self.certdir, 'clientencrypted.key'), client_certificate_password='foobar') def _build_proxy_handler(name): class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): proxy_name = name def log_message(self, format, *args): pass def do_GET(self): self.send_response(200) self.send_header('Content-Type', 'text/plain; charset=utf-8') self.end_headers() self.wfile.write(f'{self.proxy_name}: {self.path}'.encode()) return HTTPTestRequestHandler class TestProxy(unittest.TestCase): def setUp(self): self.proxy = http.server.HTTPServer( ('127.0.0.1', 0), _build_proxy_handler('normal')) self.port = http_server_port(self.proxy) self.proxy_thread = threading.Thread(target=self.proxy.serve_forever) self.proxy_thread.daemon = True self.proxy_thread.start() self.geo_proxy = http.server.HTTPServer( ('127.0.0.1', 0), _build_proxy_handler('geo')) self.geo_port = http_server_port(self.geo_proxy) self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever) self.geo_proxy_thread.daemon = True self.geo_proxy_thread.start() def test_proxy(self): geo_proxy = f'127.0.0.1:{self.geo_port}' ydl = YoutubeDL({ 'proxy': f'127.0.0.1:{self.port}', 'geo_verification_proxy': geo_proxy, }) url = 'http://foo.com/bar' response = ydl.urlopen(url).read().decode() self.assertEqual(response, f'normal: {url}') req = urllib.request.Request(url) req.add_header('Ytdl-request-proxy', geo_proxy) response = ydl.urlopen(req).read().decode() self.assertEqual(response, f'geo: {url}') def test_proxy_with_idn(self): ydl = YoutubeDL({ 'proxy': f'127.0.0.1:{self.port}', }) url = 'http://中文.tw/' response = ydl.urlopen(url).read().decode() # b'xn--fiq228c' is '中文'.encode('idna') self.assertEqual(response, 'normal: http://xn--fiq228c.tw/') if __name__ == '__main__': unittest.main() ������������������������������������������������yt-dlp-2022.08.19/test/test_iqiyi_sdk_interpreter.py������������������������������������������������0000664�0000000�0000000�00000001743�14277552437�0022355�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL, is_download_test from yt_dlp.extractor import IqiyiIE class WarningLogger: def __init__(self): self.messages = [] def warning(self, msg): self.messages.append(msg) def debug(self, msg): pass def error(self, msg): pass @is_download_test class TestIqiyiSDKInterpreter(unittest.TestCase): def test_iqiyi_sdk_interpreter(self): ''' Test the functionality of IqiyiSDKInterpreter by trying to log in If `sign` is incorrect, /validate call throws an HTTP 556 error ''' logger = WarningLogger() ie = IqiyiIE(FakeYDL({'logger': logger})) ie._perform_login('foo', 'bar') self.assertTrue('unable to log in:' in logger.messages[0]) if __name__ == '__main__': unittest.main() �����������������������������yt-dlp-2022.08.19/test/test_jsinterp.py�������������������������������������������������������������0000664�0000000�0000000�00000027217�14277552437�0017607�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import math import re from yt_dlp.jsinterp import JS_Undefined, JSInterpreter class TestJSInterpreter(unittest.TestCase): def test_basic(self): jsi = JSInterpreter('function x(){;}') self.assertEqual(jsi.call_function('x'), None) jsi = JSInterpreter('function x3(){return 42;}') self.assertEqual(jsi.call_function('x3'), 42) jsi = JSInterpreter('function x3(){42}') self.assertEqual(jsi.call_function('x3'), None) jsi = JSInterpreter('var x5 = function(){return 42;}') self.assertEqual(jsi.call_function('x5'), 42) def test_calc(self): jsi = JSInterpreter('function x4(a){return 2*a+1;}') self.assertEqual(jsi.call_function('x4', 3), 7) def test_empty_return(self): jsi = JSInterpreter('function f(){return; y()}') self.assertEqual(jsi.call_function('f'), None) def test_morespace(self): jsi = JSInterpreter('function x (a) { return 2 * a + 1 ; }') self.assertEqual(jsi.call_function('x', 3), 7) jsi = JSInterpreter('function f () { x = 2 ; return x; }') self.assertEqual(jsi.call_function('f'), 2) def test_strange_chars(self): jsi = JSInterpreter('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }') self.assertEqual(jsi.call_function('$_xY1', 20), 21) def test_operators(self): jsi = JSInterpreter('function f(){return 1 << 5;}') self.assertEqual(jsi.call_function('f'), 32) jsi = JSInterpreter('function f(){return 2 ** 5}') self.assertEqual(jsi.call_function('f'), 32) jsi = JSInterpreter('function f(){return 19 & 21;}') self.assertEqual(jsi.call_function('f'), 17) jsi = JSInterpreter('function f(){return 11 >> 2;}') self.assertEqual(jsi.call_function('f'), 2) jsi = JSInterpreter('function f(){return []? 2+3: 4;}') self.assertEqual(jsi.call_function('f'), 5) jsi = JSInterpreter('function f(){return 1 == 2}') self.assertEqual(jsi.call_function('f'), False) jsi = JSInterpreter('function f(){return 0 && 1 || 2;}') self.assertEqual(jsi.call_function('f'), 2) jsi = JSInterpreter('function f(){return 0 ?? 42;}') self.assertEqual(jsi.call_function('f'), 0) def test_array_access(self): jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}') self.assertEqual(jsi.call_function('f'), [5, 2, 7]) def test_parens(self): jsi = JSInterpreter('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}') self.assertEqual(jsi.call_function('f'), 7) jsi = JSInterpreter('function f(){return (1 + 2) * 3;}') self.assertEqual(jsi.call_function('f'), 9) def test_quotes(self): jsi = JSInterpreter(R'function f(){return "a\"\\("}') self.assertEqual(jsi.call_function('f'), R'a"\(') def test_assignments(self): jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}') self.assertEqual(jsi.call_function('f'), 31) jsi = JSInterpreter('function f(){var x = 20; x += 30 + 1; return x;}') self.assertEqual(jsi.call_function('f'), 51) jsi = JSInterpreter('function f(){var x = 20; x -= 30 + 1; return x;}') self.assertEqual(jsi.call_function('f'), -11) def test_comments(self): 'Skipping: Not yet fully implemented' return jsi = JSInterpreter(''' function x() { var x = /* 1 + */ 2; var y = /* 30 * 40 */ 50; return x + y; } ''') self.assertEqual(jsi.call_function('x'), 52) jsi = JSInterpreter(''' function f() { var x = "/*"; var y = 1 /* comment */ + 2; return y; } ''') self.assertEqual(jsi.call_function('f'), 3) def test_precedence(self): jsi = JSInterpreter(''' function x() { var a = [10, 20, 30, 40, 50]; var b = 6; a[0]=a[b%a.length]; return a; }''') self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50]) def test_builtins(self): jsi = JSInterpreter(''' function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; } ''') self.assertEqual(jsi.call_function('x'), 86000) jsi = JSInterpreter(''' function x(dt) { return new Date(dt) - 0; } ''') self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000) def test_call(self): jsi = JSInterpreter(''' function x() { return 2; } function y(a) { return x() + (a?a:0); } function z() { return y(3); } ''') self.assertEqual(jsi.call_function('z'), 5) self.assertEqual(jsi.call_function('y'), 2) def test_for_loop(self): jsi = JSInterpreter(''' function x() { a=0; for (i=0; i-10; i++) {a++} return a } ''') self.assertEqual(jsi.call_function('x'), 10) def test_switch(self): jsi = JSInterpreter(''' function x(f) { switch(f){ case 1:f+=1; case 2:f+=2; case 3:f+=3;break; case 4:f+=4; default:f=0; } return f } ''') self.assertEqual(jsi.call_function('x', 1), 7) self.assertEqual(jsi.call_function('x', 3), 6) self.assertEqual(jsi.call_function('x', 5), 0) def test_switch_default(self): jsi = JSInterpreter(''' function x(f) { switch(f){ case 2: f+=2; default: f-=1; case 5: case 6: f+=6; case 0: break; case 1: f+=1; } return f } ''') self.assertEqual(jsi.call_function('x', 1), 2) self.assertEqual(jsi.call_function('x', 5), 11) self.assertEqual(jsi.call_function('x', 9), 14) def test_try(self): jsi = JSInterpreter(''' function x() { try{return 10} catch(e){return 5} } ''') self.assertEqual(jsi.call_function('x'), 10) def test_for_loop_continue(self): jsi = JSInterpreter(''' function x() { a=0; for (i=0; i-10; i++) { continue; a++ } return a } ''') self.assertEqual(jsi.call_function('x'), 0) def test_for_loop_break(self): jsi = JSInterpreter(''' function x() { a=0; for (i=0; i-10; i++) { break; a++ } return a } ''') self.assertEqual(jsi.call_function('x'), 0) def test_literal_list(self): jsi = JSInterpreter(''' function x() { return [1, 2, "asdf", [5, 6, 7]][3] } ''') self.assertEqual(jsi.call_function('x'), [5, 6, 7]) def test_comma(self): jsi = JSInterpreter(''' function x() { a=5; a -= 1, a+=3; return a } ''') self.assertEqual(jsi.call_function('x'), 7) jsi = JSInterpreter(''' function x() { a=5; return (a -= 1, a+=3, a); } ''') self.assertEqual(jsi.call_function('x'), 7) jsi = JSInterpreter(''' function x() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) } ''') self.assertEqual(jsi.call_function('x'), 5) def test_void(self): jsi = JSInterpreter(''' function x() { return void 42; } ''') self.assertEqual(jsi.call_function('x'), None) def test_return_function(self): jsi = JSInterpreter(''' function x() { return [1, function(){return 1}][1] } ''') self.assertEqual(jsi.call_function('x')([]), 1) def test_null(self): jsi = JSInterpreter(''' function x() { return null; } ''') self.assertEqual(jsi.call_function('x'), None) jsi = JSInterpreter(''' function x() { return [null > 0, null < 0, null == 0, null === 0]; } ''') self.assertEqual(jsi.call_function('x'), [False, False, False, False]) jsi = JSInterpreter(''' function x() { return [null >= 0, null <= 0]; } ''') self.assertEqual(jsi.call_function('x'), [True, True]) def test_undefined(self): jsi = JSInterpreter(''' function x() { return undefined === undefined; } ''') self.assertEqual(jsi.call_function('x'), True) jsi = JSInterpreter(''' function x() { return undefined; } ''') self.assertEqual(jsi.call_function('x'), JS_Undefined) jsi = JSInterpreter(''' function x() { let v; return v; } ''') self.assertEqual(jsi.call_function('x'), JS_Undefined) jsi = JSInterpreter(''' function x() { return [undefined === undefined, undefined == undefined, undefined < undefined, undefined > undefined]; } ''') self.assertEqual(jsi.call_function('x'), [True, True, False, False]) jsi = JSInterpreter(''' function x() { return [undefined === 0, undefined == 0, undefined < 0, undefined > 0]; } ''') self.assertEqual(jsi.call_function('x'), [False, False, False, False]) jsi = JSInterpreter(''' function x() { return [undefined >= 0, undefined <= 0]; } ''') self.assertEqual(jsi.call_function('x'), [False, False]) jsi = JSInterpreter(''' function x() { return [undefined > null, undefined < null, undefined == null, undefined === null]; } ''') self.assertEqual(jsi.call_function('x'), [False, False, True, False]) jsi = JSInterpreter(''' function x() { return [undefined === null, undefined == null, undefined < null, undefined > null]; } ''') self.assertEqual(jsi.call_function('x'), [False, True, False, False]) jsi = JSInterpreter(''' function x() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; } ''') for y in jsi.call_function('x'): self.assertTrue(math.isnan(y)) jsi = JSInterpreter(''' function x() { let v; return v**0; } ''') self.assertEqual(jsi.call_function('x'), 1) jsi = JSInterpreter(''' function x() { let v; return [v>42, v<=42, v&&42, 42&&v]; } ''') self.assertEqual(jsi.call_function('x'), [False, False, JS_Undefined, JS_Undefined]) jsi = JSInterpreter('function x(){return undefined ?? 42; }') self.assertEqual(jsi.call_function('x'), 42) def test_object(self): jsi = JSInterpreter(''' function x() { return {}; } ''') self.assertEqual(jsi.call_function('x'), {}) jsi = JSInterpreter(''' function x() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; } ''') self.assertEqual(jsi.call_function('x'), [42, 0]) jsi = JSInterpreter(''' function x() { let a; return a?.qq; } ''') self.assertEqual(jsi.call_function('x'), JS_Undefined) jsi = JSInterpreter(''' function x() { let a = {m1: 42, m2: 0 }; return a?.qq; } ''') self.assertEqual(jsi.call_function('x'), JS_Undefined) def test_regex(self): jsi = JSInterpreter(''' function x() { let a=/,,[/,913,/](,)}/; } ''') self.assertEqual(jsi.call_function('x'), None) jsi = JSInterpreter(''' function x() { let a=/,,[/,913,/](,)}/; return a; } ''') self.assertIsInstance(jsi.call_function('x'), re.Pattern) jsi = JSInterpreter(''' function x() { let a=/,,[/,913,/](,)}/i; return a; } ''') self.assertEqual(jsi.call_function('x').flags & re.I, re.I) if __name__ == '__main__': unittest.main() ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/test_netrc.py����������������������������������������������������������������0000664�0000000�0000000�00000001310�14277552437�0017046�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp.extractor import gen_extractor_classes from yt_dlp.extractor.common import InfoExtractor NO_LOGIN = InfoExtractor._perform_login class TestNetRc(unittest.TestCase): def test_netrc_present(self): for ie in gen_extractor_classes(): if ie._perform_login is NO_LOGIN: continue self.assertTrue( ie._NETRC_MACHINE, 'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME) if __name__ == '__main__': unittest.main() ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/test_overwrites.py�����������������������������������������������������������0000664�0000000�0000000�00000003331�14277552437�0020151�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import subprocess from test.helper import is_download_test, try_rm root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) download_file = os.path.join(root_dir, 'test.webm') @is_download_test class TestOverwrites(unittest.TestCase): def setUp(self): # create an empty file open(download_file, 'a').close() def test_default_overwrites(self): outp = subprocess.Popen( [ sys.executable, 'yt_dlp/__main__.py', '-o', 'test.webm', 'https://www.youtube.com/watch?v=jNQXAC9IVRw' ], cwd=root_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() self.assertTrue(b'has already been downloaded' in sout) # if the file has no content, it has not been redownloaded self.assertTrue(os.path.getsize(download_file) < 1) def test_yes_overwrites(self): outp = subprocess.Popen( [ sys.executable, 'yt_dlp/__main__.py', '--yes-overwrites', '-o', 'test.webm', 'https://www.youtube.com/watch?v=jNQXAC9IVRw' ], cwd=root_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() self.assertTrue(b'has already been downloaded' not in sout) # if the file has no content, it has not been redownloaded self.assertTrue(os.path.getsize(download_file) > 1) def tearDown(self): try_rm(os.path.join(root_dir, 'test.webm')) if __name__ == '__main__': unittest.main() �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/test_post_hooks.py�����������������������������������������������������������0000664�0000000�0000000�00000003746�14277552437�0020142�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import get_params, is_download_test, try_rm import yt_dlp.YoutubeDL # isort: split from yt_dlp.utils import DownloadError class YoutubeDL(yt_dlp.YoutubeDL): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.to_stderr = self.to_screen TEST_ID = 'gr51aVj-mLg' EXPECTED_NAME = 'gr51aVj-mLg' @is_download_test class TestPostHooks(unittest.TestCase): def setUp(self): self.stored_name_1 = None self.stored_name_2 = None self.params = get_params({ 'skip_download': False, 'writeinfojson': False, 'quiet': True, 'verbose': False, 'cachedir': False, }) self.files = [] def test_post_hooks(self): self.params['post_hooks'] = [self.hook_one, self.hook_two] ydl = YoutubeDL(self.params) ydl.download([TEST_ID]) self.assertEqual(self.stored_name_1, EXPECTED_NAME, 'Not the expected name from hook 1') self.assertEqual(self.stored_name_2, EXPECTED_NAME, 'Not the expected name from hook 2') def test_post_hook_exception(self): self.params['post_hooks'] = [self.hook_three] ydl = YoutubeDL(self.params) self.assertRaises(DownloadError, ydl.download, [TEST_ID]) def hook_one(self, filename): self.stored_name_1, _ = os.path.splitext(os.path.basename(filename)) self.files.append(filename) def hook_two(self, filename): self.stored_name_2, _ = os.path.splitext(os.path.basename(filename)) self.files.append(filename) def hook_three(self, filename): self.files.append(filename) raise Exception('Test exception for \'%s\'' % filename) def tearDown(self): for f in self.files: try_rm(f) if __name__ == '__main__': unittest.main() ��������������������������yt-dlp-2022.08.19/test/test_postprocessors.py�������������������������������������������������������0000664�0000000�0000000�00000071716�14277552437�0021064�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp import YoutubeDL from yt_dlp.compat import compat_shlex_quote from yt_dlp.postprocessor import ( ExecPP, FFmpegThumbnailsConvertorPP, MetadataFromFieldPP, MetadataParserPP, ModifyChaptersPP, ) class TestMetadataFromField(unittest.TestCase): def test_format_to_regex(self): self.assertEqual( MetadataParserPP.format_to_regex('%(title)s - %(artist)s'), r'(?P<title>.+)\ \-\ (?P<artist>.+)') self.assertEqual(MetadataParserPP.format_to_regex(r'(?P<x>.+)'), r'(?P<x>.+)') def test_field_to_template(self): self.assertEqual(MetadataParserPP.field_to_template('title'), '%(title)s') self.assertEqual(MetadataParserPP.field_to_template('1'), '1') self.assertEqual(MetadataParserPP.field_to_template('foo bar'), 'foo bar') self.assertEqual(MetadataParserPP.field_to_template(' literal'), ' literal') def test_metadatafromfield(self): self.assertEqual( MetadataFromFieldPP.to_action('%(title)s \\: %(artist)s:%(title)s : %(artist)s'), (MetadataParserPP.Actions.INTERPRET, '%(title)s : %(artist)s', '%(title)s : %(artist)s')) class TestConvertThumbnail(unittest.TestCase): def test_escaping(self): pp = FFmpegThumbnailsConvertorPP() if not pp.available: print('Skipping: ffmpeg not found') return file = 'test/testdata/thumbnails/foo %d bar/foo_%d.{}' tests = (('webp', 'png'), ('png', 'jpg')) for inp, out in tests: out_file = file.format(out) if os.path.exists(out_file): os.remove(out_file) pp.convert_thumbnail(file.format(inp), out) assert os.path.exists(out_file) for _, out in tests: os.remove(file.format(out)) class TestExec(unittest.TestCase): def test_parse_cmd(self): pp = ExecPP(YoutubeDL(), '') info = {'filepath': 'file name'} cmd = 'echo %s' % compat_shlex_quote(info['filepath']) self.assertEqual(pp.parse_cmd('echo', info), cmd) self.assertEqual(pp.parse_cmd('echo {}', info), cmd) self.assertEqual(pp.parse_cmd('echo %(filepath)q', info), cmd) class TestModifyChaptersPP(unittest.TestCase): def setUp(self): self._pp = ModifyChaptersPP(YoutubeDL()) @staticmethod def _sponsor_chapter(start, end, cat, remove=False): c = {'start_time': start, 'end_time': end, '_categories': [(cat, start, end)]} if remove: c['remove'] = True return c @staticmethod def _chapter(start, end, title=None, remove=False): c = {'start_time': start, 'end_time': end} if title is not None: c['title'] = title if remove: c['remove'] = True return c def _chapters(self, ends, titles): self.assertEqual(len(ends), len(titles)) start = 0 chapters = [] for e, t in zip(ends, titles): chapters.append(self._chapter(start, e, t)) start = e return chapters def _remove_marked_arrange_sponsors_test_impl( self, chapters, expected_chapters, expected_removed): actual_chapters, actual_removed = ( self._pp._remove_marked_arrange_sponsors(chapters)) for c in actual_removed: c.pop('title', None) c.pop('_categories', None) actual_chapters = [{ 'start_time': c['start_time'], 'end_time': c['end_time'], 'title': c['title'], } for c in actual_chapters] self.assertSequenceEqual(expected_chapters, actual_chapters) self.assertSequenceEqual(expected_removed, actual_removed) def test_remove_marked_arrange_sponsors_CanGetThroughUnaltered(self): chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) self._remove_marked_arrange_sponsors_test_impl(chapters, chapters, []) def test_remove_marked_arrange_sponsors_ChapterWithSponsors(self): chapters = self._chapters([70], ['c']) + [ self._sponsor_chapter(10, 20, 'sponsor'), self._sponsor_chapter(30, 40, 'preview'), self._sponsor_chapter(50, 60, 'filler')] expected = self._chapters( [10, 20, 30, 40, 50, 60, 70], ['c', '[SponsorBlock]: Sponsor', 'c', '[SponsorBlock]: Preview/Recap', 'c', '[SponsorBlock]: Filler Tangent', 'c']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_UniqueNamesForOverlappingSponsors(self): chapters = self._chapters([120], ['c']) + [ self._sponsor_chapter(10, 45, 'sponsor'), self._sponsor_chapter(20, 40, 'selfpromo'), self._sponsor_chapter(50, 70, 'sponsor'), self._sponsor_chapter(60, 85, 'selfpromo'), self._sponsor_chapter(90, 120, 'selfpromo'), self._sponsor_chapter(100, 110, 'sponsor')] expected = self._chapters( [10, 20, 40, 45, 50, 60, 70, 85, 90, 100, 110, 120], ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', '[SponsorBlock]: Sponsor', 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion', 'c', '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion, Sponsor', '[SponsorBlock]: Unpaid/Self Promotion']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_ChapterWithCuts(self): cuts = [self._chapter(10, 20, remove=True), self._sponsor_chapter(30, 40, 'sponsor', remove=True), self._chapter(50, 60, remove=True)] chapters = self._chapters([70], ['c']) + cuts self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([40], ['c']), cuts) def test_remove_marked_arrange_sponsors_ChapterWithSponsorsAndCuts(self): chapters = self._chapters([70], ['c']) + [ self._sponsor_chapter(10, 20, 'sponsor'), self._sponsor_chapter(30, 40, 'selfpromo', remove=True), self._sponsor_chapter(50, 60, 'interaction')] expected = self._chapters([10, 20, 40, 50, 60], ['c', '[SponsorBlock]: Sponsor', 'c', '[SponsorBlock]: Interaction Reminder', 'c']) self._remove_marked_arrange_sponsors_test_impl( chapters, expected, [self._chapter(30, 40, remove=True)]) def test_remove_marked_arrange_sponsors_ChapterWithSponsorCutInTheMiddle(self): cuts = [self._sponsor_chapter(20, 30, 'selfpromo', remove=True), self._chapter(40, 50, remove=True)] chapters = self._chapters([70], ['c']) + [self._sponsor_chapter(10, 60, 'sponsor')] + cuts expected = self._chapters( [10, 40, 50], ['c', '[SponsorBlock]: Sponsor', 'c']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_ChapterWithCutHidingSponsor(self): cuts = [self._sponsor_chapter(20, 50, 'selpromo', remove=True)] chapters = self._chapters([60], ['c']) + [ self._sponsor_chapter(10, 20, 'intro'), self._sponsor_chapter(30, 40, 'sponsor'), self._sponsor_chapter(50, 60, 'outro'), ] + cuts expected = self._chapters( [10, 20, 30], ['c', '[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_ChapterWithAdjacentSponsors(self): chapters = self._chapters([70], ['c']) + [ self._sponsor_chapter(10, 20, 'sponsor'), self._sponsor_chapter(20, 30, 'selfpromo'), self._sponsor_chapter(30, 40, 'interaction')] expected = self._chapters( [10, 20, 30, 40, 70], ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Interaction Reminder', 'c']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_ChapterWithAdjacentCuts(self): chapters = self._chapters([70], ['c']) + [ self._sponsor_chapter(10, 20, 'sponsor'), self._sponsor_chapter(20, 30, 'interaction', remove=True), self._chapter(30, 40, remove=True), self._sponsor_chapter(40, 50, 'selpromo', remove=True), self._sponsor_chapter(50, 60, 'interaction')] expected = self._chapters([10, 20, 30, 40], ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Interaction Reminder', 'c']) self._remove_marked_arrange_sponsors_test_impl( chapters, expected, [self._chapter(20, 50, remove=True)]) def test_remove_marked_arrange_sponsors_ChapterWithOverlappingSponsors(self): chapters = self._chapters([70], ['c']) + [ self._sponsor_chapter(10, 30, 'sponsor'), self._sponsor_chapter(20, 50, 'selfpromo'), self._sponsor_chapter(40, 60, 'interaction')] expected = self._chapters( [10, 20, 30, 40, 50, 60, 70], ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion, Interaction Reminder', '[SponsorBlock]: Interaction Reminder', 'c']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_ChapterWithOverlappingCuts(self): chapters = self._chapters([70], ['c']) + [ self._sponsor_chapter(10, 30, 'sponsor', remove=True), self._sponsor_chapter(20, 50, 'selfpromo', remove=True), self._sponsor_chapter(40, 60, 'interaction', remove=True)] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([20], ['c']), [self._chapter(10, 60, remove=True)]) def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsors(self): chapters = self._chapters([170], ['c']) + [ self._sponsor_chapter(0, 30, 'intro'), self._sponsor_chapter(20, 50, 'sponsor'), self._sponsor_chapter(40, 60, 'selfpromo'), self._sponsor_chapter(70, 90, 'sponsor'), self._sponsor_chapter(80, 100, 'sponsor'), self._sponsor_chapter(90, 110, 'sponsor'), self._sponsor_chapter(120, 140, 'selfpromo'), self._sponsor_chapter(130, 160, 'interaction'), self._sponsor_chapter(150, 170, 'outro')] expected = self._chapters( [20, 30, 40, 50, 60, 70, 110, 120, 130, 140, 150, 160, 170], ['[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Intermission/Intro Animation, Sponsor', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion', 'c', '[SponsorBlock]: Sponsor', 'c', '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion, Interaction Reminder', '[SponsorBlock]: Interaction Reminder', '[SponsorBlock]: Interaction Reminder, Endcards/Credits', '[SponsorBlock]: Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingCuts(self): chapters = self._chapters([170], ['c']) + [ self._chapter(0, 30, remove=True), self._sponsor_chapter(20, 50, 'sponsor', remove=True), self._chapter(40, 60, remove=True), self._sponsor_chapter(70, 90, 'sponsor', remove=True), self._chapter(80, 100, remove=True), self._chapter(90, 110, remove=True), self._sponsor_chapter(120, 140, 'sponsor', remove=True), self._sponsor_chapter(130, 160, 'selfpromo', remove=True), self._chapter(150, 170, remove=True)] expected_cuts = [self._chapter(0, 60, remove=True), self._chapter(70, 110, remove=True), self._chapter(120, 170, remove=True)] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([20], ['c']), expected_cuts) def test_remove_marked_arrange_sponsors_OverlappingSponsorsDifferentTitlesAfterCut(self): chapters = self._chapters([60], ['c']) + [ self._sponsor_chapter(10, 60, 'sponsor'), self._sponsor_chapter(10, 40, 'intro'), self._sponsor_chapter(30, 50, 'interaction'), self._sponsor_chapter(30, 50, 'selfpromo', remove=True), self._sponsor_chapter(40, 50, 'interaction'), self._sponsor_chapter(50, 60, 'outro')] expected = self._chapters( [10, 30, 40], ['c', '[SponsorBlock]: Sponsor, Intermission/Intro Animation', '[SponsorBlock]: Sponsor, Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl( chapters, expected, [self._chapter(30, 50, remove=True)]) def test_remove_marked_arrange_sponsors_SponsorsNoLongerOverlapAfterCut(self): chapters = self._chapters([70], ['c']) + [ self._sponsor_chapter(10, 30, 'sponsor'), self._sponsor_chapter(20, 50, 'interaction'), self._sponsor_chapter(30, 50, 'selpromo', remove=True), self._sponsor_chapter(40, 60, 'sponsor'), self._sponsor_chapter(50, 60, 'interaction')] expected = self._chapters( [10, 20, 40, 50], ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Interaction Reminder', 'c']) self._remove_marked_arrange_sponsors_test_impl( chapters, expected, [self._chapter(30, 50, remove=True)]) def test_remove_marked_arrange_sponsors_SponsorsStillOverlapAfterCut(self): chapters = self._chapters([70], ['c']) + [ self._sponsor_chapter(10, 60, 'sponsor'), self._sponsor_chapter(20, 60, 'interaction'), self._sponsor_chapter(30, 50, 'selfpromo', remove=True)] expected = self._chapters( [10, 20, 40, 50], ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Interaction Reminder', 'c']) self._remove_marked_arrange_sponsors_test_impl( chapters, expected, [self._chapter(30, 50, remove=True)]) def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsorsAndCuts(self): chapters = self._chapters([200], ['c']) + [ self._sponsor_chapter(10, 40, 'sponsor'), self._sponsor_chapter(10, 30, 'intro'), self._chapter(20, 30, remove=True), self._sponsor_chapter(30, 40, 'selfpromo'), self._sponsor_chapter(50, 70, 'sponsor'), self._sponsor_chapter(60, 80, 'interaction'), self._chapter(70, 80, remove=True), self._sponsor_chapter(70, 90, 'sponsor'), self._sponsor_chapter(80, 100, 'interaction'), self._sponsor_chapter(120, 170, 'selfpromo'), self._sponsor_chapter(130, 180, 'outro'), self._chapter(140, 150, remove=True), self._chapter(150, 160, remove=True)] expected = self._chapters( [10, 20, 30, 40, 50, 70, 80, 100, 110, 130, 140, 160], ['c', '[SponsorBlock]: Sponsor, Intermission/Intro Animation', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Interaction Reminder', '[SponsorBlock]: Interaction Reminder', 'c', '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion, Endcards/Credits', '[SponsorBlock]: Endcards/Credits', 'c']) expected_cuts = [self._chapter(20, 30, remove=True), self._chapter(70, 80, remove=True), self._chapter(140, 160, remove=True)] self._remove_marked_arrange_sponsors_test_impl(chapters, expected, expected_cuts) def test_remove_marked_arrange_sponsors_SponsorOverlapsMultipleChapters(self): chapters = (self._chapters([20, 40, 60, 80, 100], ['c1', 'c2', 'c3', 'c4', 'c5']) + [self._sponsor_chapter(10, 90, 'sponsor')]) expected = self._chapters([10, 90, 100], ['c1', '[SponsorBlock]: Sponsor', 'c5']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_CutOverlapsMultipleChapters(self): cuts = [self._chapter(10, 90, remove=True)] chapters = self._chapters([20, 40, 60, 80, 100], ['c1', 'c2', 'c3', 'c4', 'c5']) + cuts expected = self._chapters([10, 20], ['c1', 'c5']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorsWithinSomeChaptersAndOverlappingOthers(self): chapters = (self._chapters([10, 40, 60, 80], ['c1', 'c2', 'c3', 'c4']) + [self._sponsor_chapter(20, 30, 'sponsor'), self._sponsor_chapter(50, 70, 'selfpromo')]) expected = self._chapters([10, 20, 30, 40, 50, 70, 80], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c2', 'c3', '[SponsorBlock]: Unpaid/Self Promotion', 'c4']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_CutsWithinSomeChaptersAndOverlappingOthers(self): cuts = [self._chapter(20, 30, remove=True), self._chapter(50, 70, remove=True)] chapters = self._chapters([10, 40, 60, 80], ['c1', 'c2', 'c3', 'c4']) + cuts expected = self._chapters([10, 30, 40, 50], ['c1', 'c2', 'c3', 'c4']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_ChaptersAfterLastSponsor(self): chapters = (self._chapters([20, 40, 50, 60], ['c1', 'c2', 'c3', 'c4']) + [self._sponsor_chapter(10, 30, 'music_offtopic')]) expected = self._chapters( [10, 30, 40, 50, 60], ['c1', '[SponsorBlock]: Non-Music Section', 'c2', 'c3', 'c4']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_ChaptersAfterLastCut(self): cuts = [self._chapter(10, 30, remove=True)] chapters = self._chapters([20, 40, 50, 60], ['c1', 'c2', 'c3', 'c4']) + cuts expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorStartsAtChapterStart(self): chapters = (self._chapters([10, 20, 40], ['c1', 'c2', 'c3']) + [self._sponsor_chapter(20, 30, 'sponsor')]) expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c3']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_CutStartsAtChapterStart(self): cuts = [self._chapter(20, 30, remove=True)] chapters = self._chapters([10, 20, 40], ['c1', 'c2', 'c3']) + cuts expected = self._chapters([10, 20, 30], ['c1', 'c2', 'c3']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorEndsAtChapterEnd(self): chapters = (self._chapters([10, 30, 40], ['c1', 'c2', 'c3']) + [self._sponsor_chapter(20, 30, 'sponsor')]) expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c3']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_CutEndsAtChapterEnd(self): cuts = [self._chapter(20, 30, remove=True)] chapters = self._chapters([10, 30, 40], ['c1', 'c2', 'c3']) + cuts expected = self._chapters([10, 20, 30], ['c1', 'c2', 'c3']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorCoincidesWithChapters(self): chapters = (self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + [self._sponsor_chapter(10, 30, 'sponsor')]) expected = self._chapters([10, 30, 40], ['c1', '[SponsorBlock]: Sponsor', 'c4']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_CutCoincidesWithChapters(self): cuts = [self._chapter(10, 30, remove=True)] chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + cuts expected = self._chapters([10, 20], ['c1', 'c4']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorsAtVideoBoundaries(self): chapters = (self._chapters([20, 40, 60], ['c1', 'c2', 'c3']) + [self._sponsor_chapter(0, 10, 'intro'), self._sponsor_chapter(50, 60, 'outro')]) expected = self._chapters( [10, 20, 40, 50, 60], ['[SponsorBlock]: Intermission/Intro Animation', 'c1', 'c2', 'c3', '[SponsorBlock]: Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_CutsAtVideoBoundaries(self): cuts = [self._chapter(0, 10, remove=True), self._chapter(50, 60, remove=True)] chapters = self._chapters([20, 40, 60], ['c1', 'c2', 'c3']) + cuts expected = self._chapters([10, 30, 40], ['c1', 'c2', 'c3']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorsOverlapChaptersAtVideoBoundaries(self): chapters = (self._chapters([10, 40, 50], ['c1', 'c2', 'c3']) + [self._sponsor_chapter(0, 20, 'intro'), self._sponsor_chapter(30, 50, 'outro')]) expected = self._chapters( [20, 30, 50], ['[SponsorBlock]: Intermission/Intro Animation', 'c2', '[SponsorBlock]: Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_CutsOverlapChaptersAtVideoBoundaries(self): cuts = [self._chapter(0, 20, remove=True), self._chapter(30, 50, remove=True)] chapters = self._chapters([10, 40, 50], ['c1', 'c2', 'c3']) + cuts expected = self._chapters([10], ['c2']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_EverythingSponsored(self): chapters = (self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + [self._sponsor_chapter(0, 20, 'intro'), self._sponsor_chapter(20, 40, 'outro')]) expected = self._chapters([20, 40], ['[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_EverythingCut(self): cuts = [self._chapter(0, 20, remove=True), self._chapter(20, 40, remove=True)] chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + cuts self._remove_marked_arrange_sponsors_test_impl( chapters, [], [self._chapter(0, 40, remove=True)]) def test_remove_marked_arrange_sponsors_TinyChaptersInTheOriginalArePreserved(self): chapters = self._chapters([0.1, 0.2, 0.3, 0.4], ['c1', 'c2', 'c3', 'c4']) self._remove_marked_arrange_sponsors_test_impl(chapters, chapters, []) def test_remove_marked_arrange_sponsors_TinySponsorsAreIgnored(self): chapters = [self._sponsor_chapter(0, 0.1, 'intro'), self._chapter(0.1, 0.2, 'c1'), self._sponsor_chapter(0.2, 0.3, 'sponsor'), self._chapter(0.3, 0.4, 'c2'), self._sponsor_chapter(0.4, 0.5, 'outro')] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([0.3, 0.5], ['c1', 'c2']), []) def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromCutsAreIgnored(self): cuts = [self._chapter(1.5, 2.5, remove=True)] chapters = self._chapters([2, 3, 3.5], ['c1', 'c2', 'c3']) + cuts self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([2, 2.5], ['c1', 'c3']), cuts) def test_remove_marked_arrange_sponsors_SingleTinyChapterIsPreserved(self): cuts = [self._chapter(0.5, 2, remove=True)] chapters = self._chapters([2], ['c']) + cuts self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([0.5], ['c']), cuts) def test_remove_marked_arrange_sponsors_TinyChapterAtTheStartPrependedToTheNext(self): cuts = [self._chapter(0.5, 2, remove=True)] chapters = self._chapters([2, 4], ['c1', 'c2']) + cuts self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([2.5], ['c2']), cuts) def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromSponsorOverlapAreIgnored(self): chapters = self._chapters([1, 3, 4], ['c1', 'c2', 'c3']) + [ self._sponsor_chapter(1.5, 2.5, 'sponsor')] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([1.5, 2.5, 4], ['c1', '[SponsorBlock]: Sponsor', 'c3']), []) def test_remove_marked_arrange_sponsors_TinySponsorsOverlapsAreIgnored(self): chapters = self._chapters([2, 3, 5], ['c1', 'c2', 'c3']) + [ self._sponsor_chapter(1, 3, 'sponsor'), self._sponsor_chapter(2.5, 4, 'selfpromo') ] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([1, 3, 4, 5], [ 'c1', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', 'c3']), []) def test_remove_marked_arrange_sponsors_TinySponsorsPrependedToTheNextSponsor(self): chapters = self._chapters([4], ['c']) + [ self._sponsor_chapter(1.5, 2, 'sponsor'), self._sponsor_chapter(2, 4, 'selfpromo') ] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([1.5, 4], ['c', '[SponsorBlock]: Unpaid/Self Promotion']), []) def test_remove_marked_arrange_sponsors_SmallestSponsorInTheOverlapGetsNamed(self): self._pp._sponsorblock_chapter_title = '[SponsorBlock]: %(name)s' chapters = self._chapters([10], ['c']) + [ self._sponsor_chapter(2, 8, 'sponsor'), self._sponsor_chapter(4, 6, 'selfpromo') ] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([2, 4, 6, 8, 10], [ 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Sponsor', 'c' ]), []) def test_make_concat_opts_CommonCase(self): sponsor_chapters = [self._chapter(1, 2, 's1'), self._chapter(10, 20, 's2')] expected = '''ffconcat version 1.0 file 'file:test' outpoint 1.000000 file 'file:test' inpoint 2.000000 outpoint 10.000000 file 'file:test' inpoint 20.000000 ''' opts = self._pp._make_concat_opts(sponsor_chapters, 30) self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts))) def test_make_concat_opts_NoZeroDurationChunkAtVideoStart(self): sponsor_chapters = [self._chapter(0, 1, 's1'), self._chapter(10, 20, 's2')] expected = '''ffconcat version 1.0 file 'file:test' inpoint 1.000000 outpoint 10.000000 file 'file:test' inpoint 20.000000 ''' opts = self._pp._make_concat_opts(sponsor_chapters, 30) self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts))) def test_make_concat_opts_NoZeroDurationChunkAtVideoEnd(self): sponsor_chapters = [self._chapter(1, 2, 's1'), self._chapter(10, 20, 's2')] expected = '''ffconcat version 1.0 file 'file:test' outpoint 1.000000 file 'file:test' inpoint 2.000000 outpoint 10.000000 ''' opts = self._pp._make_concat_opts(sponsor_chapters, 20) self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts))) def test_quote_for_concat_RunsOfQuotes(self): self.assertEqual( r"'special '\'' '\'\''characters'\'\'\''galore'", self._pp._quote_for_ffmpeg("special ' ''characters'''galore")) def test_quote_for_concat_QuotesAtStart(self): self.assertEqual( r"\'\'\''special '\'' characters '\'' galore'", self._pp._quote_for_ffmpeg("'''special ' characters ' galore")) def test_quote_for_concat_QuotesAtEnd(self): self.assertEqual( r"'special '\'' characters '\'' galore'\'\'\'", self._pp._quote_for_ffmpeg("special ' characters ' galore'''")) ��������������������������������������������������yt-dlp-2022.08.19/test/test_socks.py����������������������������������������������������������������0000664�0000000�0000000�00000006443�14277552437�0017071�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import random import subprocess import urllib.request from test.helper import FakeYDL, get_params, is_download_test @is_download_test class TestMultipleSocks(unittest.TestCase): @staticmethod def _check_params(attrs): params = get_params() for attr in attrs: if attr not in params: print('Missing %s. Skipping.' % attr) return return params def test_proxy_http(self): params = self._check_params(['primary_proxy', 'primary_server_ip']) if params is None: return ydl = FakeYDL({ 'proxy': params['primary_proxy'] }) self.assertEqual( ydl.urlopen('http://yt-dl.org/ip').read().decode(), params['primary_server_ip']) def test_proxy_https(self): params = self._check_params(['primary_proxy', 'primary_server_ip']) if params is None: return ydl = FakeYDL({ 'proxy': params['primary_proxy'] }) self.assertEqual( ydl.urlopen('https://yt-dl.org/ip').read().decode(), params['primary_server_ip']) def test_secondary_proxy_http(self): params = self._check_params(['secondary_proxy', 'secondary_server_ip']) if params is None: return ydl = FakeYDL() req = urllib.request.Request('http://yt-dl.org/ip') req.add_header('Ytdl-request-proxy', params['secondary_proxy']) self.assertEqual( ydl.urlopen(req).read().decode(), params['secondary_server_ip']) def test_secondary_proxy_https(self): params = self._check_params(['secondary_proxy', 'secondary_server_ip']) if params is None: return ydl = FakeYDL() req = urllib.request.Request('https://yt-dl.org/ip') req.add_header('Ytdl-request-proxy', params['secondary_proxy']) self.assertEqual( ydl.urlopen(req).read().decode(), params['secondary_server_ip']) @is_download_test class TestSocks(unittest.TestCase): _SKIP_SOCKS_TEST = True def setUp(self): if self._SKIP_SOCKS_TEST: return self.port = random.randint(20000, 30000) self.server_process = subprocess.Popen([ 'srelay', '-f', '-i', '127.0.0.1:%d' % self.port], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) def tearDown(self): if self._SKIP_SOCKS_TEST: return self.server_process.terminate() self.server_process.communicate() def _get_ip(self, protocol): if self._SKIP_SOCKS_TEST: return '127.0.0.1' ydl = FakeYDL({ 'proxy': '%s://127.0.0.1:%d' % (protocol, self.port), }) return ydl.urlopen('http://yt-dl.org/ip').read().decode() def test_socks4(self): self.assertTrue(isinstance(self._get_ip('socks4'), str)) def test_socks4a(self): self.assertTrue(isinstance(self._get_ip('socks4a'), str)) def test_socks5(self): self.assertTrue(isinstance(self._get_ip('socks5'), str)) if __name__ == '__main__': unittest.main() �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/test_subtitles.py������������������������������������������������������������0000664�0000000�0000000�00000041765�14277552437�0017773�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL, is_download_test, md5 from yt_dlp.extractor import ( NPOIE, NRKTVIE, PBSIE, CeskaTelevizeIE, ComedyCentralIE, DailymotionIE, DemocracynowIE, LyndaIE, RaiPlayIE, RTVEALaCartaIE, TedTalkIE, ThePlatformFeedIE, ThePlatformIE, VikiIE, VimeoIE, WallaIE, YoutubeIE, ) @is_download_test class BaseTestSubtitles(unittest.TestCase): url = None IE = None def setUp(self): self.DL = FakeYDL() self.ie = self.IE() self.DL.add_info_extractor(self.ie) if not self.IE.working(): print('Skipping: %s marked as not _WORKING' % self.IE.ie_key()) self.skipTest('IE marked as not _WORKING') def getInfoDict(self): info_dict = self.DL.extract_info(self.url, download=False) return info_dict def getSubtitles(self): info_dict = self.getInfoDict() subtitles = info_dict['requested_subtitles'] if not subtitles: return subtitles for sub_info in subtitles.values(): if sub_info.get('data') is None: uf = self.DL.urlopen(sub_info['url']) sub_info['data'] = uf.read().decode() return {l: sub_info['data'] for l, sub_info in subtitles.items()} @is_download_test class TestYoutubeSubtitles(BaseTestSubtitles): # Available subtitles for QRS8MkLhQmM: # Language formats # ru vtt, ttml, srv3, srv2, srv1, json3 # fr vtt, ttml, srv3, srv2, srv1, json3 # en vtt, ttml, srv3, srv2, srv1, json3 # nl vtt, ttml, srv3, srv2, srv1, json3 # de vtt, ttml, srv3, srv2, srv1, json3 # ko vtt, ttml, srv3, srv2, srv1, json3 # it vtt, ttml, srv3, srv2, srv1, json3 # zh-Hant vtt, ttml, srv3, srv2, srv1, json3 # hi vtt, ttml, srv3, srv2, srv1, json3 # pt-BR vtt, ttml, srv3, srv2, srv1, json3 # es-MX vtt, ttml, srv3, srv2, srv1, json3 # ja vtt, ttml, srv3, srv2, srv1, json3 # pl vtt, ttml, srv3, srv2, srv1, json3 url = 'QRS8MkLhQmM' IE = YoutubeIE def test_youtube_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles.keys()), 13) self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d') self.assertEqual(md5(subtitles['it']), '0e0b667ba68411d88fd1c5f4f4eab2f9') for lang in ['fr', 'de']: self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) def _test_subtitles_format(self, fmt, md5_hash, lang='en'): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = fmt subtitles = self.getSubtitles() self.assertEqual(md5(subtitles[lang]), md5_hash) def test_youtube_subtitles_ttml_format(self): self._test_subtitles_format('ttml', 'c97ddf1217390906fa9fbd34901f3da2') def test_youtube_subtitles_vtt_format(self): self._test_subtitles_format('vtt', 'ae1bd34126571a77aabd4d276b28044d') def test_youtube_subtitles_json3_format(self): self._test_subtitles_format('json3', '688dd1ce0981683867e7fe6fde2a224b') def _test_automatic_captions(self, url, lang): self.url = url self.DL.params['writeautomaticsub'] = True self.DL.params['subtitleslangs'] = [lang] subtitles = self.getSubtitles() self.assertTrue(subtitles[lang] is not None) def test_youtube_automatic_captions(self): # Available automatic captions for 8YoUxe5ncPo: # Language formats (all in vtt, ttml, srv3, srv2, srv1, json3) # gu, zh-Hans, zh-Hant, gd, ga, gl, lb, la, lo, tt, tr, # lv, lt, tk, th, tg, te, fil, haw, yi, ceb, yo, de, da, # el, eo, en, eu, et, es, ru, rw, ro, bn, be, bg, uk, jv, # bs, ja, or, xh, co, ca, cy, cs, ps, pt, pa, vi, pl, hy, # hr, ht, hu, hmn, hi, ha, mg, uz, ml, mn, mi, mk, ur, # mt, ms, mr, ug, ta, my, af, sw, is, am, # *it*, iw, sv, ar, # su, zu, az, id, ig, nl, no, ne, ny, fr, ku, fy, fa, fi, # ka, kk, sr, sq, ko, kn, km, st, sk, si, so, sn, sm, sl, # ky, sd # ... self._test_automatic_captions('8YoUxe5ncPo', 'it') @unittest.skip('Video unavailable') def test_youtube_translated_subtitles(self): # This video has a subtitles track, which can be translated (#4555) self._test_automatic_captions('Ky9eprVWzlI', 'it') def test_youtube_nosubtitles(self): self.DL.expect_warning('video doesn\'t have subtitles') # Available automatic captions for 8YoUxe5ncPo: # ... # 8YoUxe5ncPo has no subtitles self.url = '8YoUxe5ncPo' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertFalse(subtitles) @is_download_test class TestDailymotionSubtitles(BaseTestSubtitles): url = 'http://www.dailymotion.com/video/xczg00' IE = DailymotionIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertTrue(len(subtitles.keys()) >= 6) self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f') self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792') for lang in ['es', 'fr', 'de']: self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) def test_nosubtitles(self): self.DL.expect_warning('video doesn\'t have subtitles') self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertFalse(subtitles) @is_download_test @unittest.skip('IE broken') class TestTedSubtitles(BaseTestSubtitles): url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html' IE = TedTalkIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertTrue(len(subtitles.keys()) >= 28) self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14') self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5') for lang in ['es', 'fr', 'de']: self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) @is_download_test class TestVimeoSubtitles(BaseTestSubtitles): url = 'http://vimeo.com/76979871' IE = VimeoIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'de', 'en', 'es', 'fr'}) self.assertEqual(md5(subtitles['en']), '386cbc9320b94e25cb364b97935e5dd1') self.assertEqual(md5(subtitles['fr']), 'c9b69eef35bc6641c0d4da8a04f9dfac') def test_nosubtitles(self): self.DL.expect_warning('video doesn\'t have subtitles') self.url = 'http://vimeo.com/68093876' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertFalse(subtitles) @is_download_test @unittest.skip('IE broken') class TestWallaSubtitles(BaseTestSubtitles): url = 'http://vod.walla.co.il/movie/2705958/the-yes-men' IE = WallaIE def test_allsubtitles(self): self.DL.expect_warning('Automatic Captions not supported by this server') self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'heb'}) self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920') def test_nosubtitles(self): self.DL.expect_warning('video doesn\'t have subtitles') self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertFalse(subtitles) @is_download_test @unittest.skip('IE broken') class TestCeskaTelevizeSubtitles(BaseTestSubtitles): url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky' IE = CeskaTelevizeIE def test_allsubtitles(self): self.DL.expect_warning('Automatic Captions not supported by this server') self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'cs'}) self.assertTrue(len(subtitles['cs']) > 20000) def test_nosubtitles(self): self.DL.expect_warning('video doesn\'t have subtitles') self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertFalse(subtitles) @is_download_test @unittest.skip('IE broken') class TestLyndaSubtitles(BaseTestSubtitles): url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html' IE = LyndaIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7') @is_download_test @unittest.skip('IE broken') class TestNPOSubtitles(BaseTestSubtitles): url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860' IE = NPOIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'nl'}) self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4') @is_download_test @unittest.skip('IE broken') class TestMTVSubtitles(BaseTestSubtitles): url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans' IE = ComedyCentralIE def getInfoDict(self): return super().getInfoDict()['entries'][0] def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961') @is_download_test class TestNRKSubtitles(BaseTestSubtitles): url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1' IE = NRKTVIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'nb-ttv'}) self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149') @is_download_test class TestRaiPlaySubtitles(BaseTestSubtitles): IE = RaiPlayIE def test_subtitles_key(self): self.url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'it'}) self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a') def test_subtitles_array_key(self): self.url = 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'it'}) self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd') @is_download_test @unittest.skip('IE broken - DRM only') class TestVikiSubtitles(BaseTestSubtitles): url = 'http://www.viki.com/videos/1060846v-punch-episode-18' IE = VikiIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a') @is_download_test class TestThePlatformSubtitles(BaseTestSubtitles): # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/ # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/) url = 'theplatform:JFUjUE1_ehvq' IE = ThePlatformIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b') @is_download_test @unittest.skip('IE broken') class TestThePlatformFeedSubtitles(BaseTestSubtitles): url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207' IE = ThePlatformFeedIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade') @is_download_test class TestRtveSubtitles(BaseTestSubtitles): url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/' IE = RTVEALaCartaIE def test_allsubtitles(self): print('Skipping, only available from Spain') return self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'es'}) self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca') @is_download_test class TestDemocracynowSubtitles(BaseTestSubtitles): url = 'http://www.democracynow.org/shows/2015/7/3' IE = DemocracynowIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045') def test_subtitles_in_page(self): self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045') @is_download_test class TestPBSSubtitles(BaseTestSubtitles): url = 'https://www.pbs.org/video/how-fantasy-reflects-our-world-picecq/' IE = PBSIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'en'}) def test_subtitles_dfxp_format(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = 'dfxp' subtitles = self.getSubtitles() self.assertIn(md5(subtitles['en']), ['643b034254cdc3768ff1e750b6b5873b']) def test_subtitles_vtt_format(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = 'vtt' subtitles = self.getSubtitles() self.assertIn( md5(subtitles['en']), ['937a05711555b165d4c55a9667017045', 'f49ea998d6824d94959c8152a368ff73']) def test_subtitles_srt_format(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = 'srt' subtitles = self.getSubtitles() self.assertIn(md5(subtitles['en']), ['2082c21b43759d9bf172931b2f2ca371']) def test_subtitles_sami_format(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = 'sami' subtitles = self.getSubtitles() self.assertIn(md5(subtitles['en']), ['4256b16ac7da6a6780fafd04294e85cd']) if __name__ == '__main__': unittest.main() �����������yt-dlp-2022.08.19/test/test_update.py.disabled������������������������������������������������������0000664�0000000�0000000�00000002044�14277552437�0020770�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import json from yt_dlp.update import rsa_verify class TestUpdate(unittest.TestCase): def test_rsa_verify(self): UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537) with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'versions.json'), 'rb') as f: versions_info = f.read().decode() versions_info = json.loads(versions_info) signature = versions_info['signature'] del versions_info['signature'] self.assertTrue(rsa_verify( json.dumps(versions_info, sort_keys=True).encode(), signature, UPDATES_RSA_KEY)) if __name__ == '__main__': unittest.main() ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/test_utils.py����������������������������������������������������������������0000664�0000000�0000000�00000254177�14277552437�0017120�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import contextlib import io import itertools import json import xml.etree.ElementTree from yt_dlp.compat import ( compat_etree_fromstring, compat_HTMLParseError, compat_os_name, ) from yt_dlp.utils import ( Config, DateRange, ExtractorError, InAdvancePagedList, LazyList, OnDemandPagedList, age_restricted, args_to_str, base_url, caesar, clean_html, clean_podcast_url, cli_bool_option, cli_option, cli_valueless_option, date_from_str, datetime_from_str, detect_exe_version, determine_ext, determine_file_encoding, dfxp2srt, dict_get, encode_base_n, encode_compat_str, encodeFilename, escape_rfc3986, escape_url, expand_path, extract_attributes, find_xpath_attr, fix_xml_ampersands, float_or_none, format_bytes, get_compatible_ext, get_element_by_attribute, get_element_by_class, get_element_html_by_attribute, get_element_html_by_class, get_element_text_and_html_by_tag, get_elements_by_attribute, get_elements_by_class, get_elements_html_by_attribute, get_elements_html_by_class, get_elements_text_and_html_by_attribute, int_or_none, intlist_to_bytes, iri_to_uri, is_html, js_to_json, limit_length, locked_file, lowercase_escape, match_str, merge_dicts, mimetype2ext, month_by_name, multipart_encode, ohdave_rsa_encrypt, orderedSet, parse_age_limit, parse_bitrate, parse_codecs, parse_count, parse_dfxp_time_expr, parse_duration, parse_filesize, parse_iso8601, parse_qs, parse_resolution, pkcs1pad, prepend_extension, read_batch_urls, remove_end, remove_quotes, remove_start, render_table, replace_extension, rot47, sanitize_filename, sanitize_path, sanitize_url, sanitized_Request, shell_quote, smuggle_url, str_to_int, strip_jsonp, strip_or_none, subtitles_filename, timeconvert, unescapeHTML, unified_strdate, unified_timestamp, unsmuggle_url, update_url_query, uppercase_escape, url_basename, url_or_none, urlencode_postdata, urljoin, urshift, version_tuple, xpath_attr, xpath_element, xpath_text, xpath_with_ns, ) class TestUtil(unittest.TestCase): def test_timeconvert(self): self.assertTrue(timeconvert('') is None) self.assertTrue(timeconvert('bougrg') is None) def test_sanitize_filename(self): self.assertEqual(sanitize_filename(''), '') self.assertEqual(sanitize_filename('abc'), 'abc') self.assertEqual(sanitize_filename('abc_d-e'), 'abc_d-e') self.assertEqual(sanitize_filename('123'), '123') self.assertEqual('abc⧸de', sanitize_filename('abc/de')) self.assertFalse('/' in sanitize_filename('abc/de///')) self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', is_id=False)) self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', is_id=False)) self.assertEqual('yes no', sanitize_filename('yes? no', is_id=False)) self.assertEqual('this - that', sanitize_filename('this: that', is_id=False)) self.assertEqual(sanitize_filename('AT&T'), 'AT&T') aumlaut = 'ä' self.assertEqual(sanitize_filename(aumlaut), aumlaut) tests = '\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430' self.assertEqual(sanitize_filename(tests), tests) self.assertEqual( sanitize_filename('New World record at 0:12:34'), 'New World record at 0_12_34') self.assertEqual(sanitize_filename('--gasdgf'), '--gasdgf') self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf') self.assertEqual(sanitize_filename('--gasdgf', is_id=False), '_-gasdgf') self.assertEqual(sanitize_filename('.gasdgf'), '.gasdgf') self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf') self.assertEqual(sanitize_filename('.gasdgf', is_id=False), 'gasdgf') forbidden = '"\0\\/' for fc in forbidden: for fbc in forbidden: self.assertTrue(fbc not in sanitize_filename(fc)) def test_sanitize_filename_restricted(self): self.assertEqual(sanitize_filename('abc', restricted=True), 'abc') self.assertEqual(sanitize_filename('abc_d-e', restricted=True), 'abc_d-e') self.assertEqual(sanitize_filename('123', restricted=True), '123') self.assertEqual('abc_de', sanitize_filename('abc/de', restricted=True)) self.assertFalse('/' in sanitize_filename('abc/de///', restricted=True)) self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', restricted=True)) self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', restricted=True)) self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True)) self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True)) tests = 'aäb\u4e2d\u56fd\u7684c' self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c') self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#' for fc in forbidden: for fbc in forbidden: self.assertTrue(fbc not in sanitize_filename(fc, restricted=True)) # Handle a common case more neatly self.assertEqual(sanitize_filename('\u5927\u58f0\u5e26 - Song', restricted=True), 'Song') self.assertEqual(sanitize_filename('\u603b\u7edf: Speech', restricted=True), 'Speech') # .. but make sure the file name is never empty self.assertTrue(sanitize_filename('-', restricted=True) != '') self.assertTrue(sanitize_filename(':', restricted=True) != '') self.assertEqual(sanitize_filename( 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True), 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYTHssaaaaaaaeceeeeiiiionooooooooeuuuuuythy') def test_sanitize_ids(self): self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw') self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI') def test_sanitize_path(self): if sys.platform != 'win32': return self.assertEqual(sanitize_path('abc'), 'abc') self.assertEqual(sanitize_path('abc/def'), 'abc\\def') self.assertEqual(sanitize_path('abc\\def'), 'abc\\def') self.assertEqual(sanitize_path('abc|def'), 'abc#def') self.assertEqual(sanitize_path('<>:"|?*'), '#######') self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def') self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def') self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc') self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc') self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc') self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc') self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f') self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc') self.assertEqual( sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'), 'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s') self.assertEqual( sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'), 'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part') self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#') self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def') self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#') self.assertEqual(sanitize_path('../abc'), '..\\abc') self.assertEqual(sanitize_path('../../abc'), '..\\..\\abc') self.assertEqual(sanitize_path('./abc'), 'abc') self.assertEqual(sanitize_path('./../abc'), '..\\abc') def test_sanitize_url(self): self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar') self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar') self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar') self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar') self.assertEqual(sanitize_url('foo bar'), 'foo bar') def test_extract_basic_auth(self): auth_header = lambda url: sanitized_Request(url).get_header('Authorization') self.assertFalse(auth_header('http://foo.bar')) self.assertFalse(auth_header('http://:foo.bar')) self.assertEqual(auth_header('http://@foo.bar'), 'Basic Og==') self.assertEqual(auth_header('http://:pass@foo.bar'), 'Basic OnBhc3M=') self.assertEqual(auth_header('http://user:@foo.bar'), 'Basic dXNlcjo=') self.assertEqual(auth_header('http://user:pass@foo.bar'), 'Basic dXNlcjpwYXNz') def test_expand_path(self): def env(var): return f'%{var}%' if sys.platform == 'win32' else f'${var}' os.environ['yt_dlp_EXPATH_PATH'] = 'expanded' self.assertEqual(expand_path(env('yt_dlp_EXPATH_PATH')), 'expanded') old_home = os.environ.get('HOME') test_str = R'C:\Documents and Settings\тест\Application Data' try: os.environ['HOME'] = test_str self.assertEqual(expand_path(env('HOME')), os.getenv('HOME')) self.assertEqual(expand_path('~'), os.getenv('HOME')) self.assertEqual( expand_path('~/%s' % env('yt_dlp_EXPATH_PATH')), '%s/expanded' % os.getenv('HOME')) finally: os.environ['HOME'] = old_home or '' def test_prepend_extension(self): self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext') self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext') self.assertEqual(prepend_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp') self.assertEqual(prepend_extension('abc', 'temp'), 'abc.temp') self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp') self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext') def test_replace_extension(self): self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp') self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp') self.assertEqual(replace_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp') self.assertEqual(replace_extension('abc', 'temp'), 'abc.temp') self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp') self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp') def test_subtitles_filename(self): self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt') self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt') self.assertEqual(subtitles_filename('abc.unexpected_ext', 'en', 'vtt', 'ext'), 'abc.unexpected_ext.en.vtt') def test_remove_start(self): self.assertEqual(remove_start(None, 'A - '), None) self.assertEqual(remove_start('A - B', 'A - '), 'B') self.assertEqual(remove_start('B - A', 'A - '), 'B - A') def test_remove_end(self): self.assertEqual(remove_end(None, ' - B'), None) self.assertEqual(remove_end('A - B', ' - B'), 'A') self.assertEqual(remove_end('B - A', ' - B'), 'B - A') def test_remove_quotes(self): self.assertEqual(remove_quotes(None), None) self.assertEqual(remove_quotes('"'), '"') self.assertEqual(remove_quotes("'"), "'") self.assertEqual(remove_quotes(';'), ';') self.assertEqual(remove_quotes('";'), '";') self.assertEqual(remove_quotes('""'), '') self.assertEqual(remove_quotes('";"'), ';') def test_ordered_set(self): self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([]), []) self.assertEqual(orderedSet([1]), [1]) # keep the list ordered self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1]) def test_unescape_html(self): self.assertEqual(unescapeHTML('%20;'), '%20;') self.assertEqual(unescapeHTML('/'), '/') self.assertEqual(unescapeHTML('/'), '/') self.assertEqual(unescapeHTML('é'), 'é') self.assertEqual(unescapeHTML('�'), '�') self.assertEqual(unescapeHTML('&a"'), '&a"') # HTML5 entities self.assertEqual(unescapeHTML('.''), '.\'') def test_date_from_str(self): self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day')) self.assertEqual(date_from_str('now+7day'), date_from_str('now+1week')) self.assertEqual(date_from_str('now+14day'), date_from_str('now+2week')) self.assertEqual(date_from_str('20200229+365day'), date_from_str('20200229+1year')) self.assertEqual(date_from_str('20210131+28day'), date_from_str('20210131+1month')) def test_datetime_from_str(self): self.assertEqual(datetime_from_str('yesterday', precision='day'), datetime_from_str('now-1day', precision='auto')) self.assertEqual(datetime_from_str('now+7day', precision='day'), datetime_from_str('now+1week', precision='auto')) self.assertEqual(datetime_from_str('now+14day', precision='day'), datetime_from_str('now+2week', precision='auto')) self.assertEqual(datetime_from_str('20200229+365day', precision='day'), datetime_from_str('20200229+1year', precision='auto')) self.assertEqual(datetime_from_str('20210131+28day', precision='day'), datetime_from_str('20210131+1month', precision='auto')) self.assertEqual(datetime_from_str('20210131+59day', precision='day'), datetime_from_str('20210131+2month', precision='auto')) self.assertEqual(datetime_from_str('now+1day', precision='hour'), datetime_from_str('now+24hours', precision='auto')) self.assertEqual(datetime_from_str('now+23hours', precision='hour'), datetime_from_str('now+23hours', precision='auto')) def test_daterange(self): _20century = DateRange("19000101", "20000101") self.assertFalse("17890714" in _20century) _ac = DateRange("00010101") self.assertTrue("19690721" in _ac) _firstmilenium = DateRange(end="10000101") self.assertTrue("07110427" in _firstmilenium) def test_unified_dates(self): self.assertEqual(unified_strdate('December 21, 2010'), '20101221') self.assertEqual(unified_strdate('8/7/2009'), '20090708') self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214') self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011') self.assertEqual(unified_strdate('1968 12 10'), '19681210') self.assertEqual(unified_strdate('1968-12-10'), '19681210') self.assertEqual(unified_strdate('31-07-2022 20:00'), '20220731') self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128') self.assertEqual( unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False), '20141126') self.assertEqual( unified_strdate('2/2/2015 6:47:40 PM', day_first=False), '20150202') self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214') self.assertEqual(unified_strdate('25-09-2014'), '20140925') self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227') self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None) self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207') self.assertEqual(unified_strdate('July 15th, 2013'), '20130715') self.assertEqual(unified_strdate('September 1st, 2013'), '20130901') self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902') self.assertEqual(unified_strdate('November 3rd, 2019'), '20191103') self.assertEqual(unified_strdate('October 23rd, 2005'), '20051023') def test_unified_timestamps(self): self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600) self.assertEqual(unified_timestamp('8/7/2009'), 1247011200) self.assertEqual(unified_timestamp('Dec 14, 2012'), 1355443200) self.assertEqual(unified_timestamp('2012/10/11 01:56:38 +0000'), 1349920598) self.assertEqual(unified_timestamp('1968 12 10'), -33436800) self.assertEqual(unified_timestamp('1968-12-10'), -33436800) self.assertEqual(unified_timestamp('28/01/2014 21:00:00 +0100'), 1390939200) self.assertEqual( unified_timestamp('11/26/2014 11:30:00 AM PST', day_first=False), 1417001400) self.assertEqual( unified_timestamp('2/2/2015 6:47:40 PM', day_first=False), 1422902860) self.assertEqual(unified_timestamp('Feb 14th 2016 5:45PM'), 1455471900) self.assertEqual(unified_timestamp('25-09-2014'), 1411603200) self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200) self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None) self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500) self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100) self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361) self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540) self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140) self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363) self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1) self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86) self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78) def test_determine_ext(self): self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None) self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None) self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None) self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8') self.assertEqual(determine_ext('foobar', None), None) def test_find_xpath_attr(self): testxml = '''<root> <node/> <node x="a"/> <node x="a" y="c" /> <node x="b" y="d" /> <node x="" /> </root>''' doc = compat_etree_fromstring(testxml) self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n'), None) self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None) self.assertEqual(find_xpath_attr(doc, './/node', 'n'), None) self.assertEqual(find_xpath_attr(doc, './/node', 'n', 'v'), None) self.assertEqual(find_xpath_attr(doc, './/node', 'x'), doc[1]) self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1]) self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'b'), doc[3]) self.assertEqual(find_xpath_attr(doc, './/node', 'y'), doc[2]) self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2]) self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'd'), doc[3]) self.assertEqual(find_xpath_attr(doc, './/node', 'x', ''), doc[4]) def test_xpath_with_ns(self): testxml = '''<root xmlns:media="http://example.com/"> <media:song> <media:author>The Author</media:author> <url>http://server.com/download.mp3</url> </media:song> </root>''' doc = compat_etree_fromstring(testxml) find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'})) self.assertTrue(find('media:song') is not None) self.assertEqual(find('media:song/media:author').text, 'The Author') self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3') def test_xpath_element(self): doc = xml.etree.ElementTree.Element('root') div = xml.etree.ElementTree.SubElement(doc, 'div') p = xml.etree.ElementTree.SubElement(div, 'p') p.text = 'Foo' self.assertEqual(xpath_element(doc, 'div/p'), p) self.assertEqual(xpath_element(doc, ['div/p']), p) self.assertEqual(xpath_element(doc, ['div/bar', 'div/p']), p) self.assertEqual(xpath_element(doc, 'div/bar', default='default'), 'default') self.assertEqual(xpath_element(doc, ['div/bar'], default='default'), 'default') self.assertTrue(xpath_element(doc, 'div/bar') is None) self.assertTrue(xpath_element(doc, ['div/bar']) is None) self.assertTrue(xpath_element(doc, ['div/bar'], 'div/baz') is None) self.assertRaises(ExtractorError, xpath_element, doc, 'div/bar', fatal=True) self.assertRaises(ExtractorError, xpath_element, doc, ['div/bar'], fatal=True) self.assertRaises(ExtractorError, xpath_element, doc, ['div/bar', 'div/baz'], fatal=True) def test_xpath_text(self): testxml = '''<root> <div> <p>Foo</p> </div> </root>''' doc = compat_etree_fromstring(testxml) self.assertEqual(xpath_text(doc, 'div/p'), 'Foo') self.assertEqual(xpath_text(doc, 'div/bar', default='default'), 'default') self.assertTrue(xpath_text(doc, 'div/bar') is None) self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True) def test_xpath_attr(self): testxml = '''<root> <div> <p x="a">Foo</p> </div> </root>''' doc = compat_etree_fromstring(testxml) self.assertEqual(xpath_attr(doc, 'div/p', 'x'), 'a') self.assertEqual(xpath_attr(doc, 'div/bar', 'x'), None) self.assertEqual(xpath_attr(doc, 'div/p', 'y'), None) self.assertEqual(xpath_attr(doc, 'div/bar', 'x', default='default'), 'default') self.assertEqual(xpath_attr(doc, 'div/p', 'y', default='default'), 'default') self.assertRaises(ExtractorError, xpath_attr, doc, 'div/bar', 'x', fatal=True) self.assertRaises(ExtractorError, xpath_attr, doc, 'div/p', 'y', fatal=True) def test_smuggle_url(self): data = {"ö": "ö", "abc": [3]} url = 'https://foo.bar/baz?x=y#a' smug_url = smuggle_url(url, data) unsmug_url, unsmug_data = unsmuggle_url(smug_url) self.assertEqual(url, unsmug_url) self.assertEqual(data, unsmug_data) res_url, res_data = unsmuggle_url(url) self.assertEqual(res_url, url) self.assertEqual(res_data, None) smug_url = smuggle_url(url, {'a': 'b'}) smug_smug_url = smuggle_url(smug_url, {'c': 'd'}) res_url, res_data = unsmuggle_url(smug_smug_url) self.assertEqual(res_url, url) self.assertEqual(res_data, {'a': 'b', 'c': 'd'}) def test_shell_quote(self): args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')] self.assertEqual( shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''') def test_float_or_none(self): self.assertEqual(float_or_none('42.42'), 42.42) self.assertEqual(float_or_none('42'), 42.0) self.assertEqual(float_or_none(''), None) self.assertEqual(float_or_none(None), None) self.assertEqual(float_or_none([]), None) self.assertEqual(float_or_none(set()), None) def test_int_or_none(self): self.assertEqual(int_or_none('42'), 42) self.assertEqual(int_or_none(''), None) self.assertEqual(int_or_none(None), None) self.assertEqual(int_or_none([]), None) self.assertEqual(int_or_none(set()), None) def test_str_to_int(self): self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123.456'), 123456) self.assertEqual(str_to_int(523), 523) self.assertEqual(str_to_int('noninteger'), None) self.assertEqual(str_to_int([]), None) def test_url_basename(self): self.assertEqual(url_basename('http://foo.de/'), '') self.assertEqual(url_basename('http://foo.de/bar/baz'), 'baz') self.assertEqual(url_basename('http://foo.de/bar/baz?x=y'), 'baz') self.assertEqual(url_basename('http://foo.de/bar/baz#x=y'), 'baz') self.assertEqual(url_basename('http://foo.de/bar/baz/'), 'baz') self.assertEqual( url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'), 'trailer.mp4') def test_base_url(self): self.assertEqual(base_url('http://foo.de/'), 'http://foo.de/') self.assertEqual(base_url('http://foo.de/bar'), 'http://foo.de/') self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/') self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/') self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/') def test_urljoin(self): self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin(b'http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin(b'http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt') self.assertEqual(urljoin(None, 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin(None, '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt') self.assertEqual(urljoin('', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin(['foobar'], 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', None), None) self.assertEqual(urljoin('http://foo.de/', ''), None) self.assertEqual(urljoin('http://foo.de/', ['foobar']), None) self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt') self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de') self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de') def test_url_or_none(self): self.assertEqual(url_or_none(None), None) self.assertEqual(url_or_none(''), None) self.assertEqual(url_or_none('foo'), None) self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de') self.assertEqual(url_or_none('https://foo.de'), 'https://foo.de') self.assertEqual(url_or_none('http$://foo.de'), None) self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de') self.assertEqual(url_or_none('//foo.de'), '//foo.de') self.assertEqual(url_or_none('s3://foo.de'), None) self.assertEqual(url_or_none('rtmpte://foo.de'), 'rtmpte://foo.de') self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de') self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de') self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de') def test_parse_age_limit(self): self.assertEqual(parse_age_limit(None), None) self.assertEqual(parse_age_limit(False), None) self.assertEqual(parse_age_limit('invalid'), None) self.assertEqual(parse_age_limit(0), 0) self.assertEqual(parse_age_limit(18), 18) self.assertEqual(parse_age_limit(21), 21) self.assertEqual(parse_age_limit(22), None) self.assertEqual(parse_age_limit('18'), 18) self.assertEqual(parse_age_limit('18+'), 18) self.assertEqual(parse_age_limit('PG-13'), 13) self.assertEqual(parse_age_limit('TV-14'), 14) self.assertEqual(parse_age_limit('TV-MA'), 17) self.assertEqual(parse_age_limit('TV14'), 14) self.assertEqual(parse_age_limit('TV_G'), 0) def test_parse_duration(self): self.assertEqual(parse_duration(None), None) self.assertEqual(parse_duration(False), None) self.assertEqual(parse_duration('invalid'), None) self.assertEqual(parse_duration('1'), 1) self.assertEqual(parse_duration('1337:12'), 80232) self.assertEqual(parse_duration('9:12:43'), 33163) self.assertEqual(parse_duration('12:00'), 720) self.assertEqual(parse_duration('00:01:01'), 61) self.assertEqual(parse_duration('x:y'), None) self.assertEqual(parse_duration('3h11m53s'), 11513) self.assertEqual(parse_duration('3h 11m 53s'), 11513) self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513) self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513) self.assertEqual(parse_duration('3 hours, 11 minutes, 53 seconds'), 11513) self.assertEqual(parse_duration('3 hours, 11 mins, 53 secs'), 11513) self.assertEqual(parse_duration('62m45s'), 3765) self.assertEqual(parse_duration('6m59s'), 419) self.assertEqual(parse_duration('49s'), 49) self.assertEqual(parse_duration('0h0m0s'), 0) self.assertEqual(parse_duration('0m0s'), 0) self.assertEqual(parse_duration('0s'), 0) self.assertEqual(parse_duration('01:02:03.05'), 3723.05) self.assertEqual(parse_duration('T30M38S'), 1838) self.assertEqual(parse_duration('5 s'), 5) self.assertEqual(parse_duration('3 min'), 180) self.assertEqual(parse_duration('2.5 hours'), 9000) self.assertEqual(parse_duration('02:03:04'), 7384) self.assertEqual(parse_duration('01:02:03:04'), 93784) self.assertEqual(parse_duration('1 hour 3 minutes'), 3780) self.assertEqual(parse_duration('87 Min.'), 5220) self.assertEqual(parse_duration('PT1H0.040S'), 3600.04) self.assertEqual(parse_duration('PT00H03M30SZ'), 210) self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88) self.assertEqual(parse_duration('01:02:03:050'), 3723.05) self.assertEqual(parse_duration('103:050'), 103.05) def test_fix_xml_ampersands(self): self.assertEqual( fix_xml_ampersands('"&x=y&z=a'), '"&x=y&z=a') self.assertEqual( fix_xml_ampersands('"&x=y&wrong;&z=a'), '"&x=y&wrong;&z=a') self.assertEqual( fix_xml_ampersands('&'><"'), '&'><"') self.assertEqual( fix_xml_ampersands('Ӓ᪼'), 'Ӓ᪼') self.assertEqual(fix_xml_ampersands('&#&#'), '&#&#') def test_paged_list(self): def testPL(size, pagesize, sliceargs, expected): def get_page(pagenum): firstid = pagenum * pagesize upto = min(size, pagenum * pagesize + pagesize) yield from range(firstid, upto) pl = OnDemandPagedList(get_page, pagesize) got = pl.getslice(*sliceargs) self.assertEqual(got, expected) iapl = InAdvancePagedList(get_page, size // pagesize + 1, pagesize) got = iapl.getslice(*sliceargs) self.assertEqual(got, expected) testPL(5, 2, (), [0, 1, 2, 3, 4]) testPL(5, 2, (1,), [1, 2, 3, 4]) testPL(5, 2, (2,), [2, 3, 4]) testPL(5, 2, (4,), [4]) testPL(5, 2, (0, 3), [0, 1, 2]) testPL(5, 2, (1, 4), [1, 2, 3]) testPL(5, 2, (2, 99), [2, 3, 4]) testPL(5, 2, (20, 99), []) def test_read_batch_urls(self): f = io.StringIO('''\xef\xbb\xbf foo bar\r baz # More after this line\r ; or after this bam''') self.assertEqual(read_batch_urls(f), ['foo', 'bar', 'baz', 'bam']) def test_urlencode_postdata(self): data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'}) self.assertTrue(isinstance(data, bytes)) def test_update_url_query(self): self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})), parse_qs('http://example.com/path?quality=HD&format=mp4')) self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})), parse_qs('http://example.com/path?system=LINUX&system=WINDOWS')) self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'fields': 'id,formats,subtitles'})), parse_qs('http://example.com/path?fields=id,formats,subtitles')) self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})), parse_qs('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails')) self.assertEqual(parse_qs(update_url_query( 'http://example.com/path?manifest=f4m', {'manifest': []})), parse_qs('http://example.com/path')) self.assertEqual(parse_qs(update_url_query( 'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})), parse_qs('http://example.com/path?system=LINUX')) self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'fields': b'id,formats,subtitles'})), parse_qs('http://example.com/path?fields=id,formats,subtitles')) self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'width': 1080, 'height': 720})), parse_qs('http://example.com/path?width=1080&height=720')) self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'bitrate': 5020.43})), parse_qs('http://example.com/path?bitrate=5020.43')) self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'test': '第二行тест'})), parse_qs('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82')) def test_multipart_encode(self): self.assertEqual( multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0], b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n') self.assertEqual( multipart_encode({'欄位'.encode(): '值'.encode()}, boundary='AAAAAA')[0], b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n') self.assertRaises( ValueError, multipart_encode, {b'field': b'value'}, boundary='value') def test_dict_get(self): FALSE_VALUES = { 'none': None, 'false': False, 'zero': 0, 'empty_string': '', 'empty_list': [], } d = FALSE_VALUES.copy() d['a'] = 42 self.assertEqual(dict_get(d, 'a'), 42) self.assertEqual(dict_get(d, 'b'), None) self.assertEqual(dict_get(d, 'b', 42), 42) self.assertEqual(dict_get(d, ('a', )), 42) self.assertEqual(dict_get(d, ('b', 'a', )), 42) self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42) self.assertEqual(dict_get(d, ('b', 'c', )), None) self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42) for key, false_value in FALSE_VALUES.items(): self.assertEqual(dict_get(d, ('b', 'c', key, )), None) self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value) def test_merge_dicts(self): self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2}) self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1}) self.assertEqual(merge_dicts({'a': 1}, {'a': None}), {'a': 1}) self.assertEqual(merge_dicts({'a': 1}, {'a': ''}), {'a': 1}) self.assertEqual(merge_dicts({'a': 1}, {}), {'a': 1}) self.assertEqual(merge_dicts({'a': None}, {'a': 1}), {'a': 1}) self.assertEqual(merge_dicts({'a': ''}, {'a': 1}), {'a': ''}) self.assertEqual(merge_dicts({'a': ''}, {'a': 'abc'}), {'a': 'abc'}) self.assertEqual(merge_dicts({'a': None}, {'a': ''}, {'a': 'abc'}), {'a': 'abc'}) def test_encode_compat_str(self): self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест') self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест') def test_parse_iso8601(self): self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T22:04:26.1234Z'), 1395612266) self.assertEqual(parse_iso8601('2015-09-29T08:27:31.727'), 1443515251) self.assertEqual(parse_iso8601('2015-09-29T08-27-31.727'), None) def test_strip_jsonp(self): stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);') d = json.loads(stripped) self.assertEqual(d, [{"id": "532cb", "x": 3}]) stripped = strip_jsonp('parseMetadata({"STATUS":"OK"})\n\n\n//epc') d = json.loads(stripped) self.assertEqual(d, {'STATUS': 'OK'}) stripped = strip_jsonp('ps.embedHandler({"status": "success"});') d = json.loads(stripped) self.assertEqual(d, {'status': 'success'}) stripped = strip_jsonp('window.cb && window.cb({"status": "success"});') d = json.loads(stripped) self.assertEqual(d, {'status': 'success'}) stripped = strip_jsonp('window.cb && cb({"status": "success"});') d = json.loads(stripped) self.assertEqual(d, {'status': 'success'}) stripped = strip_jsonp('({"status": "success"});') d = json.loads(stripped) self.assertEqual(d, {'status': 'success'}) def test_strip_or_none(self): self.assertEqual(strip_or_none(' abc'), 'abc') self.assertEqual(strip_or_none('abc '), 'abc') self.assertEqual(strip_or_none(' abc '), 'abc') self.assertEqual(strip_or_none('\tabc\t'), 'abc') self.assertEqual(strip_or_none('\n\tabc\n\t'), 'abc') self.assertEqual(strip_or_none('abc'), 'abc') self.assertEqual(strip_or_none(''), '') self.assertEqual(strip_or_none(None), None) self.assertEqual(strip_or_none(42), None) self.assertEqual(strip_or_none([]), None) def test_uppercase_escape(self): self.assertEqual(uppercase_escape('aä'), 'aä') self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') def test_lowercase_escape(self): self.assertEqual(lowercase_escape('aä'), 'aä') self.assertEqual(lowercase_escape('\\u0026'), '&') def test_limit_length(self): self.assertEqual(limit_length(None, 12), None) self.assertEqual(limit_length('foo', 12), 'foo') self.assertTrue( limit_length('foo bar baz asd', 12).startswith('foo bar')) self.assertTrue('...' in limit_length('foo bar baz asd', 12)) def test_mimetype2ext(self): self.assertEqual(mimetype2ext(None), None) self.assertEqual(mimetype2ext('video/x-flv'), 'flv') self.assertEqual(mimetype2ext('application/x-mpegURL'), 'm3u8') self.assertEqual(mimetype2ext('text/vtt'), 'vtt') self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt') self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html') self.assertEqual(mimetype2ext('audio/x-wav'), 'wav') self.assertEqual(mimetype2ext('audio/x-wav;codec=pcm'), 'wav') def test_month_by_name(self): self.assertEqual(month_by_name(None), None) self.assertEqual(month_by_name('December', 'en'), 12) self.assertEqual(month_by_name('décembre', 'fr'), 12) self.assertEqual(month_by_name('December'), 12) self.assertEqual(month_by_name('décembre'), None) self.assertEqual(month_by_name('Unknown', 'unknown'), None) def test_parse_codecs(self): self.assertEqual(parse_codecs(''), {}) self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { 'vcodec': 'avc1.77.30', 'acodec': 'mp4a.40.2', 'dynamic_range': None, }) self.assertEqual(parse_codecs('mp4a.40.2'), { 'vcodec': 'none', 'acodec': 'mp4a.40.2', 'dynamic_range': None, }) self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), { 'vcodec': 'avc1.42001e', 'acodec': 'mp4a.40.5', 'dynamic_range': None, }) self.assertEqual(parse_codecs('avc3.640028'), { 'vcodec': 'avc3.640028', 'acodec': 'none', 'dynamic_range': None, }) self.assertEqual(parse_codecs(', h264,,newcodec,aac'), { 'vcodec': 'h264', 'acodec': 'aac', 'dynamic_range': None, }) self.assertEqual(parse_codecs('av01.0.05M.08'), { 'vcodec': 'av01.0.05M.08', 'acodec': 'none', 'dynamic_range': None, }) self.assertEqual(parse_codecs('vp9.2'), { 'vcodec': 'vp9.2', 'acodec': 'none', 'dynamic_range': 'HDR10', }) self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), { 'vcodec': 'av01.0.12M.10.0.110.09.16.09.0', 'acodec': 'none', 'dynamic_range': 'HDR10', }) self.assertEqual(parse_codecs('dvhe'), { 'vcodec': 'dvhe', 'acodec': 'none', 'dynamic_range': 'DV', }) self.assertEqual(parse_codecs('theora, vorbis'), { 'vcodec': 'theora', 'acodec': 'vorbis', 'dynamic_range': None, }) self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), { 'vcodec': 'unknownvcodec', 'acodec': 'unknownacodec', }) self.assertEqual(parse_codecs('unknown'), {}) def test_escape_rfc3986(self): reserved = "!*'();:@&=+$,/?#[]" unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~' self.assertEqual(escape_rfc3986(reserved), reserved) self.assertEqual(escape_rfc3986(unreserved), unreserved) self.assertEqual(escape_rfc3986('тест'), '%D1%82%D0%B5%D1%81%D1%82') self.assertEqual(escape_rfc3986('%D1%82%D0%B5%D1%81%D1%82'), '%D1%82%D0%B5%D1%81%D1%82') self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar') self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar') def test_escape_url(self): self.assertEqual( escape_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'), 'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4' ) self.assertEqual( escape_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'), 'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290' ) self.assertEqual( escape_url('http://тест.рф/фрагмент'), 'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82' ) self.assertEqual( escape_url('http://тест.рф/абв?абв=абв#абв'), 'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2' ) self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') def test_js_to_json_realworld(self): inp = '''{ 'clip':{'provider':'pseudo'} }''' self.assertEqual(js_to_json(inp), '''{ "clip":{"provider":"pseudo"} }''') json.loads(js_to_json(inp)) inp = '''{ 'playlist':[{'controls':{'all':null}}] }''' self.assertEqual(js_to_json(inp), '''{ "playlist":[{"controls":{"all":null}}] }''') inp = '''"The CW\\'s \\'Crazy Ex-Girlfriend\\'"''' self.assertEqual(js_to_json(inp), '''"The CW's 'Crazy Ex-Girlfriend'"''') inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"' json_code = js_to_json(inp) self.assertEqual(json.loads(json_code), json.loads(inp)) inp = '''{ 0:{src:'skipped', type: 'application/dash+xml'}, 1:{src:'skipped', type: 'application/vnd.apple.mpegURL'}, }''' self.assertEqual(js_to_json(inp), '''{ "0":{"src":"skipped", "type": "application/dash+xml"}, "1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"} }''') inp = '''{"foo":101}''' self.assertEqual(js_to_json(inp), '''{"foo":101}''') inp = '''{"duration": "00:01:07"}''' self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''') inp = '''{segments: [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''' self.assertEqual(js_to_json(inp), '''{"segments": [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''') def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) on = js_to_json('{"abc": true}') self.assertEqual(json.loads(on), {'abc': True}) # Ignore JavaScript code as well on = js_to_json('''{ "x": 1, y: "a", z: some.code }''') d = json.loads(on) self.assertEqual(d['x'], 1) self.assertEqual(d['y'], 'a') # Just drop ! prefix for now though this results in a wrong value on = js_to_json('''{ a: !0, b: !1, c: !!0, d: !!42.42, e: !!![], f: !"abc", g: !"", !42: 42 }''') self.assertEqual(json.loads(on), { 'a': 0, 'b': 1, 'c': 0, 'd': 42.42, 'e': [], 'f': "abc", 'g': "", '42': 42 }) on = js_to_json('["abc", "def",]') self.assertEqual(json.loads(on), ['abc', 'def']) on = js_to_json('[/*comment\n*/"abc"/*comment\n*/,/*comment\n*/"def",/*comment\n*/]') self.assertEqual(json.loads(on), ['abc', 'def']) on = js_to_json('[//comment\n"abc" //comment\n,//comment\n"def",//comment\n]') self.assertEqual(json.loads(on), ['abc', 'def']) on = js_to_json('{"abc": "def",}') self.assertEqual(json.loads(on), {'abc': 'def'}) on = js_to_json('{/*comment\n*/"abc"/*comment\n*/:/*comment\n*/"def"/*comment\n*/,/*comment\n*/}') self.assertEqual(json.loads(on), {'abc': 'def'}) on = js_to_json('{ 0: /* " \n */ ",]" , }') self.assertEqual(json.loads(on), {'0': ',]'}) on = js_to_json('{ /*comment\n*/0/*comment\n*/: /* " \n */ ",]" , }') self.assertEqual(json.loads(on), {'0': ',]'}) on = js_to_json('{ 0: // comment\n1 }') self.assertEqual(json.loads(on), {'0': 1}) on = js_to_json(r'["<p>x<\/p>"]') self.assertEqual(json.loads(on), ['<p>x</p>']) on = js_to_json(r'["\xaa"]') self.assertEqual(json.loads(on), ['\u00aa']) on = js_to_json("['a\\\nb']") self.assertEqual(json.loads(on), ['ab']) on = js_to_json("/*comment\n*/[/*comment\n*/'a\\\nb'/*comment\n*/]/*comment\n*/") self.assertEqual(json.loads(on), ['ab']) on = js_to_json('{0xff:0xff}') self.assertEqual(json.loads(on), {'255': 255}) on = js_to_json('{/*comment\n*/0xff/*comment\n*/:/*comment\n*/0xff/*comment\n*/}') self.assertEqual(json.loads(on), {'255': 255}) on = js_to_json('{077:077}') self.assertEqual(json.loads(on), {'63': 63}) on = js_to_json('{/*comment\n*/077/*comment\n*/:/*comment\n*/077/*comment\n*/}') self.assertEqual(json.loads(on), {'63': 63}) on = js_to_json('{42:42}') self.assertEqual(json.loads(on), {'42': 42}) on = js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}') self.assertEqual(json.loads(on), {'42': 42}) on = js_to_json('{42:4.2e1}') self.assertEqual(json.loads(on), {'42': 42.0}) on = js_to_json('{ "0x40": "0x40" }') self.assertEqual(json.loads(on), {'0x40': '0x40'}) on = js_to_json('{ "040": "040" }') self.assertEqual(json.loads(on), {'040': '040'}) on = js_to_json('[1,//{},\n2]') self.assertEqual(json.loads(on), [1, 2]) def test_js_to_json_malformed(self): self.assertEqual(js_to_json('42a1'), '42"a1"') self.assertEqual(js_to_json('42a-1'), '42"a"-1') def test_extract_attributes(self): self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) self.assertEqual(extract_attributes('<e x=y>'), {'x': 'y'}) self.assertEqual(extract_attributes('<e x="a \'b\' c">'), {'x': "a 'b' c"}) self.assertEqual(extract_attributes('<e x=\'a "b" c\'>'), {'x': 'a "b" c'}) self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes('<e x="&">'), {'x': '&'}) # XML self.assertEqual(extract_attributes('<e x=""">'), {'x': '"'}) self.assertEqual(extract_attributes('<e x="£">'), {'x': '£'}) # HTML 3.2 self.assertEqual(extract_attributes('<e x="λ">'), {'x': 'λ'}) # HTML 4.0 self.assertEqual(extract_attributes('<e x="&foo">'), {'x': '&foo'}) self.assertEqual(extract_attributes('<e x="\'">'), {'x': "'"}) self.assertEqual(extract_attributes('<e x=\'"\'>'), {'x': '"'}) self.assertEqual(extract_attributes('<e x >'), {'x': None}) self.assertEqual(extract_attributes('<e x=y a>'), {'x': 'y', 'a': None}) self.assertEqual(extract_attributes('<e x= y>'), {'x': 'y'}) self.assertEqual(extract_attributes('<e x=1 y=2 x=3>'), {'y': '2', 'x': '3'}) self.assertEqual(extract_attributes('<e \nx=\ny\n>'), {'x': 'y'}) self.assertEqual(extract_attributes('<e \nx=\n"y"\n>'), {'x': 'y'}) self.assertEqual(extract_attributes("<e \nx=\n'y'\n>"), {'x': 'y'}) self.assertEqual(extract_attributes('<e \nx="\ny\n">'), {'x': '\ny\n'}) self.assertEqual(extract_attributes('<e CAPS=x>'), {'caps': 'x'}) # Names lowercased self.assertEqual(extract_attributes('<e x=1 X=2>'), {'x': '2'}) self.assertEqual(extract_attributes('<e X=1 x=2>'), {'x': '2'}) self.assertEqual(extract_attributes('<e _:funny-name1=1>'), {'_:funny-name1': '1'}) self.assertEqual(extract_attributes('<e x="Fáilte 世界 \U0001f600">'), {'x': 'Fáilte 世界 \U0001f600'}) self.assertEqual(extract_attributes('<e x="décomposé">'), {'x': 'décompose\u0301'}) # "Narrow" Python builds don't support unicode code points outside BMP. try: chr(0x10000) supports_outside_bmp = True except ValueError: supports_outside_bmp = False if supports_outside_bmp: self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'}) # Malformed HTML should not break attributes extraction on older Python self.assertEqual(extract_attributes('<mal"formed/>'), {}) def test_clean_html(self): self.assertEqual(clean_html('a:\nb'), 'a: b') self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb') def test_intlist_to_bytes(self): self.assertEqual( intlist_to_bytes([0, 1, 127, 128, 255]), b'\x00\x01\x7f\x80\xff') def test_args_to_str(self): self.assertEqual( args_to_str(['foo', 'ba/r', '-baz', '2 be', '']), 'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""' ) def test_parse_filesize(self): self.assertEqual(parse_filesize(None), None) self.assertEqual(parse_filesize(''), None) self.assertEqual(parse_filesize('91 B'), 91) self.assertEqual(parse_filesize('foobar'), None) self.assertEqual(parse_filesize('2 MiB'), 2097152) self.assertEqual(parse_filesize('5 GB'), 5000000000) self.assertEqual(parse_filesize('1.2Tb'), 1200000000000) self.assertEqual(parse_filesize('1.2tb'), 1200000000000) self.assertEqual(parse_filesize('1,24 KB'), 1240) self.assertEqual(parse_filesize('1,24 kb'), 1240) self.assertEqual(parse_filesize('8.5 megabytes'), 8500000) def test_parse_count(self): self.assertEqual(parse_count(None), None) self.assertEqual(parse_count(''), None) self.assertEqual(parse_count('0'), 0) self.assertEqual(parse_count('1000'), 1000) self.assertEqual(parse_count('1.000'), 1000) self.assertEqual(parse_count('1.1k'), 1100) self.assertEqual(parse_count('1.1 k'), 1100) self.assertEqual(parse_count('1,1 k'), 1100) self.assertEqual(parse_count('1.1kk'), 1100000) self.assertEqual(parse_count('1.1kk '), 1100000) self.assertEqual(parse_count('1,1kk'), 1100000) self.assertEqual(parse_count('100 views'), 100) self.assertEqual(parse_count('1,100 views'), 1100) self.assertEqual(parse_count('1.1kk views'), 1100000) self.assertEqual(parse_count('10M views'), 10000000) self.assertEqual(parse_count('has 10M views'), 10000000) def test_parse_resolution(self): self.assertEqual(parse_resolution(None), {}) self.assertEqual(parse_resolution(''), {}) self.assertEqual(parse_resolution(' 1920x1080'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('1920×1080 '), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('1920 x 1080'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('720p'), {'height': 720}) self.assertEqual(parse_resolution('4k'), {'height': 2160}) self.assertEqual(parse_resolution('8K'), {'height': 4320}) self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('ep1x2'), {}) self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080}) def test_parse_bitrate(self): self.assertEqual(parse_bitrate(None), None) self.assertEqual(parse_bitrate(''), None) self.assertEqual(parse_bitrate('300kbps'), 300) self.assertEqual(parse_bitrate('1500kbps'), 1500) self.assertEqual(parse_bitrate('300 kbps'), 300) def test_version_tuple(self): self.assertEqual(version_tuple('1'), (1,)) self.assertEqual(version_tuple('10.23.344'), (10, 23, 344)) self.assertEqual(version_tuple('10.1-6'), (10, 1, 6)) # avconv style def test_detect_exe_version(self): self.assertEqual(detect_exe_version('''ffmpeg version 1.2.1 built on May 27 2013 08:37:26 with gcc 4.7 (Debian 4.7.3-4) configuration: --prefix=/usr --extra-'''), '1.2.1') self.assertEqual(detect_exe_version('''ffmpeg version N-63176-g1fb4685 built on May 15 2014 22:09:06 with gcc 4.8.2 (GCC)'''), 'N-63176-g1fb4685') self.assertEqual(detect_exe_version('''X server found. dri2 connection failed! Trying to open render node... Success at /dev/dri/renderD128. ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') def test_age_restricted(self): self.assertFalse(age_restricted(None, 10)) # unrestricted content self.assertFalse(age_restricted(1, None)) # unrestricted policy self.assertFalse(age_restricted(8, 10)) self.assertTrue(age_restricted(18, 14)) self.assertFalse(age_restricted(18, 18)) def test_is_html(self): self.assertFalse(is_html(b'\x49\x44\x43<html')) self.assertTrue(is_html(b'<!DOCTYPE foo>\xaaa')) self.assertTrue(is_html( # UTF-8 with BOM b'\xef\xbb\xbf<!DOCTYPE foo>\xaaa')) self.assertTrue(is_html( # UTF-16-LE b'\xff\xfe<\x00h\x00t\x00m\x00l\x00>\x00\xe4\x00' )) self.assertTrue(is_html( # UTF-16-BE b'\xfe\xff\x00<\x00h\x00t\x00m\x00l\x00>\x00\xe4' )) self.assertTrue(is_html( # UTF-32-BE b'\x00\x00\xFE\xFF\x00\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4')) self.assertTrue(is_html( # UTF-32-LE b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00')) def test_render_table(self): self.assertEqual( render_table( ['a', 'empty', 'bcd'], [[123, '', 4], [9999, '', 51]]), 'a empty bcd\n' '123 4\n' '9999 51') self.assertEqual( render_table( ['a', 'empty', 'bcd'], [[123, '', 4], [9999, '', 51]], hide_empty=True), 'a bcd\n' '123 4\n' '9999 51') self.assertEqual( render_table( ['\ta', 'bcd'], [['1\t23', 4], ['\t9999', 51]]), ' a bcd\n' '1 23 4\n' '9999 51') self.assertEqual( render_table( ['a', 'bcd'], [[123, 4], [9999, 51]], delim='-'), 'a bcd\n' '--------\n' '123 4\n' '9999 51') self.assertEqual( render_table( ['a', 'bcd'], [[123, 4], [9999, 51]], delim='-', extra_gap=2), 'a bcd\n' '----------\n' '123 4\n' '9999 51') def test_match_str(self): # Unary self.assertFalse(match_str('xy', {'x': 1200})) self.assertTrue(match_str('!xy', {'x': 1200})) self.assertTrue(match_str('x', {'x': 1200})) self.assertFalse(match_str('!x', {'x': 1200})) self.assertTrue(match_str('x', {'x': 0})) self.assertTrue(match_str('is_live', {'is_live': True})) self.assertFalse(match_str('is_live', {'is_live': False})) self.assertFalse(match_str('is_live', {'is_live': None})) self.assertFalse(match_str('is_live', {})) self.assertFalse(match_str('!is_live', {'is_live': True})) self.assertTrue(match_str('!is_live', {'is_live': False})) self.assertTrue(match_str('!is_live', {'is_live': None})) self.assertTrue(match_str('!is_live', {})) self.assertTrue(match_str('title', {'title': 'abc'})) self.assertTrue(match_str('title', {'title': ''})) self.assertFalse(match_str('!title', {'title': 'abc'})) self.assertFalse(match_str('!title', {'title': ''})) # Numeric self.assertFalse(match_str('x>0', {'x': 0})) self.assertFalse(match_str('x>0', {})) self.assertTrue(match_str('x>?0', {})) self.assertTrue(match_str('x>1K', {'x': 1200})) self.assertFalse(match_str('x>2K', {'x': 1200})) self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200})) self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200})) self.assertTrue(match_str('x > 1:0:0', {'x': 3700})) # String self.assertFalse(match_str('y=a212', {'y': 'foobar42'})) self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'})) self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'})) self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'})) self.assertTrue(match_str('y^=foo', {'y': 'foobar42'})) self.assertFalse(match_str('y!^=foo', {'y': 'foobar42'})) self.assertFalse(match_str('y^=bar', {'y': 'foobar42'})) self.assertTrue(match_str('y!^=bar', {'y': 'foobar42'})) self.assertRaises(ValueError, match_str, 'x^=42', {'x': 42}) self.assertTrue(match_str('y*=bar', {'y': 'foobar42'})) self.assertFalse(match_str('y!*=bar', {'y': 'foobar42'})) self.assertFalse(match_str('y*=baz', {'y': 'foobar42'})) self.assertTrue(match_str('y!*=baz', {'y': 'foobar42'})) self.assertTrue(match_str('y$=42', {'y': 'foobar42'})) self.assertFalse(match_str('y$=43', {'y': 'foobar42'})) # And self.assertFalse(match_str( 'like_count > 100 & dislike_count <? 50 & description', {'like_count': 90, 'description': 'foo'})) self.assertTrue(match_str( 'like_count > 100 & dislike_count <? 50 & description', {'like_count': 190, 'description': 'foo'})) self.assertFalse(match_str( 'like_count > 100 & dislike_count <? 50 & description', {'like_count': 190, 'dislike_count': 60, 'description': 'foo'})) self.assertFalse(match_str( 'like_count > 100 & dislike_count <? 50 & description', {'like_count': 190, 'dislike_count': 10})) # Regex self.assertTrue(match_str(r'x~=\bbar', {'x': 'foo bar'})) self.assertFalse(match_str(r'x~=\bbar.+', {'x': 'foo bar'})) self.assertFalse(match_str(r'x~=^FOO', {'x': 'foo bar'})) self.assertTrue(match_str(r'x~=(?i)^FOO', {'x': 'foo bar'})) # Quotes self.assertTrue(match_str(r'x^="foo"', {'x': 'foo "bar"'})) self.assertFalse(match_str(r'x^="foo "', {'x': 'foo "bar"'})) self.assertFalse(match_str(r'x$="bar"', {'x': 'foo "bar"'})) self.assertTrue(match_str(r'x$=" \"bar\""', {'x': 'foo "bar"'})) # Escaping & self.assertFalse(match_str(r'x=foo & bar', {'x': 'foo & bar'})) self.assertTrue(match_str(r'x=foo \& bar', {'x': 'foo & bar'})) self.assertTrue(match_str(r'x=foo \& bar & x^=foo', {'x': 'foo & bar'})) self.assertTrue(match_str(r'x="foo \& bar" & x^=foo', {'x': 'foo & bar'})) # Example from docs self.assertTrue(match_str( r"!is_live & like_count>?100 & description~='(?i)\bcats \& dogs\b'", {'description': 'Raining Cats & Dogs'})) # Incomplete self.assertFalse(match_str('id!=foo', {'id': 'foo'}, True)) self.assertTrue(match_str('x', {'id': 'foo'}, True)) self.assertTrue(match_str('!x', {'id': 'foo'}, True)) self.assertFalse(match_str('x', {'id': 'foo'}, False)) def test_parse_dfxp_time_expr(self): self.assertEqual(parse_dfxp_time_expr(None), None) self.assertEqual(parse_dfxp_time_expr(''), None) self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1) self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1) self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0) self.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1) self.assertEqual(parse_dfxp_time_expr('00:00:01:100'), 1.1) def test_dfxp2srt(self): dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?> <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> <body> <div xml:lang="en"> <p begin="0" end="1">The following line contains Chinese characters and special symbols</p> <p begin="1" end="2">第二行<br/>♪♪</p> <p begin="2" dur="1"><span>Third<br/>Line</span></p> <p begin="3" end="-1">Lines with invalid timestamps are ignored</p> <p begin="-1" end="-1">Ignore, two</p> <p begin="3" dur="-1">Ignored, three</p> </div> </body> </tt>'''.encode() srt_data = '''1 00:00:00,000 --> 00:00:01,000 The following line contains Chinese characters and special symbols 2 00:00:01,000 --> 00:00:02,000 第二行 ♪♪ 3 00:00:02,000 --> 00:00:03,000 Third Line ''' self.assertEqual(dfxp2srt(dfxp_data), srt_data) dfxp_data_no_default_namespace = b'''<?xml version="1.0" encoding="UTF-8"?> <tt xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> <body> <div xml:lang="en"> <p begin="0" end="1">The first line</p> </div> </body> </tt>''' srt_data = '''1 00:00:00,000 --> 00:00:01,000 The first line ''' self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data) dfxp_data_with_style = b'''<?xml version="1.0" encoding="utf-8"?> <tt xmlns="http://www.w3.org/2006/10/ttaf1" xmlns:ttp="http://www.w3.org/2006/10/ttaf1#parameter" ttp:timeBase="media" xmlns:tts="http://www.w3.org/2006/10/ttaf1#style" xml:lang="en" xmlns:ttm="http://www.w3.org/2006/10/ttaf1#metadata"> <head> <styling> <style id="s2" style="s0" tts:color="cyan" tts:fontWeight="bold" /> <style id="s1" style="s0" tts:color="yellow" tts:fontStyle="italic" /> <style id="s3" style="s0" tts:color="lime" tts:textDecoration="underline" /> <style id="s0" tts:backgroundColor="black" tts:fontStyle="normal" tts:fontSize="16" tts:fontFamily="sansSerif" tts:color="white" /> </styling> </head> <body tts:textAlign="center" style="s0"> <div> <p begin="00:00:02.08" id="p0" end="00:00:05.84">default style<span tts:color="red">custom style</span></p> <p style="s2" begin="00:00:02.08" id="p0" end="00:00:05.84"><span tts:color="lime">part 1<br /></span><span tts:color="cyan">part 2</span></p> <p style="s3" begin="00:00:05.84" id="p1" end="00:00:09.56">line 3<br />part 3</p> <p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p> </div> </body> </tt>''' srt_data = '''1 00:00:02,080 --> 00:00:05,840 <font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font> 2 00:00:02,080 --> 00:00:05,840 <b><font color="cyan" face="sansSerif" size="16"><font color="lime">part 1 </font>part 2</font></b> 3 00:00:05,840 --> 00:00:09,560 <u><font color="lime">line 3 part 3</font></u> 4 00:00:09,560 --> 00:00:12,360 <i><u><font color="yellow"><font color="lime">inner </font>style</font></u></i> ''' self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data) dfxp_data_non_utf8 = '''<?xml version="1.0" encoding="UTF-16"?> <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> <body> <div xml:lang="en"> <p begin="0" end="1">Line 1</p> <p begin="1" end="2">第二行</p> </div> </body> </tt>'''.encode('utf-16') srt_data = '''1 00:00:00,000 --> 00:00:01,000 Line 1 2 00:00:01,000 --> 00:00:02,000 第二行 ''' self.assertEqual(dfxp2srt(dfxp_data_non_utf8), srt_data) def test_cli_option(self): self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128']) self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), []) self.assertEqual(cli_option({}, '--proxy', 'proxy'), []) self.assertEqual(cli_option({'retries': 10}, '--retries', 'retries'), ['--retries', '10']) def test_cli_valueless_option(self): self.assertEqual(cli_valueless_option( {'downloader': 'external'}, '--external-downloader', 'downloader', 'external'), ['--external-downloader']) self.assertEqual(cli_valueless_option( {'downloader': 'internal'}, '--external-downloader', 'downloader', 'external'), []) self.assertEqual(cli_valueless_option( {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'), ['--no-check-certificate']) self.assertEqual(cli_valueless_option( {'nocheckcertificate': False}, '--no-check-certificate', 'nocheckcertificate'), []) self.assertEqual(cli_valueless_option( {'checkcertificate': True}, '--no-check-certificate', 'checkcertificate', False), []) self.assertEqual(cli_valueless_option( {'checkcertificate': False}, '--no-check-certificate', 'checkcertificate', False), ['--no-check-certificate']) def test_cli_bool_option(self): self.assertEqual( cli_bool_option( {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'), ['--no-check-certificate', 'true']) self.assertEqual( cli_bool_option( {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate', separator='='), ['--no-check-certificate=true']) self.assertEqual( cli_bool_option( {'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true'), ['--check-certificate', 'false']) self.assertEqual( cli_bool_option( {'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), ['--check-certificate=false']) self.assertEqual( cli_bool_option( {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true'), ['--check-certificate', 'true']) self.assertEqual( cli_bool_option( {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), ['--check-certificate=true']) self.assertEqual( cli_bool_option( {}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), []) def test_ohdave_rsa_encrypt(self): N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd e = 65537 self.assertEqual( ohdave_rsa_encrypt(b'aa111222', e, N), '726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881') def test_pkcs1pad(self): data = [1, 2, 3] padded_data = pkcs1pad(data, 32) self.assertEqual(padded_data[:2], [0, 2]) self.assertEqual(padded_data[28:], [0, 1, 2, 3]) self.assertRaises(ValueError, pkcs1pad, data, 8) def test_encode_base_n(self): self.assertEqual(encode_base_n(0, 30), '0') self.assertEqual(encode_base_n(80, 30), '2k') custom_table = '9876543210ZYXWVUTSRQPONMLKJIHGFEDCBA' self.assertEqual(encode_base_n(0, 30, custom_table), '9') self.assertEqual(encode_base_n(80, 30, custom_table), '7P') self.assertRaises(ValueError, encode_base_n, 0, 70) self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table) def test_caesar(self): self.assertEqual(caesar('ace', 'abcdef', 2), 'cea') self.assertEqual(caesar('cea', 'abcdef', -2), 'ace') self.assertEqual(caesar('ace', 'abcdef', -2), 'eac') self.assertEqual(caesar('eac', 'abcdef', 2), 'ace') self.assertEqual(caesar('ace', 'abcdef', 0), 'ace') self.assertEqual(caesar('xyz', 'abcdef', 2), 'xyz') self.assertEqual(caesar('abc', 'acegik', 2), 'ebg') self.assertEqual(caesar('ebg', 'acegik', -2), 'abc') def test_rot47(self): self.assertEqual(rot47('yt-dlp'), r'JE\5=A') self.assertEqual(rot47('YT-DLP'), r'*%\s{!') def test_urshift(self): self.assertEqual(urshift(3, 1), 1) self.assertEqual(urshift(-3, 1), 2147483646) GET_ELEMENT_BY_CLASS_TEST_STRING = ''' <span class="foo bar">nice</span> ''' def test_get_element_by_class(self): html = self.GET_ELEMENT_BY_CLASS_TEST_STRING self.assertEqual(get_element_by_class('foo', html), 'nice') self.assertEqual(get_element_by_class('no-such-class', html), None) def test_get_element_html_by_class(self): html = self.GET_ELEMENT_BY_CLASS_TEST_STRING self.assertEqual(get_element_html_by_class('foo', html), html.strip()) self.assertEqual(get_element_by_class('no-such-class', html), None) GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING = ''' <div itemprop="author" itemscope>foo</div> ''' def test_get_element_by_attribute(self): html = self.GET_ELEMENT_BY_CLASS_TEST_STRING self.assertEqual(get_element_by_attribute('class', 'foo bar', html), 'nice') self.assertEqual(get_element_by_attribute('class', 'foo', html), None) self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None) html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo') def test_get_element_html_by_attribute(self): html = self.GET_ELEMENT_BY_CLASS_TEST_STRING self.assertEqual(get_element_html_by_attribute('class', 'foo bar', html), html.strip()) self.assertEqual(get_element_html_by_attribute('class', 'foo', html), None) self.assertEqual(get_element_html_by_attribute('class', 'no-such-foo', html), None) html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING self.assertEqual(get_element_html_by_attribute('itemprop', 'author', html), html.strip()) GET_ELEMENTS_BY_CLASS_TEST_STRING = ''' <span class="foo bar">nice</span><span class="foo bar">also nice</span> ''' GET_ELEMENTS_BY_CLASS_RES = ['<span class="foo bar">nice</span>', '<span class="foo bar">also nice</span>'] def test_get_elements_by_class(self): html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice']) self.assertEqual(get_elements_by_class('no-such-class', html), []) def test_get_elements_html_by_class(self): html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING self.assertEqual(get_elements_html_by_class('foo', html), self.GET_ELEMENTS_BY_CLASS_RES) self.assertEqual(get_elements_html_by_class('no-such-class', html), []) def test_get_elements_by_attribute(self): html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice']) self.assertEqual(get_elements_by_attribute('class', 'foo', html), []) self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), []) def test_get_elements_html_by_attribute(self): html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING self.assertEqual(get_elements_html_by_attribute('class', 'foo bar', html), self.GET_ELEMENTS_BY_CLASS_RES) self.assertEqual(get_elements_html_by_attribute('class', 'foo', html), []) self.assertEqual(get_elements_html_by_attribute('class', 'no-such-foo', html), []) def test_get_elements_text_and_html_by_attribute(self): html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING self.assertEqual( list(get_elements_text_and_html_by_attribute('class', 'foo bar', html)), list(zip(['nice', 'also nice'], self.GET_ELEMENTS_BY_CLASS_RES))) self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'foo', html)), []) self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'no-such-foo', html)), []) GET_ELEMENT_BY_TAG_TEST_STRING = ''' random text lorem ipsum</p> <div> this should be returned <span>this should also be returned</span> <div> this should also be returned </div> closing tag above should not trick, so this should also be returned </div> but this text should not be returned ''' GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[32:276] GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT = GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML[5:-6] GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[78:119] GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT = GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML[6:-7] def test_get_element_text_and_html_by_tag(self): html = self.GET_ELEMENT_BY_TAG_TEST_STRING self.assertEqual( get_element_text_and_html_by_tag('div', html), (self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT, self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML)) self.assertEqual( get_element_text_and_html_by_tag('span', html), (self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML)) self.assertRaises(compat_HTMLParseError, get_element_text_and_html_by_tag, 'article', html) def test_iri_to_uri(self): self.assertEqual( iri_to_uri('https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b'), 'https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b') # Same self.assertEqual( iri_to_uri('https://www.google.com/search?q=Käsesoßenrührlöffel'), # German for cheese sauce stirring spoon 'https://www.google.com/search?q=K%C3%A4seso%C3%9Fenr%C3%BChrl%C3%B6ffel') self.assertEqual( iri_to_uri('https://www.google.com/search?q=lt<+gt>+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#'), 'https://www.google.com/search?q=lt%3C+gt%3E+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#') self.assertEqual( iri_to_uri('http://правозащита38.рф/category/news/'), 'http://xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/') self.assertEqual( iri_to_uri('http://www.правозащита38.рф/category/news/'), 'http://www.xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/') self.assertEqual( iri_to_uri('https://i❤.ws/emojidomain/👍👏🤝💪'), 'https://xn--i-7iq.ws/emojidomain/%F0%9F%91%8D%F0%9F%91%8F%F0%9F%A4%9D%F0%9F%92%AA') self.assertEqual( iri_to_uri('http://日本語.jp/'), 'http://xn--wgv71a119e.jp/') self.assertEqual( iri_to_uri('http://导航.中国/'), 'http://xn--fet810g.xn--fiqs8s/') def test_clean_podcast_url(self): self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3') self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3') def test_LazyList(self): it = list(range(10)) self.assertEqual(list(LazyList(it)), it) self.assertEqual(LazyList(it).exhaust(), it) self.assertEqual(LazyList(it)[5], it[5]) self.assertEqual(LazyList(it)[5:], it[5:]) self.assertEqual(LazyList(it)[:5], it[:5]) self.assertEqual(LazyList(it)[::2], it[::2]) self.assertEqual(LazyList(it)[1::2], it[1::2]) self.assertEqual(LazyList(it)[5::-1], it[5::-1]) self.assertEqual(LazyList(it)[6:2:-2], it[6:2:-2]) self.assertEqual(LazyList(it)[::-1], it[::-1]) self.assertTrue(LazyList(it)) self.assertFalse(LazyList(range(0))) self.assertEqual(len(LazyList(it)), len(it)) self.assertEqual(repr(LazyList(it)), repr(it)) self.assertEqual(str(LazyList(it)), str(it)) self.assertEqual(list(LazyList(it, reverse=True)), it[::-1]) self.assertEqual(list(reversed(LazyList(it))[::-1]), it) self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7]) def test_LazyList_laziness(self): def test(ll, idx, val, cache): self.assertEqual(ll[idx], val) self.assertEqual(ll._cache, list(cache)) ll = LazyList(range(10)) test(ll, 0, 0, range(1)) test(ll, 5, 5, range(6)) test(ll, -3, 7, range(10)) ll = LazyList(range(10), reverse=True) test(ll, -1, 0, range(1)) test(ll, 3, 6, range(10)) ll = LazyList(itertools.count()) test(ll, 10, 10, range(11)) ll = reversed(ll) test(ll, -15, 14, range(15)) def test_format_bytes(self): self.assertEqual(format_bytes(0), '0.00B') self.assertEqual(format_bytes(1000), '1000.00B') self.assertEqual(format_bytes(1024), '1.00KiB') self.assertEqual(format_bytes(1024**2), '1.00MiB') self.assertEqual(format_bytes(1024**3), '1.00GiB') self.assertEqual(format_bytes(1024**4), '1.00TiB') self.assertEqual(format_bytes(1024**5), '1.00PiB') self.assertEqual(format_bytes(1024**6), '1.00EiB') self.assertEqual(format_bytes(1024**7), '1.00ZiB') self.assertEqual(format_bytes(1024**8), '1.00YiB') self.assertEqual(format_bytes(1024**9), '1024.00YiB') def test_hide_login_info(self): self.assertEqual(Config.hide_login_info(['-u', 'foo', '-p', 'bar']), ['-u', 'PRIVATE', '-p', 'PRIVATE']) self.assertEqual(Config.hide_login_info(['-u']), ['-u']) self.assertEqual(Config.hide_login_info(['-u', 'foo', '-u', 'bar']), ['-u', 'PRIVATE', '-u', 'PRIVATE']) self.assertEqual(Config.hide_login_info(['--username=foo']), ['--username=PRIVATE']) def test_locked_file(self): TEXT = 'test_locked_file\n' FILE = 'test_locked_file.ytdl' MODES = 'war' # Order is important try: for lock_mode in MODES: with locked_file(FILE, lock_mode, False) as f: if lock_mode == 'r': self.assertEqual(f.read(), TEXT * 2, 'Wrong file content') else: f.write(TEXT) for test_mode in MODES: testing_write = test_mode != 'r' try: with locked_file(FILE, test_mode, False): pass except (BlockingIOError, PermissionError): if not testing_write: # FIXME print(f'Known issue: Exclusive lock ({lock_mode}) blocks read access ({test_mode})') continue self.assertTrue(testing_write, f'{test_mode} is blocked by {lock_mode}') else: self.assertFalse(testing_write, f'{test_mode} is not blocked by {lock_mode}') finally: with contextlib.suppress(OSError): os.remove(FILE) def test_determine_file_encoding(self): self.assertEqual(determine_file_encoding(b''), (None, 0)) self.assertEqual(determine_file_encoding(b'--verbose -x --audio-format mkv\n'), (None, 0)) self.assertEqual(determine_file_encoding(b'\xef\xbb\xbf'), ('utf-8', 3)) self.assertEqual(determine_file_encoding(b'\x00\x00\xfe\xff'), ('utf-32-be', 4)) self.assertEqual(determine_file_encoding(b'\xff\xfe'), ('utf-16-le', 2)) self.assertEqual(determine_file_encoding(b'\xff\xfe# coding: utf-8\n--verbose'), ('utf-16-le', 2)) self.assertEqual(determine_file_encoding(b'# coding: utf-8\n--verbose'), ('utf-8', 0)) self.assertEqual(determine_file_encoding(b'# coding: someencodinghere-12345\n--verbose'), ('someencodinghere-12345', 0)) self.assertEqual(determine_file_encoding(b'#coding:utf-8\n--verbose'), ('utf-8', 0)) self.assertEqual(determine_file_encoding(b'# coding: utf-8 \r\n--verbose'), ('utf-8', 0)) self.assertEqual(determine_file_encoding('# coding: utf-32-be'.encode('utf-32-be')), ('utf-32-be', 0)) self.assertEqual(determine_file_encoding('# coding: utf-16-le'.encode('utf-16-le')), ('utf-16-le', 0)) def test_get_compatible_ext(self): self.assertEqual(get_compatible_ext( vcodecs=[None], acodecs=[None, None], vexts=['mp4'], aexts=['m4a', 'm4a']), 'mkv') self.assertEqual(get_compatible_ext( vcodecs=[None], acodecs=[None], vexts=['flv'], aexts=['flv']), 'flv') self.assertEqual(get_compatible_ext( vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['m4a']), 'mp4') self.assertEqual(get_compatible_ext( vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['webm']), 'mkv') self.assertEqual(get_compatible_ext( vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['m4a']), 'mkv') self.assertEqual(get_compatible_ext( vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['webm']), 'webm') self.assertEqual(get_compatible_ext( vcodecs=['h264'], acodecs=['mp4a'], vexts=['mov'], aexts=['m4a']), 'mp4') self.assertEqual(get_compatible_ext( vcodecs=['av01.0.12M.08'], acodecs=['opus'], vexts=['mp4'], aexts=['webm']), 'webm') self.assertEqual(get_compatible_ext( vcodecs=['vp9'], acodecs=['opus'], vexts=['webm'], aexts=['webm'], preferences=['flv', 'mp4']), 'mp4') self.assertEqual(get_compatible_ext( vcodecs=['av1'], acodecs=['mp4a'], vexts=['webm'], aexts=['m4a'], preferences=('webm', 'mkv')), 'mkv') if __name__ == '__main__': unittest.main() �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/test_verbose_output.py�������������������������������������������������������0000664�0000000�0000000�00000005220�14277552437�0021024�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import subprocess rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) class TestVerboseOutput(unittest.TestCase): def test_private_info_arg(self): outp = subprocess.Popen( [ sys.executable, 'yt_dlp/__main__.py', '-v', '--ignore-config', '--username', 'johnsmith@gmail.com', '--password', 'my_secret_password', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() self.assertTrue(b'--username' in serr) self.assertTrue(b'johnsmith' not in serr) self.assertTrue(b'--password' in serr) self.assertTrue(b'my_secret_password' not in serr) def test_private_info_shortarg(self): outp = subprocess.Popen( [ sys.executable, 'yt_dlp/__main__.py', '-v', '--ignore-config', '-u', 'johnsmith@gmail.com', '-p', 'my_secret_password', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() self.assertTrue(b'-u' in serr) self.assertTrue(b'johnsmith' not in serr) self.assertTrue(b'-p' in serr) self.assertTrue(b'my_secret_password' not in serr) def test_private_info_eq(self): outp = subprocess.Popen( [ sys.executable, 'yt_dlp/__main__.py', '-v', '--ignore-config', '--username=johnsmith@gmail.com', '--password=my_secret_password', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() self.assertTrue(b'--username' in serr) self.assertTrue(b'johnsmith' not in serr) self.assertTrue(b'--password' in serr) self.assertTrue(b'my_secret_password' not in serr) def test_private_info_shortarg_eq(self): outp = subprocess.Popen( [ sys.executable, 'yt_dlp/__main__.py', '-v', '--ignore-config', '-u=johnsmith@gmail.com', '-p=my_secret_password', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() self.assertTrue(b'-u' in serr) self.assertTrue(b'johnsmith' not in serr) self.assertTrue(b'-p' in serr) self.assertTrue(b'my_secret_password' not in serr) if __name__ == '__main__': unittest.main() ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/test_write_annotations.py.disabled�������������������������������������������0000664�0000000�0000000�00000004660�14277552437�0023263�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import xml.etree.ElementTree import yt_dlp.extractor import yt_dlp.YoutubeDL from test.helper import get_params, is_download_test, try_rm class YoutubeDL(yt_dlp.YoutubeDL): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.to_stderr = self.to_screen params = get_params({ 'writeannotations': True, 'skip_download': True, 'writeinfojson': False, 'format': 'flv', }) TEST_ID = 'gr51aVj-mLg' ANNOTATIONS_FILE = TEST_ID + '.annotations.xml' EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label'] @is_download_test class TestAnnotations(unittest.TestCase): def setUp(self): # Clear old files self.tearDown() def test_info_json(self): expected = list(EXPECTED_ANNOTATIONS) # Two annotations could have the same text. ie = yt_dlp.extractor.YoutubeIE() ydl = YoutubeDL(params) ydl.add_info_extractor(ie) ydl.download([TEST_ID]) self.assertTrue(os.path.exists(ANNOTATIONS_FILE)) annoxml = None with open(ANNOTATIONS_FILE, encoding='utf-8') as annof: annoxml = xml.etree.ElementTree.parse(annof) self.assertTrue(annoxml is not None, 'Failed to parse annotations XML') root = annoxml.getroot() self.assertEqual(root.tag, 'document') annotationsTag = root.find('annotations') self.assertEqual(annotationsTag.tag, 'annotations') annotations = annotationsTag.findall('annotation') # Not all the annotations have TEXT children and the annotations are returned unsorted. for a in annotations: self.assertEqual(a.tag, 'annotation') if a.get('type') == 'text': textTag = a.find('TEXT') text = textTag.text self.assertTrue(text in expected) # assertIn only added in python 2.7 # remove the first occurrence, there could be more than one annotation with the same text expected.remove(text) # We should have seen (and removed) all the expected annotation texts. self.assertEqual(len(expected), 0, 'Not all expected annotations were found.') def tearDown(self): try_rm(ANNOTATIONS_FILE) if __name__ == '__main__': unittest.main() ��������������������������������������������������������������������������������yt-dlp-2022.08.19/test/test_youtube_lists.py��������������������������������������������������������0000664�0000000�0000000�00000004257�14277552437�0020662�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL, is_download_test from yt_dlp.extractor import YoutubeIE, YoutubeTabIE @is_download_test class TestYoutubeLists(unittest.TestCase): def assertIsPlaylist(self, info): """Make sure the info has '_type' set to 'playlist'""" self.assertEqual(info['_type'], 'playlist') def test_youtube_playlist_noplaylist(self): dl = FakeYDL() dl.params['noplaylist'] = True ie = YoutubeTabIE(dl) result = ie.extract('https://www.youtube.com/watch?v=OmJ-4B-mS-Y&list=PLydZ2Hrp_gPRJViZjLFKaBMgCQOYEEkyp&index=2') self.assertEqual(result['_type'], 'url') self.assertEqual(result['ie_key'], YoutubeIE.ie_key()) self.assertEqual(YoutubeIE.extract_id(result['url']), 'OmJ-4B-mS-Y') def test_youtube_mix(self): dl = FakeYDL() ie = YoutubeTabIE(dl) result = ie.extract('https://www.youtube.com/watch?v=tyITL_exICo&list=RDCLAK5uy_kLWIr9gv1XLlPbaDS965-Db4TrBoUTxQ8') entries = list(result['entries']) self.assertTrue(len(entries) >= 50) original_video = entries[0] self.assertEqual(original_video['id'], 'tyITL_exICo') def test_youtube_flat_playlist_extraction(self): dl = FakeYDL() dl.params['extract_flat'] = True ie = YoutubeTabIE(dl) result = ie.extract('https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc') self.assertIsPlaylist(result) entries = list(result['entries']) self.assertTrue(len(entries) == 1) video = entries[0] self.assertEqual(video['_type'], 'url') self.assertEqual(video['ie_key'], 'Youtube') self.assertEqual(video['id'], 'BaW_jenozKc') self.assertEqual(video['url'], 'https://www.youtube.com/watch?v=BaW_jenozKc') self.assertEqual(video['title'], 'youtube-dl test video "\'/\\ä↭𝕐') self.assertEqual(video['duration'], 10) self.assertEqual(video['uploader'], 'Philipp Hagemeister') if __name__ == '__main__': unittest.main() �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/test_youtube_misc.py���������������������������������������������������������0000664�0000000�0000000�00000001704�14277552437�0020451�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp.extractor import YoutubeIE class TestYoutubeMisc(unittest.TestCase): def test_youtube_extract(self): assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc') assertExtractId('BaW_jenozKc', 'BaW_jenozKc') if __name__ == '__main__': unittest.main() ������������������������������������������������������������yt-dlp-2022.08.19/test/test_youtube_signature.py����������������������������������������������������0000664�0000000�0000000�00000016761�14277552437�0021530�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import contextlib import re import string import urllib.request from test.helper import FakeYDL, is_download_test from yt_dlp.extractor import YoutubeIE from yt_dlp.jsinterp import JSInterpreter _SIG_TESTS = [ ( 'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js', 86, '>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321', ), ( 'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js', 85, '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@', ), ( 'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js', 90, ']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js', 84, 'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=', ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', '2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', 'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js', 84, '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>' ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js', 83, '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F' ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js', '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288', '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B' ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12', '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3', ) ] _NSIG_TESTS = [ ( 'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js', 'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w', ), ( 'https://www.youtube.com/s/player/f8cb7a3b/player_ias.vflset/en_US/base.js', 'oBo2h5euWy6osrUt', 'ivXHpm7qJjJN', ), ( 'https://www.youtube.com/s/player/2dfe380c/player_ias.vflset/en_US/base.js', 'oBo2h5euWy6osrUt', '3DIBbn3qdQ', ), ( 'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js', 'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q', ), ( 'https://www.youtube.com/s/player/8040e515/player_ias.vflset/en_US/base.js', 'wvOFaY-yjgDuIEg5', 'HkfBFDHmgw4rsw', ), ( 'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js', 'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw', ), ( 'https://www.youtube.com/s/player/5dd88d1d/player-plasma-ias-phone-en_US.vflset/base.js', 'kSxKFLeqzv_ZyHSAt', 'n8gS8oRlHOxPFA', ), ( 'https://www.youtube.com/s/player/324f67b9/player_ias.vflset/en_US/base.js', 'xdftNy7dh9QGnhW', '22qLGxrmX8F1rA', ), ( 'https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', 'TDCstCG66tEAO5pR9o', 'dbxNtZ14c-yWyw', ), ( 'https://www.youtube.com/s/player/c81bbb4a/player_ias.vflset/en_US/base.js', 'gre3EcLurNY2vqp94', 'Z9DfGxWP115WTg', ), ( 'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js', 'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw', ), ] @is_download_test class TestPlayerInfo(unittest.TestCase): def test_youtube_extract_player_info(self): PLAYER_URLS = ( ('https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', '4c3f79c5'), ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'), ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'), ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'), ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-de_DE.vflset/base.js', '64dddad9'), ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-tablet-en_US.vflset/base.js', '64dddad9'), # obsolete ('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'), ('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'), ('https://www.youtube.com/yts/jsbin/player_ias-vflCPQUIL/en_US/base.js', 'vflCPQUIL'), ('https://www.youtube.com/yts/jsbin/player-vflzQZbt7/en_US/base.js', 'vflzQZbt7'), ('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'), ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'), ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'), ) for player_url, expected_player_id in PLAYER_URLS: player_id = YoutubeIE._extract_player_info(player_url) self.assertEqual(player_id, expected_player_id) @is_download_test class TestSignature(unittest.TestCase): def setUp(self): TEST_DIR = os.path.dirname(os.path.abspath(__file__)) self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata/sigs') if not os.path.exists(self.TESTDATA_DIR): os.mkdir(self.TESTDATA_DIR) def tearDown(self): with contextlib.suppress(OSError): for f in os.listdir(self.TESTDATA_DIR): os.remove(f) def t_factory(name, sig_func, url_pattern): def make_tfunc(url, sig_input, expected_sig): m = url_pattern.match(url) assert m, '%r should follow URL format' % url test_id = m.group('id') def test_func(self): basename = f'player-{name}-{test_id}.js' fn = os.path.join(self.TESTDATA_DIR, basename) if not os.path.exists(fn): urllib.request.urlretrieve(url, fn) with open(fn, encoding='utf-8') as testf: jscode = testf.read() self.assertEqual(sig_func(jscode, sig_input), expected_sig) test_func.__name__ = f'test_{name}_js_{test_id}' setattr(TestSignature, test_func.__name__, test_func) return make_tfunc def signature(jscode, sig_input): func = YoutubeIE(FakeYDL())._parse_sig_js(jscode) src_sig = ( str(string.printable[:sig_input]) if isinstance(sig_input, int) else sig_input) return func(src_sig) def n_sig(jscode, sig_input): funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode) return JSInterpreter(jscode).call_function(funcname, sig_input) make_sig_test = t_factory( 'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$')) for test_spec in _SIG_TESTS: make_sig_test(*test_spec) make_nsig_test = t_factory( 'nsig', n_sig, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_-]+)/.+.js$')) for test_spec in _NSIG_TESTS: make_nsig_test(*test_spec) if __name__ == '__main__': unittest.main() ���������������yt-dlp-2022.08.19/test/testcert.pem�����������������������������������������������������������������0000664�0000000�0000000�00000006150�14277552437�0016671�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������-----BEGIN PRIVATE KEY----- MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDMF0bAzaHAdIyB HRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaUYF1uTcNp Qx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQqO6BVg4+h A1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8A4CK58Ev mMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRhKxUhmw0J aobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/Mo83KyiP tKMCSQulAgMBAAECggEALCfBDAexPjU5DNoh6bIorUXxIJzxTNzNHCdvgbCGiA54 BBKPh8s6qwazpnjT6WQWDIg/O5zZufqjE4wM9x4+0Zoqfib742ucJO9wY4way6x4 Clt0xzbLPabB+MoZ4H7ip+9n2+dImhe7pGdYyOHoNYeOL57BBi1YFW42Hj6u/8pd 63YCXisto3Rz1YvRQVjwsrS+cRKZlzAFQRviL30jav7Wh1aWEfcXxjj4zhm8pJdk ITGtq6howz57M0NtX6hZnfe8ywzTnDFIGKIMA2cYHuYJcBh9bc4tCGubTvTKK9UE 8fM+f6UbfGqfpKCq1mcgs0XMoFDSzKS9+mSJn0+5JQKBgQD+OCKaeH3Yzw5zGnlw XuQfMJGNcgNr+ImjmvzUAC2fAZUJLAcQueE5kzMv5Fmd+EFE2CEX1Vit3tg0SXvA G+bq609doILHMA03JHnV1npO/YNIhG3AAtJlKYGxQNfWH9mflYj9mEui8ZFxG52o zWhHYuifOjjZszUR+/eio6NPzwKBgQDNhUBTrT8LIX4SE/EFUiTlYmWIvOMgXYvN 8Cm3IRNQ/yyphZaXEU0eJzfX5uCDfSVOgd6YM/2pRah+t+1Hvey4H8e0GVTu5wMP gkkqwKPGIR1YOmlw6ippqwvoJD7LuYrm6Q4D6e1PvkjwCq6lEndrOPmPrrXNd0JJ XO60y3U2SwKBgQDLkyZarryQXxcCI6Q10Tc6pskYDMIit095PUbTeiUOXNT9GE28 Hi32ziLCakk9kCysNasii81MxtQ54tJ/f5iGbNMMddnkKl2a19Hc5LjjAm4cJzg/ 98KGEhvyVqvAo5bBDZ06/rcrD+lZOzUglQS5jcIcqCIYa0LHWQ/wJLxFzwKBgFcZ 1SRhdSmDfUmuF+S4ZpistflYjC3IV5rk4NkS9HvMWaJS0nqdw4A3AMzItXgkjq4S DkOVLTkTI5Do5HAWRv/VwC5M2hkR4NMu1VGAKSisGiKtRsirBWSZMEenLNHshbjN Jrpz5rZ4H7NT46ZkCCZyFBpX4gb9NyOedjA7Via3AoGARF8RxbYjnEGGFuhnbrJB FTPR0vaL4faY3lOgRZ8jOG9V2c9Hzi/y8a8TU4C11jnJSDqYCXBTd5XN28npYxtD pjRsCwy6ze+yvYXPO7C978eMG3YRyj366NXUxnXN59ibwe/lxi2OD9z8J1LEdF6z VJua1Wn8HKxnXMI61DhTCSo= -----END PRIVATE KEY----- -----BEGIN CERTIFICATE----- MIIEEzCCAvugAwIBAgIJAK1haYi6gmSKMA0GCSqGSIb3DQEBCwUAMIGeMQswCQYD VQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEbMBkG A1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRsIHRl c3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhhZ0Bw aGloYWcuZGUwIBcNMTUwMTMwMDExNTA4WhgPMjExNTAxMDYwMTE1MDhaMIGeMQsw CQYDVQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEb MBkGA1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRs IHRlc3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhh Z0BwaGloYWcuZGUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDMF0bA zaHAdIyBHRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaU YF1uTcNpQx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQq O6BVg4+hA1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8 A4CK58EvmMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRh KxUhmw0JaobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/ Mo83KyiPtKMCSQulAgMBAAGjUDBOMB0GA1UdDgQWBBTBUZoqhQkzHQ6xNgZfFxOd ZEVt8TAfBgNVHSMEGDAWgBTBUZoqhQkzHQ6xNgZfFxOdZEVt8TAMBgNVHRMEBTAD AQH/MA0GCSqGSIb3DQEBCwUAA4IBAQCUOCl3T/J9B08Z+ijfOJAtkbUaEHuVZb4x 5EpZSy2ZbkLvtsftMFieHVNXn9dDswQc5qjYStCC4o60LKw4M6Y63FRsAZ/DNaqb PY3jyCyuugZ8/sNf50vHYkAcF7SQYqOQFQX4TQsNUk2xMJIt7H0ErQFmkf/u3dg6 cy89zkT462IwxzSG7NNhIlRkL9o5qg+Y1mF9eZA1B0rcL6hO24PPTHOd90HDChBu SZ6XMi/LzYQSTf0Vg2R+uMIVlzSlkdcZ6sqVnnqeLL8dFyIa4e9sj/D4ZCYP8Mqe Z73H5/NNhmwCHRqVUTgm307xblQaWGhwAiDkaRvRW2aJQ0qGEdZK -----END CERTIFICATE----- ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/��������������������������������������������������������������������0000775�0000000�0000000�00000000000�14277552437�0016140�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/certificate/��������������������������������������������������������0000775�0000000�0000000�00000000000�14277552437�0020422�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/certificate/ca.crt��������������������������������������������������0000664�0000000�0000000�00000001076�14277552437�0021523�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������-----BEGIN CERTIFICATE----- MIIBfDCCASOgAwIBAgIUUgngoxFpuWft8gjj3uEFoqJyoJowCgYIKoZIzj0EAwIw FDESMBAGA1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEwMVoXDTM4MTAxNTAz MDEwMVowFDESMBAGA1UEAwwJeXRkbHB0ZXN0MFkwEwYHKoZIzj0CAQYIKoZIzj0D AQcDQgAEcTaKMtIn2/1kgid1zXFpLm87FMT5PP3/bltKVVH3DLO//0kUslCHYxFU KpcCfVt9aueRyUFi1TNkkkEZ9D6fbqNTMFEwHQYDVR0OBBYEFBdY2rVNLFGM6r1F iuamNDaiq0QoMB8GA1UdIwQYMBaAFBdY2rVNLFGM6r1FiuamNDaiq0QoMA8GA1Ud EwEB/wQFMAMBAf8wCgYIKoZIzj0EAwIDRwAwRAIgXJg2jio1kow2g/iP54Qq+iI2 m4EAvZiY0Im/Ni3PHawCIC6KCl6QcHANbeq8ckOXNGusjl6OWhvEM3uPBPhqskq1 -----END CERTIFICATE----- ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/certificate/ca.key��������������������������������������������������0000664�0000000�0000000�00000000343�14277552437�0021517�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������-----BEGIN EC PRIVATE KEY----- MHcCAQEEIG2L1bHdl3PnaLiJ7Zm8aAGCj4GiVbSbXQcrJAdL+yqOoAoGCCqGSM49 AwEHoUQDQgAEcTaKMtIn2/1kgid1zXFpLm87FMT5PP3/bltKVVH3DLO//0kUslCH YxFUKpcCfVt9aueRyUFi1TNkkkEZ9D6fbg== -----END EC PRIVATE KEY----- ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/certificate/ca.srl��������������������������������������������������0000664�0000000�0000000�00000000051�14277552437�0021523�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������4A260C33C4D34612646E6321E1E767DF1A95EF0B ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/certificate/client.crt����������������������������������������������0000664�0000000�0000000�00000000711�14277552437�0022411�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������-----BEGIN CERTIFICATE----- MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY D0dB8M1kJw== -----END CERTIFICATE----- �������������������������������������������������������yt-dlp-2022.08.19/test/testdata/certificate/client.csr����������������������������������������������0000664�0000000�0000000�00000000547�14277552437�0022417�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������-----BEGIN CERTIFICATE REQUEST----- MIHQMHcCAQAwFTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqG SM49AwEHA0IABKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq 3ZuZ7rubyuMSXNuH+2Cl9msSpJB2LhJs5kegADAKBggqhkjOPQQDAgNJADBGAiEA 1LZ72mtPmVxhGtdMvpZ0fyA68H2RC5IMHpLq18T55UcCIQDKpkXXVTvAzS0JioCq 6kiYq8Oxx6ZMoI+11k75/Kip1g== -----END CERTIFICATE REQUEST----- ���������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/certificate/client.key����������������������������������������������0000664�0000000�0000000�00000000343�14277552437�0022412�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������-----BEGIN EC PRIVATE KEY----- MHcCAQEEIAW6h9hwT0Aha+JBukgmHnrKRPoqPNWYA86ic0UaKHs8oAoGCCqGSM49 AwEHoUQDQgAEpEQpUNZ8spmSfNiD4FSSZOfjd/amX8s1LIo+1ej9RXuGGnolcird m5nuu5vK4xJc24f7YKX2axKkkHYuEmzmRw== -----END EC PRIVATE KEY----- ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/certificate/clientencrypted.key�������������������������������������0000664�0000000�0000000�00000000472�14277552437�0024333�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������-----BEGIN EC PRIVATE KEY----- Proc-Type: 4,ENCRYPTED DEK-Info: AES-256-CBC,4B39160146F15544922E553E08299A35 96A7/iBkIfTVb8r2812ued2pS49FfVY4Ppz/45OGF0uFayMtMl8/GuEBCamuhFXS rnOOpco96TTeeKZHqR45wnf4tgHM8IjoQ6H0EX3lVF19OHnArAgrGYtohWUGSyGn IgLJFdUewIjdI7XApTJprQFE5E2tETXFA95mCz88u1c= -----END EC PRIVATE KEY----- ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/certificate/clientwithencryptedkey.crt������������������������������0000664�0000000�0000000�00000001403�14277552437�0025733�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������-----BEGIN CERTIFICATE----- MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY D0dB8M1kJw== -----END CERTIFICATE----- -----BEGIN EC PRIVATE KEY----- Proc-Type: 4,ENCRYPTED DEK-Info: AES-256-CBC,4B39160146F15544922E553E08299A35 96A7/iBkIfTVb8r2812ued2pS49FfVY4Ppz/45OGF0uFayMtMl8/GuEBCamuhFXS rnOOpco96TTeeKZHqR45wnf4tgHM8IjoQ6H0EX3lVF19OHnArAgrGYtohWUGSyGn IgLJFdUewIjdI7XApTJprQFE5E2tETXFA95mCz88u1c= -----END EC PRIVATE KEY----- �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/certificate/clientwithkey.crt���������������������������������������0000664�0000000�0000000�00000001254�14277552437�0024021�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������-----BEGIN CERTIFICATE----- MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY D0dB8M1kJw== -----END CERTIFICATE----- -----BEGIN EC PRIVATE KEY----- MHcCAQEEIAW6h9hwT0Aha+JBukgmHnrKRPoqPNWYA86ic0UaKHs8oAoGCCqGSM49 AwEHoUQDQgAEpEQpUNZ8spmSfNiD4FSSZOfjd/amX8s1LIo+1ej9RXuGGnolcird m5nuu5vK4xJc24f7YKX2axKkkHYuEmzmRw== -----END EC PRIVATE KEY----- ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/certificate/instructions.md�����������������������������������������0000664�0000000�0000000�00000001327�14277552437�0023513�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Generate certificates for client cert tests ## CA ```sh openssl ecparam -name prime256v1 -genkey -noout -out ca.key openssl req -new -x509 -sha256 -days 6027 -key ca.key -out ca.crt -subj "/CN=ytdlptest" ``` ## Client ```sh openssl ecparam -name prime256v1 -genkey -noout -out client.key openssl ec -in client.key -out clientencrypted.key -passout pass:foobar -aes256 openssl req -new -sha256 -key client.key -out client.csr -subj "/CN=ytdlptest2" openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out client.crt -days 6027 -sha256 cp client.crt clientwithkey.crt cp client.crt clientwithencryptedkey.crt cat client.key >> clientwithkey.crt cat clientencrypted.key >> clientwithencryptedkey.crt ```���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/cookies/������������������������������������������������������������0000775�0000000�0000000�00000000000�14277552437�0017574�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/cookies/httponly_cookies.txt����������������������������������������0000664�0000000�0000000�00000000446�14277552437�0023736�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Netscape HTTP Cookie File # http://curl.haxx.se/rfc/cookie_spec.html # This is a generated file! Do not edit. #HttpOnly_www.foobar.foobar FALSE / TRUE 2147483647 HTTPONLY_COOKIE HTTPONLY_COOKIE_VALUE www.foobar.foobar FALSE / TRUE 2147483647 JS_ACCESSIBLE_COOKIE JS_ACCESSIBLE_COOKIE_VALUE ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/cookies/malformed_cookies.txt���������������������������������������0000664�0000000�0000000�00000000520�14277552437�0024014�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Netscape HTTP Cookie File # http://curl.haxx.se/rfc/cookie_spec.html # This is a generated file! Do not edit. # Cookie file entry with invalid number of fields - 6 instead of 7 www.foobar.foobar FALSE / FALSE 0 COOKIE # Cookie file entry with invalid expires at www.foobar.foobar FALSE / FALSE 1.7976931348623157e+308 COOKIE VALUE ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/cookies/session_cookies.txt�����������������������������������������0000664�0000000�0000000�00000000415�14277552437�0023534�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Netscape HTTP Cookie File # http://curl.haxx.se/rfc/cookie_spec.html # This is a generated file! Do not edit. www.foobar.foobar FALSE / TRUE YoutubeDLExpiresEmpty YoutubeDLExpiresEmptyValue www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpires0 YoutubeDLExpires0Value ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/f4m/����������������������������������������������������������������0000775�0000000�0000000�00000000000�14277552437�0016626�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/f4m/custom_base_url.f4m���������������������������������������������0000664�0000000�0000000�00000001742�14277552437�0022430�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="UTF-8"?> <manifest xmlns="http://ns.adobe.com/f4m/1.0"> <streamType>recorded</streamType> <baseURL>http://vod.livestream.com/events/0000000000673980/</baseURL> <duration>269.293</duration> <bootstrapInfo profile="named" id="bootstrap_1">AAAAm2Fic3QAAAAAAAAAAQAAAAPoAAAAAAAEG+0AAAAAAAAAAAAAAAAAAQAAABlhc3J0AAAAAAAAAAABAAAAAQAAAC4BAAAAVmFmcnQAAAAAAAAD6AAAAAAEAAAAAQAAAAAAAAAAAAAXcAAAAC0AAAAAAAQHQAAAE5UAAAAuAAAAAAAEGtUAAAEYAAAAAAAAAAAAAAAAAAAAAAA=</bootstrapInfo> <media url="b90f532f-b0f6-4f4e-8289-706d490b2fd8_2292" bootstrapInfoId="bootstrap_1" bitrate="2148" width="1280" height="720" videoCodec="avc1.4d401f" audioCodec="mp4a.40.2"> <metadata>AgAKb25NZXRhRGF0YQgAAAAIAAhkdXJhdGlvbgBAcNSwIMSbpgAFd2lkdGgAQJQAAAAAAAAABmhlaWdodABAhoAAAAAAAAAJZnJhbWVyYXRlAEA4/7DoLwW3AA12aWRlb2RhdGFyYXRlAECe1DLgjcobAAx2aWRlb2NvZGVjaWQAQBwAAAAAAAAADWF1ZGlvZGF0YXJhdGUAQGSimlvaPKQADGF1ZGlvY29kZWNpZABAJAAAAAAAAAAACQ==</metadata> </media> </manifest> ������������������������������yt-dlp-2022.08.19/test/testdata/ism/����������������������������������������������������������������0000775�0000000�0000000�00000000000�14277552437�0016730�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/ism/sintel.Manifest�������������������������������������������������0000664�0000000�0000000�00000055213�14277552437�0021724�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="utf-8"?> <!-- Created with Unified Streaming Platform (version=1.10.18-20255) --> <SmoothStreamingMedia MajorVersion="2" MinorVersion="0" TimeScale="10000000" Duration="8880746666"> <StreamIndex Type="audio" QualityLevels="1" TimeScale="10000000" Name="audio" Chunks="445" Url="QualityLevels({bitrate})/Fragments(audio={start time})"> <QualityLevel Index="0" Bitrate="128001" CodecPrivateData="1190" SamplingRate="48000" Channels="2" BitsPerSample="16" PacketSize="4" AudioTag="255" FourCC="AACL" /> <c t="0" d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="746666" /> </StreamIndex> <StreamIndex Type="text" QualityLevels="1" TimeScale="10000000" Language="eng" Subtype="CAPT" Name="textstream_eng" Chunks="11" Url="QualityLevels({bitrate})/Fragments(textstream_eng={start time})"> <QualityLevel Index="0" Bitrate="1000" CodecPrivateData="" FourCC="TTML" /> <c t="0" d="600000000" /> <c d="600000000" /> <c d="600000000" /> <c d="600000000" /> <c d="600000000" /> <c d="600000000" /> <c d="600000000" /> <c d="600000000" /> <c d="600000000" /> <c d="600000000" /> <c d="240000000" /> </StreamIndex> <StreamIndex Type="video" QualityLevels="5" TimeScale="10000000" Name="video" Chunks="444" Url="QualityLevels({bitrate})/Fragments(video={start time})" MaxWidth="1688" MaxHeight="720" DisplayWidth="1689" DisplayHeight="720"> <QualityLevel Index="0" Bitrate="100000" CodecPrivateData="00000001674D401FDA0544EFFC2D002CBC40000003004000000C03C60CA80000000168EF32C8" MaxWidth="336" MaxHeight="144" FourCC="AVC1" /> <QualityLevel Index="1" Bitrate="326000" CodecPrivateData="00000001674D401FDA0241FE23FFC3BC83BA44000003000400000300C03C60CA800000000168EF32C8" MaxWidth="562" MaxHeight="240" FourCC="AVC1" /> <QualityLevel Index="2" Bitrate="698000" CodecPrivateData="00000001674D401FDA0350BFB97FF06AF06AD1000003000100000300300F1832A00000000168EF32C8" MaxWidth="844" MaxHeight="360" FourCC="AVC1" /> <QualityLevel Index="3" Bitrate="1493000" CodecPrivateData="00000001674D401FDA011C3DE6FFF0D890D871000003000100000300300F1832A00000000168EF32C8" MaxWidth="1126" MaxHeight="480" FourCC="AVC1" /> <QualityLevel Index="4" Bitrate="4482000" CodecPrivateData="00000001674D401FDA01A816F97FFC1ABC1AB440000003004000000C03C60CA80000000168EF32C8" MaxWidth="1688" MaxHeight="720" FourCC="AVC1" /> <c t="0" d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> </StreamIndex> </SmoothStreamingMedia> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/m3u8/���������������������������������������������������������������0000775�0000000�0000000�00000000000�14277552437�0016734�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/m3u8/bipbop_16x9.m3u8�����������������������������������������������0000664�0000000�0000000�00000006326�14277552437�0021523�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#EXTM3U #EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="bipbop_audio",LANGUAGE="eng",NAME="BipBop Audio 1",AUTOSELECT=YES,DEFAULT=YES #EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="bipbop_audio",LANGUAGE="eng",NAME="BipBop Audio 2",AUTOSELECT=NO,DEFAULT=NO,URI="alternate_audio_aac/prog_index.m3u8" #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English",DEFAULT=YES,AUTOSELECT=YES,FORCED=NO,LANGUAGE="en",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/eng/prog_index.m3u8" #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="en",URI="subtitles/eng_forced/prog_index.m3u8" #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Français",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="fr",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/fra/prog_index.m3u8" #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Français (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="fr",URI="subtitles/fra_forced/prog_index.m3u8" #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Español",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="es",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/spa/prog_index.m3u8" #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Español (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="es",URI="subtitles/spa_forced/prog_index.m3u8" #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="日本語",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="ja",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/jpn/prog_index.m3u8" #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="日本語 (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="ja",URI="subtitles/jpn_forced/prog_index.m3u8" #EXT-X-STREAM-INF:BANDWIDTH=263851,CODECS="mp4a.40.2, avc1.4d400d",RESOLUTION=416x234,AUDIO="bipbop_audio",SUBTITLES="subs" gear1/prog_index.m3u8 #EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=28451,CODECS="avc1.4d400d",URI="gear1/iframe_index.m3u8" #EXT-X-STREAM-INF:BANDWIDTH=577610,CODECS="mp4a.40.2, avc1.4d401e",RESOLUTION=640x360,AUDIO="bipbop_audio",SUBTITLES="subs" gear2/prog_index.m3u8 #EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=181534,CODECS="avc1.4d401e",URI="gear2/iframe_index.m3u8" #EXT-X-STREAM-INF:BANDWIDTH=915905,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=960x540,AUDIO="bipbop_audio",SUBTITLES="subs" gear3/prog_index.m3u8 #EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=297056,CODECS="avc1.4d401f",URI="gear3/iframe_index.m3u8" #EXT-X-STREAM-INF:BANDWIDTH=1030138,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=1280x720,AUDIO="bipbop_audio",SUBTITLES="subs" gear4/prog_index.m3u8 #EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=339492,CODECS="avc1.4d401f",URI="gear4/iframe_index.m3u8" #EXT-X-STREAM-INF:BANDWIDTH=1924009,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=1920x1080,AUDIO="bipbop_audio",SUBTITLES="subs" gear5/prog_index.m3u8 #EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=669554,CODECS="avc1.4d401f",URI="gear5/iframe_index.m3u8" #EXT-X-STREAM-INF:BANDWIDTH=41457,CODECS="mp4a.40.2",AUDIO="bipbop_audio",SUBTITLES="subs" gear0/prog_index.m3u8 ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/m3u8/img_bipbop_adv_example_fmp4.m3u8�������������������������������0000664�0000000�0000000�00000014223�14277552437�0025056�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#EXTM3U #EXT-X-VERSION:6 #EXT-X-INDEPENDENT-SEGMENTS #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=2168183,BANDWIDTH=2177116,CODECS="avc1.640020,mp4a.40.2",RESOLUTION=960x540,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" v5/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=7968416,BANDWIDTH=8001098,CODECS="avc1.64002a,mp4a.40.2",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" v9/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=6170000,BANDWIDTH=6312875,CODECS="avc1.64002a,mp4a.40.2",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" v8/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=4670769,BANDWIDTH=4943747,CODECS="avc1.64002a,mp4a.40.2",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" v7/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=3168702,BANDWIDTH=3216424,CODECS="avc1.640020,mp4a.40.2",RESOLUTION=1280x720,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" v6/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1265132,BANDWIDTH=1268994,CODECS="avc1.64001e,mp4a.40.2",RESOLUTION=768x432,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" v4/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=895755,BANDWIDTH=902298,CODECS="avc1.64001e,mp4a.40.2",RESOLUTION=640x360,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" v3/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=530721,BANDWIDTH=541052,CODECS="avc1.640015,mp4a.40.2",RESOLUTION=480x270,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" v2/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=2390686,BANDWIDTH=2399619,CODECS="avc1.640020,ac-3",RESOLUTION=960x540,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" v5/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=8190919,BANDWIDTH=8223601,CODECS="avc1.64002a,ac-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" v9/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=6392503,BANDWIDTH=6535378,CODECS="avc1.64002a,ac-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" v8/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=4893272,BANDWIDTH=5166250,CODECS="avc1.64002a,ac-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" v7/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=3391205,BANDWIDTH=3438927,CODECS="avc1.640020,ac-3",RESOLUTION=1280x720,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" v6/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1487635,BANDWIDTH=1491497,CODECS="avc1.64001e,ac-3",RESOLUTION=768x432,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" v4/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1118258,BANDWIDTH=1124801,CODECS="avc1.64001e,ac-3",RESOLUTION=640x360,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" v3/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=753224,BANDWIDTH=763555,CODECS="avc1.640015,ac-3",RESOLUTION=480x270,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" v2/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=2198686,BANDWIDTH=2207619,CODECS="avc1.640020,ec-3",RESOLUTION=960x540,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" v5/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=7998919,BANDWIDTH=8031601,CODECS="avc1.64002a,ec-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" v9/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=6200503,BANDWIDTH=6343378,CODECS="avc1.64002a,ec-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" v8/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=4701272,BANDWIDTH=4974250,CODECS="avc1.64002a,ec-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" v7/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=3199205,BANDWIDTH=3246927,CODECS="avc1.640020,ec-3",RESOLUTION=1280x720,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" v6/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1295635,BANDWIDTH=1299497,CODECS="avc1.64001e,ec-3",RESOLUTION=768x432,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" v4/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=926258,BANDWIDTH=932801,CODECS="avc1.64001e,ec-3",RESOLUTION=640x360,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" v3/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=561224,BANDWIDTH=571555,CODECS="avc1.640015,ec-3",RESOLUTION=480x270,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" v2/prog_index.m3u8 #EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=183689,BANDWIDTH=187492,CODECS="avc1.64002a",RESOLUTION=1920x1080,URI="v7/iframe_index.m3u8" #EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=132672,BANDWIDTH=136398,CODECS="avc1.640020",RESOLUTION=1280x720,URI="v6/iframe_index.m3u8" #EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=97767,BANDWIDTH=101378,CODECS="avc1.640020",RESOLUTION=960x540,URI="v5/iframe_index.m3u8" #EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=75722,BANDWIDTH=77818,CODECS="avc1.64001e",RESOLUTION=768x432,URI="v4/iframe_index.m3u8" #EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=63522,BANDWIDTH=65091,CODECS="avc1.64001e",RESOLUTION=640x360,URI="v3/iframe_index.m3u8" #EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=39678,BANDWIDTH=40282,CODECS="avc1.640015",RESOLUTION=480x270,URI="v2/iframe_index.m3u8" #EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aud1",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,CHANNELS="2",URI="a1/prog_index.m3u8" #EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aud2",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,CHANNELS="6",URI="a2/prog_index.m3u8" #EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aud3",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,CHANNELS="6",URI="a3/prog_index.m3u8" #EXT-X-MEDIA:TYPE=CLOSED-CAPTIONS,GROUP-ID="cc1",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,INSTREAM-ID="CC1" #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="sub1",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,FORCED=NO,URI="s1/en/prog_index.m3u8" �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/mpd/����������������������������������������������������������������0000775�0000000�0000000�00000000000�14277552437�0016720�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/mpd/float_duration.mpd����������������������������������������������0000664�0000000�0000000�00000003263�14277552437�0022440�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="UTF-8"?> <MPD xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:mpeg:dash:schema:mpd:2011" type="static" minBufferTime="PT2S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT6014S"> <Period bitstreamSwitching="true"> <AdaptationSet mimeType="audio/mp4" codecs="mp4a.40.2" startWithSAP="1" segmentAlignment="true"> <SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="ai_$RepresentationID$.mp4d" media="a_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate> <Representation id="318597" bandwidth="61587"></Representation> </AdaptationSet> <AdaptationSet mimeType="video/mp4" startWithSAP="1" segmentAlignment="true"> <SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="vi_$RepresentationID$.mp4d" media="v_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate> <Representation id="318597" codecs="avc1.42001f" width="340" height="192" bandwidth="318597"></Representation> <Representation id="638590" codecs="avc1.42001f" width="512" height="288" bandwidth="638590"></Representation> <Representation id="1022565" codecs="avc1.4d001f" width="688" height="384" bandwidth="1022565"></Representation> <Representation id="2046506" codecs="avc1.4d001f" width="1024" height="576" bandwidth="2046506"></Representation> <Representation id="3998017" codecs="avc1.640029" width="1280" height="720" bandwidth="3998017"></Representation> <Representation id="5997485" codecs="avc1.640032" width="1920" height="1080" bandwidth="5997485"></Representation> </AdaptationSet> </Period> </MPD>���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/mpd/subtitles.mpd���������������������������������������������������0000664�0000000�0000000�00000024034�14277552437�0021443�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="utf-8"?> <!-- Created with Unified Streaming Platform (version=1.10.18-20255) --> <MPD xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:mpeg:dash:schema:mpd:2011" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-DASH_schema_files/DASH-MPD.xsd" type="static" mediaPresentationDuration="PT14M48S" maxSegmentDuration="PT1M" minBufferTime="PT10S" profiles="urn:mpeg:dash:profile:isoff-live:2011"> <Period id="1" duration="PT14M48S"> <BaseURL>dash/</BaseURL> <AdaptationSet id="1" group="1" contentType="audio" segmentAlignment="true" audioSamplingRate="48000" mimeType="audio/mp4" codecs="mp4a.40.2" startWithSAP="1"> <AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2" /> <Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" /> <SegmentTemplate timescale="48000" initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash" media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash"> <SegmentTimeline> <S t="0" d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="3584" /> </SegmentTimeline> </SegmentTemplate> <Representation id="audio=128001" bandwidth="128001"> </Representation> </AdaptationSet> <AdaptationSet id="2" group="3" contentType="text" lang="en" mimeType="application/mp4" codecs="stpp" startWithSAP="1"> <Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle" /> <SegmentTemplate timescale="1000" initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash" media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash"> <SegmentTimeline> <S t="0" d="60000" r="9" /> <S d="24000" /> </SegmentTimeline> </SegmentTemplate> <Representation id="textstream_eng=1000" bandwidth="1000"> </Representation> </AdaptationSet> <AdaptationSet id="3" group="2" contentType="video" par="960:409" minBandwidth="100000" maxBandwidth="4482000" maxWidth="1689" maxHeight="720" segmentAlignment="true" mimeType="video/mp4" codecs="avc1.4D401F" startWithSAP="1"> <Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" /> <SegmentTemplate timescale="12288" initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash" media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash"> <SegmentTimeline> <S t="0" d="24576" r="443" /> </SegmentTimeline> </SegmentTemplate> <Representation id="video=100000" bandwidth="100000" width="336" height="144" sar="2880:2863" scanType="progressive"> </Representation> <Representation id="video=326000" bandwidth="326000" width="562" height="240" sar="115200:114929" scanType="progressive"> </Representation> <Representation id="video=698000" bandwidth="698000" width="844" height="360" sar="86400:86299" scanType="progressive"> </Representation> <Representation id="video=1493000" bandwidth="1493000" width="1126" height="480" sar="230400:230267" scanType="progressive"> </Representation> <Representation id="video=4482000" bandwidth="4482000" width="1688" height="720" sar="86400:86299" scanType="progressive"> </Representation> </AdaptationSet> </Period> </MPD> ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/mpd/unfragmented.mpd������������������������������������������������0000664�0000000�0000000�00000003320�14277552437�0022077�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="UTF-8" standalone="yes"?> <MPD mediaPresentationDuration="PT54.915S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011"> <Period duration="PT54.915S"> <AdaptationSet segmentAlignment="true" subsegmentAlignment="true" subsegmentStartsWithSAP="1"> <Representation bandwidth="804261" codecs="avc1.4d401e" frameRate="30" height="360" id="VIDEO-1" mimeType="video/mp4" startWithSAP="1" width="360"> <BaseURL>DASH_360</BaseURL> <SegmentBase indexRange="915-1114" indexRangeExact="true"> <Initialization range="0-914"/> </SegmentBase> </Representation> <Representation bandwidth="608000" codecs="avc1.4d401e" frameRate="30" height="240" id="VIDEO-2" mimeType="video/mp4" startWithSAP="1" width="240"> <BaseURL>DASH_240</BaseURL> <SegmentBase indexRange="913-1112" indexRangeExact="true"> <Initialization range="0-912"/> </SegmentBase> </Representation> </AdaptationSet> <AdaptationSet> <Representation audioSamplingRate="48000" bandwidth="129870" codecs="mp4a.40.2" id="AUDIO-1" mimeType="audio/mp4" startWithSAP="1"> <AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/> <BaseURL>audio</BaseURL> <SegmentBase indexRange="832-1007" indexRangeExact="true"> <Initialization range="0-831"/> </SegmentBase> </Representation> </AdaptationSet> </Period> </MPD> ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/mpd/urls_only.mpd���������������������������������������������������0000664�0000000�0000000�00000053546�14277552437�0021465�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" ?> <MPD maxSegmentDuration="PT0H0M10.000S" mediaPresentationDuration="PT0H4M1.728S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-main:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011"> <Period duration="PT0H4M1.728S"> <AdaptationSet bitstreamSwitching="true" lang="und" maxHeight="1080" maxWidth="1920" par="16:9" segmentAlignment="true"> <ContentComponent contentType="video" id="1"/> <Representation audioSamplingRate="44100" bandwidth="200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="144" id="h264_aac_144p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="256"> <SegmentList duration="10000" timescale="1000"> <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/init/432f65a0.mp4"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/0/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/1/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/2/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/3/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/4/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/5/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/6/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/7/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/8/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/9/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/10/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/11/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/12/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/13/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/14/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/15/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/16/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/17/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/18/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/19/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/20/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/21/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/22/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/23/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/24/432f65a0.m4s"/> </SegmentList> </Representation> <Representation audioSamplingRate="44100" bandwidth="400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="240" id="h264_aac_240p_m4s" mimeType="video/mp4" sar="160:159" startWithSAP="1" width="424"> <SegmentList duration="10000" timescale="1000"> <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/init/432f65a0.mp4"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/0/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/1/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/2/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/3/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/4/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/5/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/6/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/7/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/8/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/9/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/10/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/11/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/12/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/13/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/14/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/15/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/16/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/17/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/18/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/19/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/20/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/21/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/22/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/23/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/24/432f65a0.m4s"/> </SegmentList> </Representation> <Representation audioSamplingRate="44100" bandwidth="800000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="360" id="h264_aac_360p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="640"> <SegmentList duration="10000" timescale="1000"> <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/init/432f65a0.mp4"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/0/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/1/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/2/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/3/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/4/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/5/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/6/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/7/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/8/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/9/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/10/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/11/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/12/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/13/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/14/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/15/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/16/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/17/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/18/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/19/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/20/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/21/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/22/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/23/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/24/432f65a0.m4s"/> </SegmentList> </Representation> <Representation audioSamplingRate="44100" bandwidth="1200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="480" id="h264_aac_480p_m4s" mimeType="video/mp4" sar="320:321" startWithSAP="1" width="856"> <SegmentList duration="10000" timescale="1000"> <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/init/432f65a0.mp4"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/0/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/1/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/2/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/3/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/4/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/5/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/6/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/7/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/8/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/9/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/10/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/11/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/12/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/13/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/14/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/15/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/16/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/17/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/18/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/19/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/20/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/21/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/22/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/23/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/24/432f65a0.m4s"/> </SegmentList> </Representation> <Representation audioSamplingRate="44100" bandwidth="1600000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="576" id="h264_aac_576p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1024"> <SegmentList duration="10000" timescale="1000"> <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/init/432f65a0.mp4"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/0/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/1/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/2/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/3/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/4/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/5/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/6/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/7/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/8/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/9/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/10/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/11/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/12/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/13/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/14/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/15/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/16/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/17/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/18/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/19/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/20/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/21/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/22/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/23/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/24/432f65a0.m4s"/> </SegmentList> </Representation> <Representation audioSamplingRate="44100" bandwidth="2400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="720" id="h264_aac_720p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1280"> <SegmentList duration="10000" timescale="1000"> <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/init/432f65a0.mp4"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/0/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/1/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/2/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/3/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/4/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/5/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/6/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/7/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/8/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/9/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/10/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/11/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/12/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/13/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/14/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/15/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/16/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/17/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/18/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/19/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/20/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/21/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/22/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/23/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/24/432f65a0.m4s"/> </SegmentList> </Representation> <Representation audioSamplingRate="44100" bandwidth="4400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="1080" id="h264_aac_1080p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1920"> <SegmentList duration="10000" timescale="1000"> <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/init/432f65a0.mp4"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/0/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/1/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/2/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/3/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/4/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/5/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/6/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/7/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/8/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/9/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/10/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/11/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/12/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/13/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/14/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/15/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/16/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/17/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/18/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/19/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/20/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/21/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/22/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/23/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/24/432f65a0.m4s"/> </SegmentList> </Representation> </AdaptationSet> </Period> </MPD> ����������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/thumbnails/���������������������������������������������������������0000775�0000000�0000000�00000000000�14277552437�0020306�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/thumbnails/foo %d bar/����������������������������������������������0000775�0000000�0000000�00000000000�14277552437�0022067�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/thumbnails/foo %d bar/foo_%d.webp�����������������������������������0000664�0000000�0000000�00000007530�14277552437�0024106�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������RIFFP��WEBPVP8 D��a�*5>HL%#"!I gnvUÿ1\}p][Gǭ_]7?akiyyyU_L#�?;#D{�pBP?C?|7Googt=j/ >ՏucXGV>ՏucXGV>ՏucXGV>ՏucXGV>ՏucXGV>ՏucXGV>ՏucXGV>пIZ}2I6-;a?竬r`P%īRѾ9l*̊vvVh,7iѺK,εo`bTLK��2u`;fiF͘JB/`y=UeF/;QQ nJ U0-PZs-UKر+]Rz@!<DaG8?`}f'q!m|Ļ4kuɜ10!FUE3E= ߗLBbTxdJqbږȖ cgi.݊Kt8Fs=![Di߁ (q룥K&YozsM%b9f^�0>'/4*P̎@|:zdPkˑ@-H!Z:}wh:}h:}h:}h:}h:}h:}h:����������d$gRAZwA*((*v`5bXvysW=ko_0f/xU~Wr'[rX~e,EJ_Kne%4Z@@Wbt˻$| ~D흗mk'/ZROUh;Ƨ]й3xO<&جDF,]b:cϚ:|Ä ҖP/aHUS* f#} _d˹7БN,Jzf 6pl,Vk7g:$jШT3sU9ˣ뙙Gs_O :F QFCkn>߿xsW~o%HAI5Ar+,Z2�.E $_P:6q�DQjTOǴeӰsݍ:xYbub:fH1{f:QپPrO@HXG޲sDONqEjC9=.*^f-~,&nێ9P[6 )ZgZoF<Y1Z]l5~ȍń{uT[}'gLiwV o5C[ܙͶcZbYthduE@53"yb)]2pxL?twa>x!M5&P^YӳWbX-D#&*kwEq@y~Oa tzL'Wx-* Xt A. x}:3Iܚ y 3y0[Us`$G͏A0Ipٔ}<<!?|)soUřȬ)yg)hyŲoó=u^}ly#\?Ai߸O`eq? W&UcS?a)zW:9lm[^:9 g *y$ER>=8 FT{=yn_bG?R]m |f/]Lj[{T3fPg,vK_ӻ(x U4^U!mw9ŴXLzGYU��V*dդ«%4Ŷ/K-9-$|3�{=\U G}{dy[cQڑOpTn{;ͧ.B0/J W*pnlK%0=DiY㊐$7oT\G}*}NUyB^8ۧy,VdU!ռNt3%#_{Nlqmr|DϧR|>[\4viBi)zgex{jG(,v00 ʺ{D]eD(BJK�)�wwzm\).3gBAzVF<@b h6\g ׻A͛SR?,] kEDe;70;UGS@:F H%=4EhhE=~_YOGNjʻRD(8!vʾLf{\?aEȯDxZ7[8mRwBg+:hu_C L(%W9FfMki\hK t.f:Soۯ$;2>.4M=n q7\}o[ujw~I'|!ep9MsSR<X<% Gr^{DZ+k(q=--#DPEPeMiƹ\N̫XU˸}h 60sQmrKvU�< ,-vP) #|EXLQ\06Û&F#f>Ʈżw2ҋz jO^ʠ6B{_̜)vn@pQ|!y"MVaN|3LB8ᖲ=vkMG]ӑ# Fdtgh,-Xz<ä҄jv"%e:� Fs>dT݅9z39鎁)5+ÖӜRsHBj H,<G^ sL`qGdrk,~Tjl1 DKȪ0긱D ~rǾϭlae'XҭgA�D^3L0wfEdVcvS$91VESٶF9Zw]ȟMOt0 g1X^RUiXAoCy:+7dkHP3v;jWS7k\[-Jh#LVTS -.S%?c@A?BqsXQ0`<hu>`7TtY/yF�Rvc 3IeUӐC9%XѤ1<RlAN\6Y5P98%BM<y~s�DqQ"ӵMM;;qA)Xi-W^x�):O m:!21JMӷ2}X |V汝OċP <CY GWJh:_^zR 9t4EsYVmإ*?u^XÃ~d^TQ;x:y HܤOY *i<-0(-c ih݌]F73vTvɑwB,˓$!r?lm7jC"j XQaUؗIí{^YITg[>e<Ёwi<*!DxO¦h`h}u0.MEzi:f~a(ݴSbٛ􀚐a}U@Ⱥ J}Ǎ!ly҄AM܃xo2S2h^^Q󀙏;lQ͜|㟱QM 7˛DrϐL ^LX!Azo:.h^C2V 2a'pȡ~!SN]!J;M:n)orXGXe& /czn ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/xspf/���������������������������������������������������������������0000775�0000000�0000000�00000000000�14277552437�0017120�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/test/testdata/xspf/foo_xspf.xspf��������������������������������������������������0000664�0000000�0000000�00000002551�14277552437�0021650�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="UTF-8"?> <playlist version="1" xmlns="http://xspf.org/ns/0/"> <date>2018-03-09T18:01:43Z</date> <trackList> <track> <location>cd1/track%201.mp3</location> <title>Pandemonium Foilverb Visit http://bigbrother404.bandcamp.com Pandemonium EP 1 202416 ../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3 Final Cartridge (Nichico Twelve Remix) Visit http://bigbrother404.bandcamp.com Foilverb Pandemonium EP 2 255857 track3.mp3 https://example.com/track3.mp3 Rebuilding Nightingale Visit http://bigbrother404.bandcamp.com Foilverb Pandemonium EP 3 287915 yt-dlp-2022.08.19/test/versions.json000066400000000000000000000031101427755243700170650ustar00rootroot00000000000000{ "latest": "2013.01.06", "signature": "72158cdba391628569ffdbea259afbcf279bbe3d8aeb7492690735dc1cfa6afa754f55c61196f3871d429599ab22f2667f1fec98865527b32632e7f4b3675a7ef0f0fbe084d359256ae4bba68f0d33854e531a70754712f244be71d4b92e664302aa99653ee4df19800d955b6c4149cd2b3f24288d6e4b40b16126e01f4c8ce6", "versions": { "2013.01.02": { "bin": [ "http://youtube-dl.org/downloads/2013.01.02/youtube-dl", "f5b502f8aaa77675c4884938b1e4871ebca2611813a0c0e74f60c0fbd6dcca6b" ], "exe": [ "http://youtube-dl.org/downloads/2013.01.02/youtube-dl.exe", "75fa89d2ce297d102ff27675aa9d92545bbc91013f52ec52868c069f4f9f0422" ], "tar": [ "http://youtube-dl.org/downloads/2013.01.02/youtube-dl-2013.01.02.tar.gz", "6a66d022ac8e1c13da284036288a133ec8dba003b7bd3a5179d0c0daca8c8196" ] }, "2013.01.06": { "bin": [ "http://youtube-dl.org/downloads/2013.01.06/youtube-dl", "64b6ed8865735c6302e836d4d832577321b4519aa02640dc508580c1ee824049" ], "exe": [ "http://youtube-dl.org/downloads/2013.01.06/youtube-dl.exe", "58609baf91e4389d36e3ba586e21dab882daaaee537e4448b1265392ae86ff84" ], "tar": [ "http://youtube-dl.org/downloads/2013.01.06/youtube-dl-2013.01.06.tar.gz", "fe77ab20a95d980ed17a659aa67e371fdd4d656d19c4c7950e7b720b0c2f1a86" ] } } }yt-dlp-2022.08.19/yt-dlp.cmd000066400000000000000000000000631427755243700152450ustar00rootroot00000000000000@py -bb -Werror -Xdev "%~dp0yt_dlp\__main__.py" %* yt-dlp-2022.08.19/yt-dlp.sh000077500000000000000000000001661427755243700151230ustar00rootroot00000000000000#!/usr/bin/env sh exec "${PYTHON:-python3}" -bb -Werror -Xdev "$(dirname "$(realpath "$0")")/yt_dlp/__main__.py" "$@" yt-dlp-2022.08.19/yt_dlp/000077500000000000000000000000001427755243700146435ustar00rootroot00000000000000yt-dlp-2022.08.19/yt_dlp/YoutubeDL.py000066400000000000000000005623251427755243700171060ustar00rootroot00000000000000import collections import contextlib import datetime import errno import fileinput import functools import io import itertools import json import locale import operator import os import random import re import shutil import subprocess import sys import tempfile import time import tokenize import traceback import unicodedata import urllib.request from string import ascii_letters from .cache import Cache from .compat import compat_os_name, compat_shlex_quote from .cookies import load_cookies from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name from .downloader.rtmp import rtmpdump_version from .extractor import gen_extractor_classes, get_info_extractor from .extractor.openload import PhantomJSwrapper from .minicurses import format_text from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors from .postprocessor import ( EmbedThumbnailPP, FFmpegFixupDuplicateMoovPP, FFmpegFixupDurationPP, FFmpegFixupM3u8PP, FFmpegFixupM4aPP, FFmpegFixupStretchedPP, FFmpegFixupTimestampPP, FFmpegMergerPP, FFmpegPostProcessor, FFmpegVideoConvertorPP, MoveFilesAfterDownloadPP, get_postprocessor, ) from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping from .update import detect_variant from .utils import ( DEFAULT_OUTTMPL, IDENTITY, LINK_TEMPLATES, MEDIA_EXTENSIONS, NO_DEFAULT, NUMBER_RE, OUTTMPL_TYPES, POSTPROCESS_WHEN, STR_FORMAT_RE_TMPL, STR_FORMAT_TYPES, ContentTooShortError, DateRange, DownloadCancelled, DownloadError, EntryNotInPlaylist, ExistingVideoReached, ExtractorError, GeoRestrictedError, HEADRequest, ISO3166Utils, LazyList, MaxDownloadsReached, Namespace, PagedList, PerRequestProxyHandler, PlaylistEntries, Popen, PostProcessingError, ReExtractInfo, RejectedVideoReached, SameFileError, UnavailableVideoError, UserNotLive, YoutubeDLCookieProcessor, YoutubeDLHandler, YoutubeDLRedirectHandler, age_restricted, args_to_str, bug_reports_message, date_from_str, determine_ext, determine_protocol, encode_compat_str, encodeFilename, error_to_compat_str, escapeHTML, expand_path, filter_dict, float_or_none, format_bytes, format_decimal_suffix, format_field, formatSeconds, get_compatible_ext, get_domain, int_or_none, iri_to_uri, join_nonempty, locked_file, make_archive_id, make_dir, make_HTTPS_handler, merge_headers, network_exceptions, number_of_digits, orderedSet, parse_filesize, preferredencoding, prepend_extension, register_socks_protocols, remove_terminal_sequences, render_table, replace_extension, sanitize_filename, sanitize_path, sanitize_url, sanitized_Request, std_headers, str_or_none, strftime_or_none, subtitles_filename, supports_terminal_sequences, system_identifier, timetuple_from_msec, to_high_limit_path, traverse_obj, try_call, try_get, url_basename, variadic, version_tuple, windows_enable_vt_mode, write_json_file, write_string, ) from .version import RELEASE_GIT_HEAD, VARIANT, __version__ if compat_os_name == 'nt': import ctypes class YoutubeDL: """YoutubeDL class. YoutubeDL objects are the ones responsible of downloading the actual video file and writing it to disk if the user has requested it, among some other tasks. In most cases there should be one per program. As, given a video URL, the downloader doesn't know how to extract all the needed information, task that InfoExtractors do, it has to pass the URL to one of them. For this, YoutubeDL objects have a method that allows InfoExtractors to be registered in a given order. When it is passed a URL, the YoutubeDL object handles it to the first InfoExtractor it finds that reports being able to handle it. The InfoExtractor extracts all the information about the video or videos the URL refers to, and YoutubeDL process the extracted information, possibly using a File Downloader to download the video. YoutubeDL objects accept a lot of parameters. In order not to saturate the object constructor with arguments, it receives a dictionary of options instead. These options are available through the params attribute for the InfoExtractors to use. The YoutubeDL also registers itself as the downloader in charge for the InfoExtractors that are added to it, so this is a "mutual registration". Available options: username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for accessing a video. ap_mso: Adobe Pass multiple-system operator identifier. ap_username: Multiple-system operator account username. ap_password: Multiple-system operator account password. usenetrc: Use netrc for authentication instead. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. no_warnings: Do not print out anything for warnings. forceprint: A dict with keys WHEN mapped to a list of templates to print to stdout. The allowed keys are video or any of the items in utils.POSTPROCESS_WHEN. For compatibility, a single list is also accepted print_to_file: A dict with keys WHEN (same as forceprint) mapped to a list of tuples with (template, filename) forcejson: Force printing info_dict as JSON. dump_single_json: Force printing the info_dict of the whole playlist (or video) as a single JSON line. force_write_download_archive: Force writing download archive regardless of 'skip_download' or 'simulate'. simulate: Do not download the video files. If unset (or None), simulate only if listsubtitles, listformats or list_thumbnails is used format: Video format code. see "FORMAT SELECTION" for more details. You can also pass a function. The function takes 'ctx' as argument and returns the formats to download. See "build_format_selector" for an implementation allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded. ignore_no_formats_error: Ignore "No video formats" error. Usefull for extracting metadata even if the video is not actually available for download (experimental) format_sort: A list of fields by which to sort the video formats. See "Sorting Formats" for more details. format_sort_force: Force the given format_sort. see "Sorting Formats" for more details. prefer_free_formats: Whether to prefer video formats with free containers over non-free ones of same quality. allow_multiple_video_streams: Allow multiple video streams to be merged into a single file allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file check_formats Whether to test if the formats are downloadable. Can be True (check all), False (check none), 'selected' (check selected formats), or None (check only if requested by extractor) paths: Dictionary of output paths. The allowed keys are 'home' 'temp' and the keys of OUTTMPL_TYPES (in utils.py) outtmpl: Dictionary of templates for output names. Allowed keys are 'default' and the keys of OUTTMPL_TYPES (in utils.py). For compatibility with youtube-dl, a single string can also be used outtmpl_na_placeholder: Placeholder for unavailable meta fields. restrictfilenames: Do not allow "&" and spaces in file names trim_file_name: Limit length of filename (extension excluded) windowsfilenames: Force the filenames to be windows compatible ignoreerrors: Do not stop on download/postprocessing errors. Can be 'only_download' to ignore only download errors. Default is 'only_download' for CLI, but False for API skip_playlist_after_errors: Number of allowed failures until the rest of the playlist is skipped force_generic_extractor: Force downloader to use the generic extractor overwrites: Overwrite all video and metadata files if True, overwrite only non-video files if None and don't overwrite any file if False For compatibility with youtube-dl, "nooverwrites" may also be used instead playlist_items: Specific indices of playlist to download. playlistrandom: Download playlist items in random order. lazy_playlist: Process playlist entries as they are received. matchtitle: Download only matching titles. rejecttitle: Reject downloads for matching titles. logger: Log messages to a logging.Logger instance. logtostderr: Log messages to stderr instead of stdout. consoletitle: Display progress in console window's titlebar. writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file clean_infojson: Remove private fields from the infojson getcomments: Extract video comments. This will not be written to disk unless writeinfojson is also given writeannotations: Write the video annotations to a .annotations.xml file writethumbnail: Write the thumbnail image to a file allow_playlist_files: Whether to write playlists' description, infojson etc also to disk when using the 'write*' options write_all_thumbnails: Write all thumbnail formats to files writelink: Write an internet shortcut file, depending on the current platform (.url/.webloc/.desktop) writeurllink: Write a Windows internet shortcut file (.url) writewebloclink: Write a macOS internet shortcut file (.webloc) writedesktoplink: Write a Linux internet shortcut file (.desktop) writesubtitles: Write the video subtitles to a file writeautomaticsub: Write the automatically generated subtitles to a file listsubtitles: Lists all available subtitles for the video subtitlesformat: The format code for subtitles subtitleslangs: List of languages of the subtitles to download (can be regex). The list may contain "all" to refer to all the available subtitles. The language can be prefixed with a "-" to exclude it from the requested languages, e.g. ['all', '-live_chat'] keepvideo: Keep the video file after post-processing daterange: A DateRange object, download only if the upload_date is in the range. skip_download: Skip the actual download of the video file cachedir: Location of the cache files in the filesystem. False to disable filesystem cache. noplaylist: Download single video instead of a playlist if in doubt. age_limit: An integer representing the user's age in years. Unsuitable videos for the given age are skipped. min_views: An integer representing the minimum view count the video must have in order to not be skipped. Videos without view count information are always downloaded. None for no limit. max_views: An integer representing the maximum view count. Videos that are more popular than that are not downloaded. Videos without view count information are always downloaded. None for no limit. download_archive: File name of a file where all downloads are recorded. Videos already present in the file are not downloaded again. break_on_existing: Stop the download process after attempting to download a file that is in the archive. break_on_reject: Stop the download process when encountering a video that has been filtered out. break_per_url: Whether break_on_reject and break_on_existing should act on each input URL as opposed to for the entire queue cookiefile: File name or text stream from where cookies should be read and dumped to cookiesfrombrowser: A tuple containing the name of the browser, the profile name/path from where cookies are loaded, and the name of the keyring, e.g. ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT') legacyserverconnect: Explicitly allow HTTPS connection to servers that do not support RFC 5746 secure renegotiation nocheckcertificate: Do not verify SSL certificates client_certificate: Path to client certificate file in PEM format. May include the private key client_certificate_key: Path to private key file for client certificate client_certificate_password: Password for client certificate private key, if encrypted. If not provided and the key is encrypted, yt-dlp will ask interactively prefer_insecure: Use HTTP instead of HTTPS to retrieve information. (Only supported by some extractors) http_headers: A dictionary of custom headers to be used for all requests proxy: URL of the proxy server to use geo_verification_proxy: URL of the proxy to use for IP address verification on geo-restricted sites. socket_timeout: Time to wait for unresponsive hosts, in seconds bidi_workaround: Work around buggy terminals without bidirectional text support, using fridibi debug_printtraffic:Print out sent and received HTTP traffic default_search: Prepend this string if an input url is not valid. 'auto' for elaborate guessing encoding: Use this encoding instead of the system-specified. extract_flat: Whether to resolve and process url_results further * False: Always process (default) * True: Never process * 'in_playlist': Do not process inside playlist/multi_video * 'discard': Always process, but don't return the result from inside playlist/multi_video * 'discard_in_playlist': Same as "discard", but only for playlists (not multi_video) wait_for_video: If given, wait for scheduled streams to become available. The value should be a tuple containing the range (min_secs, max_secs) to wait between retries postprocessors: A list of dictionaries, each with an entry * key: The name of the postprocessor. See yt_dlp/postprocessor/__init__.py for a list. * when: When to run the postprocessor. Allowed values are the entries of utils.POSTPROCESS_WHEN Assumed to be 'post_process' if not given progress_hooks: A list of functions that get called on download progress, with a dictionary with the entries * status: One of "downloading", "error", or "finished". Check this first and ignore unknown values. * info_dict: The extracted info_dict If status is one of "downloading", or "finished", the following properties may also be present: * filename: The final filename (always present) * tmpfilename: The filename we're currently writing to * downloaded_bytes: Bytes on disk * total_bytes: Size of the whole file, None if unknown * total_bytes_estimate: Guess of the eventual file size, None if unavailable. * elapsed: The number of seconds since download started. * eta: The estimated time in seconds, None if unknown * speed: The download speed in bytes/second, None if unknown * fragment_index: The counter of the currently downloaded video fragment. * fragment_count: The number of fragments (= individual files that will be merged) Progress hooks are guaranteed to be called at least once (with status "finished") if the download is successful. postprocessor_hooks: A list of functions that get called on postprocessing progress, with a dictionary with the entries * status: One of "started", "processing", or "finished". Check this first and ignore unknown values. * postprocessor: Name of the postprocessor * info_dict: The extracted info_dict Progress hooks are guaranteed to be called at least twice (with status "started" and "finished") if the processing is successful. merge_output_format: "/" separated list of extensions to use when merging formats. final_ext: Expected final extension; used to detect when the file was already downloaded and converted fixup: Automatically correct known faults of the file. One of: - "never": do nothing - "warn": only emit a warning - "detect_or_warn": check whether we can do anything about it, warn otherwise (default) source_address: Client-side IP address to bind to. sleep_interval_requests: Number of seconds to sleep between requests during extraction sleep_interval: Number of seconds to sleep before each download when used alone or a lower bound of a range for randomized sleep before each download (minimum possible number of seconds to sleep) when used along with max_sleep_interval. max_sleep_interval:Upper bound of a range for randomized sleep before each download (maximum possible number of seconds to sleep). Must only be used along with sleep_interval. Actual sleep time will be a random float from range [sleep_interval; max_sleep_interval]. sleep_interval_subtitles: Number of seconds to sleep before each subtitle download listformats: Print an overview of available video formats and exit. list_thumbnails: Print a table of all thumbnails and exit. match_filter: A function that gets called for every video with the signature (info_dict, *, incomplete: bool) -> Optional[str] For backward compatibility with youtube-dl, the signature (info_dict) -> Optional[str] is also allowed. - If it returns a message, the video is ignored. - If it returns None, the video is downloaded. - If it returns utils.NO_DEFAULT, the user is interactively asked whether to download the video. match_filter_func in utils.py is one example for this. no_color: Do not emit color codes in output. geo_bypass: Bypass geographic restriction via faking X-Forwarded-For HTTP header geo_bypass_country: Two-letter ISO 3166-2 country code that will be used for explicit geographic restriction bypassing via faking X-Forwarded-For HTTP header geo_bypass_ip_block: IP range in CIDR notation that will be used similarly to geo_bypass_country external_downloader: A dictionary of protocol keys and the executable of the external downloader to use for it. The allowed protocols are default|http|ftp|m3u8|dash|rtsp|rtmp|mms. Set the value to 'native' to use the native downloader compat_opts: Compatibility options. See "Differences in default behavior". The following options do not work when used through the API: filename, abort-on-error, multistreams, no-live-chat, format-sort no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json. Refer __init__.py for their implementation progress_template: Dictionary of templates for progress outputs. Allowed keys are 'download', 'postprocess', 'download-title' (console title) and 'postprocess-title'. The template is mapped on a dictionary with keys 'progress' and 'info' retry_sleep_functions: Dictionary of functions that takes the number of attempts as argument and returns the time to sleep in seconds. Allowed keys are 'http', 'fragment', 'file_access' download_ranges: A callback function that gets called for every video with the signature (info_dict, ydl) -> Iterable[Section]. Only the returned sections will be downloaded. Each Section is a dict with the following keys: * start_time: Start time of the section in seconds * end_time: End time of the section in seconds * title: Section title (Optional) * index: Section number (Optional) force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts noprogress: Do not print the progress bar live_from_start: Whether to download livestreams videos from the start The following parameters are not used by YoutubeDL itself, they are used by the downloader (see yt_dlp/downloader/common.py): nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize, max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries, continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size, external_downloader_args, concurrent_fragment_downloads. The following options are used by the post processors: ffmpeg_location: Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory. postprocessor_args: A dictionary of postprocessor/executable keys (in lower case) and a list of additional command-line arguments for the postprocessor/executable. The dict can also have "PP+EXE" keys which are used when the given exe is used by the given PP. Use 'default' as the name for arguments to passed to all PP For compatibility with youtube-dl, a single list of args can also be used The following options are used by the extractors: extractor_retries: Number of times to retry for known errors dynamic_mpd: Whether to process dynamic DASH manifests (default: True) hls_split_discontinuity: Split HLS playlists to different formats at discontinuities such as ad breaks (default: False) extractor_args: A dictionary of arguments to be passed to the extractors. See "EXTRACTOR ARGUMENTS" for details. E.g. {'youtube': {'skip': ['dash', 'hls']}} mark_watched: Mark videos watched (even with --simulate). Only for YouTube The following options are deprecated and may be removed in the future: playliststart: - Use playlist_items Playlist item to start at. playlistend: - Use playlist_items Playlist item to end at. playlistreverse: - Use playlist_items Download playlist items in reverse order. forceurl: - Use forceprint Force printing final URL. forcetitle: - Use forceprint Force printing title. forceid: - Use forceprint Force printing ID. forcethumbnail: - Use forceprint Force printing thumbnail URL. forcedescription: - Use forceprint Force printing description. forcefilename: - Use forceprint Force printing final filename. forceduration: - Use forceprint Force printing duration. allsubtitles: - Use subtitleslangs = ['all'] Downloads all the subtitles of the video (requires writesubtitles or writeautomaticsub) include_ads: - Doesn't work Download ads as well call_home: - Not implemented Boolean, true iff we are allowed to contact the yt-dlp servers for debugging. post_hooks: - Register a custom postprocessor A list of functions that get called as the final step for each video file, after all postprocessors have been called. The filename will be passed as the only argument. hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}. Use the native HLS downloader instead of ffmpeg/avconv if True, otherwise use ffmpeg/avconv if False, otherwise use downloader suggested by extractor if None. prefer_ffmpeg: - avconv support is deprecated If False, use avconv instead of ffmpeg if both are available, otherwise prefer ffmpeg. youtube_include_dash_manifest: - Use extractor_args If True (default), DASH manifests and related data will be downloaded and processed by extractor. You can reduce network I/O by disabling it if you don't care about DASH. (only for youtube) youtube_include_hls_manifest: - Use extractor_args If True (default), HLS manifests and related data will be downloaded and processed by extractor. You can reduce network I/O by disabling it if you don't care about HLS. (only for youtube) """ _NUMERIC_FIELDS = { 'width', 'height', 'asr', 'audio_channels', 'fps', 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx', 'timestamp', 'release_timestamp', 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', 'average_rating', 'comment_count', 'age_limit', 'start_time', 'end_time', 'chapter_number', 'season_number', 'episode_number', 'track_number', 'disc_number', 'release_year', } _format_fields = { # NB: Keep in sync with the docstring of extractor/common.py 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note', 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels', 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time' } _format_selection_exts = { 'audio': set(MEDIA_EXTENSIONS.common_audio), 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )), 'storyboards': set(MEDIA_EXTENSIONS.storyboards), } def __init__(self, params=None, auto_init=True): """Create a FileDownloader object with the given options. @param auto_init Whether to load the default extractors and print header (if verbose). Set to 'no_verbose_header' to not print the header """ if params is None: params = {} self.params = params self._ies = {} self._ies_instances = {} self._pps = {k: [] for k in POSTPROCESS_WHEN} self._printed_messages = set() self._first_webpage_request = True self._post_hooks = [] self._progress_hooks = [] self._postprocessor_hooks = [] self._download_retcode = 0 self._num_downloads = 0 self._num_videos = 0 self._playlist_level = 0 self._playlist_urls = set() self.cache = Cache(self) windows_enable_vt_mode() stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout self._out_files = Namespace( out=stdout, error=sys.stderr, screen=sys.stderr if self.params.get('quiet') else stdout, console=None if compat_os_name == 'nt' else next( filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None) ) self._allow_colors = Namespace(**{ type_: not self.params.get('no_color') and supports_terminal_sequences(stream) for type_, stream in self._out_files.items_ if type_ != 'console' }) # The code is left like this to be reused for future deprecations MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7) current_version = sys.version_info[:2] if current_version < MIN_RECOMMENDED: msg = ('Support for Python version %d.%d has been deprecated. ' 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.' '\n You will no longer receive updates on this version') if current_version < MIN_SUPPORTED: msg = 'Python version %d.%d is no longer supported' self.deprecation_warning( f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED)) if self.params.get('allow_unplayable_formats'): self.report_warning( f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. ' 'This is a developer option intended for debugging. \n' ' If you experience any issues while using this option, ' f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report') def check_deprecated(param, option, suggestion): if self.params.get(param) is not None: self.report_warning(f'{option} is deprecated. Use {suggestion} instead') return True return False if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'): if self.params.get('geo_verification_proxy') is None: self.params['geo_verification_proxy'] = self.params['cn_verification_proxy'] check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"') check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"') check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"') for msg in self.params.get('_warnings', []): self.report_warning(msg) for msg in self.params.get('_deprecation_warnings', []): self.deprecation_warning(msg) self.params['compat_opts'] = set(self.params.get('compat_opts', ())) if 'list-formats' in self.params['compat_opts']: self.params['listformats_table'] = False if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None: # nooverwrites was unnecessarily changed to overwrites # in 0c3d0f51778b153f65c21906031c2e091fcfb641 # This ensures compatibility with both keys self.params['overwrites'] = not self.params['nooverwrites'] elif self.params.get('overwrites') is None: self.params.pop('overwrites', None) else: self.params['nooverwrites'] = not self.params['overwrites'] self.params.setdefault('forceprint', {}) self.params.setdefault('print_to_file', {}) # Compatibility with older syntax if not isinstance(params['forceprint'], dict): self.params['forceprint'] = {'video': params['forceprint']} if self.params.get('bidi_workaround', False): try: import pty master, slave = pty.openpty() width = shutil.get_terminal_size().columns width_args = [] if width is None else ['-w', str(width)] sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error} try: self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs) except OSError: self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) self._output_channel = os.fdopen(master, 'rb') except OSError as ose: if ose.errno == errno.ENOENT: self.report_warning( 'Could not find fribidi executable, ignoring --bidi-workaround. ' 'Make sure that fribidi is an executable file in one of the directories in your $PATH.') else: raise if auto_init: if auto_init != 'no_verbose_header': self.print_debug_header() self.add_default_info_extractors() if (sys.platform != 'win32' and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and not self.params.get('restrictfilenames', False)): # Unicode filesystem API will throw errors (#1474, #13027) self.report_warning( 'Assuming --restrict-filenames since file system encoding ' 'cannot encode all characters. ' 'Set the LC_ALL environment variable to fix this.') self.params['restrictfilenames'] = True self._parse_outtmpl() # Creating format selector here allows us to catch syntax errors before the extraction self.format_selector = ( self.params.get('format') if self.params.get('format') in (None, '-') else self.params['format'] if callable(self.params['format']) else self.build_format_selector(self.params['format'])) # Set http_headers defaults according to std_headers self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {})) hooks = { 'post_hooks': self.add_post_hook, 'progress_hooks': self.add_progress_hook, 'postprocessor_hooks': self.add_postprocessor_hook, } for opt, fn in hooks.items(): for ph in self.params.get(opt, []): fn(ph) for pp_def_raw in self.params.get('postprocessors', []): pp_def = dict(pp_def_raw) when = pp_def.pop('when', 'post_process') self.add_post_processor( get_postprocessor(pp_def.pop('key'))(self, **pp_def), when=when) self._setup_opener() register_socks_protocols() def preload_download_archive(fn): """Preload the archive, if any is specified""" if fn is None: return False self.write_debug(f'Loading archive file {fn!r}') try: with locked_file(fn, 'r', encoding='utf-8') as archive_file: for line in archive_file: self.archive.add(line.strip()) except OSError as ioe: if ioe.errno != errno.ENOENT: raise return False return True self.archive = set() preload_download_archive(self.params.get('download_archive')) def warn_if_short_id(self, argv): # short YouTube ID starting with dash? idxs = [ i for i, a in enumerate(argv) if re.match(r'^-[0-9A-Za-z_-]{10}$', a)] if idxs: correct_argv = ( ['yt-dlp'] + [a for i, a in enumerate(argv) if i not in idxs] + ['--'] + [argv[i] for i in idxs] ) self.report_warning( 'Long argument string detected. ' 'Use -- to separate parameters and URLs, like this:\n%s' % args_to_str(correct_argv)) def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" ie_key = ie.ie_key() self._ies[ie_key] = ie if not isinstance(ie, type): self._ies_instances[ie_key] = ie ie.set_downloader(self) def _get_info_extractor_class(self, ie_key): ie = self._ies.get(ie_key) if ie is None: ie = get_info_extractor(ie_key) self.add_info_extractor(ie) return ie def get_info_extractor(self, ie_key): """ Get an instance of an IE with name ie_key, it will try to get one from the _ies list, if there's no instance it will create a new one and add it to the extractor list. """ ie = self._ies_instances.get(ie_key) if ie is None: ie = get_info_extractor(ie_key)() self.add_info_extractor(ie) return ie def add_default_info_extractors(self): """ Add the InfoExtractors returned by gen_extractors to the end of the list """ for ie in gen_extractor_classes(): self.add_info_extractor(ie) def add_post_processor(self, pp, when='post_process'): """Add a PostProcessor object to the end of the chain.""" assert when in POSTPROCESS_WHEN, f'Invalid when={when}' self._pps[when].append(pp) pp.set_downloader(self) def add_post_hook(self, ph): """Add the post hook""" self._post_hooks.append(ph) def add_progress_hook(self, ph): """Add the download progress hook""" self._progress_hooks.append(ph) def add_postprocessor_hook(self, ph): """Add the postprocessing progress hook""" self._postprocessor_hooks.append(ph) for pps in self._pps.values(): for pp in pps: pp.add_progress_hook(ph) def _bidi_workaround(self, message): if not hasattr(self, '_output_channel'): return message assert hasattr(self, '_output_process') assert isinstance(message, str) line_count = message.count('\n') + 1 self._output_process.stdin.write((message + '\n').encode()) self._output_process.stdin.flush() res = ''.join(self._output_channel.readline().decode() for _ in range(line_count)) return res[:-len('\n')] def _write_string(self, message, out=None, only_once=False): if only_once: if message in self._printed_messages: return self._printed_messages.add(message) write_string(message, out=out, encoding=self.params.get('encoding')) def to_stdout(self, message, skip_eol=False, quiet=None): """Print message to stdout""" if quiet is not None: self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead') if skip_eol is not False: self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead') self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out) def to_screen(self, message, skip_eol=False, quiet=None): """Print message to screen if not in quiet mode""" if self.params.get('logger'): self.params['logger'].debug(message) return if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'): return self._write_string( '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')), self._out_files.screen) def to_stderr(self, message, only_once=False): """Print message to stderr""" assert isinstance(message, str) if self.params.get('logger'): self.params['logger'].error(message) else: self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once) def _send_console_code(self, code): if compat_os_name == 'nt' or not self._out_files.console: return self._write_string(code, self._out_files.console) def to_console_title(self, message): if not self.params.get('consoletitle', False): return message = remove_terminal_sequences(message) if compat_os_name == 'nt': if ctypes.windll.kernel32.GetConsoleWindow(): # c_wchar_p() might not be necessary if `message` is # already of type unicode() ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) else: self._send_console_code(f'\033]0;{message}\007') def save_console_title(self): if not self.params.get('consoletitle') or self.params.get('simulate'): return self._send_console_code('\033[22;0t') # Save the title on stack def restore_console_title(self): if not self.params.get('consoletitle') or self.params.get('simulate'): return self._send_console_code('\033[23;0t') # Restore the title from stack def __enter__(self): self.save_console_title() return self def __exit__(self, *args): self.restore_console_title() if self.params.get('cookiefile') is not None: self.cookiejar.save(ignore_discard=True, ignore_expires=True) def trouble(self, message=None, tb=None, is_error=True): """Determine action to take when a download problem appears. Depending on if the downloader has been configured to ignore download errors or not, this method may throw an exception or not when errors are found, after printing the message. @param tb If given, is additional traceback information @param is_error Whether to raise error according to ignorerrors """ if message is not None: self.to_stderr(message) if self.params.get('verbose'): if tb is None: if sys.exc_info()[0]: # if .trouble has been called from an except block tb = '' if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) tb += encode_compat_str(traceback.format_exc()) else: tb_data = traceback.format_list(traceback.extract_stack()) tb = ''.join(tb_data) if tb: self.to_stderr(tb) if not is_error: return if not self.params.get('ignoreerrors'): if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: exc_info = sys.exc_info()[1].exc_info else: exc_info = sys.exc_info() raise DownloadError(message, exc_info) self._download_retcode = 1 Styles = Namespace( HEADERS='yellow', EMPHASIS='light blue', FILENAME='green', ID='green', DELIM='blue', ERROR='red', WARNING='yellow', SUPPRESS='light black', ) def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False): text = str(text) if test_encoding: original_text = text # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii' text = text.encode(encoding, 'ignore').decode(encoding) if fallback is not None and text != original_text: text = fallback return format_text(text, f) if allow_colors else text if fallback is None else fallback def _format_out(self, *args, **kwargs): return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs) def _format_screen(self, *args, **kwargs): return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs) def _format_err(self, *args, **kwargs): return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs) def report_warning(self, message, only_once=False): ''' Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored ''' if self.params.get('logger') is not None: self.params['logger'].warning(message) else: if self.params.get('no_warnings'): return self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once) def deprecation_warning(self, message): if self.params.get('logger') is not None: self.params['logger'].warning(f'DeprecationWarning: {message}') else: self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True) def report_error(self, message, *args, **kwargs): ''' Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. ''' self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs) def write_debug(self, message, only_once=False): '''Log debug message or Print message to stderr''' if not self.params.get('verbose', False): return message = f'[debug] {message}' if self.params.get('logger'): self.params['logger'].debug(message) else: self.to_stderr(message, only_once) def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" try: self.to_screen('[download] %s has already been downloaded' % file_name) except UnicodeEncodeError: self.to_screen('[download] The file has already been downloaded') def report_file_delete(self, file_name): """Report that existing file will be deleted.""" try: self.to_screen('Deleting existing file %s' % file_name) except UnicodeEncodeError: self.to_screen('Deleting existing file') def raise_no_formats(self, info, forced=False, *, msg=None): has_drm = info.get('_has_drm') ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg) msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!' if forced or not ignored: raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'], expected=has_drm or ignored or expected) else: self.report_warning(msg) def parse_outtmpl(self): self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version') self._parse_outtmpl() return self.params['outtmpl'] def _parse_outtmpl(self): sanitize = IDENTITY if self.params.get('restrictfilenames'): # Remove spaces in the default template sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-') outtmpl = self.params.setdefault('outtmpl', {}) if not isinstance(outtmpl, dict): self.params['outtmpl'] = outtmpl = {'default': outtmpl} outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None}) def get_output_path(self, dir_type='', filename=None): paths = self.params.get('paths', {}) assert isinstance(paths, dict) path = os.path.join( expand_path(paths.get('home', '').strip()), expand_path(paths.get(dir_type, '').strip()) if dir_type else '', filename or '') return sanitize_path(path, force=self.params.get('windowsfilenames')) @staticmethod def _outtmpl_expandpath(outtmpl): # expand_path translates '%%' into '%' and '$$' into '$' # correspondingly that is not what we want since we need to keep # '%%' intact for template dict substitution step. Working around # with boundary-alike separator hack. sep = ''.join([random.choice(ascii_letters) for _ in range(32)]) outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$') # outtmpl should be expand_path'ed before template dict substitution # because meta fields may contain env variables we don't want to # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and # title "Hello $PATH", we don't want `$PATH` to be expanded. return expand_path(outtmpl).replace(sep, '') @staticmethod def escape_outtmpl(outtmpl): ''' Escape any remaining strings like %s, %abc% etc. ''' return re.sub( STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'), lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0), outtmpl) @classmethod def validate_outtmpl(cls, outtmpl): ''' @return None or Exception object ''' outtmpl = re.sub( STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'), lambda mobj: f'{mobj.group(0)[:-1]}s', cls._outtmpl_expandpath(outtmpl)) try: cls.escape_outtmpl(outtmpl) % collections.defaultdict(int) return None except ValueError as err: return err @staticmethod def _copy_infodict(info_dict): info_dict = dict(info_dict) info_dict.pop('__postprocessors', None) info_dict.pop('__pending_error', None) return info_dict def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict @param sanitize Whether to sanitize the output as a filename. For backward compatibility, a function can also be passed """ info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set info_dict = self._copy_infodict(info_dict) info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs formatSeconds(info_dict['duration'], '-' if sanitize else ':') if info_dict.get('duration', None) is not None else None) info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads) info_dict['video_autonumber'] = self._num_videos if info_dict.get('resolution') is None: info_dict['resolution'] = self.format_resolution(info_dict, default=None) # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences # of %(field)s to %(field)0Nd for backward compatibility field_size_compat_map = { 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0), 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0), 'autonumber': self.params.get('autonumber_size') or 5, } TMPL_DICT = {} EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]')) MATH_FUNCTIONS = { '+': float.__add__, '-': float.__sub__, } # Field is of the form key1.key2... # where keys (except first) can be string, int or slice FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)') MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})' MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys())) INTERNAL_FORMAT_RE = re.compile(rf'''(?x) (?P-)? (?P{FIELD_RE}) (?P(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*) (?:>(?P.+?))? (?P (?P(?.*?))? (?:\|(?P.*?))? )$''') def _traverse_infodict(k): k = k.split('.') if k[0] == '': k.pop(0) return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True) def get_value(mdict): # Object traversal value = _traverse_infodict(mdict['fields']) # Negative if mdict['negate']: value = float_or_none(value) if value is not None: value *= -1 # Do maths offset_key = mdict['maths'] if offset_key: value = float_or_none(value) operator = None while offset_key: item = re.match( MATH_FIELD_RE if operator else MATH_OPERATORS_RE, offset_key).group(0) offset_key = offset_key[len(item):] if operator is None: operator = MATH_FUNCTIONS[item] continue item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1) offset = float_or_none(item) if offset is None: offset = float_or_none(_traverse_infodict(item)) try: value = operator(value, multiplier * offset) except (TypeError, ZeroDivisionError): return None operator = None # Datetime formatting if mdict['strf_format']: value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ',')) # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485 if sanitize and value == '': value = None return value na = self.params.get('outtmpl_na_placeholder', 'NA') def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')): return sanitize_filename(str(value), restricted=restricted, is_id=( bool(re.search(r'(^|[_.])id(\.|$)', key)) if 'filename-sanitization' in self.params['compat_opts'] else NO_DEFAULT)) sanitizer = sanitize if callable(sanitize) else filename_sanitizer sanitize = bool(sanitize) def _dumpjson_default(obj): if isinstance(obj, (set, LazyList)): return list(obj) return repr(obj) def create_key(outer_mobj): if not outer_mobj.group('has_key'): return outer_mobj.group(0) key = outer_mobj.group('key') mobj = re.match(INTERNAL_FORMAT_RE, key) initial_field = mobj.group('fields') if mobj else '' value, replacement, default = None, None, na while mobj: mobj = mobj.groupdict() default = mobj['default'] if mobj['default'] is not None else default value = get_value(mobj) replacement = mobj['replacement'] if value is None and mobj['alternate']: mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:]) else: break fmt = outer_mobj.group('format') if fmt == 's' and value is not None and key in field_size_compat_map.keys(): fmt = f'0{field_size_compat_map[key]:d}d' value = default if value is None else value if replacement is None else replacement flags = outer_mobj.group('conversion') or '' str_fmt = f'{fmt[:-1]}s' if fmt[-1] == 'l': # list delim = '\n' if '#' in flags else ', ' value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt elif fmt[-1] == 'j': # json value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt elif fmt[-1] == 'h': # html value, fmt = escapeHTML(value), str_fmt elif fmt[-1] == 'q': # quoted value = map(str, variadic(value) if '#' in flags else [value]) value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt elif fmt[-1] == 'B': # bytes value = f'%{str_fmt}'.encode() % str(value).encode() value, fmt = value.decode('utf-8', 'ignore'), 's' elif fmt[-1] == 'U': # unicode normalized value, fmt = unicodedata.normalize( # "+" = compatibility equivalence, "#" = NFD 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'), value), str_fmt elif fmt[-1] == 'D': # decimal suffix num_fmt, fmt = fmt[:-1].replace('#', ''), 's' value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s', factor=1024 if '#' in flags else 1000) elif fmt[-1] == 'S': # filename sanitization value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt elif fmt[-1] == 'c': if value: value = str(value)[0] else: fmt = str_fmt elif fmt[-1] not in 'rs': # numeric value = float_or_none(value) if value is None: value, fmt = default, 's' if sanitize: if fmt[-1] == 'r': # If value is an object, sanitize might convert it to a string # So we convert it to repr first value, fmt = repr(value), str_fmt if fmt[-1] in 'csr': value = sanitizer(initial_field, value) key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format')) TMPL_DICT[key] = value return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix')) return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs): outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) return self.escape_outtmpl(outtmpl) % info_dict def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None): assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive' if outtmpl is None: outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default']) try: outtmpl = self._outtmpl_expandpath(outtmpl) filename = self.evaluate_outtmpl(outtmpl, info_dict, True) if not filename: return None if tmpl_type in ('', 'temp'): final_ext, ext = self.params.get('final_ext'), info_dict.get('ext') if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'): filename = replace_extension(filename, ext, final_ext) elif tmpl_type: force_ext = OUTTMPL_TYPES[tmpl_type] if force_ext: filename = replace_extension(filename, force_ext, info_dict.get('ext')) # https://github.com/blackjack4494/youtube-dlc/issues/85 trim_file_name = self.params.get('trim_file_name', False) if trim_file_name: no_ext, *ext = filename.rsplit('.', 2) filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.') return filename except ValueError as err: self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') return None def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False): """Generate the output filename""" if outtmpl: assert not dir_type, 'outtmpl and dir_type are mutually exclusive' dir_type = None filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl) if not filename and dir_type not in ('', 'temp'): return '' if warn: if not self.params.get('paths'): pass elif filename == '-': self.report_warning('--paths is ignored when an outputting to stdout', only_once=True) elif os.path.isabs(filename): self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True) if filename == '-' or not filename: return filename return self.get_output_path(dir_type, filename) def _match_entry(self, info_dict, incomplete=False, silent=False): """ Returns None if the file should be downloaded """ video_title = info_dict.get('title', info_dict.get('id', 'entry')) def check_filter(): if 'title' in info_dict: # This can happen when we're just evaluating the playlist title = info_dict['title'] matchtitle = self.params.get('matchtitle', False) if matchtitle: if not re.search(matchtitle, title, re.IGNORECASE): return '"' + title + '" title did not match pattern "' + matchtitle + '"' rejecttitle = self.params.get('rejecttitle', False) if rejecttitle: if re.search(rejecttitle, title, re.IGNORECASE): return '"' + title + '" title matched reject pattern "' + rejecttitle + '"' date = info_dict.get('upload_date') if date is not None: dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}' view_count = info_dict.get('view_count') if view_count is not None: min_views = self.params.get('min_views') if min_views is not None and view_count < min_views: return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views) max_views = self.params.get('max_views') if max_views is not None and view_count > max_views: return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): return 'Skipping "%s" because it is age restricted' % video_title match_filter = self.params.get('match_filter') if match_filter is not None: try: ret = match_filter(info_dict, incomplete=incomplete) except TypeError: # For backward compatibility ret = None if incomplete else match_filter(info_dict) if ret is NO_DEFAULT: while True: filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME) reply = input(self._format_screen( f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip() if reply in {'y', ''}: return None elif reply == 'n': return f'Skipping {video_title}' elif ret is not None: return ret return None if self.in_download_archive(info_dict): reason = '%s has already been recorded in the archive' % video_title break_opt, break_err = 'break_on_existing', ExistingVideoReached else: reason = check_filter() break_opt, break_err = 'break_on_reject', RejectedVideoReached if reason is not None: if not silent: self.to_screen('[download] ' + reason) if self.params.get(break_opt, False): raise break_err() return reason @staticmethod def add_extra_info(info_dict, extra_info): '''Set the keys from extra_info in info dict if they are missing''' for key, value in extra_info.items(): info_dict.setdefault(key, value) def extract_info(self, url, download=True, ie_key=None, extra_info=None, process=True, force_generic_extractor=False): """ Return a list with a dictionary for each video extracted. Arguments: url -- URL to extract Keyword arguments: download -- whether to download videos during extraction ie_key -- extractor key hint extra_info -- dictionary containing the extra values to add to each result process -- whether to resolve all unresolved references (URLs, playlist items), must be True for download to work. force_generic_extractor -- force using the generic extractor """ if extra_info is None: extra_info = {} if not ie_key and force_generic_extractor: ie_key = 'Generic' if ie_key: ies = {ie_key: self._get_info_extractor_class(ie_key)} else: ies = self._ies for ie_key, ie in ies.items(): if not ie.suitable(url): continue if not ie.working(): self.report_warning('The program functionality for this site has been marked as broken, ' 'and will probably not work.') temp_id = ie.get_temp_id(url) if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}): self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive') if self.params.get('break_on_existing', False): raise ExistingVideoReached() break return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process) else: self.report_error('no suitable InfoExtractor for URL %s' % url) def _handle_extraction_exceptions(func): @functools.wraps(func) def wrapper(self, *args, **kwargs): while True: try: return func(self, *args, **kwargs) except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError): raise except ReExtractInfo as e: if e.expected: self.to_screen(f'{e}; Re-extracting data') else: self.to_stderr('\r') self.report_warning(f'{e}; Re-extracting data') continue except GeoRestrictedError as e: msg = e.msg if e.countries: msg += '\nThis video is available in %s.' % ', '.join( map(ISO3166Utils.short2full, e.countries)) msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' self.report_error(msg) except ExtractorError as e: # An error we somewhat expected self.report_error(str(e), e.format_traceback()) except Exception as e: if self.params.get('ignoreerrors'): self.report_error(str(e), tb=encode_compat_str(traceback.format_exc())) else: raise break return wrapper def _wait_for_video(self, ie_result={}): if (not self.params.get('wait_for_video') or ie_result.get('_type', 'video') != 'video' or ie_result.get('formats') or ie_result.get('url')): return format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1] last_msg = '' def progress(msg): nonlocal last_msg full_msg = f'{msg}\n' if not self.params.get('noprogress'): full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r' elif last_msg: return self.to_screen(full_msg, skip_eol=True) last_msg = msg min_wait, max_wait = self.params.get('wait_for_video') diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time()) if diff is None and ie_result.get('live_status') == 'is_upcoming': diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0) self.report_warning('Release time of video is not known') elif ie_result and (diff or 0) <= 0: self.report_warning('Video should already be available according to extracted info') diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf')) self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now') wait_till = time.time() + diff try: while True: diff = wait_till - time.time() if diff <= 0: progress('') raise ReExtractInfo('[wait] Wait period ended', expected=True) progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}') time.sleep(1) except KeyboardInterrupt: progress('') raise ReExtractInfo('[wait] Interrupted by user', expected=True) except BaseException as e: if not isinstance(e, ReExtractInfo): self.to_screen('') raise @_handle_extraction_exceptions def __extract_info(self, url, ie, download, extra_info, process): try: ie_result = ie.extract(url) except UserNotLive as e: if process: if self.params.get('wait_for_video'): self.report_warning(e) self._wait_for_video() raise if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}') return if isinstance(ie_result, list): # Backwards compatibility: old IE result format ie_result = { '_type': 'compat_list', 'entries': ie_result, } if extra_info.get('original_url'): ie_result.setdefault('original_url', extra_info['original_url']) self.add_default_extra_info(ie_result, ie, url) if process: self._wait_for_video(ie_result) return self.process_ie_result(ie_result, download, extra_info) else: return ie_result def add_default_extra_info(self, ie_result, ie, url): if url is not None: self.add_extra_info(ie_result, { 'webpage_url': url, 'original_url': url, }) webpage_url = ie_result.get('webpage_url') if webpage_url: self.add_extra_info(ie_result, { 'webpage_url_basename': url_basename(webpage_url), 'webpage_url_domain': get_domain(webpage_url), }) if ie is not None: self.add_extra_info(ie_result, { 'extractor': ie.IE_NAME, 'extractor_key': ie.ie_key(), }) def process_ie_result(self, ie_result, download=True, extra_info=None): """ Take the result of the ie(may be modified) and resolve all unresolved references (URLs, playlist items). It will also download the videos if 'download'. Returns the resolved ie_result. """ if extra_info is None: extra_info = {} result_type = ie_result.get('_type', 'video') if result_type in ('url', 'url_transparent'): ie_result['url'] = sanitize_url( ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https') if ie_result.get('original_url'): extra_info.setdefault('original_url', ie_result['original_url']) extract_flat = self.params.get('extract_flat', False) if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or extract_flat is True): info_copy = ie_result.copy() ie = try_get(ie_result.get('ie_key'), self.get_info_extractor) if ie and not ie_result.get('id'): info_copy['id'] = ie.get_temp_id(ie_result['url']) self.add_default_extra_info(info_copy, ie, ie_result['url']) self.add_extra_info(info_copy, extra_info) info_copy, _ = self.pre_process(info_copy) self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True) self._raise_pending_errors(info_copy) if self.params.get('force_write_download_archive', False): self.record_download_archive(info_copy) return ie_result if result_type == 'video': self.add_extra_info(ie_result, extra_info) ie_result = self.process_video_result(ie_result, download=download) self._raise_pending_errors(ie_result) additional_urls = (ie_result or {}).get('additional_urls') if additional_urls: # TODO: Improve MetadataParserPP to allow setting a list if isinstance(additional_urls, str): additional_urls = [additional_urls] self.to_screen( '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls))) self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls)) ie_result['additional_entries'] = [ self.extract_info( url, download, extra_info=extra_info, force_generic_extractor=self.params.get('force_generic_extractor')) for url in additional_urls ] return ie_result elif result_type == 'url': # We have to add extra_info to the results because it may be # contained in a playlist return self.extract_info( ie_result['url'], download, ie_key=ie_result.get('ie_key'), extra_info=extra_info) elif result_type == 'url_transparent': # Use the information from the embedding page info = self.extract_info( ie_result['url'], ie_key=ie_result.get('ie_key'), extra_info=extra_info, download=False, process=False) # extract_info may return None when ignoreerrors is enabled and # extraction failed with an error, don't crash and return early # in this case if not info: return info exempted_fields = {'_type', 'url', 'ie_key'} if not ie_result.get('section_end') and ie_result.get('section_start') is None: # For video clips, the id etc of the clip extractor should be used exempted_fields |= {'id', 'extractor', 'extractor_key'} new_result = info.copy() new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields)) # Extracted info may not be a video result (i.e. # info.get('_type', 'video') != video) but rather an url or # url_transparent. In such cases outer metadata (from ie_result) # should be propagated to inner one (info). For this to happen # _type of info should be overridden with url_transparent. This # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163. if new_result.get('_type') == 'url': new_result['_type'] = 'url_transparent' return self.process_ie_result( new_result, download=download, extra_info=extra_info) elif result_type in ('playlist', 'multi_video'): # Protect from infinite recursion due to recursively nested playlists # (see https://github.com/ytdl-org/youtube-dl/issues/27833) webpage_url = ie_result['webpage_url'] if webpage_url in self._playlist_urls: self.to_screen( '[download] Skipping already downloaded playlist: %s' % ie_result.get('title') or ie_result.get('id')) return self._playlist_level += 1 self._playlist_urls.add(webpage_url) self._fill_common_fields(ie_result, False) self._sanitize_thumbnails(ie_result) try: return self.__process_playlist(ie_result, download) finally: self._playlist_level -= 1 if not self._playlist_level: self._playlist_urls.clear() elif result_type == 'compat_list': self.report_warning( 'Extractor %s returned a compat_list result. ' 'It needs to be updated.' % ie_result.get('extractor')) def _fixup(r): self.add_extra_info(r, { 'extractor': ie_result['extractor'], 'webpage_url': ie_result['webpage_url'], 'webpage_url_basename': url_basename(ie_result['webpage_url']), 'webpage_url_domain': get_domain(ie_result['webpage_url']), 'extractor_key': ie_result['extractor_key'], }) return r ie_result['entries'] = [ self.process_ie_result(_fixup(r), download, extra_info) for r in ie_result['entries'] ] return ie_result else: raise Exception('Invalid result type: %s' % result_type) def _ensure_dir_exists(self, path): return make_dir(path, self.report_error) @staticmethod def _playlist_infodict(ie_result, strict=False, **kwargs): info = { 'playlist_count': ie_result.get('playlist_count'), 'playlist': ie_result.get('title') or ie_result.get('id'), 'playlist_id': ie_result.get('id'), 'playlist_title': ie_result.get('title'), 'playlist_uploader': ie_result.get('uploader'), 'playlist_uploader_id': ie_result.get('uploader_id'), **kwargs, } if strict: return info return { **info, 'playlist_index': 0, '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)), 'extractor': ie_result['extractor'], 'webpage_url': ie_result['webpage_url'], 'webpage_url_basename': url_basename(ie_result['webpage_url']), 'webpage_url_domain': get_domain(ie_result['webpage_url']), 'extractor_key': ie_result['extractor_key'], } def __process_playlist(self, ie_result, download): """Process each entry in the playlist""" assert ie_result['_type'] in ('playlist', 'multi_video') common_info = self._playlist_infodict(ie_result, strict=True) title = common_info.get('playlist') or '' if self._match_entry(common_info, incomplete=True) is not None: return self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}') all_entries = PlaylistEntries(self, ie_result) entries = orderedSet(all_entries.get_requested_items(), lazy=True) lazy = self.params.get('lazy_playlist') if lazy: resolved_entries, n_entries = [], 'N/A' ie_result['requested_entries'], ie_result['entries'] = None, None else: entries = resolved_entries = list(entries) n_entries = len(resolved_entries) ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], []) if not ie_result.get('playlist_count'): # Better to do this after potentially exhausting entries ie_result['playlist_count'] = all_entries.get_full_count() extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries)) ie_copy = collections.ChainMap(ie_result, extra) _infojson_written = False write_playlist_files = self.params.get('allow_playlist_files', True) if write_playlist_files and self.params.get('list_thumbnails'): self.list_thumbnails(ie_result) if write_playlist_files and not self.params.get('simulate'): _infojson_written = self._write_info_json( 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson')) if _infojson_written is None: return if self._write_description('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_description')) is None: return # TODO: This should be passed to ThumbnailsConvertor if necessary self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail')) if lazy: if self.params.get('playlistreverse') or self.params.get('playlistrandom'): self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True) elif self.params.get('playlistreverse'): entries.reverse() elif self.params.get('playlistrandom'): random.shuffle(entries) self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos' f'{format_field(ie_result, "playlist_count", " of %s")}') keep_resolved_entries = self.params.get('extract_flat') != 'discard' if self.params.get('extract_flat') == 'discard_in_playlist': keep_resolved_entries = ie_result['_type'] != 'playlist' if keep_resolved_entries: self.write_debug('The information of all playlist entries will be held in memory') failures = 0 max_failures = self.params.get('skip_playlist_after_errors') or float('inf') for i, (playlist_index, entry) in enumerate(entries): if lazy: resolved_entries.append((playlist_index, entry)) if not entry: continue entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip') if not lazy and 'playlist-index' in self.params.get('compat_opts', []): playlist_index = ie_result['requested_entries'][i] entry_copy = collections.ChainMap(entry, { **common_info, 'n_entries': int_or_none(n_entries), 'playlist_index': playlist_index, 'playlist_autonumber': i + 1, }) if self._match_entry(entry_copy, incomplete=True) is not None: # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369 resolved_entries[i] = (playlist_index, NO_DEFAULT) continue self.to_screen('[download] Downloading video %s of %s' % ( self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS))) extra.update({ 'playlist_index': playlist_index, 'playlist_autonumber': i + 1, }) entry_result = self.__process_iterable_entry(entry, download, extra) if not entry_result: failures += 1 if failures >= max_failures: self.report_error( f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction') break if keep_resolved_entries: resolved_entries[i] = (playlist_index, entry_result) # Update with processed data ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT] ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT] # Write the updated info to json if _infojson_written is True and self._write_info_json( 'updated playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None: return ie_result = self.run_all_pps('playlist', ie_result) self.to_screen(f'[download] Finished downloading playlist: {title}') return ie_result @_handle_extraction_exceptions def __process_iterable_entry(self, entry, download, extra_info): return self.process_ie_result( entry, download=download, extra_info=extra_info) def _build_format_filter(self, filter_spec): " Returns a function to filter the formats according to the filter_spec " OPERATORS = { '<': operator.lt, '<=': operator.le, '>': operator.gt, '>=': operator.ge, '=': operator.eq, '!=': operator.ne, } operator_rex = re.compile(r'''(?x)\s* (?Pwidth|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s* (?P%s)(?P\s*\?)?\s* (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s* ''' % '|'.join(map(re.escape, OPERATORS.keys()))) m = operator_rex.fullmatch(filter_spec) if m: try: comparison_value = int(m.group('value')) except ValueError: comparison_value = parse_filesize(m.group('value')) if comparison_value is None: comparison_value = parse_filesize(m.group('value') + 'B') if comparison_value is None: raise ValueError( 'Invalid value %r in format specification %r' % ( m.group('value'), filter_spec)) op = OPERATORS[m.group('op')] if not m: STR_OPERATORS = { '=': operator.eq, '^=': lambda attr, value: attr.startswith(value), '$=': lambda attr, value: attr.endswith(value), '*=': lambda attr, value: value in attr, '~=': lambda attr, value: value.search(attr) is not None } str_operator_rex = re.compile(r'''(?x)\s* (?P[a-zA-Z0-9._-]+)\s* (?P!\s*)?(?P%s)\s*(?P\?\s*)? (?P["'])? (?P(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+)) (?(quote)(?P=quote))\s* ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) m = str_operator_rex.fullmatch(filter_spec) if m: if m.group('op') == '~=': comparison_value = re.compile(m.group('value')) else: comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value')) str_op = STR_OPERATORS[m.group('op')] if m.group('negation'): op = lambda attr, value: not str_op(attr, value) else: op = str_op if not m: raise SyntaxError('Invalid filter specification %r' % filter_spec) def _filter(f): actual_value = f.get(m.group('key')) if actual_value is None: return m.group('none_inclusive') return op(actual_value, comparison_value) return _filter def _check_formats(self, formats): for f in formats: self.to_screen('[info] Testing format %s' % f['format_id']) path = self.get_output_path('temp') if not self._ensure_dir_exists(f'{path}/'): continue temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None) temp_file.close() try: success, _ = self.dl(temp_file.name, f, test=True) except (DownloadError, OSError, ValueError) + network_exceptions: success = False finally: if os.path.exists(temp_file.name): try: os.remove(temp_file.name) except OSError: self.report_warning('Unable to delete temporary file "%s"' % temp_file.name) if success: yield f else: self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id']) def _default_format_spec(self, info_dict, download=True): def can_merge(): merger = FFmpegMergerPP(self) return merger.available and merger.can_merge() prefer_best = ( not self.params.get('simulate') and download and ( not can_merge() or info_dict.get('is_live') and not self.params.get('live_from_start') or self.params['outtmpl']['default'] == '-')) compat = ( prefer_best or self.params.get('allow_multiple_audio_streams', False) or 'format-spec' in self.params['compat_opts']) return ( 'best/bestvideo+bestaudio' if prefer_best else 'bestvideo*+bestaudio/best' if not compat else 'bestvideo+bestaudio/best') def build_format_selector(self, format_spec): def syntax_error(note, start): message = ( 'Invalid format specification: ' '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1])) return SyntaxError(message) PICKFIRST = 'PICKFIRST' MERGE = 'MERGE' SINGLE = 'SINGLE' GROUP = 'GROUP' FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters']) allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False), 'video': self.params.get('allow_multiple_video_streams', False)} check_formats = self.params.get('check_formats') == 'selected' def _parse_filter(tokens): filter_parts = [] for type, string, start, _, _ in tokens: if type == tokenize.OP and string == ']': return ''.join(filter_parts) else: filter_parts.append(string) def _remove_unused_ops(tokens): # Remove operators that we don't use and join them with the surrounding strings. # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' ALLOWED_OPS = ('/', '+', ',', '(', ')') last_string, last_start, last_end, last_line = None, None, None, None for type, string, start, end, line in tokens: if type == tokenize.OP and string == '[': if last_string: yield tokenize.NAME, last_string, last_start, last_end, last_line last_string = None yield type, string, start, end, line # everything inside brackets will be handled by _parse_filter for type, string, start, end, line in tokens: yield type, string, start, end, line if type == tokenize.OP and string == ']': break elif type == tokenize.OP and string in ALLOWED_OPS: if last_string: yield tokenize.NAME, last_string, last_start, last_end, last_line last_string = None yield type, string, start, end, line elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: if not last_string: last_string = string last_start = start last_end = end else: last_string += string if last_string: yield tokenize.NAME, last_string, last_start, last_end, last_line def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False): selectors = [] current_selector = None for type, string, start, _, _ in tokens: # ENCODING is only defined in python 3.x if type == getattr(tokenize, 'ENCODING', None): continue elif type in [tokenize.NAME, tokenize.NUMBER]: current_selector = FormatSelector(SINGLE, string, []) elif type == tokenize.OP: if string == ')': if not inside_group: # ')' will be handled by the parentheses group tokens.restore_last_token() break elif inside_merge and string in ['/', ',']: tokens.restore_last_token() break elif inside_choice and string == ',': tokens.restore_last_token() break elif string == ',': if not current_selector: raise syntax_error('"," must follow a format selector', start) selectors.append(current_selector) current_selector = None elif string == '/': if not current_selector: raise syntax_error('"/" must follow a format selector', start) first_choice = current_selector second_choice = _parse_format_selection(tokens, inside_choice=True) current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), []) elif string == '[': if not current_selector: current_selector = FormatSelector(SINGLE, 'best', []) format_filter = _parse_filter(tokens) current_selector.filters.append(format_filter) elif string == '(': if current_selector: raise syntax_error('Unexpected "("', start) group = _parse_format_selection(tokens, inside_group=True) current_selector = FormatSelector(GROUP, group, []) elif string == '+': if not current_selector: raise syntax_error('Unexpected "+"', start) selector_1 = current_selector selector_2 = _parse_format_selection(tokens, inside_merge=True) if not selector_2: raise syntax_error('Expected a selector', start) current_selector = FormatSelector(MERGE, (selector_1, selector_2), []) else: raise syntax_error(f'Operator not recognized: "{string}"', start) elif type == tokenize.ENDMARKER: break if current_selector: selectors.append(current_selector) return selectors def _merge(formats_pair): format_1, format_2 = formats_pair formats_info = [] formats_info.extend(format_1.get('requested_formats', (format_1,))) formats_info.extend(format_2.get('requested_formats', (format_2,))) if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']: get_no_more = {'video': False, 'audio': False} for (i, fmt_info) in enumerate(formats_info): if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none': formats_info.pop(i) continue for aud_vid in ['audio', 'video']: if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none': if get_no_more[aud_vid]: formats_info.pop(i) break get_no_more[aud_vid] = True if len(formats_info) == 1: return formats_info[0] video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none'] audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none'] the_only_video = video_fmts[0] if len(video_fmts) == 1 else None the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None output_ext = get_compatible_ext( vcodecs=[f.get('vcodec') for f in video_fmts], acodecs=[f.get('acodec') for f in audio_fmts], vexts=[f['ext'] for f in video_fmts], aexts=[f['ext'] for f in audio_fmts], preferences=(try_call(lambda: self.params['merge_output_format'].split('/')) or self.params.get('prefer_free_formats') and ('webm', 'mkv'))) filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info)) new_dict = { 'requested_formats': formats_info, 'format': '+'.join(filtered('format')), 'format_id': '+'.join(filtered('format_id')), 'ext': output_ext, 'protocol': '+'.join(map(determine_protocol, formats_info)), 'language': '+'.join(orderedSet(filtered('language'))) or None, 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None, 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None, 'tbr': sum(filtered('tbr', 'vbr', 'abr')), } if the_only_video: new_dict.update({ 'width': the_only_video.get('width'), 'height': the_only_video.get('height'), 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video), 'fps': the_only_video.get('fps'), 'dynamic_range': the_only_video.get('dynamic_range'), 'vcodec': the_only_video.get('vcodec'), 'vbr': the_only_video.get('vbr'), 'stretched_ratio': the_only_video.get('stretched_ratio'), }) if the_only_audio: new_dict.update({ 'acodec': the_only_audio.get('acodec'), 'abr': the_only_audio.get('abr'), 'asr': the_only_audio.get('asr'), 'audio_channels': the_only_audio.get('audio_channels') }) return new_dict def _check_formats(formats): if not check_formats: yield from formats return yield from self._check_formats(formats) def _build_selector_function(selector): if isinstance(selector, list): # , fs = [_build_selector_function(s) for s in selector] def selector_function(ctx): for f in fs: yield from f(ctx) return selector_function elif selector.type == GROUP: # () selector_function = _build_selector_function(selector.selector) elif selector.type == PICKFIRST: # / fs = [_build_selector_function(s) for s in selector.selector] def selector_function(ctx): for f in fs: picked_formats = list(f(ctx)) if picked_formats: return picked_formats return [] elif selector.type == MERGE: # + selector_1, selector_2 = map(_build_selector_function, selector.selector) def selector_function(ctx): for pair in itertools.product(selector_1(ctx), selector_2(ctx)): yield _merge(pair) elif selector.type == SINGLE: # atom format_spec = selector.selector or 'best' # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector if format_spec == 'all': def selector_function(ctx): yield from _check_formats(ctx['formats'][::-1]) elif format_spec == 'mergeall': def selector_function(ctx): formats = list(_check_formats( f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none')) if not formats: return merged_format = formats[-1] for f in formats[-2::-1]: merged_format = _merge((merged_format, f)) yield merged_format else: format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1 mobj = re.match( r'(?Pbest|worst|b|w)(?Pvideo|audio|v|a)?(?P\*)?(?:\.(?P[1-9]\d*))?$', format_spec) if mobj is not None: format_idx = int_or_none(mobj.group('n'), default=1) format_reverse = mobj.group('bw')[0] == 'b' format_type = (mobj.group('type') or [None])[0] not_format_type = {'v': 'a', 'a': 'v'}.get(format_type) format_modified = mobj.group('mod') is not None format_fallback = not format_type and not format_modified # for b, w _filter_f = ( (lambda f: f.get('%scodec' % format_type) != 'none') if format_type and format_modified # bv*, ba*, wv*, wa* else (lambda f: f.get('%scodec' % not_format_type) == 'none') if format_type # bv, ba, wv, wa else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none') if not format_modified # b, w else lambda f: True) # b*, w* filter_f = lambda f: _filter_f(f) and ( f.get('vcodec') != 'none' or f.get('acodec') != 'none') else: if format_spec in self._format_selection_exts['audio']: filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' elif format_spec in self._format_selection_exts['video']: filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none' seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none' elif format_spec in self._format_selection_exts['storyboards']: filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none' else: filter_f = lambda f: f.get('format_id') == format_spec # id def selector_function(ctx): formats = list(ctx['formats']) matches = list(filter(filter_f, formats)) if filter_f is not None else formats if not matches: if format_fallback and ctx['incomplete_formats']: # for extractors with incomplete formats (audio only (soundcloud) # or video only (imgur)) best/worst will fallback to # best/worst {video,audio}-only format matches = formats elif seperate_fallback and not ctx['has_merged_format']: # for compatibility with youtube-dl when there is no pre-merged format matches = list(filter(seperate_fallback, formats)) matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1])) try: yield matches[format_idx - 1] except LazyList.IndexError: return filters = [self._build_format_filter(f) for f in selector.filters] def final_selector(ctx): ctx_copy = dict(ctx) for _filter in filters: ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats'])) return selector_function(ctx_copy) return final_selector stream = io.BytesIO(format_spec.encode()) try: tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline))) except tokenize.TokenError: raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) class TokenIterator: def __init__(self, tokens): self.tokens = tokens self.counter = 0 def __iter__(self): return self def __next__(self): if self.counter >= len(self.tokens): raise StopIteration() value = self.tokens[self.counter] self.counter += 1 return value next = __next__ def restore_last_token(self): self.counter -= 1 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens))) return _build_selector_function(parsed_selector) def _calc_headers(self, info_dict): res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {}) cookies = self._calc_cookies(info_dict['url']) if cookies: res['Cookie'] = cookies if 'X-Forwarded-For' not in res: x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip') if x_forwarded_for_ip: res['X-Forwarded-For'] = x_forwarded_for_ip return res def _calc_cookies(self, url): pr = sanitized_Request(url) self.cookiejar.add_cookie_header(pr) return pr.get_header('Cookie') def _sort_thumbnails(self, thumbnails): thumbnails.sort(key=lambda t: ( t.get('preference') if t.get('preference') is not None else -1, t.get('width') if t.get('width') is not None else -1, t.get('height') if t.get('height') is not None else -1, t.get('id') if t.get('id') is not None else '', t.get('url'))) def _sanitize_thumbnails(self, info_dict): thumbnails = info_dict.get('thumbnails') if thumbnails is None: thumbnail = info_dict.get('thumbnail') if thumbnail: info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] if not thumbnails: return def check_thumbnails(thumbnails): for t in thumbnails: self.to_screen(f'[info] Testing thumbnail {t["id"]}') try: self.urlopen(HEADRequest(t['url'])) except network_exceptions as err: self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...') continue yield t self._sort_thumbnails(thumbnails) for i, t in enumerate(thumbnails): if t.get('id') is None: t['id'] = '%d' % i if t.get('width') and t.get('height'): t['resolution'] = '%dx%d' % (t['width'], t['height']) t['url'] = sanitize_url(t['url']) if self.params.get('check_formats') is True: info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True) else: info_dict['thumbnails'] = thumbnails def _fill_common_fields(self, info_dict, is_video=True): # TODO: move sanitization here if is_video: # playlists are allowed to lack "title" title = info_dict.get('title', NO_DEFAULT) if title is NO_DEFAULT: raise ExtractorError('Missing "title" field in extractor result', video_id=info_dict['id'], ie=info_dict['extractor']) info_dict['fulltitle'] = title if not title: if title == '': self.write_debug('Extractor gave empty title. Creating a generic title') else: self.report_warning('Extractor failed to obtain "title". Creating a generic title instead') info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}' if info_dict.get('duration') is not None: info_dict['duration_string'] = formatSeconds(info_dict['duration']) for ts_key, date_key in ( ('timestamp', 'upload_date'), ('release_timestamp', 'release_date'), ('modified_timestamp', 'modified_date'), ): if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None: # Working around out-of-range timestamp values (e.g. negative ones on Windows, # see http://bugs.python.org/issue1646728) with contextlib.suppress(ValueError, OverflowError, OSError): upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key]) info_dict[date_key] = upload_date.strftime('%Y%m%d') live_keys = ('is_live', 'was_live') live_status = info_dict.get('live_status') if live_status is None: for key in live_keys: if info_dict.get(key) is False: continue if info_dict.get(key): live_status = key break if all(info_dict.get(key) is False for key in live_keys): live_status = 'not_live' if live_status: info_dict['live_status'] = live_status for key in live_keys: if info_dict.get(key) is None: info_dict[key] = (live_status == key) # Auto generate title fields corresponding to the *_number fields when missing # in order to always have clean titles. This is very common for TV series. for field in ('chapter', 'season', 'episode'): if info_dict.get('%s_number' % field) is not None and not info_dict.get(field): info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) def _raise_pending_errors(self, info): err = info.pop('__pending_error', None) if err: self.report_error(err, tb=False) def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' self._num_videos += 1 if 'id' not in info_dict: raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor']) elif not info_dict.get('id'): raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor']) def report_force_conversion(field, field_not, conversion): self.report_warning( '"%s" field is not %s - forcing %s conversion, there is an error in extractor' % (field, field_not, conversion)) def sanitize_string_field(info, string_field): field = info.get(string_field) if field is None or isinstance(field, str): return report_force_conversion(string_field, 'a string', 'string') info[string_field] = str(field) def sanitize_numeric_fields(info): for numeric_field in self._NUMERIC_FIELDS: field = info.get(numeric_field) if field is None or isinstance(field, (int, float)): continue report_force_conversion(numeric_field, 'numeric', 'int') info[numeric_field] = int_or_none(field) sanitize_string_field(info_dict, 'id') sanitize_numeric_fields(info_dict) if info_dict.get('section_end') and info_dict.get('section_start') is not None: info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3) if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None): self.report_warning('"duration" field is negative, there is an error in extractor') chapters = info_dict.get('chapters') or [] if chapters and chapters[0].get('start_time'): chapters.insert(0, {'start_time': 0}) dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')} for idx, (prev, current, next_) in enumerate(zip( (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1): if current.get('start_time') is None: current['start_time'] = prev.get('end_time') if not current.get('end_time'): current['end_time'] = next_.get('start_time') if not current.get('title'): current['title'] = f'' if 'playlist' not in info_dict: # It isn't part of a playlist info_dict['playlist'] = None info_dict['playlist_index'] = None self._sanitize_thumbnails(info_dict) thumbnail = info_dict.get('thumbnail') thumbnails = info_dict.get('thumbnails') if thumbnail: info_dict['thumbnail'] = sanitize_url(thumbnail) elif thumbnails: info_dict['thumbnail'] = thumbnails[-1]['url'] if info_dict.get('display_id') is None and 'id' in info_dict: info_dict['display_id'] = info_dict['id'] self._fill_common_fields(info_dict) for cc_kind in ('subtitles', 'automatic_captions'): cc = info_dict.get(cc_kind) if cc: for _, subtitle in cc.items(): for subtitle_format in subtitle: if subtitle_format.get('url'): subtitle_format['url'] = sanitize_url(subtitle_format['url']) if subtitle_format.get('ext') is None: subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() automatic_captions = info_dict.get('automatic_captions') subtitles = info_dict.get('subtitles') info_dict['requested_subtitles'] = self.process_subtitles( info_dict['id'], subtitles, automatic_captions) if info_dict.get('formats') is None: # There's only one format available formats = [info_dict] else: formats = info_dict['formats'] # or None ensures --clean-infojson removes it info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None if not self.params.get('allow_unplayable_formats'): formats = [f for f in formats if not f.get('has_drm')] if info_dict['_has_drm'] and formats and all( f.get('acodec') == f.get('vcodec') == 'none' for f in formats): self.report_warning( 'This video is DRM protected and only images are available for download. ' 'Use --list-formats to see them') get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start')) if not get_from_start: info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M') if info_dict.get('is_live') and formats: formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start] if get_from_start and not formats: self.raise_no_formats(info_dict, msg=( '--live-from-start is passed, but there are no formats that can be downloaded from the start. ' 'If you want to download from the current time, use --no-live-from-start')) if not formats: self.raise_no_formats(info_dict) def is_wellformed(f): url = f.get('url') if not url: self.report_warning( '"url" field is missing or empty - skipping format, ' 'there is an error in extractor') return False if isinstance(url, bytes): sanitize_string_field(f, 'url') return True # Filter out malformed formats for better extraction robustness formats = list(filter(is_wellformed, formats)) formats_dict = {} # We check that all the formats have the format and format_id fields for i, format in enumerate(formats): sanitize_string_field(format, 'format_id') sanitize_numeric_fields(format) format['url'] = sanitize_url(format['url']) if not format.get('format_id'): format['format_id'] = str(i) else: # Sanitize format_id from characters used in format selector expression format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id']) format_id = format['format_id'] if format_id not in formats_dict: formats_dict[format_id] = [] formats_dict[format_id].append(format) # Make sure all formats have unique format_id common_exts = set(itertools.chain(*self._format_selection_exts.values())) for format_id, ambiguous_formats in formats_dict.items(): ambigious_id = len(ambiguous_formats) > 1 for i, format in enumerate(ambiguous_formats): if ambigious_id: format['format_id'] = '%s-%d' % (format_id, i) if format.get('ext') is None: format['ext'] = determine_ext(format['url']).lower() # Ensure there is no conflict between id and ext in format selection # See https://github.com/yt-dlp/yt-dlp/issues/1282 if format['format_id'] != format['ext'] and format['format_id'] in common_exts: format['format_id'] = 'f%s' % format['format_id'] for i, format in enumerate(formats): if format.get('format') is None: format['format'] = '{id} - {res}{note}'.format( id=format['format_id'], res=self.format_resolution(format), note=format_field(format, 'format_note', ' (%s)'), ) if format.get('protocol') is None: format['protocol'] = determine_protocol(format) if format.get('resolution') is None: format['resolution'] = self.format_resolution(format, default=None) if format.get('dynamic_range') is None and format.get('vcodec') != 'none': format['dynamic_range'] = 'SDR' if (info_dict.get('duration') and format.get('tbr') and not format.get('filesize') and not format.get('filesize_approx')): format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8)) # Add HTTP headers, so that external programs can use them from the # json output full_format_info = info_dict.copy() full_format_info.update(format) format['http_headers'] = self._calc_headers(full_format_info) # Remove private housekeeping stuff if '__x_forwarded_for_ip' in info_dict: del info_dict['__x_forwarded_for_ip'] if self.params.get('check_formats') is True: formats = LazyList(self._check_formats(formats[::-1]), reverse=True) if not formats or formats[0] is not info_dict: # only set the 'formats' fields if the original info_dict list them # otherwise we end up with a circular reference, the first (and unique) # element in the 'formats' field in info_dict is info_dict itself, # which can't be exported to json info_dict['formats'] = formats info_dict, _ = self.pre_process(info_dict) if self._match_entry(info_dict, incomplete=self._format_fields) is not None: return info_dict self.post_extract(info_dict) info_dict, _ = self.pre_process(info_dict, 'after_filter') # The pre-processors may have modified the formats formats = info_dict.get('formats', [info_dict]) list_only = self.params.get('simulate') is None and ( self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')) interactive_format_selection = not list_only and self.format_selector == '-' if self.params.get('list_thumbnails'): self.list_thumbnails(info_dict) if self.params.get('listsubtitles'): if 'automatic_captions' in info_dict: self.list_subtitles( info_dict['id'], automatic_captions, 'automatic captions') self.list_subtitles(info_dict['id'], subtitles, 'subtitles') if self.params.get('listformats') or interactive_format_selection: self.list_formats(info_dict) if list_only: # Without this printing, -F --print-json will not work self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True) return info_dict format_selector = self.format_selector if format_selector is None: req_format = self._default_format_spec(info_dict, download=download) self.write_debug('Default format spec: %s' % req_format) format_selector = self.build_format_selector(req_format) while True: if interactive_format_selection: req_format = input( self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS)) try: format_selector = self.build_format_selector(req_format) except SyntaxError as err: self.report_error(err, tb=False, is_error=False) continue formats_to_download = list(format_selector({ 'formats': formats, 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats), 'incomplete_formats': ( # All formats are video-only or all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) # all formats are audio-only or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)), })) if interactive_format_selection and not formats_to_download: self.report_error('Requested format is not available', tb=False, is_error=False) continue break if not formats_to_download: if not self.params.get('ignore_no_formats_error'): raise ExtractorError( 'Requested format is not available. Use --list-formats for a list of available formats', expected=True, video_id=info_dict['id'], ie=info_dict['extractor']) self.report_warning('Requested format is not available') # Process what we can, even without any available formats. formats_to_download = [{}] requested_ranges = self.params.get('download_ranges') if requested_ranges: requested_ranges = tuple(requested_ranges(info_dict, self)) best_format, downloaded_formats = formats_to_download[-1], [] if download: if best_format: def to_screen(*msg): self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}') to_screen(f'Downloading {len(formats_to_download)} format(s):', (f['format_id'] for f in formats_to_download)) if requested_ranges: to_screen(f'Downloading {len(requested_ranges)} time ranges:', (f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges)) max_downloads_reached = False for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]): new_info = self._copy_infodict(info_dict) new_info.update(fmt) offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf') if chapter or offset: new_info.update({ 'section_start': offset + chapter.get('start_time', 0), 'section_end': offset + min(chapter.get('end_time', duration), duration), 'section_title': chapter.get('title'), 'section_number': chapter.get('index'), }) downloaded_formats.append(new_info) try: self.process_info(new_info) except MaxDownloadsReached: max_downloads_reached = True self._raise_pending_errors(new_info) # Remove copied info for key, val in tuple(new_info.items()): if info_dict.get(key) == val: new_info.pop(key) if max_downloads_reached: break write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats} assert write_archive.issubset({True, False, 'ignore'}) if True in write_archive and False not in write_archive: self.record_download_archive(info_dict) info_dict['requested_downloads'] = downloaded_formats info_dict = self.run_all_pps('after_video', info_dict) if max_downloads_reached: raise MaxDownloadsReached() # We update the info dict with the selected best quality format (backwards compatibility) info_dict.update(best_format) return info_dict def process_subtitles(self, video_id, normal_subtitles, automatic_captions): """Select the requested subtitles and their format""" available_subs, normal_sub_langs = {}, [] if normal_subtitles and self.params.get('writesubtitles'): available_subs.update(normal_subtitles) normal_sub_langs = tuple(normal_subtitles.keys()) if automatic_captions and self.params.get('writeautomaticsub'): for lang, cap_info in automatic_captions.items(): if lang not in available_subs: available_subs[lang] = cap_info if (not self.params.get('writesubtitles') and not self.params.get('writeautomaticsub') or not available_subs): return None all_sub_langs = tuple(available_subs.keys()) if self.params.get('allsubtitles', False): requested_langs = all_sub_langs elif self.params.get('subtitleslangs', False): # A list is used so that the order of languages will be the same as # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041 requested_langs = [] for lang_re in self.params.get('subtitleslangs'): discard = lang_re[0] == '-' if discard: lang_re = lang_re[1:] if lang_re == 'all': if discard: requested_langs = [] else: requested_langs.extend(all_sub_langs) continue current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs) if discard: for lang in current_langs: while lang in requested_langs: requested_langs.remove(lang) else: requested_langs.extend(current_langs) requested_langs = orderedSet(requested_langs) elif normal_sub_langs: requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1] else: requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1] if requested_langs: self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs)) formats_query = self.params.get('subtitlesformat', 'best') formats_preference = formats_query.split('/') if formats_query else [] subs = {} for lang in requested_langs: formats = available_subs.get(lang) if formats is None: self.report_warning(f'{lang} subtitles not available for {video_id}') continue for ext in formats_preference: if ext == 'best': f = formats[-1] break matches = list(filter(lambda f: f['ext'] == ext, formats)) if matches: f = matches[-1] break else: f = formats[-1] self.report_warning( 'No subtitle format found matching "%s" for language %s, ' 'using %s' % (formats_query, lang, f['ext'])) subs[lang] = f return subs def _forceprint(self, key, info_dict): if info_dict is None: return info_copy = info_dict.copy() info_copy['formats_table'] = self.render_formats_table(info_dict) info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict) info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles')) info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions')) def format_tmpl(tmpl): mobj = re.match(r'\w+(=?)$', tmpl) if mobj and mobj.group(1): return f'{tmpl[:-1]} = %({tmpl[:-1]})r' elif mobj: return f'%({tmpl})s' return tmpl for tmpl in self.params['forceprint'].get(key, []): self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy)) for tmpl, file_tmpl in self.params['print_to_file'].get(key, []): filename = self.prepare_filename(info_dict, outtmpl=file_tmpl) tmpl = format_tmpl(tmpl) self.to_screen(f'[info] Writing {tmpl!r} to: {filename}') if self._ensure_dir_exists(filename): with open(filename, 'a', encoding='utf-8') as f: f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n') def __forced_printings(self, info_dict, filename, incomplete): def print_mandatory(field, actual_field=None): if actual_field is None: actual_field = field if (self.params.get('force%s' % field, False) and (not incomplete or info_dict.get(actual_field) is not None)): self.to_stdout(info_dict[actual_field]) def print_optional(field): if (self.params.get('force%s' % field, False) and info_dict.get(field) is not None): self.to_stdout(info_dict[field]) info_dict = info_dict.copy() if filename is not None: info_dict['filename'] = filename if info_dict.get('requested_formats') is not None: # For RTMP URLs, also include the playpath info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats']) elif info_dict.get('url'): info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '') if (self.params.get('forcejson') or self.params['forceprint'].get('video') or self.params['print_to_file'].get('video')): self.post_extract(info_dict) self._forceprint('video', info_dict) print_mandatory('title') print_mandatory('id') print_mandatory('url', 'urls') print_optional('thumbnail') print_optional('description') print_optional('filename') if self.params.get('forceduration') and info_dict.get('duration') is not None: self.to_stdout(formatSeconds(info_dict['duration'])) print_mandatory('format') if self.params.get('forcejson'): self.to_stdout(json.dumps(self.sanitize_info(info_dict))) def dl(self, name, info, subtitle=False, test=False): if not info.get('url'): self.raise_no_formats(info, True) if test: verbose = self.params.get('verbose') params = { 'test': True, 'quiet': self.params.get('quiet') or not verbose, 'verbose': verbose, 'noprogress': not verbose, 'nopart': True, 'skip_unavailable_fragments': False, 'keep_fragments': False, 'overwrites': True, '_no_ytdl_file': True, } else: params = self.params fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params) if not test: for ph in self._progress_hooks: fd.add_progress_hook(ph) urls = '", "'.join( (f['url'].split(',')[0] + ',' if f['url'].startswith('data:') else f['url']) for f in info.get('requested_formats', []) or [info]) self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"') # Note: Ideally info should be a deep-copied so that hooks cannot modify it. # But it may contain objects that are not deep-copyable new_info = self._copy_infodict(info) if new_info.get('http_headers') is None: new_info['http_headers'] = self._calc_headers(new_info) return fd.download(name, new_info, subtitle) def existing_file(self, filepaths, *, default_overwrite=True): existing_files = list(filter(os.path.exists, orderedSet(filepaths))) if existing_files and not self.params.get('overwrites', default_overwrite): return existing_files[0] for file in existing_files: self.report_file_delete(file) os.remove(file) return None def process_info(self, info_dict): """Process a single resolved IE result. (Modifies it in-place)""" assert info_dict.get('_type', 'video') == 'video' original_infodict = info_dict if 'format' not in info_dict and 'ext' in info_dict: info_dict['format'] = info_dict['ext'] # This is mostly just for backward compatibility of process_info # As a side-effect, this allows for format-specific filters if self._match_entry(info_dict) is not None: info_dict['__write_download_archive'] = 'ignore' return # Does nothing under normal operation - for backward compatibility of process_info self.post_extract(info_dict) self._num_downloads += 1 # info_dict['_filename'] needs to be set for backward compatibility info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True) temp_filename = self.prepare_filename(info_dict, 'temp') files_to_move = {} # Forced printings self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict)) def check_max_downloads(): if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'): raise MaxDownloadsReached() if self.params.get('simulate'): info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') check_max_downloads() return if full_filename is None: return if not self._ensure_dir_exists(encodeFilename(full_filename)): return if not self._ensure_dir_exists(encodeFilename(temp_filename)): return if self._write_description('video', info_dict, self.prepare_filename(info_dict, 'description')) is None: return sub_files = self._write_subtitles(info_dict, temp_filename) if sub_files is None: return files_to_move.update(dict(sub_files)) thumb_files = self._write_thumbnails( 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail')) if thumb_files is None: return files_to_move.update(dict(thumb_files)) infofn = self.prepare_filename(info_dict, 'infojson') _infojson_written = self._write_info_json('video', info_dict, infofn) if _infojson_written: info_dict['infojson_filename'] = infofn # For backward compatibility, even though it was a private field info_dict['__infojson_filename'] = infofn elif _infojson_written is None: return # Note: Annotations are deprecated annofn = None if self.params.get('writeannotations', False): annofn = self.prepare_filename(info_dict, 'annotation') if annofn: if not self._ensure_dir_exists(encodeFilename(annofn)): return if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)): self.to_screen('[info] Video annotations are already present') elif not info_dict.get('annotations'): self.report_warning('There are no annotations to write.') else: try: self.to_screen('[info] Writing video annotations to: ' + annofn) with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: annofile.write(info_dict['annotations']) except (KeyError, TypeError): self.report_warning('There are no annotations to write.') except OSError: self.report_error('Cannot write annotations file: ' + annofn) return # Write internet shortcut files def _write_link_file(link_type): url = try_get(info_dict['webpage_url'], iri_to_uri) if not url: self.report_warning( f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown') return True linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext')) if not self._ensure_dir_exists(encodeFilename(linkfn)): return False if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)): self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present') return True try: self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}') with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline='\r\n' if link_type == 'url' else '\n') as linkfile: template_vars = {'url': url} if link_type == 'desktop': template_vars['filename'] = linkfn[:-(len(link_type) + 1)] linkfile.write(LINK_TEMPLATES[link_type] % template_vars) except OSError: self.report_error(f'Cannot write internet shortcut {linkfn}') return False return True write_links = { 'url': self.params.get('writeurllink'), 'webloc': self.params.get('writewebloclink'), 'desktop': self.params.get('writedesktoplink'), } if self.params.get('writelink'): link_type = ('webloc' if sys.platform == 'darwin' else 'desktop' if sys.platform.startswith('linux') else 'url') write_links[link_type] = True if any(should_write and not _write_link_file(link_type) for link_type, should_write in write_links.items()): return def replace_info_dict(new_info): nonlocal info_dict if new_info == info_dict: return info_dict.clear() info_dict.update(new_info) new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) replace_info_dict(new_info) if self.params.get('skip_download'): info_dict['filepath'] = temp_filename info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) info_dict['__files_to_move'] = files_to_move replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)) info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') else: # Download info_dict.setdefault('__postprocessors', []) try: def existing_video_file(*filepaths): ext = info_dict.get('ext') converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext) file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)), default_overwrite=False) if file: info_dict['ext'] = os.path.splitext(file)[1][1:] return file fd, success = None, True if info_dict.get('protocol') or info_dict.get('url'): fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-') if fd is not FFmpegFD and ( info_dict.get('section_start') or info_dict.get('section_end')): msg = ('This format cannot be partially downloaded' if FFmpegFD.available() else 'You have requested downloading the video partially, but ffmpeg is not installed') self.report_error(f'{msg}. Aborting') return if info_dict.get('requested_formats') is not None: requested_formats = info_dict['requested_formats'] old_ext = info_dict['ext'] if self.params.get('merge_output_format') is None: if (info_dict['ext'] == 'webm' and info_dict.get('thumbnails') # check with type instead of pp_key, __name__, or isinstance # since we dont want any custom PPs to trigger this and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721 info_dict['ext'] = 'mkv' self.report_warning( 'webm doesn\'t support embedding a thumbnail, mkv will be used') new_ext = info_dict['ext'] def correct_ext(filename, ext=new_ext): if filename == '-': return filename filename_real_ext = os.path.splitext(filename)[1][1:] filename_wo_ext = ( os.path.splitext(filename)[0] if filename_real_ext in (old_ext, new_ext) else filename) return f'{filename_wo_ext}.{ext}' # Ensure filename always has a correct extension for successful merge full_filename = correct_ext(full_filename) temp_filename = correct_ext(temp_filename) dl_filename = existing_video_file(full_filename, temp_filename) info_dict['__real_download'] = False merger = FFmpegMergerPP(self) downloaded = [] if dl_filename is not None: self.report_file_already_downloaded(dl_filename) elif fd: for f in requested_formats if fd != FFmpegFD else []: f['filepath'] = fname = prepend_extension( correct_ext(temp_filename, info_dict['ext']), 'f%s' % f['format_id'], info_dict['ext']) downloaded.append(fname) info_dict['url'] = '\n'.join(f['url'] for f in requested_formats) success, real_download = self.dl(temp_filename, info_dict) info_dict['__real_download'] = real_download else: if self.params.get('allow_unplayable_formats'): self.report_warning( 'You have requested merging of multiple formats ' 'while also allowing unplayable formats to be downloaded. ' 'The formats won\'t be merged to prevent data corruption.') elif not merger.available: msg = 'You have requested merging of multiple formats but ffmpeg is not installed' if not self.params.get('ignoreerrors'): self.report_error(f'{msg}. Aborting due to --abort-on-error') return self.report_warning(f'{msg}. The formats won\'t be merged') if temp_filename == '-': reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params) else 'but the formats are incompatible for simultaneous download' if merger.available else 'but ffmpeg is not installed') self.report_warning( f'You have requested downloading multiple formats to stdout {reason}. ' 'The formats will be streamed one after the other') fname = temp_filename for f in requested_formats: new_info = dict(info_dict) del new_info['requested_formats'] new_info.update(f) if temp_filename != '-': fname = prepend_extension( correct_ext(temp_filename, new_info['ext']), 'f%s' % f['format_id'], new_info['ext']) if not self._ensure_dir_exists(fname): return f['filepath'] = fname downloaded.append(fname) partial_success, real_download = self.dl(fname, new_info) info_dict['__real_download'] = info_dict['__real_download'] or real_download success = success and partial_success if downloaded and merger.available and not self.params.get('allow_unplayable_formats'): info_dict['__postprocessors'].append(merger) info_dict['__files_to_merge'] = downloaded # Even if there were no downloads, it is being merged only now info_dict['__real_download'] = True else: for file in downloaded: files_to_move[file] = None else: # Just a single file dl_filename = existing_video_file(full_filename, temp_filename) if dl_filename is None or dl_filename == temp_filename: # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part. # So we should try to resume the download success, real_download = self.dl(temp_filename, info_dict) info_dict['__real_download'] = real_download else: self.report_file_already_downloaded(dl_filename) dl_filename = dl_filename or temp_filename info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) except network_exceptions as err: self.report_error('unable to download video data: %s' % error_to_compat_str(err)) return except OSError as err: raise UnavailableVideoError(err) except (ContentTooShortError, ) as err: self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})') return self._raise_pending_errors(info_dict) if success and full_filename != '-': def fixup(): do_fixup = True fixup_policy = self.params.get('fixup') vid = info_dict['id'] if fixup_policy in ('ignore', 'never'): return elif fixup_policy == 'warn': do_fixup = 'warn' elif fixup_policy != 'force': assert fixup_policy in ('detect_or_warn', None) if not info_dict.get('__real_download'): do_fixup = False def ffmpeg_fixup(cndn, msg, cls): if not (do_fixup and cndn): return elif do_fixup == 'warn': self.report_warning(f'{vid}: {msg}') return pp = cls(self) if pp.available: info_dict['__postprocessors'].append(pp) else: self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically') stretched_ratio = info_dict.get('stretched_ratio') ffmpeg_fixup(stretched_ratio not in (1, None), f'Non-uniform pixel ratio {stretched_ratio}', FFmpegFixupStretchedPP) downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None downloader = downloader.FD_NAME if downloader else None ext = info_dict.get('ext') postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any(( isinstance(pp, FFmpegVideoConvertorPP) and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None) ) for pp in self._pps['post_process']) if not postprocessed_by_ffmpeg: ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash', 'writing DASH m4a. Only some players support this container', FFmpegFixupM4aPP) ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts') or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None, 'Possible MPEG-TS in MP4 container or malformed AAC timestamps', FFmpegFixupM3u8PP) ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD', 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP) ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP) ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP) fixup() try: replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move)) except PostProcessingError as err: self.report_error('Postprocessing: %s' % str(err)) return try: for ph in self._post_hooks: ph(info_dict['filepath']) except Exception as err: self.report_error('post hooks: %s' % str(err)) return info_dict['__write_download_archive'] = True assert info_dict is original_infodict # Make sure the info_dict was modified in-place if self.params.get('force_write_download_archive'): info_dict['__write_download_archive'] = True check_max_downloads() def __download_wrapper(self, func): @functools.wraps(func) def wrapper(*args, **kwargs): try: res = func(*args, **kwargs) except UnavailableVideoError as e: self.report_error(e) except DownloadCancelled as e: self.to_screen(f'[info] {e}') if not self.params.get('break_per_url'): raise else: if self.params.get('dump_single_json', False): self.post_extract(res) self.to_stdout(json.dumps(self.sanitize_info(res))) return wrapper def download(self, url_list): """Download a given list of URLs.""" url_list = variadic(url_list) # Passing a single URL is a common mistake outtmpl = self.params['outtmpl']['default'] if (len(url_list) > 1 and outtmpl != '-' and '%' not in outtmpl and self.params.get('max_downloads') != 1): raise SameFileError(outtmpl) for url in url_list: self.__download_wrapper(self.extract_info)( url, force_generic_extractor=self.params.get('force_generic_extractor', False)) return self._download_retcode def download_with_info_file(self, info_filename): with contextlib.closing(fileinput.FileInput( [info_filename], mode='r', openhook=fileinput.hook_encoded('utf-8'))) as f: # FileInput doesn't have a read method, we can't call json.load info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True)) try: self.__download_wrapper(self.process_ie_result)(info, download=True) except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e: if not isinstance(e, EntryNotInPlaylist): self.to_stderr('\r') webpage_url = info.get('webpage_url') if webpage_url is not None: self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}') return self.download([webpage_url]) else: raise return self._download_retcode @staticmethod def sanitize_info(info_dict, remove_private_keys=False): ''' Sanitize the infodict for converting to json ''' if info_dict is None: return info_dict info_dict.setdefault('epoch', int(time.time())) info_dict.setdefault('_type', 'video') if remove_private_keys: reject = lambda k, v: v is None or k.startswith('__') or k in { 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber', } else: reject = lambda k, v: False def filter_fn(obj): if isinstance(obj, dict): return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)} elif isinstance(obj, (list, tuple, set, LazyList)): return list(map(filter_fn, obj)) elif obj is None or isinstance(obj, (str, int, float, bool)): return obj else: return repr(obj) return filter_fn(info_dict) @staticmethod def filter_requested_info(info_dict, actually_filter=True): ''' Alias of sanitize_info for backward compatibility ''' return YoutubeDL.sanitize_info(info_dict, actually_filter) def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None): for filename in set(filter(None, files_to_delete)): if msg: self.to_screen(msg % filename) try: os.remove(filename) except OSError: self.report_warning(f'Unable to delete file {filename}') if filename in info.get('__files_to_move', []): # NB: Delete even if None del info['__files_to_move'][filename] @staticmethod def post_extract(info_dict): def actual_post_extract(info_dict): if info_dict.get('_type') in ('playlist', 'multi_video'): for video_dict in info_dict.get('entries', {}): actual_post_extract(video_dict or {}) return post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {}) info_dict.update(post_extractor()) actual_post_extract(info_dict or {}) def run_pp(self, pp, infodict): files_to_delete = [] if '__files_to_move' not in infodict: infodict['__files_to_move'] = {} try: files_to_delete, infodict = pp.run(infodict) except PostProcessingError as e: # Must be True and not 'only_download' if self.params.get('ignoreerrors') is True: self.report_error(e) return infodict raise if not files_to_delete: return infodict if self.params.get('keepvideo', False): for f in files_to_delete: infodict['__files_to_move'].setdefault(f, '') else: self._delete_downloaded_files( *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)') return infodict def run_all_pps(self, key, info, *, additional_pps=None): self._forceprint(key, info) for pp in (additional_pps or []) + self._pps[key]: info = self.run_pp(pp, info) return info def pre_process(self, ie_info, key='pre_process', files_to_move=None): info = dict(ie_info) info['__files_to_move'] = files_to_move or {} try: info = self.run_all_pps(key, info) except PostProcessingError as err: msg = f'Preprocessing: {err}' info.setdefault('__pending_error', msg) self.report_error(msg, is_error=False) return info, info.pop('__files_to_move', None) def post_process(self, filename, info, files_to_move=None): """Run all the postprocessors on the given file.""" info['filepath'] = filename info['__files_to_move'] = files_to_move or {} info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors')) info = self.run_pp(MoveFilesAfterDownloadPP(self), info) del info['__files_to_move'] return self.run_all_pps('after_move', info) def _make_archive_id(self, info_dict): video_id = info_dict.get('id') if not video_id: return # Future-proof against any change in case # and backwards compatibility with prior versions extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist if extractor is None: url = str_or_none(info_dict.get('url')) if not url: return # Try to find matching extractor for the URL and take its ie_key for ie_key, ie in self._ies.items(): if ie.suitable(url): extractor = ie_key break else: return return make_archive_id(extractor, video_id) def in_download_archive(self, info_dict): fn = self.params.get('download_archive') if fn is None: return False vid_ids = [self._make_archive_id(info_dict)] vid_ids.extend(info_dict.get('_old_archive_ids') or []) return any(id_ in self.archive for id_ in vid_ids) def record_download_archive(self, info_dict): fn = self.params.get('download_archive') if fn is None: return vid_id = self._make_archive_id(info_dict) assert vid_id self.write_debug(f'Adding to archive: {vid_id}') with locked_file(fn, 'a', encoding='utf-8') as archive_file: archive_file.write(vid_id + '\n') self.archive.add(vid_id) @staticmethod def format_resolution(format, default='unknown'): if format.get('vcodec') == 'none' and format.get('acodec') != 'none': return 'audio only' if format.get('resolution') is not None: return format['resolution'] if format.get('width') and format.get('height'): return '%dx%d' % (format['width'], format['height']) elif format.get('height'): return '%sp' % format['height'] elif format.get('width'): return '%dx?' % format['width'] return default def _list_format_headers(self, *headers): if self.params.get('listformats_table', True) is not False: return [self._format_out(header, self.Styles.HEADERS) for header in headers] return headers def _format_note(self, fdict): res = '' if fdict.get('ext') in ['f4f', 'f4m']: res += '(unsupported)' if fdict.get('language'): if res: res += ' ' res += '[%s]' % fdict['language'] if fdict.get('format_note') is not None: if res: res += ' ' res += fdict['format_note'] if fdict.get('tbr') is not None: if res: res += ', ' res += '%4dk' % fdict['tbr'] if fdict.get('container') is not None: if res: res += ', ' res += '%s container' % fdict['container'] if (fdict.get('vcodec') is not None and fdict.get('vcodec') != 'none'): if res: res += ', ' res += fdict['vcodec'] if fdict.get('vbr') is not None: res += '@' elif fdict.get('vbr') is not None and fdict.get('abr') is not None: res += 'video@' if fdict.get('vbr') is not None: res += '%4dk' % fdict['vbr'] if fdict.get('fps') is not None: if res: res += ', ' res += '%sfps' % fdict['fps'] if fdict.get('acodec') is not None: if res: res += ', ' if fdict['acodec'] == 'none': res += 'video only' else: res += '%-5s' % fdict['acodec'] elif fdict.get('abr') is not None: if res: res += ', ' res += 'audio' if fdict.get('abr') is not None: res += '@%3dk' % fdict['abr'] if fdict.get('asr') is not None: res += ' (%5dHz)' % fdict['asr'] if fdict.get('filesize') is not None: if res: res += ', ' res += format_bytes(fdict['filesize']) elif fdict.get('filesize_approx') is not None: if res: res += ', ' res += '~' + format_bytes(fdict['filesize_approx']) return res def render_formats_table(self, info_dict): if not info_dict.get('formats') and not info_dict.get('url'): return None formats = info_dict.get('formats', [info_dict]) if not self.params.get('listformats_table', True) is not False: table = [ [ format_field(f, 'format_id'), format_field(f, 'ext'), self.format_resolution(f), self._format_note(f) ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1) def simplified_codec(f, field): assert field in ('acodec', 'vcodec') codec = f.get(field, 'unknown') if not codec: return 'unknown' elif codec != 'none': return '.'.join(codec.split('.')[:4]) if field == 'vcodec' and f.get('acodec') == 'none': return 'images' elif field == 'acodec' and f.get('vcodec') == 'none': return '' return self._format_out('audio only' if field == 'vcodec' else 'video only', self.Styles.SUPPRESS) delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True) table = [ [ self._format_out(format_field(f, 'format_id'), self.Styles.ID), format_field(f, 'ext'), format_field(f, func=self.format_resolution, ignore=('audio only', 'images')), format_field(f, 'fps', '\t%d', func=round), format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), format_field(f, 'audio_channels', '\t%s'), delim, format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes), format_field(f, 'tbr', '\t%dk', func=round), shorten_protocol_name(f.get('protocol', '')), delim, simplified_codec(f, 'vcodec'), format_field(f, 'vbr', '\t%dk', func=round), simplified_codec(f, 'acodec'), format_field(f, 'abr', '\t%dk', func=round), format_field(f, 'asr', '\t%s', func=format_decimal_suffix), join_nonempty( self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None, format_field(f, 'language', '[%s]'), join_nonempty(format_field(f, 'format_note'), format_field(f, 'container', ignore=(None, f.get('ext'))), delim=', '), delim=' '), ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] header_line = self._list_format_headers( 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO', delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO') return render_table( header_line, table, hide_empty=True, delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True)) def render_thumbnails_table(self, info_dict): thumbnails = list(info_dict.get('thumbnails') or []) if not thumbnails: return None return render_table( self._list_format_headers('ID', 'Width', 'Height', 'URL'), [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]) def render_subtitles_table(self, video_id, subtitles): def _row(lang, formats): exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats))) if len(set(names)) == 1: names = [] if names[0] == 'unknown' else names[:1] return [lang, ', '.join(names), ', '.join(exts)] if not subtitles: return None return render_table( self._list_format_headers('Language', 'Name', 'Formats'), [_row(lang, formats) for lang, formats in subtitles.items()], hide_empty=True) def __list_table(self, video_id, name, func, *args): table = func(*args) if not table: self.to_screen(f'{video_id} has no {name}') return self.to_screen(f'[info] Available {name} for {video_id}:') self.to_stdout(table) def list_formats(self, info_dict): self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict) def list_thumbnails(self, info_dict): self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict) def list_subtitles(self, video_id, subtitles, name='subtitles'): self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles) def urlopen(self, req): """ Start an HTTP download """ if isinstance(req, str): req = sanitized_Request(req) return self._opener.open(req, timeout=self._socket_timeout) def print_debug_header(self): if not self.params.get('verbose'): return # These imports can be slow. So import them only as needed from .extractor.extractors import _LAZY_LOADER from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors def get_encoding(stream): ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) if not supports_terminal_sequences(stream): from .utils import WINDOWS_VT_MODE # Must be imported locally ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)' return ret encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % ( locale.getpreferredencoding(), sys.getfilesystemencoding(), self.get_encoding(), ', '.join( f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_ if stream is not None and key != 'console') ) logger = self.params.get('logger') if logger: write_debug = lambda msg: logger.debug(f'[debug] {msg}') write_debug(encoding_str) else: write_string(f'[debug] {encoding_str}\n', encoding=None) write_debug = lambda msg: self._write_string(f'[debug] {msg}\n') source = detect_variant() if VARIANT not in (None, 'pip'): source += '*' write_debug(join_nonempty( 'yt-dlp version', __version__, f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '', '' if source == 'unknown' else f'({source})', delim=' ')) if not _LAZY_LOADER: if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): write_debug('Lazy loading extractors is forcibly disabled') else: write_debug('Lazy loading extractors is disabled') if plugin_extractors or plugin_postprocessors: write_debug('Plugins: %s' % [ '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}') for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())]) if self.params['compat_opts']: write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts'])) if source == 'source': try: stdout, _, _ = Popen.run( ['git', 'rev-parse', '--short', 'HEAD'], text=True, cwd=os.path.dirname(os.path.abspath(__file__)), stdout=subprocess.PIPE, stderr=subprocess.PIPE) if re.fullmatch('[0-9a-f]+', stdout.strip()): write_debug(f'Git HEAD: {stdout.strip()}') except Exception: with contextlib.suppress(Exception): sys.exc_clear() write_debug(system_identifier()) exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self) ffmpeg_features = {key for key, val in ffmpeg_features.items() if val} if ffmpeg_features: exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features)) exe_versions['rtmpdump'] = rtmpdump_version() exe_versions['phantomjs'] = PhantomJSwrapper._version() exe_str = ', '.join( f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v ) or 'none' write_debug('exe versions: %s' % exe_str) from .compat.compat_utils import get_package_info from .dependencies import available_dependencies write_debug('Optional libraries: %s' % (', '.join(sorted({ join_nonempty(*get_package_info(m)) for m in available_dependencies.values() })) or 'none')) self._setup_opener() proxy_map = {} for handler in self._opener.handlers: if hasattr(handler, 'proxies'): proxy_map.update(handler.proxies) write_debug(f'Proxy map: {proxy_map}') # Not implemented if False and self.params.get('call_home'): ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() write_debug('Public IP address: %s' % ipaddr) latest_version = self.urlopen( 'https://yt-dl.org/latest/version').read().decode() if version_tuple(latest_version) > version_tuple(__version__): self.report_warning( 'You are using an outdated version (newest version: %s)! ' 'See https://yt-dl.org/update if you need help updating.' % latest_version) def _setup_opener(self): if hasattr(self, '_opener'): return timeout_val = self.params.get('socket_timeout') self._socket_timeout = 20 if timeout_val is None else float(timeout_val) opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser') opts_cookiefile = self.params.get('cookiefile') opts_proxy = self.params.get('proxy') self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self) cookie_processor = YoutubeDLCookieProcessor(self.cookiejar) if opts_proxy is not None: if opts_proxy == '': proxies = {} else: proxies = {'http': opts_proxy, 'https': opts_proxy} else: proxies = urllib.request.getproxies() # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805) if 'http' in proxies and 'https' not in proxies: proxies['https'] = proxies['http'] proxy_handler = PerRequestProxyHandler(proxies) debuglevel = 1 if self.params.get('debug_printtraffic') else 0 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) redirect_handler = YoutubeDLRedirectHandler() data_handler = urllib.request.DataHandler() # When passing our own FileHandler instance, build_opener won't add the # default FileHandler and allows us to disable the file protocol, which # can be used for malicious purposes (see # https://github.com/ytdl-org/youtube-dl/issues/8227) file_handler = urllib.request.FileHandler() def file_open(*args, **kwargs): raise urllib.error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons') file_handler.file_open = file_open opener = urllib.request.build_opener( proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler) # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details) opener.addheaders = [] self._opener = opener def encode(self, s): if isinstance(s, bytes): return s # Already encoded try: return s.encode(self.get_encoding()) except UnicodeEncodeError as err: err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.' raise def get_encoding(self): encoding = self.params.get('encoding') if encoding is None: encoding = preferredencoding() return encoding def _write_info_json(self, label, ie_result, infofn, overwrite=None): ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error ''' if overwrite is None: overwrite = self.params.get('overwrites', True) if not self.params.get('writeinfojson'): return False elif not infofn: self.write_debug(f'Skipping writing {label} infojson') return False elif not self._ensure_dir_exists(infofn): return None elif not overwrite and os.path.exists(infofn): self.to_screen(f'[info] {label.title()} metadata is already present') return 'exists' self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}') try: write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn) return True except OSError: self.report_error(f'Cannot write {label} metadata to JSON file {infofn}') return None def _write_description(self, label, ie_result, descfn): ''' Write description and returns True = written, False = skip, None = error ''' if not self.params.get('writedescription'): return False elif not descfn: self.write_debug(f'Skipping writing {label} description') return False elif not self._ensure_dir_exists(descfn): return None elif not self.params.get('overwrites', True) and os.path.exists(descfn): self.to_screen(f'[info] {label.title()} description is already present') elif ie_result.get('description') is None: self.report_warning(f'There\'s no {label} description to write') return False else: try: self.to_screen(f'[info] Writing {label} description to: {descfn}') with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: descfile.write(ie_result['description']) except OSError: self.report_error(f'Cannot write {label} description file {descfn}') return None return True def _write_subtitles(self, info_dict, filename): ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error''' ret = [] subtitles = info_dict.get('requested_subtitles') if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE return ret sub_filename_base = self.prepare_filename(info_dict, 'subtitle') if not sub_filename_base: self.to_screen('[info] Skipping writing video subtitles') return ret for sub_lang, sub_info in subtitles.items(): sub_format = sub_info['ext'] sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext')) existing_sub = self.existing_file((sub_filename_final, sub_filename)) if existing_sub: self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present') sub_info['filepath'] = existing_sub ret.append((existing_sub, sub_filename_final)) continue self.to_screen(f'[info] Writing video subtitles to: {sub_filename}') if sub_info.get('data') is not None: try: # Use newline='' to prevent conversion of newline characters # See https://github.com/ytdl-org/youtube-dl/issues/10268 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile: subfile.write(sub_info['data']) sub_info['filepath'] = sub_filename ret.append((sub_filename, sub_filename_final)) continue except OSError: self.report_error(f'Cannot write video subtitles file {sub_filename}') return None try: sub_copy = sub_info.copy() sub_copy.setdefault('http_headers', info_dict.get('http_headers')) self.dl(sub_filename, sub_copy, subtitle=True) sub_info['filepath'] = sub_filename ret.append((sub_filename, sub_filename_final)) except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err: msg = f'Unable to download video subtitles for {sub_lang!r}: {err}' if self.params.get('ignoreerrors') is not True: # False or 'only_download' if not self.params.get('ignoreerrors'): self.report_error(msg) raise DownloadError(msg) self.report_warning(msg) return ret def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None): ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) ''' write_all = self.params.get('write_all_thumbnails', False) thumbnails, ret = [], [] if write_all or self.params.get('writethumbnail', False): thumbnails = info_dict.get('thumbnails') or [] multiple = write_all and len(thumbnails) > 1 if thumb_filename_base is None: thumb_filename_base = filename if thumbnails and not thumb_filename_base: self.write_debug(f'Skipping writing {label} thumbnail') return ret for idx, t in list(enumerate(thumbnails))[::-1]: thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg') thumb_display_id = f'{label} thumbnail {t["id"]}' thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext')) thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext')) existing_thumb = self.existing_file((thumb_filename_final, thumb_filename)) if existing_thumb: self.to_screen('[info] %s is already present' % ( thumb_display_id if multiple else f'{label} thumbnail').capitalize()) t['filepath'] = existing_thumb ret.append((existing_thumb, thumb_filename_final)) else: self.to_screen(f'[info] Downloading {thumb_display_id} ...') try: uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {}))) self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}') with open(encodeFilename(thumb_filename), 'wb') as thumbf: shutil.copyfileobj(uf, thumbf) ret.append((thumb_filename, thumb_filename_final)) t['filepath'] = thumb_filename except network_exceptions as err: thumbnails.pop(idx) self.report_warning(f'Unable to download {thumb_display_id}: {err}') if ret and not write_all: break return ret yt-dlp-2022.08.19/yt_dlp/__init__.py000066400000000000000000001232131427755243700167560ustar00rootroot00000000000000try: import contextvars # noqa: F401 except Exception: raise Exception( f'You are using an unsupported version of Python. Only Python versions 3.7 and above are supported by yt-dlp') # noqa: F541 __license__ = 'Public Domain' import collections import getpass import itertools import optparse import os import re import sys from .compat import compat_shlex_quote from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .downloader import FileDownloader from .downloader.external import get_external_downloader from .extractor import list_extractor_classes from .extractor.adobepass import MSO_INFO from .extractor.common import InfoExtractor from .options import parseOpts from .postprocessor import ( FFmpegExtractAudioPP, FFmpegMergerPP, FFmpegPostProcessor, FFmpegSubtitlesConvertorPP, FFmpegThumbnailsConvertorPP, FFmpegVideoConvertorPP, FFmpegVideoRemuxerPP, MetadataFromFieldPP, MetadataParserPP, ) from .update import Updater from .utils import ( NO_DEFAULT, POSTPROCESS_WHEN, DateRange, DownloadCancelled, DownloadError, GeoUtils, PlaylistEntries, SameFileError, decodeOption, download_range_func, expand_path, float_or_none, format_field, int_or_none, match_filter_func, parse_duration, preferredencoding, read_batch_urls, read_stdin, render_table, setproctitle, std_headers, traverse_obj, variadic, write_string, ) from .YoutubeDL import YoutubeDL def _exit(status=0, *args): for msg in args: sys.stderr.write(msg) raise SystemExit(status) def get_urls(urls, batchfile, verbose): # Batch file verification batch_urls = [] if batchfile is not None: try: batch_urls = read_batch_urls( read_stdin('URLs') if batchfile == '-' else open(expand_path(batchfile), encoding='utf-8', errors='ignore')) if verbose: write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') except OSError: _exit(f'ERROR: batch file {batchfile} could not be read') _enc = preferredencoding() return [ url.strip().decode(_enc, 'ignore') if isinstance(url, bytes) else url.strip() for url in batch_urls + urls] def print_extractor_information(opts, urls): # Importing GenericIE is currently slow since it imports other extractors # TODO: Move this back to module level after generalization of embed detection from .extractor.generic import GenericIE out = '' if opts.list_extractors: urls = dict.fromkeys(urls, False) for ie in list_extractor_classes(opts.age_limit): out += ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n' if ie == GenericIE: matched_urls = [url for url, matched in urls.items() if not matched] else: matched_urls = tuple(filter(ie.suitable, urls.keys())) urls.update(dict.fromkeys(matched_urls, True)) out += ''.join(f' {url}\n' for url in matched_urls) elif opts.list_extractor_descriptions: _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow') out = '\n'.join( ie.description(markdown=False, search_examples=_SEARCHES) for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False) elif opts.ap_list_mso: out = 'Supported TV Providers:\n%s\n' % render_table( ['mso', 'mso name'], [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]) else: return False write_string(out, out=sys.stdout) return True def set_compat_opts(opts): def _unused_compat_opt(name): if name not in opts.compat_opts: return False opts.compat_opts.discard(name) opts.compat_opts.update(['*%s' % name]) return True def set_default_compat(compat_name, opt_name, default=True, remove_compat=True): attr = getattr(opts, opt_name) if compat_name in opts.compat_opts: if attr is None: setattr(opts, opt_name, not default) return True else: if remove_compat: _unused_compat_opt(compat_name) return False elif attr is None: setattr(opts, opt_name, default) return None set_default_compat('abort-on-error', 'ignoreerrors', 'only_download') set_default_compat('no-playlist-metafiles', 'allow_playlist_files') set_default_compat('no-clean-infojson', 'clean_infojson') if 'no-attach-info-json' in opts.compat_opts: if opts.embed_infojson: _unused_compat_opt('no-attach-info-json') else: opts.embed_infojson = False if 'format-sort' in opts.compat_opts: opts.format_sort.extend(InfoExtractor.FormatSort.ytdl_default) _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) if _video_multistreams_set is False and _audio_multistreams_set is False: _unused_compat_opt('multistreams') if 'filename' in opts.compat_opts: if opts.outtmpl.get('default') is None: opts.outtmpl.update({'default': '%(title)s-%(id)s.%(ext)s'}) else: _unused_compat_opt('filename') def validate_options(opts): def validate(cndn, name, value=None, msg=None): if cndn: return True raise ValueError((msg or 'invalid {name} "{value}" given').format(name=name, value=value)) def validate_in(name, value, items, msg=None): return validate(value is None or value in items, name, value, msg) def validate_regex(name, value, regex): return validate(value is None or re.match(regex, value), name, value) def validate_positive(name, value, strict=False): return validate(value is None or value > 0 or (not strict and value == 0), name, value, '{name} "{value}" must be positive' + ('' if strict else ' or 0')) def validate_minmax(min_val, max_val, min_name, max_name=None): if max_val is None or min_val is None or max_val >= min_val: return if not max_name: min_name, max_name = f'min {min_name}', f'max {min_name}' raise ValueError(f'{max_name} "{max_val}" must be must be greater than or equal to {min_name} "{min_val}"') # Usernames and passwords validate(not opts.usenetrc or (opts.username is None and opts.password is None), '.netrc', msg='using {name} conflicts with giving username/password') validate(opts.password is None or opts.username is not None, 'account username', msg='{name} missing') validate(opts.ap_password is None or opts.ap_username is not None, 'TV Provider account username', msg='{name} missing') validate_in('TV Provider', opts.ap_mso, MSO_INFO, 'Unsupported {name} "{value}", use --ap-list-mso to get a list of supported TV Providers') # Numbers validate_positive('autonumber start', opts.autonumber_start) validate_positive('autonumber size', opts.autonumber_size, True) validate_positive('concurrent fragments', opts.concurrent_fragment_downloads, True) validate_positive('playlist start', opts.playliststart, True) if opts.playlistend != -1: validate_minmax(opts.playliststart, opts.playlistend, 'playlist start', 'playlist end') # Time ranges validate_positive('subtitles sleep interval', opts.sleep_interval_subtitles) validate_positive('requests sleep interval', opts.sleep_interval_requests) validate_positive('sleep interval', opts.sleep_interval) validate_positive('max sleep interval', opts.max_sleep_interval) if opts.sleep_interval is None: validate( opts.max_sleep_interval is None, 'min sleep interval', msg='{name} must be specified; use --min-sleep-interval') elif opts.max_sleep_interval is None: opts.max_sleep_interval = opts.sleep_interval else: validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval') if opts.wait_for_video is not None: min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None]) validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video), 'time range to wait for video', opts.wait_for_video) validate_minmax(min_wait, max_wait, 'time range to wait for video') opts.wait_for_video = (min_wait, max_wait) # Format sort for f in opts.format_sort: validate_regex('format sorting', f, InfoExtractor.FormatSort.regex) # Postprocessor formats validate_regex('merge output format', opts.merge_output_format, r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS)))) validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE) validate_in('subtitle format', opts.convertsubtitles, FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS) validate_regex('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.FORMAT_RE) validate_regex('recode video format', opts.recodevideo, FFmpegVideoConvertorPP.FORMAT_RE) validate_regex('remux video format', opts.remuxvideo, FFmpegVideoRemuxerPP.FORMAT_RE) if opts.audioquality: opts.audioquality = opts.audioquality.strip('k').strip('K') # int_or_none prevents inf, nan validate_positive('audio quality', int_or_none(float_or_none(opts.audioquality), default=0)) # Retries def parse_retries(name, value): if value is None: return None elif value in ('inf', 'infinite'): return float('inf') try: return int(value) except (TypeError, ValueError): validate(False, f'{name} retry count', value) opts.retries = parse_retries('download', opts.retries) opts.fragment_retries = parse_retries('fragment', opts.fragment_retries) opts.extractor_retries = parse_retries('extractor', opts.extractor_retries) opts.file_access_retries = parse_retries('file access', opts.file_access_retries) # Retry sleep function def parse_sleep_func(expr): NUMBER_RE = r'\d+(?:\.\d+)?' op, start, limit, step, *_ = tuple(re.fullmatch( rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?', expr.strip()).groups()) + (None, None) if op == 'exp': return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf')) else: default_step = start if op or limit else 0 return lambda n: min(float(start) + float(step or default_step) * n, float(limit or 'inf')) for key, expr in opts.retry_sleep.items(): if not expr: del opts.retry_sleep[key] continue try: opts.retry_sleep[key] = parse_sleep_func(expr) except AttributeError: raise ValueError(f'invalid {key} retry sleep expression {expr!r}') # Bytes def parse_bytes(name, value): if value is None: return None numeric_limit = FileDownloader.parse_bytes(value) validate(numeric_limit is not None, 'rate limit', value) return numeric_limit opts.ratelimit = parse_bytes('rate limit', opts.ratelimit) opts.throttledratelimit = parse_bytes('throttled rate limit', opts.throttledratelimit) opts.min_filesize = parse_bytes('min filesize', opts.min_filesize) opts.max_filesize = parse_bytes('max filesize', opts.max_filesize) opts.buffersize = parse_bytes('buffer size', opts.buffersize) opts.http_chunk_size = parse_bytes('http chunk size', opts.http_chunk_size) # Output templates def validate_outtmpl(tmpl, msg): err = YoutubeDL.validate_outtmpl(tmpl) if err: raise ValueError(f'invalid {msg} "{tmpl}": {err}') for k, tmpl in opts.outtmpl.items(): validate_outtmpl(tmpl, f'{k} output template') for type_, tmpl_list in opts.forceprint.items(): for tmpl in tmpl_list: validate_outtmpl(tmpl, f'{type_} print template') for type_, tmpl_list in opts.print_to_file.items(): for tmpl, file in tmpl_list: validate_outtmpl(tmpl, f'{type_} print to file template') validate_outtmpl(file, f'{type_} print to file filename') validate_outtmpl(opts.sponsorblock_chapter_title, 'SponsorBlock chapter title') for k, tmpl in opts.progress_template.items(): k = f'{k[:-6]} console title' if '-title' in k else f'{k} progress' validate_outtmpl(tmpl, f'{k} template') outtmpl_default = opts.outtmpl.get('default') if outtmpl_default == '': opts.skip_download = None del opts.outtmpl['default'] if outtmpl_default and not os.path.splitext(outtmpl_default)[1] and opts.extractaudio: raise ValueError( 'Cannot download a video and extract audio into the same file! ' f'Use "{outtmpl_default}.%(ext)s" instead of "{outtmpl_default}" as the output template') def parse_chapters(name, value): chapters, ranges = [], [] for regex in value or []: if regex.startswith('*'): for range in regex[1:].split(','): dur = tuple(map(parse_duration, range.strip().split('-'))) if len(dur) == 2 and all(t is not None for t in dur): ranges.append(dur) else: raise ValueError(f'invalid {name} time range "{regex}". Must be of the form *start-end') continue try: chapters.append(re.compile(regex)) except re.error as err: raise ValueError(f'invalid {name} regex "{regex}" - {err}') return chapters, ranges opts.remove_chapters, opts.remove_ranges = parse_chapters('--remove-chapters', opts.remove_chapters) opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges)) # Cookies from browser if opts.cookiesfrombrowser: mobj = re.match(r'(?P[^+:]+)(\s*\+\s*(?P[^:]+))?(\s*:(?P.+))?', opts.cookiesfrombrowser) if mobj is None: raise ValueError(f'invalid cookies from browser arguments: {opts.cookiesfrombrowser}') browser_name, keyring, profile = mobj.group('name', 'keyring', 'profile') browser_name = browser_name.lower() if browser_name not in SUPPORTED_BROWSERS: raise ValueError(f'unsupported browser specified for cookies: "{browser_name}". ' f'Supported browsers are: {", ".join(sorted(SUPPORTED_BROWSERS))}') if keyring is not None: keyring = keyring.upper() if keyring not in SUPPORTED_KEYRINGS: raise ValueError(f'unsupported keyring specified for cookies: "{keyring}". ' f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}') opts.cookiesfrombrowser = (browser_name, profile, keyring) # MetadataParser def metadataparser_actions(f): if isinstance(f, str): cmd = '--parse-metadata %s' % compat_shlex_quote(f) try: actions = [MetadataFromFieldPP.to_action(f)] except Exception as err: raise ValueError(f'{cmd} is invalid; {err}') else: cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f)) actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(',')) for action in actions: try: MetadataParserPP.validate_action(*action) except Exception as err: raise ValueError(f'{cmd} is invalid; {err}') yield action parse_metadata = opts.parse_metadata or [] if opts.metafromtitle is not None: parse_metadata.append('title:%s' % opts.metafromtitle) opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, parse_metadata))) # Other options if opts.playlist_items is not None: try: tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items)) except Exception as err: raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}') geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country if geo_bypass_code is not None: try: GeoUtils.random_ipv4(geo_bypass_code) except Exception: raise ValueError('unsupported geo-bypass country or ip-block') opts.match_filter = match_filter_func(opts.match_filter) if opts.download_archive is not None: opts.download_archive = expand_path(opts.download_archive) if opts.user_agent is not None: opts.headers.setdefault('User-Agent', opts.user_agent) if opts.referer is not None: opts.headers.setdefault('Referer', opts.referer) if opts.no_sponsorblock: opts.sponsorblock_mark = opts.sponsorblock_remove = set() default_downloader = None for proto, path in opts.external_downloader.items(): if path == 'native': continue ed = get_external_downloader(path) if ed is None: raise ValueError( f'No such {format_field(proto, None, "%s ", ignore="default")}external downloader "{path}"') elif ed and proto == 'default': default_downloader = ed.get_basename() warnings, deprecation_warnings = [], [] # Common mistake: -f best if opts.format == 'best': warnings.append('.\n '.join(( '"-f best" selects the best pre-merged format which is often not the best option', 'To let yt-dlp download and merge the best available formats, simply do not pass any format selection', 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning'))) # --(postprocessor/downloader)-args without name def report_args_compat(name, value, key1, key2=None, where=None): if key1 in value and key2 not in value: warnings.append(f'{name.title()} arguments given without specifying name. ' f'The arguments will be given to {where or f"all {name}s"}') return True return False if report_args_compat('external downloader', opts.external_downloader_args, 'default', where=default_downloader) and default_downloader: # Compat with youtube-dl's behavior. See https://github.com/ytdl-org/youtube-dl/commit/49c5293014bc11ec8c009856cd63cffa6296c1e1 opts.external_downloader_args.setdefault(default_downloader, opts.external_downloader_args.pop('default')) if report_args_compat('post-processor', opts.postprocessor_args, 'default-compat', 'default'): opts.postprocessor_args['default'] = opts.postprocessor_args.pop('default-compat') opts.postprocessor_args.setdefault('sponskrub', []) def report_conflict(arg1, opt1, arg2='--allow-unplayable-formats', opt2='allow_unplayable_formats', val1=NO_DEFAULT, val2=NO_DEFAULT, default=False): if val2 is NO_DEFAULT: val2 = getattr(opts, opt2) if not val2: return if val1 is NO_DEFAULT: val1 = getattr(opts, opt1) if val1: warnings.append(f'{arg1} is ignored since {arg2} was given') setattr(opts, opt1, default) # Conflicting options report_conflict('--playlist-reverse', 'playlist_reverse', '--playlist-random', 'playlist_random') report_conflict('--playlist-reverse', 'playlist_reverse', '--lazy-playlist', 'lazy_playlist') report_conflict('--playlist-random', 'playlist_random', '--lazy-playlist', 'lazy_playlist') report_conflict('--dateafter', 'dateafter', '--date', 'date', default=None) report_conflict('--datebefore', 'datebefore', '--date', 'date', default=None) report_conflict('--exec-before-download', 'exec_before_dl_cmd', '"--exec before_dl:"', 'exec_cmd', val2=opts.exec_cmd.get('before_dl')) report_conflict('--id', 'useid', '--output', 'outtmpl', val2=opts.outtmpl.get('default')) report_conflict('--remux-video', 'remuxvideo', '--recode-video', 'recodevideo') report_conflict('--sponskrub', 'sponskrub', '--remove-chapters', 'remove_chapters') report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-mark', 'sponsorblock_mark') report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-remove', 'sponsorblock_remove') report_conflict('--sponskrub-cut', 'sponskrub_cut', '--split-chapter', 'split_chapters', val1=opts.sponskrub and opts.sponskrub_cut) # Conflicts with --allow-unplayable-formats report_conflict('--add-metadata', 'addmetadata') report_conflict('--embed-chapters', 'addchapters') report_conflict('--embed-info-json', 'embed_infojson') report_conflict('--embed-subs', 'embedsubtitles') report_conflict('--embed-thumbnail', 'embedthumbnail') report_conflict('--extract-audio', 'extractaudio') report_conflict('--fixup', 'fixup', val1=opts.fixup not in (None, 'never', 'ignore'), default='never') report_conflict('--recode-video', 'recodevideo') report_conflict('--remove-chapters', 'remove_chapters', default=[]) report_conflict('--remux-video', 'remuxvideo') report_conflict('--sponskrub', 'sponskrub') report_conflict('--sponsorblock-remove', 'sponsorblock_remove', default=set()) report_conflict('--xattrs', 'xattrs') # Fully deprecated options def report_deprecation(val, old, new=None): if not val: return deprecation_warnings.append( f'{old} is deprecated and may be removed in a future version. Use {new} instead' if new else f'{old} is deprecated and may not work as expected') report_deprecation(opts.sponskrub, '--sponskrub', '--sponsorblock-mark or --sponsorblock-remove') report_deprecation(not opts.prefer_ffmpeg, '--prefer-avconv', 'ffmpeg') # report_deprecation(opts.include_ads, '--include-ads') # We may re-implement this in future # report_deprecation(opts.call_home, '--call-home') # We may re-implement this in future # report_deprecation(opts.writeannotations, '--write-annotations') # It's just that no website has it # Dependent options opts.date = DateRange.day(opts.date) if opts.date else DateRange(opts.dateafter, opts.datebefore) if opts.exec_before_dl_cmd: opts.exec_cmd['before_dl'] = opts.exec_before_dl_cmd if opts.useid: # --id is not deprecated in youtube-dl opts.outtmpl['default'] = '%(id)s.%(ext)s' if opts.overwrites: # --force-overwrites implies --no-continue opts.continue_dl = False if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None: # Add chapters when adding metadata or marking sponsors opts.addchapters = True if opts.extractaudio and not opts.keepvideo and opts.format is None: # Do not unnecessarily download audio opts.format = 'bestaudio/best' if opts.getcomments and opts.writeinfojson is None and not opts.embed_infojson: # If JSON is not printed anywhere, but comments are requested, save it to file if not opts.dumpjson or opts.print_json or opts.dump_single_json: opts.writeinfojson = True if opts.allsubtitles and not (opts.embedsubtitles or opts.writeautomaticsub): # --all-sub automatically sets --write-sub if --write-auto-sub is not given opts.writesubtitles = True if opts.addmetadata and opts.embed_infojson is None: # If embedding metadata and infojson is present, embed it opts.embed_infojson = 'if_exists' # Ask for passwords if opts.username is not None and opts.password is None: opts.password = getpass.getpass('Type account password and press [Return]: ') if opts.ap_username is not None and opts.ap_password is None: opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ') return warnings, deprecation_warnings def get_postprocessors(opts): yield from opts.add_postprocessors if opts.parse_metadata: yield { 'key': 'MetadataParser', 'actions': opts.parse_metadata, 'when': 'pre_process' } sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove if sponsorblock_query: yield { 'key': 'SponsorBlock', 'categories': sponsorblock_query, 'api': opts.sponsorblock_api, 'when': 'after_filter' } if opts.convertsubtitles: yield { 'key': 'FFmpegSubtitlesConvertor', 'format': opts.convertsubtitles, 'when': 'before_dl' } if opts.convertthumbnails: yield { 'key': 'FFmpegThumbnailsConvertor', 'format': opts.convertthumbnails, 'when': 'before_dl' } if opts.extractaudio: yield { 'key': 'FFmpegExtractAudio', 'preferredcodec': opts.audioformat, 'preferredquality': opts.audioquality, 'nopostoverwrites': opts.nopostoverwrites, } if opts.remuxvideo: yield { 'key': 'FFmpegVideoRemuxer', 'preferedformat': opts.remuxvideo, } if opts.recodevideo: yield { 'key': 'FFmpegVideoConvertor', 'preferedformat': opts.recodevideo, } # If ModifyChapters is going to remove chapters, subtitles must already be in the container. if opts.embedsubtitles: keep_subs = 'no-keep-subs' not in opts.compat_opts yield { 'key': 'FFmpegEmbedSubtitle', # already_have_subtitle = True prevents the file from being deleted after embedding 'already_have_subtitle': opts.writesubtitles and keep_subs } if not opts.writeautomaticsub and keep_subs: opts.writesubtitles = True # ModifyChapters must run before FFmpegMetadataPP if opts.remove_chapters or sponsorblock_query: yield { 'key': 'ModifyChapters', 'remove_chapters_patterns': opts.remove_chapters, 'remove_sponsor_segments': opts.sponsorblock_remove, 'remove_ranges': opts.remove_ranges, 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, 'force_keyframes': opts.force_keyframes_at_cuts } # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and # FFmpegExtractAudioPP as containers before conversion may not support # metadata (3gp, webm, etc.) # By default ffmpeg preserves metadata applicable for both # source and target containers. From this point the container won't change, # so metadata can be added here. if opts.addmetadata or opts.addchapters or opts.embed_infojson: yield { 'key': 'FFmpegMetadata', 'add_chapters': opts.addchapters, 'add_metadata': opts.addmetadata, 'add_infojson': opts.embed_infojson, } # Deprecated # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment # but must be below EmbedSubtitle and FFmpegMetadata # See https://github.com/yt-dlp/yt-dlp/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29 # If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found if opts.sponskrub is not False: yield { 'key': 'SponSkrub', 'path': opts.sponskrub_path, 'args': opts.sponskrub_args, 'cut': opts.sponskrub_cut, 'force': opts.sponskrub_force, 'ignoreerror': opts.sponskrub is None, '_from_cli': True, } if opts.embedthumbnail: yield { 'key': 'EmbedThumbnail', # already_have_thumbnail = True prevents the file from being deleted after embedding 'already_have_thumbnail': opts.writethumbnail } if not opts.writethumbnail: opts.writethumbnail = True opts.outtmpl['pl_thumbnail'] = '' if opts.split_chapters: yield { 'key': 'FFmpegSplitChapters', 'force_keyframes': opts.force_keyframes_at_cuts, } # XAttrMetadataPP should be run after post-processors that may change file contents if opts.xattrs: yield {'key': 'XAttrMetadata'} if opts.concat_playlist != 'never': yield { 'key': 'FFmpegConcat', 'only_multi_video': opts.concat_playlist != 'always', 'when': 'playlist', } # Exec must be the last PP of each category for when, exec_cmd in opts.exec_cmd.items(): yield { 'key': 'Exec', 'exec_cmd': exec_cmd, 'when': when, } ParsedOptions = collections.namedtuple('ParsedOptions', ('parser', 'options', 'urls', 'ydl_opts')) def parse_options(argv=None): """@returns ParsedOptions(parser, opts, urls, ydl_opts)""" parser, opts, urls = parseOpts(argv) urls = get_urls(urls, opts.batchfile, opts.verbose) set_compat_opts(opts) try: warnings, deprecation_warnings = validate_options(opts) except ValueError as err: parser.error(f'{err}\n') postprocessors = list(get_postprocessors(opts)) print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[2:]) any_getting = any(getattr(opts, k) for k in ( 'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' )) playlist_pps = [pp for pp in postprocessors if pp.get('when') == 'playlist'] write_playlist_infojson = (opts.writeinfojson and not opts.clean_infojson and opts.allow_playlist_files and opts.outtmpl.get('pl_infojson') != '') if not any(( opts.extract_flat, opts.dump_single_json, opts.forceprint.get('playlist'), opts.print_to_file.get('playlist'), write_playlist_infojson, )): if not playlist_pps: opts.extract_flat = 'discard' elif playlist_pps == [{'key': 'FFmpegConcat', 'only_multi_video': True, 'when': 'playlist'}]: opts.extract_flat = 'discard_in_playlist' final_ext = ( opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS else opts.remuxvideo if opts.remuxvideo in FFmpegVideoRemuxerPP.SUPPORTED_EXTS else opts.audioformat if (opts.extractaudio and opts.audioformat in FFmpegExtractAudioPP.SUPPORTED_EXTS) else None) return ParsedOptions(parser, opts, urls, { 'usenetrc': opts.usenetrc, 'netrc_location': opts.netrc_location, 'username': opts.username, 'password': opts.password, 'twofactor': opts.twofactor, 'videopassword': opts.videopassword, 'ap_mso': opts.ap_mso, 'ap_username': opts.ap_username, 'ap_password': opts.ap_password, 'client_certificate': opts.client_certificate, 'client_certificate_key': opts.client_certificate_key, 'client_certificate_password': opts.client_certificate_password, 'quiet': opts.quiet or any_getting or opts.print_json or bool(opts.forceprint), 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, 'forcetitle': opts.gettitle, 'forceid': opts.getid, 'forcethumbnail': opts.getthumbnail, 'forcedescription': opts.getdescription, 'forceduration': opts.getduration, 'forcefilename': opts.getfilename, 'forceformat': opts.getformat, 'forceprint': opts.forceprint, 'print_to_file': opts.print_to_file, 'forcejson': opts.dumpjson or opts.print_json, 'dump_single_json': opts.dump_single_json, 'force_write_download_archive': opts.force_write_download_archive, 'simulate': (print_only or any_getting or None) if opts.simulate is None else opts.simulate, 'skip_download': opts.skip_download, 'format': opts.format, 'allow_unplayable_formats': opts.allow_unplayable_formats, 'ignore_no_formats_error': opts.ignore_no_formats_error, 'format_sort': opts.format_sort, 'format_sort_force': opts.format_sort_force, 'allow_multiple_video_streams': opts.allow_multiple_video_streams, 'allow_multiple_audio_streams': opts.allow_multiple_audio_streams, 'check_formats': opts.check_formats, 'listformats': opts.listformats, 'listformats_table': opts.listformats_table, 'outtmpl': opts.outtmpl, 'outtmpl_na_placeholder': opts.outtmpl_na_placeholder, 'paths': opts.paths, 'autonumber_size': opts.autonumber_size, 'autonumber_start': opts.autonumber_start, 'restrictfilenames': opts.restrictfilenames, 'windowsfilenames': opts.windowsfilenames, 'ignoreerrors': opts.ignoreerrors, 'force_generic_extractor': opts.force_generic_extractor, 'ratelimit': opts.ratelimit, 'throttledratelimit': opts.throttledratelimit, 'overwrites': opts.overwrites, 'retries': opts.retries, 'file_access_retries': opts.file_access_retries, 'fragment_retries': opts.fragment_retries, 'extractor_retries': opts.extractor_retries, 'retry_sleep_functions': opts.retry_sleep, 'skip_unavailable_fragments': opts.skip_unavailable_fragments, 'keep_fragments': opts.keep_fragments, 'concurrent_fragment_downloads': opts.concurrent_fragment_downloads, 'buffersize': opts.buffersize, 'noresizebuffer': opts.noresizebuffer, 'http_chunk_size': opts.http_chunk_size, 'continuedl': opts.continue_dl, 'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress, 'progress_with_newline': opts.progress_with_newline, 'progress_template': opts.progress_template, 'playliststart': opts.playliststart, 'playlistend': opts.playlistend, 'playlistreverse': opts.playlist_reverse, 'playlistrandom': opts.playlist_random, 'lazy_playlist': opts.lazy_playlist, 'noplaylist': opts.noplaylist, 'logtostderr': opts.outtmpl.get('default') == '-', 'consoletitle': opts.consoletitle, 'nopart': opts.nopart, 'updatetime': opts.updatetime, 'writedescription': opts.writedescription, 'writeannotations': opts.writeannotations, 'writeinfojson': opts.writeinfojson, 'allow_playlist_files': opts.allow_playlist_files, 'clean_infojson': opts.clean_infojson, 'getcomments': opts.getcomments, 'writethumbnail': opts.writethumbnail is True, 'write_all_thumbnails': opts.writethumbnail == 'all', 'writelink': opts.writelink, 'writeurllink': opts.writeurllink, 'writewebloclink': opts.writewebloclink, 'writedesktoplink': opts.writedesktoplink, 'writesubtitles': opts.writesubtitles, 'writeautomaticsub': opts.writeautomaticsub, 'allsubtitles': opts.allsubtitles, 'listsubtitles': opts.listsubtitles, 'subtitlesformat': opts.subtitlesformat, 'subtitleslangs': opts.subtitleslangs, 'matchtitle': decodeOption(opts.matchtitle), 'rejecttitle': decodeOption(opts.rejecttitle), 'max_downloads': opts.max_downloads, 'prefer_free_formats': opts.prefer_free_formats, 'trim_file_name': opts.trim_file_name, 'verbose': opts.verbose, 'dump_intermediate_pages': opts.dump_intermediate_pages, 'write_pages': opts.write_pages, 'load_pages': opts.load_pages, 'test': opts.test, 'keepvideo': opts.keepvideo, 'min_filesize': opts.min_filesize, 'max_filesize': opts.max_filesize, 'min_views': opts.min_views, 'max_views': opts.max_views, 'daterange': opts.date, 'cachedir': opts.cachedir, 'youtube_print_sig_code': opts.youtube_print_sig_code, 'age_limit': opts.age_limit, 'download_archive': opts.download_archive, 'break_on_existing': opts.break_on_existing, 'break_on_reject': opts.break_on_reject, 'break_per_url': opts.break_per_url, 'skip_playlist_after_errors': opts.skip_playlist_after_errors, 'cookiefile': opts.cookiefile, 'cookiesfrombrowser': opts.cookiesfrombrowser, 'legacyserverconnect': opts.legacy_server_connect, 'nocheckcertificate': opts.no_check_certificate, 'prefer_insecure': opts.prefer_insecure, 'http_headers': opts.headers, 'proxy': opts.proxy, 'socket_timeout': opts.socket_timeout, 'bidi_workaround': opts.bidi_workaround, 'debug_printtraffic': opts.debug_printtraffic, 'prefer_ffmpeg': opts.prefer_ffmpeg, 'include_ads': opts.include_ads, 'default_search': opts.default_search, 'dynamic_mpd': opts.dynamic_mpd, 'extractor_args': opts.extractor_args, 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest, 'youtube_include_hls_manifest': opts.youtube_include_hls_manifest, 'encoding': opts.encoding, 'extract_flat': opts.extract_flat, 'live_from_start': opts.live_from_start, 'wait_for_video': opts.wait_for_video, 'mark_watched': opts.mark_watched, 'merge_output_format': opts.merge_output_format, 'final_ext': final_ext, 'postprocessors': postprocessors, 'fixup': opts.fixup, 'source_address': opts.source_address, 'call_home': opts.call_home, 'sleep_interval_requests': opts.sleep_interval_requests, 'sleep_interval': opts.sleep_interval, 'max_sleep_interval': opts.max_sleep_interval, 'sleep_interval_subtitles': opts.sleep_interval_subtitles, 'external_downloader': opts.external_downloader, 'download_ranges': opts.download_ranges, 'force_keyframes_at_cuts': opts.force_keyframes_at_cuts, 'list_thumbnails': opts.list_thumbnails, 'playlist_items': opts.playlist_items, 'xattr_set_filesize': opts.xattr_set_filesize, 'match_filter': opts.match_filter, 'no_color': opts.no_color, 'ffmpeg_location': opts.ffmpeg_location, 'hls_prefer_native': opts.hls_prefer_native, 'hls_use_mpegts': opts.hls_use_mpegts, 'hls_split_discontinuity': opts.hls_split_discontinuity, 'external_downloader_args': opts.external_downloader_args, 'postprocessor_args': opts.postprocessor_args, 'cn_verification_proxy': opts.cn_verification_proxy, 'geo_verification_proxy': opts.geo_verification_proxy, 'geo_bypass': opts.geo_bypass, 'geo_bypass_country': opts.geo_bypass_country, 'geo_bypass_ip_block': opts.geo_bypass_ip_block, '_warnings': warnings, '_deprecation_warnings': deprecation_warnings, 'compat_opts': opts.compat_opts, }) def _real_main(argv=None): setproctitle('yt-dlp') parser, opts, all_urls, ydl_opts = parse_options(argv) # Dump user agent if opts.dump_user_agent: ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent']) write_string(f'{ua}\n', out=sys.stdout) return if print_extractor_information(opts, all_urls): return # We may need ffmpeg_location without having access to the YoutubeDL instance # See https://github.com/yt-dlp/yt-dlp/issues/2191 if opts.ffmpeg_location: FFmpegPostProcessor._ffmpeg_location.set(opts.ffmpeg_location) with YoutubeDL(ydl_opts) as ydl: pre_process = opts.update_self or opts.rm_cachedir actual_use = all_urls or opts.load_info_filename if opts.rm_cachedir: ydl.cache.remove() updater = Updater(ydl) if opts.update_self and updater.update() and actual_use: if updater.cmd: return updater.restart() # This code is reachable only for zip variant in py < 3.10 # It makes sense to exit here, but the old behavior is to continue ydl.report_warning('Restart yt-dlp to use the updated version') # return 100, 'ERROR: The program must exit for the update to complete' if not actual_use: if pre_process: return ydl._download_retcode ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv) parser.error( 'You must provide at least one URL.\n' 'Type yt-dlp --help to see a list of all options.') parser.destroy() try: if opts.load_info_filename is not None: return ydl.download_with_info_file(expand_path(opts.load_info_filename)) else: return ydl.download(all_urls) except DownloadCancelled: ydl.to_screen('Aborting remaining downloads') return 101 def main(argv=None): try: _exit(*variadic(_real_main(argv))) except DownloadError: _exit(1) except SameFileError as e: _exit(f'ERROR: {e}') except KeyboardInterrupt: _exit('\nERROR: Interrupted by user') except BrokenPipeError as e: # https://docs.python.org/3/library/signal.html#note-on-sigpipe devnull = os.open(os.devnull, os.O_WRONLY) os.dup2(devnull, sys.stdout.fileno()) _exit(f'\nERROR: {e}') except optparse.OptParseError as e: _exit(2, f'\n{e}') from .extractor import gen_extractors, list_extractors __all__ = [ 'main', 'YoutubeDL', 'parse_options', 'gen_extractors', 'list_extractors', ] yt-dlp-2022.08.19/yt_dlp/__main__.py000066400000000000000000000005471427755243700167430ustar00rootroot00000000000000#!/usr/bin/env python3 # Execute with # $ python -m yt_dlp import sys if __package__ is None and not hasattr(sys, 'frozen'): # direct call of __main__.py import os.path path = os.path.realpath(os.path.abspath(__file__)) sys.path.insert(0, os.path.dirname(os.path.dirname(path))) import yt_dlp if __name__ == '__main__': yt_dlp.main() yt-dlp-2022.08.19/yt_dlp/aes.py000066400000000000000000000531501427755243700157710ustar00rootroot00000000000000import base64 from math import ceil from .compat import compat_ord from .dependencies import Cryptodome_AES from .utils import bytes_to_intlist, intlist_to_bytes if Cryptodome_AES: def aes_cbc_decrypt_bytes(data, key, iv): """ Decrypt bytes with AES-CBC using pycryptodome """ return Cryptodome_AES.new(key, Cryptodome_AES.MODE_CBC, iv).decrypt(data) def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): """ Decrypt bytes with AES-GCM using pycryptodome """ return Cryptodome_AES.new(key, Cryptodome_AES.MODE_GCM, nonce).decrypt_and_verify(data, tag) else: def aes_cbc_decrypt_bytes(data, key, iv): """ Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """ return intlist_to_bytes(aes_cbc_decrypt(*map(bytes_to_intlist, (data, key, iv)))) def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): """ Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """ return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce)))) def aes_cbc_encrypt_bytes(data, key, iv, **kwargs): return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs)) def unpad_pkcs7(data): return data[:-compat_ord(data[-1])] BLOCK_SIZE_BYTES = 16 def pad_block(block, padding_mode): """ Pad a block with the given padding mode @param {int[]} block block to pad @param padding_mode padding mode """ padding_size = BLOCK_SIZE_BYTES - len(block) PADDING_BYTE = { 'pkcs7': padding_size, 'iso7816': 0x0, 'whitespace': 0x20, 'zero': 0x0, } if padding_size < 0: raise ValueError('Block size exceeded') elif padding_mode not in PADDING_BYTE: raise NotImplementedError(f'Padding mode {padding_mode} is not implemented') if padding_mode == 'iso7816' and padding_size: block = block + [0x80] # NB: += mutates list padding_size -= 1 return block + [PADDING_BYTE[padding_mode]] * padding_size def aes_ecb_encrypt(data, key, iv=None): """ Encrypt with aes in ECB mode @param {int[]} data cleartext @param {int[]} key 16/24/32-Byte cipher key @param {int[]} iv Unused for this mode @returns {int[]} encrypted data """ expanded_key = key_expansion(key) block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) encrypted_data = [] for i in range(block_count): block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] encrypted_data += aes_encrypt(block, expanded_key) encrypted_data = encrypted_data[:len(data)] return encrypted_data def aes_ecb_decrypt(data, key, iv=None): """ Decrypt with aes in ECB mode @param {int[]} data cleartext @param {int[]} key 16/24/32-Byte cipher key @param {int[]} iv Unused for this mode @returns {int[]} decrypted data """ expanded_key = key_expansion(key) block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) encrypted_data = [] for i in range(block_count): block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] encrypted_data += aes_decrypt(block, expanded_key) encrypted_data = encrypted_data[:len(data)] return encrypted_data def aes_ctr_decrypt(data, key, iv): """ Decrypt with aes in counter mode @param {int[]} data cipher @param {int[]} key 16/24/32-Byte cipher key @param {int[]} iv 16-Byte initialization vector @returns {int[]} decrypted data """ return aes_ctr_encrypt(data, key, iv) def aes_ctr_encrypt(data, key, iv): """ Encrypt with aes in counter mode @param {int[]} data cleartext @param {int[]} key 16/24/32-Byte cipher key @param {int[]} iv 16-Byte initialization vector @returns {int[]} encrypted data """ expanded_key = key_expansion(key) block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) counter = iter_vector(iv) encrypted_data = [] for i in range(block_count): counter_block = next(counter) block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] block += [0] * (BLOCK_SIZE_BYTES - len(block)) cipher_counter_block = aes_encrypt(counter_block, expanded_key) encrypted_data += xor(block, cipher_counter_block) encrypted_data = encrypted_data[:len(data)] return encrypted_data def aes_cbc_decrypt(data, key, iv): """ Decrypt with aes in CBC mode @param {int[]} data cipher @param {int[]} key 16/24/32-Byte cipher key @param {int[]} iv 16-Byte IV @returns {int[]} decrypted data """ expanded_key = key_expansion(key) block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) decrypted_data = [] previous_cipher_block = iv for i in range(block_count): block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] block += [0] * (BLOCK_SIZE_BYTES - len(block)) decrypted_block = aes_decrypt(block, expanded_key) decrypted_data += xor(decrypted_block, previous_cipher_block) previous_cipher_block = block decrypted_data = decrypted_data[:len(data)] return decrypted_data def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'): """ Encrypt with aes in CBC mode @param {int[]} data cleartext @param {int[]} key 16/24/32-Byte cipher key @param {int[]} iv 16-Byte IV @param padding_mode Padding mode to use @returns {int[]} encrypted data """ expanded_key = key_expansion(key) block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) encrypted_data = [] previous_cipher_block = iv for i in range(block_count): block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] block = pad_block(block, padding_mode) mixed_block = xor(block, previous_cipher_block) encrypted_block = aes_encrypt(mixed_block, expanded_key) encrypted_data += encrypted_block previous_cipher_block = encrypted_block return encrypted_data def aes_gcm_decrypt_and_verify(data, key, tag, nonce): """ Decrypt with aes in GBM mode and checks authenticity using tag @param {int[]} data cipher @param {int[]} key 16-Byte cipher key @param {int[]} tag authentication tag @param {int[]} nonce IV (recommended 12-Byte) @returns {int[]} decrypted data """ # XXX: check aes, gcm param hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key)) if len(nonce) == 12: j0 = nonce + [0, 0, 0, 1] else: fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8 ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big')) j0 = ghash(hash_subkey, ghash_in) # TODO: add nonce support to aes_ctr_decrypt # nonce_ctr = j0[:12] iv_ctr = inc(j0) decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr))) pad_len = len(data) // 16 * 16 s_tag = ghash( hash_subkey, data + [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad + bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data + ((len(data) * 8).to_bytes(8, 'big'))) # length of data ) if tag != aes_ctr_encrypt(s_tag, key, j0): raise ValueError("Mismatching authentication tag") return decrypted_data def aes_encrypt(data, expanded_key): """ Encrypt one block with aes @param {int[]} data 16-Byte state @param {int[]} expanded_key 176/208/240-Byte expanded key @returns {int[]} 16-Byte cipher """ rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) for i in range(1, rounds + 1): data = sub_bytes(data) data = shift_rows(data) if i != rounds: data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX)) data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) return data def aes_decrypt(data, expanded_key): """ Decrypt one block with aes @param {int[]} data 16-Byte cipher @param {int[]} expanded_key 176/208/240-Byte expanded key @returns {int[]} 16-Byte state """ rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 for i in range(rounds, 0, -1): data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) if i != rounds: data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV)) data = shift_rows_inv(data) data = sub_bytes_inv(data) data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) return data def aes_decrypt_text(data, password, key_size_bytes): """ Decrypt text - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter - The cipher key is retrieved by encrypting the first 16 Byte of 'password' with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's) - Mode of operation is 'counter' @param {str} data Base64 encoded string @param {str,unicode} password Password (will be encoded with utf-8) @param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit @returns {str} Decrypted data """ NONCE_LENGTH_BYTES = 8 data = bytes_to_intlist(base64.b64decode(data)) password = bytes_to_intlist(password.encode()) key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) nonce = data[:NONCE_LENGTH_BYTES] cipher = data[NONCE_LENGTH_BYTES:] decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)) plaintext = intlist_to_bytes(decrypted_data) return plaintext RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16) SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d) MIX_COLUMN_MATRIX = ((0x2, 0x3, 0x1, 0x1), (0x1, 0x2, 0x3, 0x1), (0x1, 0x1, 0x2, 0x3), (0x3, 0x1, 0x1, 0x2)) MIX_COLUMN_MATRIX_INV = ((0xE, 0xB, 0xD, 0x9), (0x9, 0xE, 0xB, 0xD), (0xD, 0x9, 0xE, 0xB), (0xB, 0xD, 0x9, 0xE)) RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35, 0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA, 0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31, 0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD, 0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88, 0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A, 0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3, 0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0, 0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41, 0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75, 0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80, 0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54, 0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA, 0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E, 0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17, 0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01) RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03, 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1, 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78, 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e, 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38, 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10, 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba, 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57, 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8, 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0, 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7, 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d, 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1, 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab, 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5, 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07) def key_expansion(data): """ Generate key schedule @param {int[]} data 16/24/32-Byte cipher key @returns {int[]} 176/208/240-Byte expanded key """ data = data[:] # copy rcon_iteration = 1 key_size_bytes = len(data) expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES while len(data) < expanded_key_size_bytes: temp = data[-4:] temp = key_schedule_core(temp, rcon_iteration) rcon_iteration += 1 data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) for _ in range(3): temp = data[-4:] data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) if key_size_bytes == 32: temp = data[-4:] temp = sub_bytes(temp) data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0): temp = data[-4:] data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) data = data[:expanded_key_size_bytes] return data def iter_vector(iv): while True: yield iv iv = inc(iv) def sub_bytes(data): return [SBOX[x] for x in data] def sub_bytes_inv(data): return [SBOX_INV[x] for x in data] def rotate(data): return data[1:] + [data[0]] def key_schedule_core(data, rcon_iteration): data = rotate(data) data = sub_bytes(data) data[0] = data[0] ^ RCON[rcon_iteration] return data def xor(data1, data2): return [x ^ y for x, y in zip(data1, data2)] def iter_mix_columns(data, matrix): for i in (0, 4, 8, 12): for row in matrix: mixed = 0 for j in range(4): # xor is (+) and (-) mixed ^= (0 if data[i:i + 4][j] == 0 or row[j] == 0 else RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[data[i + j]] + RIJNDAEL_LOG_TABLE[row[j]]) % 0xFF]) yield mixed def shift_rows(data): return [data[((column + row) & 0b11) * 4 + row] for column in range(4) for row in range(4)] def shift_rows_inv(data): return [data[((column - row) & 0b11) * 4 + row] for column in range(4) for row in range(4)] def shift_block(data): data_shifted = [] bit = 0 for n in data: if bit: n |= 0x100 bit = n & 1 n >>= 1 data_shifted.append(n) return data_shifted def inc(data): data = data[:] # copy for i in range(len(data) - 1, -1, -1): if data[i] == 255: data[i] = 0 else: data[i] = data[i] + 1 break return data def block_product(block_x, block_y): # NIST SP 800-38D, Algorithm 1 if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES: raise ValueError("Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES) block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1) block_v = block_y[:] block_z = [0] * BLOCK_SIZE_BYTES for i in block_x: for bit in range(7, -1, -1): if i & (1 << bit): block_z = xor(block_z, block_v) do_xor = block_v[-1] & 1 block_v = shift_block(block_v) if do_xor: block_v = xor(block_v, block_r) return block_z def ghash(subkey, data): # NIST SP 800-38D, Algorithm 2 if len(data) % BLOCK_SIZE_BYTES: raise ValueError("Length of data should be %d bytes" % BLOCK_SIZE_BYTES) last_y = [0] * BLOCK_SIZE_BYTES for i in range(0, len(data), BLOCK_SIZE_BYTES): block = data[i: i + BLOCK_SIZE_BYTES] last_y = block_product(xor(last_y, block), subkey) return last_y __all__ = [ 'aes_cbc_decrypt', 'aes_cbc_decrypt_bytes', 'aes_ctr_decrypt', 'aes_decrypt_text', 'aes_decrypt', 'aes_ecb_decrypt', 'aes_gcm_decrypt_and_verify', 'aes_gcm_decrypt_and_verify_bytes', 'aes_cbc_encrypt', 'aes_cbc_encrypt_bytes', 'aes_ctr_encrypt', 'aes_ecb_encrypt', 'aes_encrypt', 'key_expansion', 'pad_block', 'unpad_pkcs7', ] yt-dlp-2022.08.19/yt_dlp/cache.py000066400000000000000000000055621427755243700162700ustar00rootroot00000000000000import contextlib import errno import json import os import re import shutil import traceback from .utils import expand_path, write_json_file class Cache: def __init__(self, ydl): self._ydl = ydl def _get_root_dir(self): res = self._ydl.params.get('cachedir') if res is None: cache_root = os.getenv('XDG_CACHE_HOME', '~/.cache') res = os.path.join(cache_root, 'yt-dlp') return expand_path(res) def _get_cache_fn(self, section, key, dtype): assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \ 'invalid section %r' % section assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key return os.path.join( self._get_root_dir(), section, f'{key}.{dtype}') @property def enabled(self): return self._ydl.params.get('cachedir') is not False def store(self, section, key, data, dtype='json'): assert dtype in ('json',) if not self.enabled: return fn = self._get_cache_fn(section, key, dtype) try: try: os.makedirs(os.path.dirname(fn)) except OSError as ose: if ose.errno != errno.EEXIST: raise self._ydl.write_debug(f'Saving {section}.{key} to cache') write_json_file(data, fn) except Exception: tb = traceback.format_exc() self._ydl.report_warning(f'Writing cache to {fn!r} failed: {tb}') def load(self, section, key, dtype='json', default=None): assert dtype in ('json',) if not self.enabled: return default cache_fn = self._get_cache_fn(section, key, dtype) with contextlib.suppress(OSError): try: with open(cache_fn, encoding='utf-8') as cachef: self._ydl.write_debug(f'Loading {section}.{key} from cache') return json.load(cachef) except ValueError: try: file_size = os.path.getsize(cache_fn) except OSError as oe: file_size = str(oe) self._ydl.report_warning(f'Cache retrieval from {cache_fn} failed ({file_size})') return default def remove(self): if not self.enabled: self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)') return cachedir = self._get_root_dir() if not any((term in cachedir) for term in ('cache', 'tmp')): raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir) self._ydl.to_screen( 'Removing cache dir %s .' % cachedir, skip_eol=True) if os.path.exists(cachedir): self._ydl.to_screen('.', skip_eol=True) shutil.rmtree(cachedir) self._ydl.to_screen('.') yt-dlp-2022.08.19/yt_dlp/compat/000077500000000000000000000000001427755243700161265ustar00rootroot00000000000000yt-dlp-2022.08.19/yt_dlp/compat/__init__.py000066400000000000000000000047601427755243700202460ustar00rootroot00000000000000import os import sys import warnings import xml.etree.ElementTree as etree from ._deprecated import * # noqa: F401, F403 from .compat_utils import passthrough_module # XXX: Implement this the same way as other DeprecationWarnings without circular import passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn( DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=3)) # HTMLParseError has been deprecated in Python 3.3 and removed in # Python 3.5. Introducing dummy exception for Python >3.5 for compatible # and uniform cross-version exception handling class compat_HTMLParseError(Exception): pass class _TreeBuilder(etree.TreeBuilder): def doctype(self, name, pubid, system): pass def compat_etree_fromstring(text): return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) compat_os_name = os._name if os.name == 'java' else os.name if compat_os_name == 'nt': def compat_shlex_quote(s): import re return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') else: from shlex import quote as compat_shlex_quote # noqa: F401 def compat_ord(c): return c if isinstance(c, int) else ord(c) if compat_os_name == 'nt' and sys.version_info < (3, 8): # os.path.realpath on Windows does not follow symbolic links # prior to Python 3.8 (see https://bugs.python.org/issue9949) def compat_realpath(path): while os.path.islink(path): path = os.path.abspath(os.readlink(path)) return os.path.realpath(path) else: compat_realpath = os.path.realpath # Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl # See https://github.com/yt-dlp/yt-dlp/issues/792 # https://docs.python.org/3/library/os.path.html#os.path.expanduser if compat_os_name in ('nt', 'ce'): def compat_expanduser(path): HOME = os.environ.get('HOME') if not HOME: return os.path.expanduser(path) elif not path.startswith('~'): return path i = path.replace('\\', '/', 1).find('/') # ~user if i < 0: i = len(path) userhome = os.path.join(os.path.dirname(HOME), path[1:i]) if i > 1 else HOME return userhome + path[i:] else: compat_expanduser = os.path.expanduser # NB: Add modules that are imported dynamically here so that PyInstaller can find them # See https://github.com/pyinstaller/pyinstaller-hooks-contrib/issues/438 if False: from . import _legacy # noqa: F401 yt-dlp-2022.08.19/yt_dlp/compat/_deprecated.py000066400000000000000000000006561427755243700207460ustar00rootroot00000000000000"""Deprecated - New code should avoid these""" import base64 import urllib.error import urllib.parse compat_str = str compat_b64decode = base64.b64decode compat_HTTPError = urllib.error.HTTPError compat_urlparse = urllib.parse compat_parse_qs = urllib.parse.parse_qs compat_urllib_parse_unquote = urllib.parse.unquote compat_urllib_parse_urlencode = urllib.parse.urlencode compat_urllib_parse_urlparse = urllib.parse.urlparse yt-dlp-2022.08.19/yt_dlp/compat/_legacy.py000066400000000000000000000061271427755243700201110ustar00rootroot00000000000000""" Do not use! """ import collections import ctypes import getpass import html.entities import html.parser import http.client import http.cookiejar import http.cookies import http.server import itertools import os import shlex import shutil import socket import struct import tokenize import urllib.error import urllib.parse import urllib.request import xml.etree.ElementTree as etree from subprocess import DEVNULL # isort: split import asyncio # noqa: F401 import re # noqa: F401 from asyncio import run as compat_asyncio_run # noqa: F401 from re import Pattern as compat_Pattern # noqa: F401 from re import match as compat_Match # noqa: F401 from .compat_utils import passthrough_module from ..dependencies import Cryptodome_AES as compat_pycrypto_AES # noqa: F401 from ..dependencies import brotli as compat_brotli # noqa: F401 from ..dependencies import websockets as compat_websockets # noqa: F401 passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) # compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE # will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines def compat_ctypes_WINFUNCTYPE(*args, **kwargs): return ctypes.WINFUNCTYPE(*args, **kwargs) def compat_setenv(key, value, env=os.environ): env[key] = value compat_basestring = str compat_chr = chr compat_collections_abc = collections.abc compat_cookiejar = http.cookiejar compat_cookiejar_Cookie = http.cookiejar.Cookie compat_cookies = http.cookies compat_cookies_SimpleCookie = http.cookies.SimpleCookie compat_etree_Element = etree.Element compat_etree_register_namespace = etree.register_namespace compat_filter = filter compat_get_terminal_size = shutil.get_terminal_size compat_getenv = os.getenv compat_getpass = getpass.getpass compat_html_entities = html.entities compat_html_entities_html5 = html.entities.html5 compat_HTMLParser = html.parser.HTMLParser compat_http_client = http.client compat_http_server = http.server compat_input = input compat_integer_types = (int, ) compat_itertools_count = itertools.count compat_kwargs = lambda kwargs: kwargs compat_map = map compat_numeric_types = (int, float, complex) compat_print = print compat_shlex_split = shlex.split compat_socket_create_connection = socket.create_connection compat_Struct = struct.Struct compat_struct_pack = struct.pack compat_struct_unpack = struct.unpack compat_subprocess_get_DEVNULL = lambda: DEVNULL compat_tokenize_tokenize = tokenize.tokenize compat_urllib_error = urllib.error compat_urllib_parse = urllib.parse compat_urllib_parse_quote = urllib.parse.quote compat_urllib_parse_quote_plus = urllib.parse.quote_plus compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes compat_urllib_parse_urlunparse = urllib.parse.urlunparse compat_urllib_request = urllib.request compat_urllib_request_DataHandler = urllib.request.DataHandler compat_urllib_response = urllib.response compat_urlretrieve = urllib.request.urlretrieve compat_xml_parse_error = etree.ParseError compat_xpath = lambda xpath: xpath compat_zip = zip workaround_optparse_bug9161 = lambda: None yt-dlp-2022.08.19/yt_dlp/compat/compat_utils.py000066400000000000000000000043561427755243700212130ustar00rootroot00000000000000import collections import contextlib import importlib import sys import types _NO_ATTRIBUTE = object() _Package = collections.namedtuple('Package', ('name', 'version')) def get_package_info(module): parent = module.__name__.split('.')[0] parent_module = None with contextlib.suppress(ImportError): parent_module = importlib.import_module(parent) for attr in ('__version__', 'version_string', 'version'): version = getattr(parent_module, attr, None) if version is not None: break return _Package(getattr(module, '_yt_dlp__identifier', parent), str(version)) def _is_package(module): try: module.__getattribute__('__path__') except AttributeError: return False return True def passthrough_module(parent, child, allowed_attributes=None, *, callback=lambda _: None): parent_module = importlib.import_module(parent) child_module = None # Import child module only as needed class PassthroughModule(types.ModuleType): def __getattr__(self, attr): if _is_package(parent_module): with contextlib.suppress(ImportError): return importlib.import_module(f'.{attr}', parent) ret = self.__from_child(attr) if ret is _NO_ATTRIBUTE: raise AttributeError(f'module {parent} has no attribute {attr}') callback(attr) return ret def __from_child(self, attr): if allowed_attributes is None: if attr.startswith('__') and attr.endswith('__'): return _NO_ATTRIBUTE elif attr not in allowed_attributes: return _NO_ATTRIBUTE nonlocal child_module child_module = child_module or importlib.import_module(child, parent) with contextlib.suppress(AttributeError): return getattr(child_module, attr) if _is_package(child_module): with contextlib.suppress(ImportError): return importlib.import_module(f'.{attr}', child) return _NO_ATTRIBUTE # Python 3.6 does not have module level __getattr__ # https://peps.python.org/pep-0562/ sys.modules[parent].__class__ = PassthroughModule yt-dlp-2022.08.19/yt_dlp/compat/functools.py000066400000000000000000000012301427755243700205100ustar00rootroot00000000000000# flake8: noqa: F405 from functools import * # noqa: F403 from .compat_utils import passthrough_module passthrough_module(__name__, 'functools') del passthrough_module try: cache # >= 3.9 except NameError: cache = lru_cache(maxsize=None) try: cached_property # >= 3.8 except NameError: class cached_property: def __init__(self, func): update_wrapper(self, func) self.func = func def __get__(self, instance, _): if instance is None: return self setattr(instance, self.func.__name__, self.func(instance)) return getattr(instance, self.func.__name__) yt-dlp-2022.08.19/yt_dlp/compat/imghdr.py000066400000000000000000000010741427755243700177540ustar00rootroot00000000000000tests = { 'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP', 'png': lambda h: h[:8] == b'\211PNG\r\n\032\n', 'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'), 'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'), } def what(file=None, h=None): """Detect format of image (Currently supports jpeg, png, webp, gif only) Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py """ if h is None: with open(file, 'rb') as f: h = f.read(12) return next((type_ for type_, test in tests.items() if test(h)), None) yt-dlp-2022.08.19/yt_dlp/cookies.py000066400000000000000000001122431427755243700166540ustar00rootroot00000000000000import base64 import contextlib import http.cookiejar import json import os import shutil import struct import subprocess import sys import tempfile import time from datetime import datetime, timedelta, timezone from enum import Enum, auto from hashlib import pbkdf2_hmac from .aes import ( aes_cbc_decrypt_bytes, aes_gcm_decrypt_and_verify_bytes, unpad_pkcs7, ) from .dependencies import ( _SECRETSTORAGE_UNAVAILABLE_REASON, secretstorage, sqlite3, ) from .minicurses import MultilinePrinter, QuietMultilinePrinter from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'} SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'} class YDLLogger: def __init__(self, ydl=None): self._ydl = ydl def debug(self, message): if self._ydl: self._ydl.write_debug(message) def info(self, message): if self._ydl: self._ydl.to_screen(f'[Cookies] {message}') def warning(self, message, only_once=False): if self._ydl: self._ydl.report_warning(message, only_once) def error(self, message): if self._ydl: self._ydl.report_error(message) class ProgressBar(MultilinePrinter): _DELAY, _timer = 0.1, 0 def print(self, message): if time.time() - self._timer > self._DELAY: self.print_at_line(f'[Cookies] {message}', 0) self._timer = time.time() def progress_bar(self): """Return a context manager with a print method. (Optional)""" # Do not print to files/pipes, loggers, or when --no-progress is used if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'): return file = self._ydl._out_files.error try: if not file.isatty(): return except BaseException: return return self.ProgressBar(file, preserve_output=False) def _create_progress_bar(logger): if hasattr(logger, 'progress_bar'): printer = logger.progress_bar() if printer: return printer printer = QuietMultilinePrinter() printer.print = lambda _: None return printer def load_cookies(cookie_file, browser_specification, ydl): cookie_jars = [] if browser_specification is not None: browser_name, profile, keyring = _parse_browser_specification(*browser_specification) cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring)) if cookie_file is not None: is_filename = YoutubeDLCookieJar.is_path(cookie_file) if is_filename: cookie_file = expand_path(cookie_file) jar = YoutubeDLCookieJar(cookie_file) if not is_filename or os.access(cookie_file, os.R_OK): jar.load(ignore_discard=True, ignore_expires=True) cookie_jars.append(jar) return _merge_cookie_jars(cookie_jars) def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None): if browser_name == 'firefox': return _extract_firefox_cookies(profile, logger) elif browser_name == 'safari': return _extract_safari_cookies(profile, logger) elif browser_name in CHROMIUM_BASED_BROWSERS: return _extract_chrome_cookies(browser_name, profile, keyring, logger) else: raise ValueError(f'unknown browser: {browser_name}') def _extract_firefox_cookies(profile, logger): logger.info('Extracting cookies from firefox') if not sqlite3: logger.warning('Cannot extract cookies from firefox without sqlite3 support. ' 'Please use a python interpreter compiled with sqlite3 support') return YoutubeDLCookieJar() if profile is None: search_root = _firefox_browser_dir() elif _is_path(profile): search_root = profile else: search_root = os.path.join(_firefox_browser_dir(), profile) cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger) if cookie_database_path is None: raise FileNotFoundError(f'could not find firefox cookies database in {search_root}') logger.debug(f'Extracting cookies from: "{cookie_database_path}"') with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: cursor = None try: cursor = _open_database_copy(cookie_database_path, tmpdir) cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies') jar = YoutubeDLCookieJar() with _create_progress_bar(logger) as progress_bar: table = cursor.fetchall() total_cookie_count = len(table) for i, (host, name, value, path, expiry, is_secure) in enumerate(table): progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}') cookie = http.cookiejar.Cookie( version=0, name=name, value=value, port=None, port_specified=False, domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'), path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False, comment=None, comment_url=None, rest={}) jar.set_cookie(cookie) logger.info(f'Extracted {len(jar)} cookies from firefox') return jar finally: if cursor is not None: cursor.connection.close() def _firefox_browser_dir(): if sys.platform in ('cygwin', 'win32'): return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles') elif sys.platform == 'darwin': return os.path.expanduser('~/Library/Application Support/Firefox') return os.path.expanduser('~/.mozilla/firefox') def _get_chromium_based_browser_settings(browser_name): # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md if sys.platform in ('cygwin', 'win32'): appdata_local = os.path.expandvars('%LOCALAPPDATA%') appdata_roaming = os.path.expandvars('%APPDATA%') browser_dir = { 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'), 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'), 'chromium': os.path.join(appdata_local, R'Chromium\User Data'), 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'), 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'), 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'), }[browser_name] elif sys.platform == 'darwin': appdata = os.path.expanduser('~/Library/Application Support') browser_dir = { 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'), 'chrome': os.path.join(appdata, 'Google/Chrome'), 'chromium': os.path.join(appdata, 'Chromium'), 'edge': os.path.join(appdata, 'Microsoft Edge'), 'opera': os.path.join(appdata, 'com.operasoftware.Opera'), 'vivaldi': os.path.join(appdata, 'Vivaldi'), }[browser_name] else: config = _config_home() browser_dir = { 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'), 'chrome': os.path.join(config, 'google-chrome'), 'chromium': os.path.join(config, 'chromium'), 'edge': os.path.join(config, 'microsoft-edge'), 'opera': os.path.join(config, 'opera'), 'vivaldi': os.path.join(config, 'vivaldi'), }[browser_name] # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE: # dbus-monitor "interface='org.kde.KWallet'" "type=method_return" keyring_name = { 'brave': 'Brave', 'chrome': 'Chrome', 'chromium': 'Chromium', 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium', 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium', 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome', }[browser_name] browsers_without_profiles = {'opera'} return { 'browser_dir': browser_dir, 'keyring_name': keyring_name, 'supports_profiles': browser_name not in browsers_without_profiles } def _extract_chrome_cookies(browser_name, profile, keyring, logger): logger.info(f'Extracting cookies from {browser_name}') if not sqlite3: logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. ' 'Please use a python interpreter compiled with sqlite3 support') return YoutubeDLCookieJar() config = _get_chromium_based_browser_settings(browser_name) if profile is None: search_root = config['browser_dir'] elif _is_path(profile): search_root = profile config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile else: if config['supports_profiles']: search_root = os.path.join(config['browser_dir'], profile) else: logger.error(f'{browser_name} does not support profiles') search_root = config['browser_dir'] cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger) if cookie_database_path is None: raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"') logger.debug(f'Extracting cookies from: "{cookie_database_path}"') decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring) with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: cursor = None try: cursor = _open_database_copy(cookie_database_path, tmpdir) cursor.connection.text_factory = bytes column_names = _get_column_names(cursor, 'cookies') secure_column = 'is_secure' if 'is_secure' in column_names else 'secure' cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies') jar = YoutubeDLCookieJar() failed_cookies = 0 unencrypted_cookies = 0 with _create_progress_bar(logger) as progress_bar: table = cursor.fetchall() total_cookie_count = len(table) for i, line in enumerate(table): progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}') is_encrypted, cookie = _process_chrome_cookie(decryptor, *line) if not cookie: failed_cookies += 1 continue elif not is_encrypted: unencrypted_cookies += 1 jar.set_cookie(cookie) if failed_cookies > 0: failed_message = f' ({failed_cookies} could not be decrypted)' else: failed_message = '' logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}') counts = decryptor._cookie_counts.copy() counts['unencrypted'] = unencrypted_cookies logger.debug(f'cookie version breakdown: {counts}') return jar finally: if cursor is not None: cursor.connection.close() def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure): host_key = host_key.decode() name = name.decode() value = value.decode() path = path.decode() is_encrypted = not value and encrypted_value if is_encrypted: value = decryptor.decrypt(encrypted_value) if value is None: return is_encrypted, None return is_encrypted, http.cookiejar.Cookie( version=0, name=name, value=value, port=None, port_specified=False, domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'), path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False, comment=None, comment_url=None, rest={}) class ChromeCookieDecryptor: """ Overview: Linux: - cookies are either v10 or v11 - v10: AES-CBC encrypted with a fixed key - v11: AES-CBC encrypted with an OS protected key (keyring) - v11 keys can be stored in various places depending on the activate desktop environment [2] Mac: - cookies are either v10 or not v10 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux - not v10: 'old data' stored as plaintext Windows: - cookies are either v10 or not v10 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI - not v10: encrypted with DPAPI Sources: - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/ - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc - KeyStorageLinux::CreateService """ _cookie_counts = {} def decrypt(self, encrypted_value): raise NotImplementedError('Must be implemented by sub classes') def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None): if sys.platform == 'darwin': return MacChromeCookieDecryptor(browser_keyring_name, logger) elif sys.platform in ('win32', 'cygwin'): return WindowsChromeCookieDecryptor(browser_root, logger) return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring) class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): def __init__(self, browser_keyring_name, logger, *, keyring=None): self._logger = logger self._v10_key = self.derive_key(b'peanuts') password = _get_linux_keyring_password(browser_keyring_name, keyring, logger) self._v11_key = None if password is None else self.derive_key(password) self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0} @staticmethod def derive_key(password): # values from # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16) def decrypt(self, encrypted_value): version = encrypted_value[:3] ciphertext = encrypted_value[3:] if version == b'v10': self._cookie_counts['v10'] += 1 return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger) elif version == b'v11': self._cookie_counts['v11'] += 1 if self._v11_key is None: self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True) return None return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger) else: self._cookie_counts['other'] += 1 return None class MacChromeCookieDecryptor(ChromeCookieDecryptor): def __init__(self, browser_keyring_name, logger): self._logger = logger password = _get_mac_keyring_password(browser_keyring_name, logger) self._v10_key = None if password is None else self.derive_key(password) self._cookie_counts = {'v10': 0, 'other': 0} @staticmethod def derive_key(password): # values from # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16) def decrypt(self, encrypted_value): version = encrypted_value[:3] ciphertext = encrypted_value[3:] if version == b'v10': self._cookie_counts['v10'] += 1 if self._v10_key is None: self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) return None return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger) else: self._cookie_counts['other'] += 1 # other prefixes are considered 'old data' which were stored as plaintext # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm return encrypted_value class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): def __init__(self, browser_root, logger): self._logger = logger self._v10_key = _get_windows_v10_key(browser_root, logger) self._cookie_counts = {'v10': 0, 'other': 0} def decrypt(self, encrypted_value): version = encrypted_value[:3] ciphertext = encrypted_value[3:] if version == b'v10': self._cookie_counts['v10'] += 1 if self._v10_key is None: self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) return None # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc # kNonceLength nonce_length = 96 // 8 # boringssl # EVP_AEAD_AES_GCM_TAG_LEN authentication_tag_length = 16 raw_ciphertext = ciphertext nonce = raw_ciphertext[:nonce_length] ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length] authentication_tag = raw_ciphertext[-authentication_tag_length:] return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger) else: self._cookie_counts['other'] += 1 # any other prefix means the data is DPAPI encrypted # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc return _decrypt_windows_dpapi(encrypted_value, self._logger).decode() def _extract_safari_cookies(profile, logger): if profile is not None: logger.error('safari does not support profiles') if sys.platform != 'darwin': raise ValueError(f'unsupported platform: {sys.platform}') cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies') if not os.path.isfile(cookies_path): logger.debug('Trying secondary cookie location') cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies') if not os.path.isfile(cookies_path): raise FileNotFoundError('could not find safari cookies database') with open(cookies_path, 'rb') as f: cookies_data = f.read() jar = parse_safari_cookies(cookies_data, logger=logger) logger.info(f'Extracted {len(jar)} cookies from safari') return jar class ParserError(Exception): pass class DataParser: def __init__(self, data, logger): self._data = data self.cursor = 0 self._logger = logger def read_bytes(self, num_bytes): if num_bytes < 0: raise ParserError(f'invalid read of {num_bytes} bytes') end = self.cursor + num_bytes if end > len(self._data): raise ParserError('reached end of input') data = self._data[self.cursor:end] self.cursor = end return data def expect_bytes(self, expected_value, message): value = self.read_bytes(len(expected_value)) if value != expected_value: raise ParserError(f'unexpected value: {value} != {expected_value} ({message})') def read_uint(self, big_endian=False): data_format = '>I' if big_endian else ' 0: self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}') elif num_bytes < 0: raise ParserError(f'invalid skip of {num_bytes} bytes') def skip_to(self, offset, description='unknown'): self.skip(offset - self.cursor, description) def skip_to_end(self, description='unknown'): self.skip_to(len(self._data), description) def _mac_absolute_time_to_posix(timestamp): return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp()) def _parse_safari_cookies_header(data, logger): p = DataParser(data, logger) p.expect_bytes(b'cook', 'database signature') number_of_pages = p.read_uint(big_endian=True) page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)] return page_sizes, p.cursor def _parse_safari_cookies_page(data, jar, logger): p = DataParser(data, logger) p.expect_bytes(b'\x00\x00\x01\x00', 'page signature') number_of_cookies = p.read_uint() record_offsets = [p.read_uint() for _ in range(number_of_cookies)] if number_of_cookies == 0: logger.debug(f'a cookies page of size {len(data)} has no cookies') return p.skip_to(record_offsets[0], 'unknown page header field') with _create_progress_bar(logger) as progress_bar: for i, record_offset in enumerate(record_offsets): progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}') p.skip_to(record_offset, 'space between records') record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger) p.read_bytes(record_length) p.skip_to_end('space in between pages') def _parse_safari_cookies_record(data, jar, logger): p = DataParser(data, logger) record_size = p.read_uint() p.skip(4, 'unknown record field 1') flags = p.read_uint() is_secure = bool(flags & 0x0001) p.skip(4, 'unknown record field 2') domain_offset = p.read_uint() name_offset = p.read_uint() path_offset = p.read_uint() value_offset = p.read_uint() p.skip(8, 'unknown record field 3') expiration_date = _mac_absolute_time_to_posix(p.read_double()) _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841 try: p.skip_to(domain_offset) domain = p.read_cstring() p.skip_to(name_offset) name = p.read_cstring() p.skip_to(path_offset) path = p.read_cstring() p.skip_to(value_offset) value = p.read_cstring() except UnicodeDecodeError: logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True) return record_size p.skip_to(record_size, 'space at the end of the record') cookie = http.cookiejar.Cookie( version=0, name=name, value=value, port=None, port_specified=False, domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'), path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False, comment=None, comment_url=None, rest={}) jar.set_cookie(cookie) return record_size def parse_safari_cookies(data, jar=None, logger=YDLLogger()): """ References: - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc - this data appears to be out of date but the important parts of the database structure is the same - there are a few bytes here and there which are skipped during parsing """ if jar is None: jar = YoutubeDLCookieJar() page_sizes, body_start = _parse_safari_cookies_header(data, logger) p = DataParser(data[body_start:], logger) for page_size in page_sizes: _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger) p.skip_to_end('footer') return jar class _LinuxDesktopEnvironment(Enum): """ https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h DesktopEnvironment """ OTHER = auto() CINNAMON = auto() GNOME = auto() KDE = auto() PANTHEON = auto() UNITY = auto() XFCE = auto() class _LinuxKeyring(Enum): """ https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h SelectedLinuxBackend """ KWALLET = auto() GNOMEKEYRING = auto() BASICTEXT = auto() SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys() def _get_linux_desktop_environment(env): """ https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc GetDesktopEnvironment """ xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None) desktop_session = env.get('DESKTOP_SESSION', None) if xdg_current_desktop is not None: xdg_current_desktop = xdg_current_desktop.split(':')[0].strip() if xdg_current_desktop == 'Unity': if desktop_session is not None and 'gnome-fallback' in desktop_session: return _LinuxDesktopEnvironment.GNOME else: return _LinuxDesktopEnvironment.UNITY elif xdg_current_desktop == 'GNOME': return _LinuxDesktopEnvironment.GNOME elif xdg_current_desktop == 'X-Cinnamon': return _LinuxDesktopEnvironment.CINNAMON elif xdg_current_desktop == 'KDE': return _LinuxDesktopEnvironment.KDE elif xdg_current_desktop == 'Pantheon': return _LinuxDesktopEnvironment.PANTHEON elif xdg_current_desktop == 'XFCE': return _LinuxDesktopEnvironment.XFCE elif desktop_session is not None: if desktop_session in ('mate', 'gnome'): return _LinuxDesktopEnvironment.GNOME elif 'kde' in desktop_session: return _LinuxDesktopEnvironment.KDE elif 'xfce' in desktop_session: return _LinuxDesktopEnvironment.XFCE else: if 'GNOME_DESKTOP_SESSION_ID' in env: return _LinuxDesktopEnvironment.GNOME elif 'KDE_FULL_SESSION' in env: return _LinuxDesktopEnvironment.KDE return _LinuxDesktopEnvironment.OTHER def _choose_linux_keyring(logger): """ https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc SelectBackend """ desktop_environment = _get_linux_desktop_environment(os.environ) logger.debug(f'detected desktop environment: {desktop_environment.name}') if desktop_environment == _LinuxDesktopEnvironment.KDE: linux_keyring = _LinuxKeyring.KWALLET elif desktop_environment == _LinuxDesktopEnvironment.OTHER: linux_keyring = _LinuxKeyring.BASICTEXT else: linux_keyring = _LinuxKeyring.GNOMEKEYRING return linux_keyring def _get_kwallet_network_wallet(logger): """ The name of the wallet used to store network passwords. https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc KWalletDBus::NetworkWallet which does a dbus call to the following function: https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html Wallet::NetworkWallet """ default_wallet = 'kdewallet' try: stdout, _, returncode = Popen.run([ 'dbus-send', '--session', '--print-reply=literal', '--dest=org.kde.kwalletd5', '/modules/kwalletd5', 'org.kde.KWallet.networkWallet' ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) if returncode: logger.warning('failed to read NetworkWallet') return default_wallet else: logger.debug(f'NetworkWallet = "{stdout.strip()}"') return stdout.strip() except Exception as e: logger.warning(f'exception while obtaining NetworkWallet: {e}') return default_wallet def _get_kwallet_password(browser_keyring_name, logger): logger.debug('using kwallet-query to obtain password from kwallet') if shutil.which('kwallet-query') is None: logger.error('kwallet-query command not found. KWallet and kwallet-query ' 'must be installed to read from KWallet. kwallet-query should be' 'included in the kwallet package for your distribution') return b'' network_wallet = _get_kwallet_network_wallet(logger) try: stdout, _, returncode = Popen.run([ 'kwallet-query', '--read-password', f'{browser_keyring_name} Safe Storage', '--folder', f'{browser_keyring_name} Keys', network_wallet ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) if returncode: logger.error(f'kwallet-query failed with return code {returncode}. ' 'Please consult the kwallet-query man page for details') return b'' else: if stdout.lower().startswith(b'failed to read'): logger.debug('failed to read password from kwallet. Using empty string instead') # this sometimes occurs in KDE because chrome does not check hasEntry and instead # just tries to read the value (which kwallet returns "") whereas kwallet-query # checks hasEntry. To verify this: # dbus-monitor "interface='org.kde.KWallet'" "type=method_return" # while starting chrome. # this may be a bug as the intended behaviour is to generate a random password and store # it, but that doesn't matter here. return b'' else: logger.debug('password found') return stdout.rstrip(b'\n') except Exception as e: logger.warning(f'exception running kwallet-query: {error_to_str(e)}') return b'' def _get_gnome_keyring_password(browser_keyring_name, logger): if not secretstorage: logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}') return b'' # the Gnome keyring does not seem to organise keys in the same way as KWallet, # using `dbus-monitor` during startup, it can be observed that chromium lists all keys # and presumably searches for its key in the list. It appears that we must do the same. # https://github.com/jaraco/keyring/issues/556 with contextlib.closing(secretstorage.dbus_init()) as con: col = secretstorage.get_default_collection(con) for item in col.get_all_items(): if item.get_label() == f'{browser_keyring_name} Safe Storage': return item.get_secret() else: logger.error('failed to read from keyring') return b'' def _get_linux_keyring_password(browser_keyring_name, keyring, logger): # note: chrome/chromium can be run with the following flags to determine which keyring backend # it has chosen to use # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_ # Chromium supports a flag: --password-store= so the automatic detection # will not be sufficient in all cases. keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger) logger.debug(f'Chosen keyring: {keyring.name}') if keyring == _LinuxKeyring.KWALLET: return _get_kwallet_password(browser_keyring_name, logger) elif keyring == _LinuxKeyring.GNOMEKEYRING: return _get_gnome_keyring_password(browser_keyring_name, logger) elif keyring == _LinuxKeyring.BASICTEXT: # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required) return None assert False, f'Unknown keyring {keyring}' def _get_mac_keyring_password(browser_keyring_name, logger): logger.debug('using find-generic-password to obtain password from OSX keychain') try: stdout, _, _ = Popen.run( ['security', 'find-generic-password', '-w', # write password to stdout '-a', browser_keyring_name, # match 'account' '-s', f'{browser_keyring_name} Safe Storage'], # match 'service' stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) return stdout.rstrip(b'\n') except Exception as e: logger.warning(f'exception running find-generic-password: {error_to_str(e)}') return None def _get_windows_v10_key(browser_root, logger): path = _find_most_recently_used_file(browser_root, 'Local State', logger) if path is None: logger.error('could not find local state file') return None logger.debug(f'Found local state file at "{path}"') with open(path, encoding='utf8') as f: data = json.load(f) try: base64_key = data['os_crypt']['encrypted_key'] except KeyError: logger.error('no encrypted key in Local State') return None encrypted_key = base64.b64decode(base64_key) prefix = b'DPAPI' if not encrypted_key.startswith(prefix): logger.error('invalid key') return None return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger) def pbkdf2_sha1(password, salt, iterations, key_length): return pbkdf2_hmac('sha1', password, salt, iterations, key_length) def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16): plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)) try: return plaintext.decode() except UnicodeDecodeError: logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) return None def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): try: plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce) except ValueError: logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True) return None try: return plaintext.decode() except UnicodeDecodeError: logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) return None def _decrypt_windows_dpapi(ciphertext, logger): """ References: - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata """ import ctypes import ctypes.wintypes class DATA_BLOB(ctypes.Structure): _fields_ = [('cbData', ctypes.wintypes.DWORD), ('pbData', ctypes.POINTER(ctypes.c_char))] buffer = ctypes.create_string_buffer(ciphertext) blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer) blob_out = DATA_BLOB() ret = ctypes.windll.crypt32.CryptUnprotectData( ctypes.byref(blob_in), # pDataIn None, # ppszDataDescr: human readable description of pDataIn None, # pOptionalEntropy: salt? None, # pvReserved: must be NULL None, # pPromptStruct: information about prompts to display 0, # dwFlags ctypes.byref(blob_out) # pDataOut ) if not ret: logger.warning('failed to decrypt with DPAPI', only_once=True) return None result = ctypes.string_at(blob_out.pbData, blob_out.cbData) ctypes.windll.kernel32.LocalFree(blob_out.pbData) return result def _config_home(): return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config')) def _open_database_copy(database_path, tmpdir): # cannot open sqlite databases if they are already in use (e.g. by the browser) database_copy_path = os.path.join(tmpdir, 'temporary.sqlite') shutil.copy(database_path, database_copy_path) conn = sqlite3.connect(database_copy_path) return conn.cursor() def _get_column_names(cursor, table_name): table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall() return [row[1].decode() for row in table_info] def _find_most_recently_used_file(root, filename, logger): # if there are multiple browser profiles, take the most recently used one i, paths = 0, [] with _create_progress_bar(logger) as progress_bar: for curr_root, dirs, files in os.walk(root): for file in files: i += 1 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched') if file == filename: paths.append(os.path.join(curr_root, file)) return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime) def _merge_cookie_jars(jars): output_jar = YoutubeDLCookieJar() for jar in jars: for cookie in jar: output_jar.set_cookie(cookie) if jar.filename is not None: output_jar.filename = jar.filename return output_jar def _is_path(value): return os.path.sep in value def _parse_browser_specification(browser_name, profile=None, keyring=None): if browser_name not in SUPPORTED_BROWSERS: raise ValueError(f'unsupported browser: "{browser_name}"') if keyring not in (None, *SUPPORTED_KEYRINGS): raise ValueError(f'unsupported keyring: "{keyring}"') if profile is not None and _is_path(profile): profile = os.path.expanduser(profile) return browser_name, profile, keyring yt-dlp-2022.08.19/yt_dlp/dependencies.py000066400000000000000000000047531427755243700176540ustar00rootroot00000000000000# flake8: noqa: F401 """Imports all optional dependencies for the project. An attribute "_yt_dlp__identifier" may be inserted into the module if it uses an ambiguous namespace""" try: import brotlicffi as brotli except ImportError: try: import brotli except ImportError: brotli = None try: import certifi except ImportError: certifi = None else: from os.path import exists as _path_exists # The certificate may not be bundled in executable if not _path_exists(certifi.where()): certifi = None try: from Cryptodome.Cipher import AES as Cryptodome_AES except ImportError: try: from Crypto.Cipher import AES as Cryptodome_AES except (ImportError, SyntaxError): # Old Crypto gives SyntaxError in newer Python Cryptodome_AES = None else: try: # In pycrypto, mode defaults to ECB. See: # https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode Cryptodome_AES.new(b'abcdefghijklmnop') except TypeError: pass else: Cryptodome_AES._yt_dlp__identifier = 'pycrypto' try: import mutagen except ImportError: mutagen = None secretstorage = None try: import secretstorage _SECRETSTORAGE_UNAVAILABLE_REASON = None except ImportError: _SECRETSTORAGE_UNAVAILABLE_REASON = ( 'as the `secretstorage` module is not installed. ' 'Please install by running `python3 -m pip install secretstorage`') except Exception as _err: _SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}' try: import sqlite3 except ImportError: # although sqlite3 is part of the standard library, it is possible to compile python without # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544 sqlite3 = None try: import websockets except (ImportError, SyntaxError): # websockets 3.10 on python 3.6 causes SyntaxError # See https://github.com/yt-dlp/yt-dlp/issues/2633 websockets = None try: import xattr # xattr or pyxattr except ImportError: xattr = None else: if hasattr(xattr, 'set'): # pyxattr xattr._yt_dlp__identifier = 'pyxattr' all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')} available_dependencies = {k: v for k, v in all_dependencies.items() if v} __all__ = [ 'all_dependencies', 'available_dependencies', *all_dependencies.keys(), ] yt-dlp-2022.08.19/yt_dlp/downloader/000077500000000000000000000000001427755243700170015ustar00rootroot00000000000000yt-dlp-2022.08.19/yt_dlp/downloader/__init__.py000066400000000000000000000105341427755243700211150ustar00rootroot00000000000000from ..utils import NO_DEFAULT, determine_protocol def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=None, to_stdout=False): info_dict['protocol'] = determine_protocol(info_dict) info_copy = info_dict.copy() info_copy['to_stdout'] = to_stdout protocols = (protocol or info_copy['protocol']).split('+') downloaders = [_get_suitable_downloader(info_copy, proto, params, default) for proto in protocols] if set(downloaders) == {FFmpegFD} and FFmpegFD.can_merge_formats(info_copy, params): return FFmpegFD elif (set(downloaders) == {DashSegmentsFD} and not (to_stdout and len(protocols) > 1) and set(protocols) == {'http_dash_segments_generator'}): return DashSegmentsFD elif len(downloaders) == 1: return downloaders[0] return None # Some of these require get_suitable_downloader from .common import FileDownloader from .dash import DashSegmentsFD from .external import FFmpegFD, get_external_downloader from .f4m import F4mFD from .fc2 import FC2LiveFD from .hls import HlsFD from .http import HttpFD from .ism import IsmFD from .mhtml import MhtmlFD from .niconico import NiconicoDmcFD from .rtmp import RtmpFD from .rtsp import RtspFD from .websocket import WebSocketFragmentFD from .youtube_live_chat import YoutubeLiveChatFD PROTOCOL_MAP = { 'rtmp': RtmpFD, 'rtmpe': RtmpFD, 'rtmp_ffmpeg': FFmpegFD, 'm3u8_native': HlsFD, 'm3u8': FFmpegFD, 'mms': RtspFD, 'rtsp': RtspFD, 'f4m': F4mFD, 'http_dash_segments': DashSegmentsFD, 'http_dash_segments_generator': DashSegmentsFD, 'ism': IsmFD, 'mhtml': MhtmlFD, 'niconico_dmc': NiconicoDmcFD, 'fc2_live': FC2LiveFD, 'websocket_frag': WebSocketFragmentFD, 'youtube_live_chat': YoutubeLiveChatFD, 'youtube_live_chat_replay': YoutubeLiveChatFD, } def shorten_protocol_name(proto, simplify=False): short_protocol_names = { 'm3u8_native': 'm3u8', 'm3u8': 'm3u8F', 'rtmp_ffmpeg': 'rtmpF', 'http_dash_segments': 'dash', 'http_dash_segments_generator': 'dashG', 'niconico_dmc': 'dmc', 'websocket_frag': 'WSfrag', } if simplify: short_protocol_names.update({ 'https': 'http', 'ftps': 'ftp', 'm3u8': 'm3u8', # Reverse above m3u8 mapping 'm3u8_native': 'm3u8', 'http_dash_segments_generator': 'dash', 'rtmp_ffmpeg': 'rtmp', 'm3u8_frag_urls': 'm3u8', 'dash_frag_urls': 'dash', }) return short_protocol_names.get(proto, proto) def _get_suitable_downloader(info_dict, protocol, params, default): """Get the downloader class that can handle the info dict.""" if default is NO_DEFAULT: default = HttpFD if (info_dict.get('section_start') or info_dict.get('section_end')) and FFmpegFD.can_download(info_dict): return FFmpegFD info_dict['protocol'] = protocol downloaders = params.get('external_downloader') external_downloader = ( downloaders if isinstance(downloaders, str) or downloaders is None else downloaders.get(shorten_protocol_name(protocol, True), downloaders.get('default'))) if external_downloader is None: if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params): return FFmpegFD elif external_downloader.lower() != 'native': ed = get_external_downloader(external_downloader) if ed.can_download(info_dict, external_downloader): return ed if protocol == 'http_dash_segments': if info_dict.get('is_live') and (external_downloader or '').lower() != 'native': return FFmpegFD if protocol in ('m3u8', 'm3u8_native'): if info_dict.get('is_live'): return FFmpegFD elif (external_downloader or '').lower() == 'native': return HlsFD elif protocol == 'm3u8_native' and get_suitable_downloader( info_dict, params, None, protocol='m3u8_frag_urls', to_stdout=info_dict['to_stdout']): return HlsFD elif params.get('hls_prefer_native') is True: return HlsFD elif params.get('hls_prefer_native') is False: return FFmpegFD return PROTOCOL_MAP.get(protocol, default) __all__ = [ 'FileDownloader', 'get_suitable_downloader', 'shorten_protocol_name', ] yt-dlp-2022.08.19/yt_dlp/downloader/common.py000066400000000000000000000441761427755243700206570ustar00rootroot00000000000000import contextlib import errno import functools import os import random import re import time from ..minicurses import ( BreaklineStatusPrinter, MultilineLogger, MultilinePrinter, QuietMultilinePrinter, ) from ..utils import ( IDENTITY, NO_DEFAULT, NUMBER_RE, LockingUnsupportedError, Namespace, RetryManager, classproperty, decodeArgument, encodeFilename, format_bytes, join_nonempty, sanitize_open, shell_quote, timeconvert, timetuple_from_msec, try_call, ) class FileDownloader: """File Downloader class. File downloader objects are the ones responsible of downloading the actual video file and writing it to disk. File downloaders accept a lot of parameters. In order not to saturate the object constructor with arguments, it receives a dictionary of options instead. Available options: verbose: Print additional info to stdout. quiet: Do not print messages to stdout. ratelimit: Download speed limit, in bytes/sec. continuedl: Attempt to continue downloads if possible throttledratelimit: Assume the download is being throttled below this speed (bytes/sec) retries: Number of times to retry for HTTP error 5xx file_access_retries: Number of times to retry on file access error buffersize: Size of download buffer in bytes. noresizebuffer: Do not automatically resize the download buffer. continuedl: Try to continue downloads if possible. noprogress: Do not print the progress bar. nopart: Do not use temporary .part files. updatetime: Use the Last-modified header to set output file timestamps. test: Download only first bytes to test the downloader. min_filesize: Skip files smaller than this size max_filesize: Skip files larger than this size xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. external_downloader_args: A dictionary of downloader keys (in lower case) and a list of additional command-line arguments for the executable. Use 'default' as the name for arguments to be passed to all downloaders. For compatibility with youtube-dl, a single list of args can also be used hls_use_mpegts: Use the mpegts container for HLS videos. http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be useful for bypassing bandwidth throttling imposed by a webserver (experimental) progress_template: See YoutubeDL.py retry_sleep_functions: See YoutubeDL.py Subclasses of this one must re-define the real_download method. """ _TEST_FILE_SIZE = 10241 params = None def __init__(self, ydl, params): """Create a FileDownloader object with the given options.""" self._set_ydl(ydl) self._progress_hooks = [] self.params = params self._prepare_multiline_status() self.add_progress_hook(self.report_progress) def _set_ydl(self, ydl): self.ydl = ydl for func in ( 'deprecation_warning', 'report_error', 'report_file_already_downloaded', 'report_warning', 'to_console_title', 'to_stderr', 'trouble', 'write_debug', ): if not hasattr(self, func): setattr(self, func, getattr(ydl, func)) def to_screen(self, *args, **kargs): self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs) __to_screen = to_screen @classproperty def FD_NAME(cls): return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower() @staticmethod def format_seconds(seconds): if seconds is None: return ' Unknown' time = timetuple_from_msec(seconds * 1000) if time.hours > 99: return '--:--:--' if not time.hours: return '%02d:%02d' % time[1:-1] return '%02d:%02d:%02d' % time[:-1] format_eta = format_seconds @staticmethod def calc_percent(byte_counter, data_len): if data_len is None: return None return float(byte_counter) / float(data_len) * 100.0 @staticmethod def format_percent(percent): return ' N/A%' if percent is None else f'{percent:>5.1f}%' @staticmethod def calc_eta(start, now, total, current): if total is None: return None if now is None: now = time.time() dif = now - start if current == 0 or dif < 0.001: # One millisecond return None rate = float(current) / dif return int((float(total) - float(current)) / rate) @staticmethod def calc_speed(start, now, bytes): dif = now - start if bytes == 0 or dif < 0.001: # One millisecond return None return float(bytes) / dif @staticmethod def format_speed(speed): return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s' @staticmethod def format_retries(retries): return 'inf' if retries == float('inf') else int(retries) @staticmethod def best_block_size(elapsed_time, bytes): new_min = max(bytes / 2.0, 1.0) new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB if elapsed_time < 0.001: return int(new_max) rate = bytes / elapsed_time if rate > new_max: return int(new_max) if rate < new_min: return int(new_min) return int(rate) @staticmethod def parse_bytes(bytestr): """Parse a string indicating a byte quantity into an integer.""" matchobj = re.match(rf'(?i)^({NUMBER_RE})([kMGTPEZY]?)$', bytestr) if matchobj is None: return None number = float(matchobj.group(1)) multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) return int(round(number * multiplier)) def slow_down(self, start_time, now, byte_counter): """Sleep if the download speed is over the rate limit.""" rate_limit = self.params.get('ratelimit') if rate_limit is None or byte_counter == 0: return if now is None: now = time.time() elapsed = now - start_time if elapsed <= 0.0: return speed = float(byte_counter) / elapsed if speed > rate_limit: sleep_time = float(byte_counter) / rate_limit - elapsed if sleep_time > 0: time.sleep(sleep_time) def temp_name(self, filename): """Returns a temporary filename for the given filename.""" if self.params.get('nopart', False) or filename == '-' or \ (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): return filename return filename + '.part' def undo_temp_name(self, filename): if filename.endswith('.part'): return filename[:-len('.part')] return filename def ytdl_filename(self, filename): return filename + '.ytdl' def wrap_file_access(action, *, fatal=False): def error_callback(err, count, retries, *, fd): return RetryManager.report_retry( err, count, retries, info=fd.__to_screen, warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')), error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'), sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access')) def wrapper(self, func, *args, **kwargs): for retry in RetryManager(self.params.get('file_access_retries'), error_callback, fd=self): try: return func(self, *args, **kwargs) except OSError as err: if err.errno in (errno.EACCES, errno.EINVAL): retry.error = err continue retry.error_callback(err, 1, 0) return functools.partial(functools.partialmethod, wrapper) @wrap_file_access('open', fatal=True) def sanitize_open(self, filename, open_mode): f, filename = sanitize_open(filename, open_mode) if not getattr(f, 'locked', None): self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True) return f, filename @wrap_file_access('remove') def try_remove(self, filename): os.remove(filename) @wrap_file_access('rename') def try_rename(self, old_filename, new_filename): if old_filename == new_filename: return os.replace(old_filename, new_filename) def try_utime(self, filename, last_modified_hdr): """Try to set the last-modified time of the given file.""" if last_modified_hdr is None: return if not os.path.isfile(encodeFilename(filename)): return timestr = last_modified_hdr if timestr is None: return filetime = timeconvert(timestr) if filetime is None: return filetime # Ignore obviously invalid dates if filetime == 0: return with contextlib.suppress(Exception): os.utime(filename, (time.time(), filetime)) return filetime def report_destination(self, filename): """Report destination filename.""" self.to_screen('[download] Destination: ' + filename) def _prepare_multiline_status(self, lines=1): if self.params.get('noprogress'): self._multiline = QuietMultilinePrinter() elif self.ydl.params.get('logger'): self._multiline = MultilineLogger(self.ydl.params['logger'], lines) elif self.params.get('progress_with_newline'): self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines) else: self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet')) self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color') def _finish_multiline_status(self): self._multiline.end() ProgressStyles = Namespace( downloaded_bytes='light blue', percent='light blue', eta='yellow', speed='green', elapsed='bold white', total_bytes='', total_bytes_estimate='', ) def _report_progress_status(self, s, default_template): for name, style in self.ProgressStyles.items_: name = f'_{name}_str' if name not in s: continue s[name] = self._format_progress(s[name], style) s['_default_template'] = default_template % s progress_dict = s.copy() progress_dict.pop('info_dict') progress_dict = {'info': s['info_dict'], 'progress': progress_dict} progress_template = self.params.get('progress_template', {}) self._multiline.print_at_line(self.ydl.evaluate_outtmpl( progress_template.get('download') or '[download] %(progress._default_template)s', progress_dict), s.get('progress_idx') or 0) self.to_console_title(self.ydl.evaluate_outtmpl( progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s', progress_dict)) def _format_progress(self, *args, **kwargs): return self.ydl._format_text( self._multiline.stream, self._multiline.allow_colors, *args, **kwargs) def report_progress(self, s): def with_fields(*tups, default=''): for *fields, tmpl in tups: if all(s.get(f) is not None for f in fields): return tmpl return default if s['status'] == 'finished': if self.params.get('noprogress'): self.to_screen('[download] Download completed') speed = try_call(lambda: s['total_bytes'] / s['elapsed']) s.update({ 'speed': speed, '_speed_str': self.format_speed(speed).strip(), '_total_bytes_str': format_bytes(s.get('total_bytes')), '_elapsed_str': self.format_seconds(s.get('elapsed')), '_percent_str': self.format_percent(100), }) self._report_progress_status(s, join_nonempty( '100%%', with_fields(('total_bytes', 'of %(_total_bytes_str)s')), with_fields(('elapsed', 'in %(_elapsed_str)s')), with_fields(('speed', 'at %(_speed_str)s')), delim=' ')) if s['status'] != 'downloading': return s.update({ '_eta_str': self.format_eta(s.get('eta')), '_speed_str': self.format_speed(s.get('speed')), '_percent_str': self.format_percent(try_call( lambda: 100 * s['downloaded_bytes'] / s['total_bytes'], lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'], lambda: s['downloaded_bytes'] == 0 and 0)), '_total_bytes_str': format_bytes(s.get('total_bytes')), '_total_bytes_estimate_str': format_bytes(s.get('total_bytes_estimate')), '_downloaded_bytes_str': format_bytes(s.get('downloaded_bytes')), '_elapsed_str': self.format_seconds(s.get('elapsed')), }) msg_template = with_fields( ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'), ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'), ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'), ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'), default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s') msg_template += with_fields( ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'), ('fragment_index', ' (frag %(fragment_index)s)')) self._report_progress_status(s, msg_template) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" self.to_screen('[download] Resuming download at byte %s' % resume_len) def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True): """Report retry""" is_frag = False if frag_index is NO_DEFAULT else 'fragment' RetryManager.report_retry( err, count, retries, info=self.__to_screen, warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'), error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'), sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'), suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None) def report_unable_to_resume(self): """Report it was impossible to resume download.""" self.to_screen('[download] Unable to resume') @staticmethod def supports_manifest(manifest): """ Whether the downloader can download the fragments from the manifest. Redefine in subclasses if needed. """ pass def download(self, filename, info_dict, subtitle=False): """Download to a filename using the info from info_dict Return True on success and False otherwise """ nooverwrites_and_exists = ( not self.params.get('overwrites', True) and os.path.exists(encodeFilename(filename)) ) if not hasattr(filename, 'write'): continuedl_and_exists = ( self.params.get('continuedl', True) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False) ) # Check file already present if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists): self.report_file_already_downloaded(filename) self._hook_progress({ 'filename': filename, 'status': 'finished', 'total_bytes': os.path.getsize(encodeFilename(filename)), }, info_dict) self._finish_multiline_status() return True, False if subtitle: sleep_interval = self.params.get('sleep_interval_subtitles') or 0 else: min_sleep_interval = self.params.get('sleep_interval') or 0 sleep_interval = random.uniform( min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval) if sleep_interval > 0: self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...') time.sleep(sleep_interval) ret = self.real_download(filename, info_dict) self._finish_multiline_status() return ret, True def real_download(self, filename, info_dict): """Real download process. Redefine in subclasses.""" raise NotImplementedError('This method must be implemented by subclasses') def _hook_progress(self, status, info_dict): # Ideally we want to make a copy of the dict, but that is too slow status['info_dict'] = info_dict # youtube-dl passes the same status object to all the hooks. # Some third party scripts seems to be relying on this. # So keep this behavior if possible for ph in self._progress_hooks: ph(status) def add_progress_hook(self, ph): # See YoutubeDl.py (search for progress_hooks) for a description of # this interface self._progress_hooks.append(ph) def _debug_cmd(self, args, exe=None): if not self.params.get('verbose', False): return str_args = [decodeArgument(a) for a in args] if exe is None: exe = os.path.basename(str_args[0]) self.write_debug(f'{exe} command line: {shell_quote(str_args)}') yt-dlp-2022.08.19/yt_dlp/downloader/dash.py000066400000000000000000000057741427755243700203070ustar00rootroot00000000000000import time from . import get_suitable_downloader from .fragment import FragmentFD from ..utils import urljoin class DashSegmentsFD(FragmentFD): """ Download segments in a DASH manifest. External downloaders can take over the fragment downloads by supporting the 'dash_frag_urls' protocol """ FD_NAME = 'dashsegments' def real_download(self, filename, info_dict): if info_dict.get('is_live') and set(info_dict['protocol'].split('+')) != {'http_dash_segments_generator'}: self.report_error('Live DASH videos are not supported') real_start = time.time() real_downloader = get_suitable_downloader( info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-')) requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])] args = [] for fmt in requested_formats or [info_dict]: try: fragment_count = 1 if self.params.get('test') else len(fmt['fragments']) except TypeError: fragment_count = None ctx = { 'filename': fmt.get('filepath') or filename, 'live': 'is_from_start' if fmt.get('is_from_start') else fmt.get('is_live'), 'total_frags': fragment_count, } if real_downloader: self._prepare_external_frag_download(ctx) else: self._prepare_and_start_frag_download(ctx, fmt) ctx['start'] = real_start fragments_to_download = self._get_fragments(fmt, ctx) if real_downloader: self.to_screen( f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') info_dict['fragments'] = list(fragments_to_download) fd = real_downloader(self.ydl, self.params) return fd.real_download(filename, info_dict) args.append([ctx, fragments_to_download, fmt]) return self.download_and_append_fragments_multiple(*args) def _resolve_fragments(self, fragments, ctx): fragments = fragments(ctx) if callable(fragments) else fragments return [next(iter(fragments))] if self.params.get('test') else fragments def _get_fragments(self, fmt, ctx): fragment_base_url = fmt.get('fragment_base_url') fragments = self._resolve_fragments(fmt['fragments'], ctx) frag_index = 0 for i, fragment in enumerate(fragments): frag_index += 1 if frag_index <= ctx['fragment_index']: continue fragment_url = fragment.get('url') if not fragment_url: assert fragment_base_url fragment_url = urljoin(fragment_base_url, fragment['path']) yield { 'frag_index': frag_index, 'fragment_count': fragment.get('fragment_count'), 'index': i, 'url': fragment_url, } yt-dlp-2022.08.19/yt_dlp/downloader/external.py000066400000000000000000000523171427755243700212050ustar00rootroot00000000000000import enum import os.path import re import subprocess import sys import time from .fragment import FragmentFD from ..compat import functools from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor from ..utils import ( Popen, RetryManager, _configuration_args, check_executable, classproperty, cli_bool_option, cli_option, cli_valueless_option, determine_ext, encodeArgument, encodeFilename, handle_youtubedl_headers, remove_end, traverse_obj, ) class Features(enum.Enum): TO_STDOUT = enum.auto() MULTIPLE_FORMATS = enum.auto() class ExternalFD(FragmentFD): SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps') SUPPORTED_FEATURES = () _CAPTURE_STDERR = True def real_download(self, filename, info_dict): self.report_destination(filename) tmpfilename = self.temp_name(filename) try: started = time.time() retval = self._call_downloader(tmpfilename, info_dict) except KeyboardInterrupt: if not info_dict.get('is_live'): raise # Live stream downloading cancellation should be considered as # correct and expected termination thus all postprocessing # should take place retval = 0 self.to_screen('[%s] Interrupted by user' % self.get_basename()) if retval == 0: status = { 'filename': filename, 'status': 'finished', 'elapsed': time.time() - started, } if filename != '-': fsize = os.path.getsize(encodeFilename(tmpfilename)) self.to_screen(f'\r[{self.get_basename()}] Downloaded {fsize} bytes') self.try_rename(tmpfilename, filename) status.update({ 'downloaded_bytes': fsize, 'total_bytes': fsize, }) self._hook_progress(status, info_dict) return True else: self.to_stderr('\n') self.report_error('%s exited with code %d' % ( self.get_basename(), retval)) return False @classmethod def get_basename(cls): return cls.__name__[:-2].lower() @classproperty def EXE_NAME(cls): return cls.get_basename() @functools.cached_property def exe(self): return self.EXE_NAME @classmethod def available(cls, path=None): path = check_executable( cls.EXE_NAME if path in (None, cls.get_basename()) else path, [cls.AVAILABLE_OPT]) if not path: return False cls.exe = path return path @classmethod def supports(cls, info_dict): return all(( not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES, '+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES, all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')), )) @classmethod def can_download(cls, info_dict, path=None): return cls.available(path) and cls.supports(info_dict) def _option(self, command_option, param): return cli_option(self.params, command_option, param) def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None): return cli_bool_option(self.params, command_option, param, true_value, false_value, separator) def _valueless_option(self, command_option, param, expected_value=True): return cli_valueless_option(self.params, command_option, param, expected_value) def _configuration_args(self, keys=None, *args, **kwargs): return _configuration_args( self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME, keys, *args, **kwargs) def _call_downloader(self, tmpfilename, info_dict): """ Either overwrite this or implement _make_cmd """ cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] self._debug_cmd(cmd) if 'fragments' not in info_dict: _, stderr, returncode = Popen.run( cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None) if returncode and stderr: self.to_stderr(stderr) return returncode skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=None, fatal=not skip_unavailable_fragments) for retry in retry_manager: _, stderr, returncode = Popen.run(cmd, text=True, stderr=subprocess.PIPE) if not returncode: break # TODO: Decide whether to retry based on error code # https://aria2.github.io/manual/en/html/aria2c.html#exit-status if stderr: self.to_stderr(stderr) retry.error = Exception() continue if not skip_unavailable_fragments and retry_manager.error: return -1 decrypt_fragment = self.decrypter(info_dict) dest, _ = self.sanitize_open(tmpfilename, 'wb') for frag_index, fragment in enumerate(info_dict['fragments']): fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) try: src, _ = self.sanitize_open(fragment_filename, 'rb') except OSError as err: if skip_unavailable_fragments and frag_index > 1: self.report_skip_fragment(frag_index, err) continue self.report_error(f'Unable to open fragment {frag_index}; {err}') return -1 dest.write(decrypt_fragment(fragment, src.read())) src.close() if not self.params.get('keep_fragments', False): self.try_remove(encodeFilename(fragment_filename)) dest.close() self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename)) return 0 class CurlFD(ExternalFD): AVAILABLE_OPT = '-V' _CAPTURE_STDERR = False # curl writes the progress to stderr def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed'] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['--header', f'{key}: {val}'] cmd += self._bool_option('--continue-at', 'continuedl', '-', '0') cmd += self._valueless_option('--silent', 'noprogress') cmd += self._valueless_option('--verbose', 'verbose') cmd += self._option('--limit-rate', 'ratelimit') retry = self._option('--retry', 'retries') if len(retry) == 2: if retry[1] in ('inf', 'infinite'): retry[1] = '2147483647' cmd += retry cmd += self._option('--max-filesize', 'max_filesize') cmd += self._option('--interface', 'source_address') cmd += self._option('--proxy', 'proxy') cmd += self._valueless_option('--insecure', 'nocheckcertificate') cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd class AxelFD(ExternalFD): AVAILABLE_OPT = '-V' def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-o', tmpfilename] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['-H', f'{key}: {val}'] cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd class WgetFD(ExternalFD): AVAILABLE_OPT = '--version' def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto'] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['--header', f'{key}: {val}'] cmd += self._option('--limit-rate', 'ratelimit') retry = self._option('--tries', 'retries') if len(retry) == 2: if retry[1] in ('inf', 'infinite'): retry[1] = '0' cmd += retry cmd += self._option('--bind-address', 'source_address') proxy = self.params.get('proxy') if proxy: for var in ('http_proxy', 'https_proxy'): cmd += ['--execute', f'{var}={proxy}'] cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate') cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd class Aria2cFD(ExternalFD): AVAILABLE_OPT = '-v' SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'dash_frag_urls', 'm3u8_frag_urls') @staticmethod def supports_manifest(manifest): UNSUPPORTED_FEATURES = [ r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1] # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 ] check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) return all(check_results) def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-c', '--console-log-level=warn', '--summary-interval=0', '--download-result=hide', '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16'] if 'fragments' in info_dict: cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true'] else: cmd += ['--min-split-size', '1M'] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['--header', f'{key}: {val}'] cmd += self._option('--max-overall-download-limit', 'ratelimit') cmd += self._option('--interface', 'source_address') cmd += self._option('--all-proxy', 'proxy') cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=') cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=') cmd += self._configuration_args() # aria2c strips out spaces from the beginning/end of filenames and paths. # We work around this issue by adding a "./" to the beginning of the # filename and relative path, and adding a "/" at the end of the path. # See: https://github.com/yt-dlp/yt-dlp/issues/276 # https://github.com/ytdl-org/youtube-dl/issues/20312 # https://github.com/aria2/aria2/issues/1373 dn = os.path.dirname(tmpfilename) if dn: if not os.path.isabs(dn): dn = f'.{os.path.sep}{dn}' cmd += ['--dir', dn + os.path.sep] if 'fragments' not in info_dict: cmd += ['--out', f'.{os.path.sep}{os.path.basename(tmpfilename)}'] cmd += ['--auto-file-renaming=false'] if 'fragments' in info_dict: cmd += ['--file-allocation=none', '--uri-selector=inorder'] url_list_file = '%s.frag.urls' % tmpfilename url_list = [] for frag_index, fragment in enumerate(info_dict['fragments']): fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index) url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename)) stream, _ = self.sanitize_open(url_list_file, 'wb') stream.write('\n'.join(url_list).encode()) stream.close() cmd += ['-i', url_list_file] else: cmd += ['--', info_dict['url']] return cmd class HttpieFD(ExternalFD): AVAILABLE_OPT = '--version' EXE_NAME = 'http' def _make_cmd(self, tmpfilename, info_dict): cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += [f'{key}:{val}'] return cmd class FFmpegFD(ExternalFD): SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments') SUPPORTED_FEATURES = (Features.TO_STDOUT, Features.MULTIPLE_FORMATS) @classmethod def available(cls, path=None): # TODO: Fix path for ffmpeg # Fixme: This may be wrong when --ffmpeg-location is used return FFmpegPostProcessor().available def on_process_started(self, proc, stdin): """ Override this in subclasses """ pass @classmethod def can_merge_formats(cls, info_dict, params): return ( info_dict.get('requested_formats') and info_dict.get('protocol') and not params.get('allow_unplayable_formats') and 'no-direct-merge' not in params.get('compat_opts', []) and cls.can_download(info_dict)) def _call_downloader(self, tmpfilename, info_dict): urls = [f['url'] for f in info_dict.get('requested_formats', [])] or [info_dict['url']] ffpp = FFmpegPostProcessor(downloader=self) if not ffpp.available: self.report_error('m3u8 download detected but ffmpeg could not be found. Please install') return False ffpp.check_version() args = [ffpp.executable, '-y'] for log_level in ('quiet', 'verbose'): if self.params.get(log_level, False): args += ['-loglevel', log_level] break if not self.params.get('verbose'): args += ['-hide_banner'] args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args'), default=[]) # These exists only for compatibility. Extractors should use # info_dict['downloader_options']['ffmpeg_args'] instead args += info_dict.get('_ffmpeg_args') or [] seekable = info_dict.get('_seekable') if seekable is not None: # setting -seekable prevents ffmpeg from guessing if the server # supports seeking(by adding the header `Range: bytes=0-`), which # can cause problems in some cases # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127 # http://trac.ffmpeg.org/ticket/6125#comment:10 args += ['-seekable', '1' if seekable else '0'] http_headers = None if info_dict.get('http_headers'): youtubedl_headers = handle_youtubedl_headers(info_dict['http_headers']) http_headers = [ # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. '-headers', ''.join(f'{key}: {val}\r\n' for key, val in youtubedl_headers.items()) ] env = None proxy = self.params.get('proxy') if proxy: if not re.match(r'^[\da-zA-Z]+://', proxy): proxy = 'http://%s' % proxy if proxy.startswith('socks'): self.report_warning( '%s does not support SOCKS proxies. Downloading is likely to fail. ' 'Consider adding --hls-prefer-native to your command.' % self.get_basename()) # Since December 2015 ffmpeg supports -http_proxy option (see # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) # We could switch to the following code if we are able to detect version properly # args += ['-http_proxy', proxy] env = os.environ.copy() env['HTTP_PROXY'] = proxy env['http_proxy'] = proxy protocol = info_dict.get('protocol') if protocol == 'rtmp': player_url = info_dict.get('player_url') page_url = info_dict.get('page_url') app = info_dict.get('app') play_path = info_dict.get('play_path') tc_url = info_dict.get('tc_url') flash_version = info_dict.get('flash_version') live = info_dict.get('rtmp_live', False) conn = info_dict.get('rtmp_conn') if player_url is not None: args += ['-rtmp_swfverify', player_url] if page_url is not None: args += ['-rtmp_pageurl', page_url] if app is not None: args += ['-rtmp_app', app] if play_path is not None: args += ['-rtmp_playpath', play_path] if tc_url is not None: args += ['-rtmp_tcurl', tc_url] if flash_version is not None: args += ['-rtmp_flashver', flash_version] if live: args += ['-rtmp_live', 'live'] if isinstance(conn, list): for entry in conn: args += ['-rtmp_conn', entry] elif isinstance(conn, str): args += ['-rtmp_conn', conn] start_time, end_time = info_dict.get('section_start') or 0, info_dict.get('section_end') for i, url in enumerate(urls): if http_headers is not None and re.match(r'^https?://', url): args += http_headers if start_time: args += ['-ss', str(start_time)] if end_time: args += ['-t', str(end_time - start_time)] args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url] if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): args += ['-c', 'copy'] if info_dict.get('requested_formats') or protocol == 'http_dash_segments': for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]): stream_number = fmt.get('manifest_stream_number', 0) args.extend(['-map', f'{i}:{stream_number}']) if self.params.get('test', False): args += ['-fs', str(self._TEST_FILE_SIZE)] ext = info_dict['ext'] if protocol in ('m3u8', 'm3u8_native'): use_mpegts = (tmpfilename == '-') or self.params.get('hls_use_mpegts') if use_mpegts is None: use_mpegts = info_dict.get('is_live') if use_mpegts: args += ['-f', 'mpegts'] else: args += ['-f', 'mp4'] if (ffpp.basename == 'ffmpeg' and ffpp._features.get('needs_adtstoasc')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): args += ['-bsf:a', 'aac_adtstoasc'] elif protocol == 'rtmp': args += ['-f', 'flv'] elif ext == 'mp4' and tmpfilename == '-': args += ['-f', 'mpegts'] elif ext == 'unknown_video': ext = determine_ext(remove_end(tmpfilename, '.part')) if ext == 'unknown_video': self.report_warning( 'The video format is unknown and cannot be downloaded by ffmpeg. ' 'Explicitly set the extension in the filename to attempt download in that format') else: self.report_warning(f'The video format is unknown. Trying to download as {ext} according to the filename') args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)] else: args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)] args += self._configuration_args(('_o1', '_o', '')) args = [encodeArgument(opt) for opt in args] args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) self._debug_cmd(args) with Popen(args, stdin=subprocess.PIPE, env=env) as proc: if url in ('-', 'pipe:'): self.on_process_started(proc, proc.stdin) try: retval = proc.wait() except BaseException as e: # subprocces.run would send the SIGKILL signal to ffmpeg and the # mp4 file couldn't be played, but if we ask ffmpeg to quit it # produces a file that is playable (this is mostly useful for live # streams). Note that Windows is not affected and produces playable # files (see https://github.com/ytdl-org/youtube-dl/issues/8300). if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'): proc.communicate_or_kill(b'q') else: proc.kill(timeout=None) raise return retval class AVconvFD(FFmpegFD): pass _BY_NAME = { klass.get_basename(): klass for name, klass in globals().items() if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD') } _BY_EXE = {klass.EXE_NAME: klass for klass in _BY_NAME.values()} def list_external_downloaders(): return sorted(_BY_NAME.keys()) def get_external_downloader(external_downloader): """ Given the name of the executable, see whether we support the given downloader . """ # Drop .exe extension on Windows bn = os.path.splitext(os.path.basename(external_downloader))[0] return _BY_NAME.get(bn, _BY_EXE.get(bn)) yt-dlp-2022.08.19/yt_dlp/downloader/f4m.py000066400000000000000000000357621427755243700200560ustar00rootroot00000000000000import base64 import io import itertools import struct import time import urllib.error import urllib.parse from .fragment import FragmentFD from ..compat import compat_etree_fromstring from ..utils import fix_xml_ampersands, xpath_text class DataTruncatedError(Exception): pass class FlvReader(io.BytesIO): """ Reader for Flv files The file format is documented in https://www.adobe.com/devnet/f4v.html """ def read_bytes(self, n): data = self.read(n) if len(data) < n: raise DataTruncatedError( 'FlvReader error: need %d bytes while only %d bytes got' % ( n, len(data))) return data # Utility functions for reading numbers and strings def read_unsigned_long_long(self): return struct.unpack('!Q', self.read_bytes(8))[0] def read_unsigned_int(self): return struct.unpack('!I', self.read_bytes(4))[0] def read_unsigned_char(self): return struct.unpack('!B', self.read_bytes(1))[0] def read_string(self): res = b'' while True: char = self.read_bytes(1) if char == b'\x00': break res += char return res def read_box_info(self): """ Read a box and return the info as a tuple: (box_size, box_type, box_data) """ real_size = size = self.read_unsigned_int() box_type = self.read_bytes(4) header_end = 8 if size == 1: real_size = self.read_unsigned_long_long() header_end = 16 return real_size, box_type, self.read_bytes(real_size - header_end) def read_asrt(self): # version self.read_unsigned_char() # flags self.read_bytes(3) quality_entry_count = self.read_unsigned_char() # QualityEntryCount for i in range(quality_entry_count): self.read_string() segment_run_count = self.read_unsigned_int() segments = [] for i in range(segment_run_count): first_segment = self.read_unsigned_int() fragments_per_segment = self.read_unsigned_int() segments.append((first_segment, fragments_per_segment)) return { 'segment_run': segments, } def read_afrt(self): # version self.read_unsigned_char() # flags self.read_bytes(3) # time scale self.read_unsigned_int() quality_entry_count = self.read_unsigned_char() # QualitySegmentUrlModifiers for i in range(quality_entry_count): self.read_string() fragments_count = self.read_unsigned_int() fragments = [] for i in range(fragments_count): first = self.read_unsigned_int() first_ts = self.read_unsigned_long_long() duration = self.read_unsigned_int() if duration == 0: discontinuity_indicator = self.read_unsigned_char() else: discontinuity_indicator = None fragments.append({ 'first': first, 'ts': first_ts, 'duration': duration, 'discontinuity_indicator': discontinuity_indicator, }) return { 'fragments': fragments, } def read_abst(self): # version self.read_unsigned_char() # flags self.read_bytes(3) self.read_unsigned_int() # BootstrapinfoVersion # Profile,Live,Update,Reserved flags = self.read_unsigned_char() live = flags & 0x20 != 0 # time scale self.read_unsigned_int() # CurrentMediaTime self.read_unsigned_long_long() # SmpteTimeCodeOffset self.read_unsigned_long_long() self.read_string() # MovieIdentifier server_count = self.read_unsigned_char() # ServerEntryTable for i in range(server_count): self.read_string() quality_count = self.read_unsigned_char() # QualityEntryTable for i in range(quality_count): self.read_string() # DrmData self.read_string() # MetaData self.read_string() segments_count = self.read_unsigned_char() segments = [] for i in range(segments_count): box_size, box_type, box_data = self.read_box_info() assert box_type == b'asrt' segment = FlvReader(box_data).read_asrt() segments.append(segment) fragments_run_count = self.read_unsigned_char() fragments = [] for i in range(fragments_run_count): box_size, box_type, box_data = self.read_box_info() assert box_type == b'afrt' fragments.append(FlvReader(box_data).read_afrt()) return { 'segments': segments, 'fragments': fragments, 'live': live, } def read_bootstrap_info(self): total_size, box_type, box_data = self.read_box_info() assert box_type == b'abst' return FlvReader(box_data).read_abst() def read_bootstrap_info(bootstrap_bytes): return FlvReader(bootstrap_bytes).read_bootstrap_info() def build_fragments_list(boot_info): """ Return a list of (segment, fragment) for each fragment in the video """ res = [] segment_run_table = boot_info['segments'][0] fragment_run_entry_table = boot_info['fragments'][0]['fragments'] first_frag_number = fragment_run_entry_table[0]['first'] fragments_counter = itertools.count(first_frag_number) for segment, fragments_count in segment_run_table['segment_run']: # In some live HDS streams (e.g. Rai), `fragments_count` is # abnormal and causing out-of-memory errors. It's OK to change the # number of fragments for live streams as they are updated periodically if fragments_count == 4294967295 and boot_info['live']: fragments_count = 2 for _ in range(fragments_count): res.append((segment, next(fragments_counter))) if boot_info['live']: res = res[-2:] return res def write_unsigned_int(stream, val): stream.write(struct.pack('!I', val)) def write_unsigned_int_24(stream, val): stream.write(struct.pack('!I', val)[1:]) def write_flv_header(stream): """Writes the FLV header to stream""" # FLV header stream.write(b'FLV\x01') stream.write(b'\x05') stream.write(b'\x00\x00\x00\x09') stream.write(b'\x00\x00\x00\x00') def write_metadata_tag(stream, metadata): """Writes optional metadata tag to stream""" SCRIPT_TAG = b'\x12' FLV_TAG_HEADER_LEN = 11 if metadata: stream.write(SCRIPT_TAG) write_unsigned_int_24(stream, len(metadata)) stream.write(b'\x00\x00\x00\x00\x00\x00\x00') stream.write(metadata) write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata)) def remove_encrypted_media(media): return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and 'drmAdditionalHeaderSetId' not in e.attrib, media)) def _add_ns(prop, ver=1): return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop) def get_base_url(manifest): base_url = xpath_text( manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)], 'base URL', default=None) if base_url: base_url = base_url.strip() return base_url class F4mFD(FragmentFD): """ A downloader for f4m manifests or AdobeHDS. """ def _get_unencrypted_media(self, doc): media = doc.findall(_add_ns('media')) if not media: self.report_error('No media found') if not self.params.get('allow_unplayable_formats'): for e in (doc.findall(_add_ns('drmAdditionalHeader')) + doc.findall(_add_ns('drmAdditionalHeaderSet'))): # If id attribute is missing it's valid for all media nodes # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute if 'id' not in e.attrib: self.report_error('Missing ID in f4m DRM') media = remove_encrypted_media(media) if not media: self.report_error('Unsupported DRM') return media def _get_bootstrap_from_url(self, bootstrap_url): bootstrap = self.ydl.urlopen(bootstrap_url).read() return read_bootstrap_info(bootstrap) def _update_live_fragments(self, bootstrap_url, latest_fragment): fragments_list = [] retries = 30 while (not fragments_list) and (retries > 0): boot_info = self._get_bootstrap_from_url(bootstrap_url) fragments_list = build_fragments_list(boot_info) fragments_list = [f for f in fragments_list if f[1] > latest_fragment] if not fragments_list: # Retry after a while time.sleep(5.0) retries -= 1 if not fragments_list: self.report_error('Failed to update fragments') return fragments_list def _parse_bootstrap_node(self, node, base_url): # Sometimes non empty inline bootstrap info can be specified along # with bootstrap url attribute (e.g. dummy inline bootstrap info # contains whitespace characters in [1]). We will prefer bootstrap # url over inline bootstrap info when present. # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m bootstrap_url = node.get('url') if bootstrap_url: bootstrap_url = urllib.parse.urljoin( base_url, bootstrap_url) boot_info = self._get_bootstrap_from_url(bootstrap_url) else: bootstrap_url = None bootstrap = base64.b64decode(node.text) boot_info = read_bootstrap_info(bootstrap) return boot_info, bootstrap_url def real_download(self, filename, info_dict): man_url = info_dict['url'] requested_bitrate = info_dict.get('tbr') self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.geturl() # Some manifests may be malformed, e.g. prosiebensat1 generated manifests # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244 # and https://github.com/ytdl-org/youtube-dl/issues/7823) manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip() doc = compat_etree_fromstring(manifest) formats = [(int(f.attrib.get('bitrate', -1)), f) for f in self._get_unencrypted_media(doc)] if requested_bitrate is None or len(formats) == 1: # get the best format formats = sorted(formats, key=lambda f: f[0]) rate, media = formats[-1] else: rate, media = list(filter( lambda f: int(f[0]) == requested_bitrate, formats))[0] # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. man_base_url = get_base_url(doc) or man_url base_url = urllib.parse.urljoin(man_base_url, media.attrib['url']) bootstrap_node = doc.find(_add_ns('bootstrapInfo')) boot_info, bootstrap_url = self._parse_bootstrap_node( bootstrap_node, man_base_url) live = boot_info['live'] metadata_node = media.find(_add_ns('metadata')) if metadata_node is not None: metadata = base64.b64decode(metadata_node.text) else: metadata = None fragments_list = build_fragments_list(boot_info) test = self.params.get('test', False) if test: # We only download the first fragment fragments_list = fragments_list[:1] total_frags = len(fragments_list) # For some akamai manifests we'll need to add a query to the fragment url akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) ctx = { 'filename': filename, 'total_frags': total_frags, 'live': bool(live), } self._prepare_frag_download(ctx) dest_stream = ctx['dest_stream'] if ctx['complete_frags_downloaded_bytes'] == 0: write_flv_header(dest_stream) if not live: write_metadata_tag(dest_stream, metadata) base_url_parsed = urllib.parse.urlparse(base_url) self._start_frag_download(ctx, info_dict) frag_index = 0 while fragments_list: seg_i, frag_i = fragments_list.pop(0) frag_index += 1 if frag_index <= ctx['fragment_index']: continue name = 'Seg%d-Frag%d' % (seg_i, frag_i) query = [] if base_url_parsed.query: query.append(base_url_parsed.query) if akamai_pv: query.append(akamai_pv.strip(';')) if info_dict.get('extra_param_to_segment_url'): query.append(info_dict['extra_param_to_segment_url']) url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query)) try: success = self._download_fragment(ctx, url_parsed.geturl(), info_dict) if not success: return False down_data = self._read_fragment(ctx) reader = FlvReader(down_data) while True: try: _, box_type, box_data = reader.read_box_info() except DataTruncatedError: if test: # In tests, segments may be truncated, and thus # FlvReader may not be able to parse the whole # chunk. If so, write the segment as is # See https://github.com/ytdl-org/youtube-dl/issues/9214 dest_stream.write(down_data) break raise if box_type == b'mdat': self._append_fragment(ctx, box_data) break except urllib.error.HTTPError as err: if live and (err.code == 404 or err.code == 410): # We didn't keep up with the live window. Continue # with the next available fragment. msg = 'Fragment %d unavailable' % frag_i self.report_warning(msg) fragments_list = [] else: raise if not fragments_list and not test and live and bootstrap_url: fragments_list = self._update_live_fragments(bootstrap_url, frag_i) total_frags += len(fragments_list) if fragments_list and (fragments_list[0][1] > frag_i + 1): msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1)) self.report_warning(msg) self._finish_frag_download(ctx, info_dict) return True yt-dlp-2022.08.19/yt_dlp/downloader/fc2.py000066400000000000000000000024541427755243700200320ustar00rootroot00000000000000import threading from .common import FileDownloader from .external import FFmpegFD class FC2LiveFD(FileDownloader): """ Downloads FC2 live without being stopped.
Note, this is not a part of public API, and will be removed without notice. DO NOT USE """ def real_download(self, filename, info_dict): ws = info_dict['ws'] heartbeat_lock = threading.Lock() heartbeat_state = [None, 1] def heartbeat(): if heartbeat_state[1] < 0: return try: heartbeat_state[1] += 1 ws.send('{"name":"heartbeat","arguments":{},"id":%d}' % heartbeat_state[1]) except Exception: self.to_screen('[fc2:live] Heartbeat failed') with heartbeat_lock: heartbeat_state[0] = threading.Timer(30, heartbeat) heartbeat_state[0]._daemonic = True heartbeat_state[0].start() heartbeat() new_info_dict = info_dict.copy() new_info_dict.update({ 'ws': None, 'protocol': 'live_ffmpeg', }) try: return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict) finally: # stop heartbeating heartbeat_state[1] = -1 yt-dlp-2022.08.19/yt_dlp/downloader/fragment.py000066400000000000000000000531461427755243700211670ustar00rootroot00000000000000import concurrent.futures import contextlib import http.client import json import math import os import struct import time import urllib.error from .common import FileDownloader from .http import HttpFD from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 from ..compat import compat_os_name from ..utils import ( DownloadError, RetryManager, encodeFilename, sanitized_Request, traverse_obj, ) class HttpQuietDownloader(HttpFD): def to_screen(self, *args, **kargs): pass to_console_title = to_screen class FragmentFD(FileDownloader): """ A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests). Available options: fragment_retries: Number of times to retry a fragment for HTTP error (DASH and hlsnative only) skip_unavailable_fragments: Skip unavailable fragments (DASH and hlsnative only) keep_fragments: Keep downloaded fragments on disk after downloading is finished concurrent_fragment_downloads: The number of threads to use for native hls and dash downloads _no_ytdl_file: Don't use .ytdl file For each incomplete fragment download yt-dlp keeps on disk a special bookkeeping file with download state and metadata (in future such files will be used for any incomplete download handled by yt-dlp). This file is used to properly handle resuming, check download file consistency and detect potential errors. The file has a .ytdl extension and represents a standard JSON file of the following format: extractor: Dictionary of extractor related data. TBD. downloader: Dictionary of downloader related data. May contain following data: current_fragment: Dictionary with current (being downloaded) fragment data: index: 0-based index of current fragment among all fragments fragment_count: Total count of fragments This feature is experimental and file format may change in future. """ def report_retry_fragment(self, err, frag_index, count, retries): self.deprecation_warning( 'yt_dlp.downloader.FragmentFD.report_retry_fragment is deprecated. Use yt_dlp.downloader.FileDownloader.report_retry instead') return self.report_retry(err, count, retries, frag_index) def report_skip_fragment(self, frag_index, err=None): err = f' {err};' if err else '' self.to_screen(f'[download]{err} Skipping fragment {frag_index:d} ...') def _prepare_url(self, info_dict, url): headers = info_dict.get('http_headers') return sanitized_Request(url, None, headers) if headers else url def _prepare_and_start_frag_download(self, ctx, info_dict): self._prepare_frag_download(ctx) self._start_frag_download(ctx, info_dict) def __do_ytdl_file(self, ctx): return ctx['live'] is not True and ctx['tmpfilename'] != '-' and not self.params.get('_no_ytdl_file') def _read_ytdl_file(self, ctx): assert 'ytdl_corrupt' not in ctx stream, _ = self.sanitize_open(self.ytdl_filename(ctx['filename']), 'r') try: ytdl_data = json.loads(stream.read()) ctx['fragment_index'] = ytdl_data['downloader']['current_fragment']['index'] if 'extra_state' in ytdl_data['downloader']: ctx['extra_state'] = ytdl_data['downloader']['extra_state'] except Exception: ctx['ytdl_corrupt'] = True finally: stream.close() def _write_ytdl_file(self, ctx): frag_index_stream, _ = self.sanitize_open(self.ytdl_filename(ctx['filename']), 'w') try: downloader = { 'current_fragment': { 'index': ctx['fragment_index'], }, } if 'extra_state' in ctx: downloader['extra_state'] = ctx['extra_state'] if ctx.get('fragment_count') is not None: downloader['fragment_count'] = ctx['fragment_count'] frag_index_stream.write(json.dumps({'downloader': downloader})) finally: frag_index_stream.close() def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None): fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index']) fragment_info_dict = { 'url': frag_url, 'http_headers': headers or info_dict.get('http_headers'), 'request_data': request_data, 'ctx_id': ctx.get('ctx_id'), } success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict) if not success: return False if fragment_info_dict.get('filetime'): ctx['fragment_filetime'] = fragment_info_dict.get('filetime') ctx['fragment_filename_sanitized'] = fragment_filename return True def _read_fragment(self, ctx): if not ctx.get('fragment_filename_sanitized'): return None try: down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb') except FileNotFoundError: if ctx.get('live'): return None raise ctx['fragment_filename_sanitized'] = frag_sanitized frag_content = down.read() down.close() return frag_content def _append_fragment(self, ctx, frag_content): try: ctx['dest_stream'].write(frag_content) ctx['dest_stream'].flush() finally: if self.__do_ytdl_file(ctx): self._write_ytdl_file(ctx) if not self.params.get('keep_fragments', False): self.try_remove(encodeFilename(ctx['fragment_filename_sanitized'])) del ctx['fragment_filename_sanitized'] def _prepare_frag_download(self, ctx): if 'live' not in ctx: ctx['live'] = False if not ctx['live']: total_frags_str = '%d' % ctx['total_frags'] ad_frags = ctx.get('ad_frags', 0) if ad_frags: total_frags_str += ' (not including %d ad)' % ad_frags else: total_frags_str = 'unknown (live)' self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}') self.report_destination(ctx['filename']) dl = HttpQuietDownloader(self.ydl, { **self.params, 'noprogress': True, 'test': False, }) tmpfilename = self.temp_name(ctx['filename']) open_mode = 'wb' resume_len = 0 # Establish possible resume length if os.path.isfile(encodeFilename(tmpfilename)): open_mode = 'ab' resume_len = os.path.getsize(encodeFilename(tmpfilename)) # Should be initialized before ytdl file check ctx.update({ 'tmpfilename': tmpfilename, 'fragment_index': 0, }) if self.__do_ytdl_file(ctx): if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))): self._read_ytdl_file(ctx) is_corrupt = ctx.get('ytdl_corrupt') is True is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0 if is_corrupt or is_inconsistent: message = ( '.ytdl file is corrupt' if is_corrupt else 'Inconsistent state of incomplete fragment download') self.report_warning( '%s. Restarting from the beginning ...' % message) ctx['fragment_index'] = resume_len = 0 if 'ytdl_corrupt' in ctx: del ctx['ytdl_corrupt'] self._write_ytdl_file(ctx) else: self._write_ytdl_file(ctx) assert ctx['fragment_index'] == 0 dest_stream, tmpfilename = self.sanitize_open(tmpfilename, open_mode) ctx.update({ 'dl': dl, 'dest_stream': dest_stream, 'tmpfilename': tmpfilename, # Total complete fragments downloaded so far in bytes 'complete_frags_downloaded_bytes': resume_len, }) def _start_frag_download(self, ctx, info_dict): resume_len = ctx['complete_frags_downloaded_bytes'] total_frags = ctx['total_frags'] ctx_id = ctx.get('ctx_id') # This dict stores the download progress, it's updated by the progress # hook state = { 'status': 'downloading', 'downloaded_bytes': resume_len, 'fragment_index': ctx['fragment_index'], 'fragment_count': total_frags, 'filename': ctx['filename'], 'tmpfilename': ctx['tmpfilename'], } start = time.time() ctx.update({ 'started': start, 'fragment_started': start, # Amount of fragment's bytes downloaded by the time of the previous # frag progress hook invocation 'prev_frag_downloaded_bytes': 0, }) def frag_progress_hook(s): if s['status'] not in ('downloading', 'finished'): return if not total_frags and ctx.get('fragment_count'): state['fragment_count'] = ctx['fragment_count'] if ctx_id is not None and s.get('ctx_id') != ctx_id: return state['max_progress'] = ctx.get('max_progress') state['progress_idx'] = ctx.get('progress_idx') time_now = time.time() state['elapsed'] = time_now - start frag_total_bytes = s.get('total_bytes') or 0 s['fragment_info_dict'] = s.pop('info_dict', {}) if not ctx['live']: estimated_size = ( (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) / (state['fragment_index'] + 1) * total_frags) state['total_bytes_estimate'] = estimated_size if s['status'] == 'finished': state['fragment_index'] += 1 ctx['fragment_index'] = state['fragment_index'] state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes'] ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes'] ctx['speed'] = state['speed'] = self.calc_speed( ctx['fragment_started'], time_now, frag_total_bytes) ctx['fragment_started'] = time.time() ctx['prev_frag_downloaded_bytes'] = 0 else: frag_downloaded_bytes = s['downloaded_bytes'] state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes'] if not ctx['live']: state['eta'] = self.calc_eta( start, time_now, estimated_size - resume_len, state['downloaded_bytes'] - resume_len) ctx['speed'] = state['speed'] = self.calc_speed( ctx['fragment_started'], time_now, frag_downloaded_bytes) ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes self._hook_progress(state, info_dict) ctx['dl'].add_progress_hook(frag_progress_hook) return start def _finish_frag_download(self, ctx, info_dict): ctx['dest_stream'].close() if self.__do_ytdl_file(ctx): ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) if os.path.isfile(ytdl_filename): self.try_remove(ytdl_filename) elapsed = time.time() - ctx['started'] if ctx['tmpfilename'] == '-': downloaded_bytes = ctx['complete_frags_downloaded_bytes'] else: self.try_rename(ctx['tmpfilename'], ctx['filename']) if self.params.get('updatetime', True): filetime = ctx.get('fragment_filetime') if filetime: with contextlib.suppress(Exception): os.utime(ctx['filename'], (time.time(), filetime)) downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename'])) self._hook_progress({ 'downloaded_bytes': downloaded_bytes, 'total_bytes': downloaded_bytes, 'filename': ctx['filename'], 'status': 'finished', 'elapsed': elapsed, 'ctx_id': ctx.get('ctx_id'), 'max_progress': ctx.get('max_progress'), 'progress_idx': ctx.get('progress_idx'), }, info_dict) def _prepare_external_frag_download(self, ctx): if 'live' not in ctx: ctx['live'] = False if not ctx['live']: total_frags_str = '%d' % ctx['total_frags'] ad_frags = ctx.get('ad_frags', 0) if ad_frags: total_frags_str += ' (not including %d ad)' % ad_frags else: total_frags_str = 'unknown (live)' self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}') tmpfilename = self.temp_name(ctx['filename']) # Should be initialized before ytdl file check ctx.update({ 'tmpfilename': tmpfilename, 'fragment_index': 0, }) def decrypter(self, info_dict): _key_cache = {} def _get_key(url): if url not in _key_cache: _key_cache[url] = self.ydl.urlopen(self._prepare_url(info_dict, url)).read() return _key_cache[url] def decrypt_fragment(fragment, frag_content): if frag_content is None: return decrypt_info = fragment.get('decrypt_info') if not decrypt_info or decrypt_info['METHOD'] != 'AES-128': return frag_content iv = decrypt_info.get('IV') or struct.pack('>8xq', fragment['media_sequence']) decrypt_info['KEY'] = decrypt_info.get('KEY') or _get_key(info_dict.get('_decryption_key_url') or decrypt_info['URI']) # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded, # not what it decrypts to. if self.params.get('test', False): return frag_content return unpad_pkcs7(aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv)) return decrypt_fragment def download_and_append_fragments_multiple(self, *args, pack_func=None, finish_func=None): ''' @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ... all args must be either tuple or list ''' interrupt_trigger = [True] max_progress = len(args) if max_progress == 1: return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func) max_workers = self.params.get('concurrent_fragment_downloads', 1) if max_progress > 1: self._prepare_multiline_status(max_progress) is_live = any(traverse_obj(args, (..., 2, 'is_live'), default=[])) def thread_func(idx, ctx, fragments, info_dict, tpe): ctx['max_progress'] = max_progress ctx['progress_idx'] = idx return self.download_and_append_fragments( ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func, tpe=tpe, interrupt_trigger=interrupt_trigger) class FTPE(concurrent.futures.ThreadPoolExecutor): # has to stop this or it's going to wait on the worker thread itself def __exit__(self, exc_type, exc_val, exc_tb): pass if compat_os_name == 'nt': def future_result(future): while True: try: return future.result(0.1) except KeyboardInterrupt: raise except concurrent.futures.TimeoutError: continue else: def future_result(future): return future.result() def interrupt_trigger_iter(fg): for f in fg: if not interrupt_trigger[0]: break yield f spins = [] for idx, (ctx, fragments, info_dict) in enumerate(args): tpe = FTPE(math.ceil(max_workers / max_progress)) job = tpe.submit(thread_func, idx, ctx, interrupt_trigger_iter(fragments), info_dict, tpe) spins.append((tpe, job)) result = True for tpe, job in spins: try: result = result and future_result(job) except KeyboardInterrupt: interrupt_trigger[0] = False finally: tpe.shutdown(wait=True) if not interrupt_trigger[0] and not is_live: raise KeyboardInterrupt() # we expect the user wants to stop and DO WANT the preceding postprocessors to run; # so returning a intermediate result here instead of KeyboardInterrupt on live return result def download_and_append_fragments( self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None, tpe=None, interrupt_trigger=None): if not interrupt_trigger: interrupt_trigger = (True, ) is_fatal = ( ((lambda _: False) if info_dict.get('is_live') else (lambda idx: idx == 0)) if self.params.get('skip_unavailable_fragments', True) else (lambda _: True)) if not pack_func: pack_func = lambda frag_content, _: frag_content def download_fragment(fragment, ctx): if not interrupt_trigger[0]: return frag_index = ctx['fragment_index'] = fragment['frag_index'] ctx['last_error'] = None headers = info_dict.get('http_headers', {}).copy() byte_range = fragment.get('byte_range') if byte_range: headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) # Never skip the first fragment fatal = is_fatal(fragment.get('index') or (frag_index - 1)) def error_callback(err, count, retries): if fatal and count > retries: ctx['dest_stream'].close() self.report_retry(err, count, retries, frag_index, fatal) ctx['last_error'] = err for retry in RetryManager(self.params.get('fragment_retries'), error_callback): try: ctx['fragment_count'] = fragment.get('fragment_count') if not self._download_fragment(ctx, fragment['url'], info_dict, headers): return except (urllib.error.HTTPError, http.client.IncompleteRead) as err: retry.error = err continue except DownloadError: # has own retry settings if fatal: raise def append_fragment(frag_content, frag_index, ctx): if frag_content: self._append_fragment(ctx, pack_func(frag_content, frag_index)) elif not is_fatal(frag_index - 1): self.report_skip_fragment(frag_index, 'fragment not found') else: ctx['dest_stream'].close() self.report_error(f'fragment {frag_index} not found, unable to continue') return False return True decrypt_fragment = self.decrypter(info_dict) max_workers = math.ceil( self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1)) if max_workers > 1: def _download_fragment(fragment): ctx_copy = ctx.copy() download_fragment(fragment, ctx_copy) return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized') self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome') with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: try: for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments): ctx.update({ 'fragment_filename_sanitized': frag_filename, 'fragment_index': frag_index, }) if not append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx): return False except KeyboardInterrupt: self._finish_multiline_status() self.report_error( 'Interrupted by user. Waiting for all threads to shutdown...', is_error=False, tb=False) pool.shutdown(wait=False) raise else: for fragment in fragments: if not interrupt_trigger[0]: break try: download_fragment(fragment, ctx) result = append_fragment( decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx) except KeyboardInterrupt: if info_dict.get('is_live'): break raise if not result: return False if finish_func is not None: ctx['dest_stream'].write(finish_func()) ctx['dest_stream'].flush() self._finish_frag_download(ctx, info_dict) return True yt-dlp-2022.08.19/yt_dlp/downloader/hls.py000066400000000000000000000403771427755243700201540ustar00rootroot00000000000000import binascii import io import re import urllib.parse from . import get_suitable_downloader from .external import FFmpegFD from .fragment import FragmentFD from .. import webvtt from ..dependencies import Cryptodome_AES from ..utils import bug_reports_message, parse_m3u8_attributes, update_url_query class HlsFD(FragmentFD): """ Download segments in a m3u8 manifest. External downloaders can take over the fragment downloads by supporting the 'm3u8_frag_urls' protocol and re-defining 'supports_manifest' function """ FD_NAME = 'hlsnative' @staticmethod def can_download(manifest, info_dict, allow_unplayable_formats=False): UNSUPPORTED_FEATURES = [ # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] # Live streams heuristic does not always work (e.g. geo restricted to Germany # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] # This heuristic also is not correct since segments may not be appended as well. # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite # no segments will definitely be appended to the end of the playlist. # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of # # event media playlists [4] # r'#EXT-X-MAP:', # media initialization [5] # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5 ] if not allow_unplayable_formats: UNSUPPORTED_FEATURES += [ r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] ] def check_results(): yield not info_dict.get('is_live') for feature in UNSUPPORTED_FEATURES: yield not re.search(feature, manifest) return all(check_results()) def real_download(self, filename, info_dict): man_url = info_dict['url'] self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.geturl() s = urlh.read().decode('utf-8', 'ignore') can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None if can_download: has_ffmpeg = FFmpegFD.available() no_crypto = not Cryptodome_AES and '#EXT-X-KEY:METHOD=AES-128' in s if no_crypto and has_ffmpeg: can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available' elif no_crypto: message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; ' 'Decryption will be performed natively, but will be extremely slow') elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s): install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and ' message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, ' f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command') if not can_download: has_drm = re.search('|'.join([ r'#EXT-X-FAXS-CM:', # Adobe Flash Access r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay ]), s) if has_drm and not self.params.get('allow_unplayable_formats'): self.report_error( 'This video is DRM protected; Try selecting another format with --format or ' 'add --check-formats to automatically fallback to the next best format') return False message = message or 'Unsupported features have been detected' fd = FFmpegFD(self.ydl, self.params) self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}') return fd.real_download(filename, info_dict) elif message: self.report_warning(message) is_webvtt = info_dict['ext'] == 'vtt' if is_webvtt: real_downloader = None # Packing the fragments is not currently supported for external downloader else: real_downloader = get_suitable_downloader( info_dict, self.params, None, protocol='m3u8_frag_urls', to_stdout=(filename == '-')) if real_downloader and not real_downloader.supports_manifest(s): real_downloader = None if real_downloader: self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') def is_ad_fragment_start(s): return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) def is_ad_fragment_end(s): return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) fragments = [] media_frags = 0 ad_frags = 0 ad_frag_next = False for line in s.splitlines(): line = line.strip() if not line: continue if line.startswith('#'): if is_ad_fragment_start(line): ad_frag_next = True elif is_ad_fragment_end(line): ad_frag_next = False continue if ad_frag_next: ad_frags += 1 continue media_frags += 1 ctx = { 'filename': filename, 'total_frags': media_frags, 'ad_frags': ad_frags, } if real_downloader: self._prepare_external_frag_download(ctx) else: self._prepare_and_start_frag_download(ctx, info_dict) extra_state = ctx.setdefault('extra_state', {}) format_index = info_dict.get('format_index') extra_query = None extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') if extra_param_to_segment_url: extra_query = urllib.parse.parse_qs(extra_param_to_segment_url) i = 0 media_sequence = 0 decrypt_info = {'METHOD': 'NONE'} byte_range = {} discontinuity_count = 0 frag_index = 0 ad_frag_next = False for line in s.splitlines(): line = line.strip() if line: if not line.startswith('#'): if format_index and discontinuity_count != format_index: continue if ad_frag_next: continue frag_index += 1 if frag_index <= ctx['fragment_index']: continue frag_url = ( line if re.match(r'^https?://', line) else urllib.parse.urljoin(man_url, line)) if extra_query: frag_url = update_url_query(frag_url, extra_query) fragments.append({ 'frag_index': frag_index, 'url': frag_url, 'decrypt_info': decrypt_info, 'byte_range': byte_range, 'media_sequence': media_sequence, }) media_sequence += 1 elif line.startswith('#EXT-X-MAP'): if format_index and discontinuity_count != format_index: continue if frag_index > 0: self.report_error( 'Initialization fragment found after media fragments, unable to download') return False frag_index += 1 map_info = parse_m3u8_attributes(line[11:]) frag_url = ( map_info.get('URI') if re.match(r'^https?://', map_info.get('URI')) else urllib.parse.urljoin(man_url, map_info.get('URI'))) if extra_query: frag_url = update_url_query(frag_url, extra_query) if map_info.get('BYTERANGE'): splitted_byte_range = map_info.get('BYTERANGE').split('@') sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] byte_range = { 'start': sub_range_start, 'end': sub_range_start + int(splitted_byte_range[0]), } fragments.append({ 'frag_index': frag_index, 'url': frag_url, 'decrypt_info': decrypt_info, 'byte_range': byte_range, 'media_sequence': media_sequence }) media_sequence += 1 elif line.startswith('#EXT-X-KEY'): decrypt_url = decrypt_info.get('URI') decrypt_info = parse_m3u8_attributes(line[11:]) if decrypt_info['METHOD'] == 'AES-128': if 'IV' in decrypt_info: decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32)) if not re.match(r'^https?://', decrypt_info['URI']): decrypt_info['URI'] = urllib.parse.urljoin( man_url, decrypt_info['URI']) if extra_query: decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) if decrypt_url != decrypt_info['URI']: decrypt_info['KEY'] = None elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): media_sequence = int(line[22:]) elif line.startswith('#EXT-X-BYTERANGE'): splitted_byte_range = line[17:].split('@') sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] byte_range = { 'start': sub_range_start, 'end': sub_range_start + int(splitted_byte_range[0]), } elif is_ad_fragment_start(line): ad_frag_next = True elif is_ad_fragment_end(line): ad_frag_next = False elif line.startswith('#EXT-X-DISCONTINUITY'): discontinuity_count += 1 i += 1 # We only download the first fragment during the test if self.params.get('test', False): fragments = [fragments[0] if fragments else None] if real_downloader: info_dict['fragments'] = fragments fd = real_downloader(self.ydl, self.params) # TODO: Make progress updates work without hooking twice # for ph in self._progress_hooks: # fd.add_progress_hook(ph) return fd.real_download(filename, info_dict) if is_webvtt: def pack_fragment(frag_content, frag_index): output = io.StringIO() adjust = 0 overflow = False mpegts_last = None for block in webvtt.parse_fragment(frag_content): if isinstance(block, webvtt.CueBlock): extra_state['webvtt_mpegts_last'] = mpegts_last if overflow: extra_state['webvtt_mpegts_adjust'] += 1 overflow = False block.start += adjust block.end += adjust dedup_window = extra_state.setdefault('webvtt_dedup_window', []) ready = [] i = 0 is_new = True while i < len(dedup_window): wcue = dedup_window[i] wblock = webvtt.CueBlock.from_json(wcue) i += 1 if wblock.hinges(block): wcue['end'] = block.end is_new = False continue if wblock == block: is_new = False continue if wblock.end > block.start: continue ready.append(wblock) i -= 1 del dedup_window[i] if is_new: dedup_window.append(block.as_json) for block in ready: block.write_into(output) # we only emit cues once they fall out of the duplicate window continue elif isinstance(block, webvtt.Magic): # take care of MPEG PES timestamp overflow if block.mpegts is None: block.mpegts = 0 extra_state.setdefault('webvtt_mpegts_adjust', 0) block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33 if block.mpegts < extra_state.get('webvtt_mpegts_last', 0): overflow = True block.mpegts += 1 << 33 mpegts_last = block.mpegts if frag_index == 1: extra_state['webvtt_mpegts'] = block.mpegts or 0 extra_state['webvtt_local'] = block.local or 0 # XXX: block.local = block.mpegts = None ? else: if block.mpegts is not None and block.local is not None: adjust = ( (block.mpegts - extra_state.get('webvtt_mpegts', 0)) - (block.local - extra_state.get('webvtt_local', 0)) ) continue elif isinstance(block, webvtt.HeaderBlock): if frag_index != 1: # XXX: this should probably be silent as well # or verify that all segments contain the same data self.report_warning(bug_reports_message( 'Discarding a %s block found in the middle of the stream; ' 'if the subtitles display incorrectly,' % (type(block).__name__))) continue block.write_into(output) return output.getvalue().encode() def fin_fragments(): dedup_window = extra_state.get('webvtt_dedup_window') if not dedup_window: return b'' output = io.StringIO() for cue in dedup_window: webvtt.CueBlock.from_json(cue).write_into(output) return output.getvalue().encode() self.download_and_append_fragments( ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments) else: return self.download_and_append_fragments(ctx, fragments, info_dict) yt-dlp-2022.08.19/yt_dlp/downloader/http.py000066400000000000000000000410521427755243700203340ustar00rootroot00000000000000import http.client import os import random import socket import ssl import time import urllib.error from .common import FileDownloader from ..utils import ( ContentTooShortError, RetryManager, ThrottledDownload, XAttrMetadataError, XAttrUnavailableError, encodeFilename, int_or_none, parse_http_range, sanitized_Request, try_call, write_xattr, ) RESPONSE_READ_EXCEPTIONS = ( TimeoutError, socket.timeout, # compat: py < 3.10 ConnectionError, ssl.SSLError, http.client.HTTPException ) class HttpFD(FileDownloader): def real_download(self, filename, info_dict): url = info_dict['url'] request_data = info_dict.get('request_data', None) class DownloadContext(dict): __getattr__ = dict.get __setattr__ = dict.__setitem__ __delattr__ = dict.__delitem__ ctx = DownloadContext() ctx.filename = filename ctx.tmpfilename = self.temp_name(filename) ctx.stream = None # Do not include the Accept-Encoding header headers = {'Youtubedl-no-compression': 'True'} add_headers = info_dict.get('http_headers') if add_headers: headers.update(add_headers) is_test = self.params.get('test', False) chunk_size = self._TEST_FILE_SIZE if is_test else ( self.params.get('http_chunk_size') or info_dict.get('downloader_options', {}).get('http_chunk_size') or 0) ctx.open_mode = 'wb' ctx.resume_len = 0 ctx.block_size = self.params.get('buffersize', 1024) ctx.start_time = time.time() # parse given Range req_start, req_end, _ = parse_http_range(headers.get('Range')) if self.params.get('continuedl', True): # Establish possible resume length if os.path.isfile(encodeFilename(ctx.tmpfilename)): ctx.resume_len = os.path.getsize( encodeFilename(ctx.tmpfilename)) ctx.is_resume = ctx.resume_len > 0 class SucceedDownload(Exception): pass class RetryDownload(Exception): def __init__(self, source_error): self.source_error = source_error class NextFragment(Exception): pass def establish_connection(): ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size) if not is_test and chunk_size else chunk_size) if ctx.resume_len > 0: range_start = ctx.resume_len if req_start is not None: # offset the beginning of Range to be within request range_start += req_start if ctx.is_resume: self.report_resuming_byte(ctx.resume_len) ctx.open_mode = 'ab' elif req_start is not None: range_start = req_start elif ctx.chunk_size > 0: range_start = 0 else: range_start = None ctx.is_resume = False if ctx.chunk_size: chunk_aware_end = range_start + ctx.chunk_size - 1 # we're not allowed to download outside Range range_end = chunk_aware_end if req_end is None else min(chunk_aware_end, req_end) elif req_end is not None: # there's no need for chunked downloads, so download until the end of Range range_end = req_end else: range_end = None if try_call(lambda: range_start > range_end): ctx.resume_len = 0 ctx.open_mode = 'wb' raise RetryDownload(Exception(f'Conflicting range. (start={range_start} > end={range_end})')) if try_call(lambda: range_end >= ctx.content_len): range_end = ctx.content_len - 1 request = sanitized_Request(url, request_data, headers) has_range = range_start is not None if has_range: request.add_header('Range', f'bytes={int(range_start)}-{int_or_none(range_end) or ""}') # Establish connection try: ctx.data = self.ydl.urlopen(request) # When trying to resume, Content-Range HTTP header of response has to be checked # to match the value of requested Range HTTP header. This is due to a webservers # that don't support resuming and serve a whole file with no Content-Range # set in response despite of requested Range (see # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799) if has_range: content_range = ctx.data.headers.get('Content-Range') content_range_start, content_range_end, content_len = parse_http_range(content_range) # Content-Range is present and matches requested Range, resume is possible if range_start == content_range_start and ( # Non-chunked download not ctx.chunk_size # Chunked download and requested piece or # its part is promised to be served or content_range_end == range_end or content_len < range_end): ctx.content_len = content_len if content_len or req_end: ctx.data_len = min(content_len or req_end, req_end or content_len) - (req_start or 0) return # Content-Range is either not present or invalid. Assuming remote webserver is # trying to send the whole file, resume is not possible, so wiping the local file # and performing entire redownload self.report_unable_to_resume() ctx.resume_len = 0 ctx.open_mode = 'wb' ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None)) except urllib.error.HTTPError as err: if err.code == 416: # Unable to resume (requested range not satisfiable) try: # Open the connection again without the range header ctx.data = self.ydl.urlopen( sanitized_Request(url, request_data, headers)) content_length = ctx.data.info()['Content-Length'] except urllib.error.HTTPError as err: if err.code < 500 or err.code >= 600: raise else: # Examine the reported length if (content_length is not None and (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)): # The file had already been fully downloaded. # Explanation to the above condition: in issue #175 it was revealed that # YouTube sometimes adds or removes a few bytes from the end of the file, # changing the file size slightly and causing problems for some users. So # I decided to implement a suggested change and consider the file # completely downloaded if the file size differs less than 100 bytes from # the one in the hard drive. self.report_file_already_downloaded(ctx.filename) self.try_rename(ctx.tmpfilename, ctx.filename) self._hook_progress({ 'filename': ctx.filename, 'status': 'finished', 'downloaded_bytes': ctx.resume_len, 'total_bytes': ctx.resume_len, }, info_dict) raise SucceedDownload() else: # The length does not match, we start the download over self.report_unable_to_resume() ctx.resume_len = 0 ctx.open_mode = 'wb' return elif err.code < 500 or err.code >= 600: # Unexpected HTTP error raise raise RetryDownload(err) except urllib.error.URLError as err: if isinstance(err.reason, ssl.CertificateError): raise raise RetryDownload(err) # In urllib.request.AbstractHTTPHandler, the response is partially read on request. # Any errors that occur during this will not be wrapped by URLError except RESPONSE_READ_EXCEPTIONS as err: raise RetryDownload(err) def close_stream(): if ctx.stream is not None: if not ctx.tmpfilename == '-': ctx.stream.close() ctx.stream = None def download(): data_len = ctx.data.info().get('Content-length', None) # Range HTTP header may be ignored/unsupported by a webserver # (e.g. extractor/scivee.py, extractor/bambuser.py). # However, for a test we still would like to download just a piece of a file. # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control # block size when downloading a file. if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE): data_len = self._TEST_FILE_SIZE if data_len is not None: data_len = int(data_len) + ctx.resume_len min_data_len = self.params.get('min_filesize') max_data_len = self.params.get('max_filesize') if min_data_len is not None and data_len < min_data_len: self.to_screen( f'\r[download] File is smaller than min-filesize ({data_len} bytes < {min_data_len} bytes). Aborting.') return False if max_data_len is not None and data_len > max_data_len: self.to_screen( f'\r[download] File is larger than max-filesize ({data_len} bytes > {max_data_len} bytes). Aborting.') return False byte_counter = 0 + ctx.resume_len block_size = ctx.block_size start = time.time() # measure time over whole while-loop, so slow_down() and best_block_size() work together properly now = None # needed for slow_down() in the first loop run before = start # start measuring def retry(e): close_stream() ctx.resume_len = (byte_counter if ctx.tmpfilename == '-' else os.path.getsize(encodeFilename(ctx.tmpfilename))) raise RetryDownload(e) while True: try: # Download and write data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) except RESPONSE_READ_EXCEPTIONS as err: retry(err) byte_counter += len(data_block) # exit loop when download is finished if len(data_block) == 0: break # Open destination file just in time if ctx.stream is None: try: ctx.stream, ctx.tmpfilename = self.sanitize_open( ctx.tmpfilename, ctx.open_mode) assert ctx.stream is not None ctx.filename = self.undo_temp_name(ctx.tmpfilename) self.report_destination(ctx.filename) except OSError as err: self.report_error('unable to open for writing: %s' % str(err)) return False if self.params.get('xattr_set_filesize', False) and data_len is not None: try: write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode()) except (XAttrUnavailableError, XAttrMetadataError) as err: self.report_error('unable to set filesize xattr: %s' % str(err)) try: ctx.stream.write(data_block) except OSError as err: self.to_stderr('\n') self.report_error('unable to write data: %s' % str(err)) return False # Apply rate limit self.slow_down(start, now, byte_counter - ctx.resume_len) # end measuring of one loop run now = time.time() after = now # Adjust block size if not self.params.get('noresizebuffer', False): block_size = self.best_block_size(after - before, len(data_block)) before = after # Progress message speed = self.calc_speed(start, now, byte_counter - ctx.resume_len) if ctx.data_len is None: eta = None else: eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len) self._hook_progress({ 'status': 'downloading', 'downloaded_bytes': byte_counter, 'total_bytes': ctx.data_len, 'tmpfilename': ctx.tmpfilename, 'filename': ctx.filename, 'eta': eta, 'speed': speed, 'elapsed': now - ctx.start_time, 'ctx_id': info_dict.get('ctx_id'), }, info_dict) if data_len is not None and byte_counter == data_len: break if speed and speed < (self.params.get('throttledratelimit') or 0): # The speed must stay below the limit for 3 seconds # This prevents raising error when the speed temporarily goes down if ctx.throttle_start is None: ctx.throttle_start = now elif now - ctx.throttle_start > 3: if ctx.stream is not None and ctx.tmpfilename != '-': ctx.stream.close() raise ThrottledDownload() elif speed: ctx.throttle_start = None if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len: ctx.resume_len = byte_counter # ctx.block_size = block_size raise NextFragment() if ctx.stream is None: self.to_stderr('\n') self.report_error('Did not get any data blocks') return False if ctx.tmpfilename != '-': ctx.stream.close() if data_len is not None and byte_counter != data_len: err = ContentTooShortError(byte_counter, int(data_len)) retry(err) self.try_rename(ctx.tmpfilename, ctx.filename) # Update file modification time if self.params.get('updatetime', True): info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None)) self._hook_progress({ 'downloaded_bytes': byte_counter, 'total_bytes': byte_counter, 'filename': ctx.filename, 'status': 'finished', 'elapsed': time.time() - ctx.start_time, 'ctx_id': info_dict.get('ctx_id'), }, info_dict) return True for retry in RetryManager(self.params.get('retries'), self.report_retry): try: establish_connection() return download() except RetryDownload as err: retry.error = err.source_error continue except NextFragment: retry.error = None retry.attempt -= 1 continue except SucceedDownload: return True except: # noqa: E722 close_stream() raise return False yt-dlp-2022.08.19/yt_dlp/downloader/ism.py000066400000000000000000000264351427755243700201550ustar00rootroot00000000000000import binascii import io import struct import time import urllib.error from .fragment import FragmentFD from ..utils import RetryManager u8 = struct.Struct('>B') u88 = struct.Struct('>Bx') u16 = struct.Struct('>H') u1616 = struct.Struct('>Hxx') u32 = struct.Struct('>I') u64 = struct.Struct('>Q') s88 = struct.Struct('>bx') s16 = struct.Struct('>h') s1616 = struct.Struct('>hxx') s32 = struct.Struct('>i') unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000) TRACK_ENABLED = 0x1 TRACK_IN_MOVIE = 0x2 TRACK_IN_PREVIEW = 0x4 SELF_CONTAINED = 0x1 def box(box_type, payload): return u32.pack(8 + len(payload)) + box_type + payload def full_box(box_type, version, flags, payload): return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload) def write_piff_header(stream, params): track_id = params['track_id'] fourcc = params['fourcc'] duration = params['duration'] timescale = params.get('timescale', 10000000) language = params.get('language', 'und') height = params.get('height', 0) width = params.get('width', 0) stream_type = params['stream_type'] creation_time = modification_time = int(time.time()) ftyp_payload = b'isml' # major brand ftyp_payload += u32.pack(1) # minor version ftyp_payload += b'piff' + b'iso2' # compatible brands stream.write(box(b'ftyp', ftyp_payload)) # File Type Box mvhd_payload = u64.pack(creation_time) mvhd_payload += u64.pack(modification_time) mvhd_payload += u32.pack(timescale) mvhd_payload += u64.pack(duration) mvhd_payload += s1616.pack(1) # rate mvhd_payload += s88.pack(1) # volume mvhd_payload += u16.pack(0) # reserved mvhd_payload += u32.pack(0) * 2 # reserved mvhd_payload += unity_matrix mvhd_payload += u32.pack(0) * 6 # pre defined mvhd_payload += u32.pack(0xffffffff) # next track id moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box tkhd_payload = u64.pack(creation_time) tkhd_payload += u64.pack(modification_time) tkhd_payload += u32.pack(track_id) # track id tkhd_payload += u32.pack(0) # reserved tkhd_payload += u64.pack(duration) tkhd_payload += u32.pack(0) * 2 # reserved tkhd_payload += s16.pack(0) # layer tkhd_payload += s16.pack(0) # alternate group tkhd_payload += s88.pack(1 if stream_type == 'audio' else 0) # volume tkhd_payload += u16.pack(0) # reserved tkhd_payload += unity_matrix tkhd_payload += u1616.pack(width) tkhd_payload += u1616.pack(height) trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box mdhd_payload = u64.pack(creation_time) mdhd_payload += u64.pack(modification_time) mdhd_payload += u32.pack(timescale) mdhd_payload += u64.pack(duration) mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60)) mdhd_payload += u16.pack(0) # pre defined mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box hdlr_payload = u32.pack(0) # pre defined if stream_type == 'audio': # handler type hdlr_payload += b'soun' hdlr_payload += u32.pack(0) * 3 # reserved hdlr_payload += b'SoundHandler\0' # name elif stream_type == 'video': hdlr_payload += b'vide' hdlr_payload += u32.pack(0) * 3 # reserved hdlr_payload += b'VideoHandler\0' # name elif stream_type == 'text': hdlr_payload += b'subt' hdlr_payload += u32.pack(0) * 3 # reserved hdlr_payload += b'SubtitleHandler\0' # name else: assert False mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box if stream_type == 'audio': smhd_payload = s88.pack(0) # balance smhd_payload += u16.pack(0) # reserved media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header elif stream_type == 'video': vmhd_payload = u16.pack(0) # graphics mode vmhd_payload += u16.pack(0) * 3 # opcolor media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header elif stream_type == 'text': media_header_box = full_box(b'sthd', 0, 0, b'') # Subtitle Media Header else: assert False minf_payload = media_header_box dref_payload = u32.pack(1) # entry count dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box minf_payload += box(b'dinf', dinf_payload) # Data Information Box stsd_payload = u32.pack(1) # entry count sample_entry_payload = u8.pack(0) * 6 # reserved sample_entry_payload += u16.pack(1) # data reference index if stream_type == 'audio': sample_entry_payload += u32.pack(0) * 2 # reserved sample_entry_payload += u16.pack(params.get('channels', 2)) sample_entry_payload += u16.pack(params.get('bits_per_sample', 16)) sample_entry_payload += u16.pack(0) # pre defined sample_entry_payload += u16.pack(0) # reserved sample_entry_payload += u1616.pack(params['sampling_rate']) if fourcc == 'AACL': sample_entry_box = box(b'mp4a', sample_entry_payload) elif stream_type == 'video': sample_entry_payload += u16.pack(0) # pre defined sample_entry_payload += u16.pack(0) # reserved sample_entry_payload += u32.pack(0) * 3 # pre defined sample_entry_payload += u16.pack(width) sample_entry_payload += u16.pack(height) sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi sample_entry_payload += u32.pack(0) # reserved sample_entry_payload += u16.pack(1) # frame count sample_entry_payload += u8.pack(0) * 32 # compressor name sample_entry_payload += u16.pack(0x18) # depth sample_entry_payload += s16.pack(-1) # pre defined codec_private_data = binascii.unhexlify(params['codec_private_data'].encode()) if fourcc in ('H264', 'AVC1'): sps, pps = codec_private_data.split(u32.pack(1))[1:] avcc_payload = u8.pack(1) # configuration version avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete representation (1) + reserved (11111) + length size minus one avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001) avcc_payload += u16.pack(len(sps)) avcc_payload += sps avcc_payload += u8.pack(1) # number of pps avcc_payload += u16.pack(len(pps)) avcc_payload += pps sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry else: assert False elif stream_type == 'text': if fourcc == 'TTML': sample_entry_payload += b'http://www.w3.org/ns/ttml\0' # namespace sample_entry_payload += b'\0' # schema location sample_entry_payload += b'\0' # auxilary mime types(??) sample_entry_box = box(b'stpp', sample_entry_payload) else: assert False else: assert False stsd_payload += sample_entry_box stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box stts_payload = u32.pack(0) # entry count stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box stsc_payload = u32.pack(0) # entry count stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box stco_payload = u32.pack(0) # entry count stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box minf_payload += box(b'stbl', stbl_payload) # Sample Table Box mdia_payload += box(b'minf', minf_payload) # Media Information Box trak_payload += box(b'mdia', mdia_payload) # Media Box moov_payload += box(b'trak', trak_payload) # Track Box mehd_payload = u64.pack(duration) mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box trex_payload = u32.pack(track_id) # track id trex_payload += u32.pack(1) # default sample description index trex_payload += u32.pack(0) # default sample duration trex_payload += u32.pack(0) # default sample size trex_payload += u32.pack(0) # default sample flags mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box stream.write(box(b'moov', moov_payload)) # Movie Box def extract_box_data(data, box_sequence): data_reader = io.BytesIO(data) while True: box_size = u32.unpack(data_reader.read(4))[0] box_type = data_reader.read(4) if box_type == box_sequence[0]: box_data = data_reader.read(box_size - 8) if len(box_sequence) == 1: return box_data return extract_box_data(box_data, box_sequence[1:]) data_reader.seek(box_size - 8, 1) class IsmFD(FragmentFD): """ Download segments in a ISM manifest """ def real_download(self, filename, info_dict): segments = info_dict['fragments'][:1] if self.params.get( 'test', False) else info_dict['fragments'] ctx = { 'filename': filename, 'total_frags': len(segments), } self._prepare_and_start_frag_download(ctx, info_dict) extra_state = ctx.setdefault('extra_state', { 'ism_track_written': False, }) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) frag_index = 0 for i, segment in enumerate(segments): frag_index += 1 if frag_index <= ctx['fragment_index']: continue retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=frag_index, fatal=not skip_unavailable_fragments) for retry in retry_manager: try: success = self._download_fragment(ctx, segment['url'], info_dict) if not success: return False frag_content = self._read_fragment(ctx) if not extra_state['ism_track_written']: tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd']) info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0] write_piff_header(ctx['dest_stream'], info_dict['_download_params']) extra_state['ism_track_written'] = True self._append_fragment(ctx, frag_content) except urllib.error.HTTPError as err: retry.error = err continue if retry_manager.error: if not skip_unavailable_fragments: return False self.report_skip_fragment(frag_index) self._finish_frag_download(ctx, info_dict) return True yt-dlp-2022.08.19/yt_dlp/downloader/mhtml.py000066400000000000000000000140631427755243700205000ustar00rootroot00000000000000import io import quopri import re import uuid from .fragment import FragmentFD from ..compat import imghdr from ..utils import escapeHTML, formatSeconds, srt_subtitles_timecode, urljoin from ..version import __version__ as YT_DLP_VERSION class MhtmlFD(FragmentFD): _STYLESHEET = """\ html, body { margin: 0; padding: 0; height: 100vh; } html { overflow-y: scroll; scroll-snap-type: y mandatory; } body { scroll-snap-type: y mandatory; display: flex; flex-flow: column; } body > figure { max-width: 100vw; max-height: 100vh; scroll-snap-align: center; } body > figure > figcaption { text-align: center; height: 2.5em; } body > figure > img { display: block; margin: auto; max-width: 100%; max-height: calc(100vh - 5em); } """ _STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET) _STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET) @staticmethod def _escape_mime(s): return '=?utf-8?Q?' + (b''.join( bytes((b,)) if b >= 0x20 else b'=%02X' % b for b in quopri.encodestring(s.encode(), header=True) )).decode('us-ascii') + '?=' def _gen_cid(self, i, fragment, frag_boundary): return '%u.%s@yt-dlp.github.io.invalid' % (i, frag_boundary) def _gen_stub(self, *, fragments, frag_boundary, title): output = io.StringIO() output.write(( '' '' '' '' '' '' '{title}' '' '' '' ).format( version=escapeHTML(YT_DLP_VERSION), styles=self._STYLESHEET, title=escapeHTML(title) )) t0 = 0 for i, frag in enumerate(fragments): output.write('
') try: t1 = t0 + frag['duration'] output.write(( '
Slide #{num}: {t0} – {t1} (duration: {duration})
' ).format( num=i + 1, t0=srt_subtitles_timecode(t0), t1=srt_subtitles_timecode(t1), duration=formatSeconds(frag['duration'], msec=True) )) except (KeyError, ValueError, TypeError): t1 = None output.write(( '
Slide #{num}
' ).format(num=i + 1)) output.write(''.format( cid=self._gen_cid(i, frag, frag_boundary))) output.write('
') t0 = t1 return output.getvalue() def real_download(self, filename, info_dict): fragment_base_url = info_dict.get('fragment_base_url') fragments = info_dict['fragments'][:1] if self.params.get( 'test', False) else info_dict['fragments'] title = info_dict.get('title', info_dict['format_id']) origin = info_dict.get('webpage_url', info_dict['url']) ctx = { 'filename': filename, 'total_frags': len(fragments), } self._prepare_and_start_frag_download(ctx, info_dict) extra_state = ctx.setdefault('extra_state', { 'header_written': False, 'mime_boundary': str(uuid.uuid4()).replace('-', ''), }) frag_boundary = extra_state['mime_boundary'] if not extra_state['header_written']: stub = self._gen_stub( fragments=fragments, frag_boundary=frag_boundary, title=title ) ctx['dest_stream'].write(( 'MIME-Version: 1.0\r\n' 'From: \r\n' 'To: \r\n' 'Subject: {title}\r\n' 'Content-type: multipart/related; ' '' 'boundary="{boundary}"; ' '' 'type="text/html"\r\n' 'X.yt-dlp.Origin: {origin}\r\n' '\r\n' '--{boundary}\r\n' 'Content-Type: text/html; charset=utf-8\r\n' 'Content-Length: {length}\r\n' '\r\n' '{stub}\r\n' ).format( origin=origin, boundary=frag_boundary, length=len(stub), title=self._escape_mime(title), stub=stub ).encode()) extra_state['header_written'] = True for i, fragment in enumerate(fragments): if (i + 1) <= ctx['fragment_index']: continue fragment_url = fragment.get('url') if not fragment_url: assert fragment_base_url fragment_url = urljoin(fragment_base_url, fragment['path']) success = self._download_fragment(ctx, fragment_url, info_dict) if not success: continue frag_content = self._read_fragment(ctx) frag_header = io.BytesIO() frag_header.write( b'--%b\r\n' % frag_boundary.encode('us-ascii')) frag_header.write( b'Content-ID: <%b>\r\n' % self._gen_cid(i, fragment, frag_boundary).encode('us-ascii')) frag_header.write( b'Content-type: %b\r\n' % f'image/{imghdr.what(h=frag_content) or "jpeg"}'.encode()) frag_header.write( b'Content-length: %u\r\n' % len(frag_content)) frag_header.write( b'Content-location: %b\r\n' % fragment_url.encode('us-ascii')) frag_header.write( b'X.yt-dlp.Duration: %f\r\n' % fragment['duration']) frag_header.write(b'\r\n') self._append_fragment( ctx, frag_header.getvalue() + frag_content + b'\r\n') ctx['dest_stream'].write( b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii')) self._finish_frag_download(ctx, info_dict) return True yt-dlp-2022.08.19/yt_dlp/downloader/niconico.py000066400000000000000000000036021427755243700211550ustar00rootroot00000000000000import threading from . import get_suitable_downloader from .common import FileDownloader from ..utils import sanitized_Request class NiconicoDmcFD(FileDownloader): """ Downloading niconico douga from DMC with heartbeat """ def real_download(self, filename, info_dict): from ..extractor.niconico import NiconicoIE self.to_screen('[%s] Downloading from DMC' % self.FD_NAME) ie = NiconicoIE(self.ydl) info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict) fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params) success = download_complete = False timer = [None] heartbeat_lock = threading.Lock() heartbeat_url = heartbeat_info_dict['url'] heartbeat_data = heartbeat_info_dict['data'].encode() heartbeat_interval = heartbeat_info_dict.get('interval', 30) request = sanitized_Request(heartbeat_url, heartbeat_data) def heartbeat(): try: self.ydl.urlopen(request).read() except Exception: self.to_screen('[%s] Heartbeat failed' % self.FD_NAME) with heartbeat_lock: if not download_complete: timer[0] = threading.Timer(heartbeat_interval, heartbeat) timer[0].start() heartbeat_info_dict['ping']() self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval)) try: heartbeat() if type(fd).__name__ == 'HlsFD': info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0]) success = fd.real_download(filename, info_dict) finally: if heartbeat_lock: with heartbeat_lock: timer[0].cancel() download_complete = True return success yt-dlp-2022.08.19/yt_dlp/downloader/rtmp.py000066400000000000000000000213331427755243700203370ustar00rootroot00000000000000import os import re import subprocess import time from .common import FileDownloader from ..utils import ( Popen, check_executable, encodeArgument, encodeFilename, get_exe_version, ) def rtmpdump_version(): return get_exe_version( 'rtmpdump', ['--help'], r'(?i)RTMPDump\s*v?([0-9a-zA-Z._-]+)') class RtmpFD(FileDownloader): def real_download(self, filename, info_dict): def run_rtmpdump(args): start = time.time() resume_percent = None resume_downloaded_data_len = None proc = Popen(args, stderr=subprocess.PIPE) cursor_in_new_line = True proc_stderr_closed = False try: while not proc_stderr_closed: # read line from stderr line = '' while True: char = proc.stderr.read(1) if not char: proc_stderr_closed = True break if char in [b'\r', b'\n']: break line += char.decode('ascii', 'replace') if not line: # proc_stderr_closed is True continue mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line) if mobj: downloaded_data_len = int(float(mobj.group(1)) * 1024) percent = float(mobj.group(2)) if not resume_percent: resume_percent = percent resume_downloaded_data_len = downloaded_data_len time_now = time.time() eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent) speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len) data_len = None if percent > 0: data_len = int(downloaded_data_len * 100 / percent) self._hook_progress({ 'status': 'downloading', 'downloaded_bytes': downloaded_data_len, 'total_bytes_estimate': data_len, 'tmpfilename': tmpfilename, 'filename': filename, 'eta': eta, 'elapsed': time_now - start, 'speed': speed, }, info_dict) cursor_in_new_line = False else: # no percent for live streams mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line) if mobj: downloaded_data_len = int(float(mobj.group(1)) * 1024) time_now = time.time() speed = self.calc_speed(start, time_now, downloaded_data_len) self._hook_progress({ 'downloaded_bytes': downloaded_data_len, 'tmpfilename': tmpfilename, 'filename': filename, 'status': 'downloading', 'elapsed': time_now - start, 'speed': speed, }, info_dict) cursor_in_new_line = False elif self.params.get('verbose', False): if not cursor_in_new_line: self.to_screen('') cursor_in_new_line = True self.to_screen('[rtmpdump] ' + line) if not cursor_in_new_line: self.to_screen('') return proc.wait() except BaseException: # Including KeyboardInterrupt proc.kill(timeout=None) raise url = info_dict['url'] player_url = info_dict.get('player_url') page_url = info_dict.get('page_url') app = info_dict.get('app') play_path = info_dict.get('play_path') tc_url = info_dict.get('tc_url') flash_version = info_dict.get('flash_version') live = info_dict.get('rtmp_live', False) conn = info_dict.get('rtmp_conn') protocol = info_dict.get('rtmp_protocol') real_time = info_dict.get('rtmp_real_time', False) no_resume = info_dict.get('no_resume', False) continue_dl = self.params.get('continuedl', True) self.report_destination(filename) tmpfilename = self.temp_name(filename) test = self.params.get('test', False) # Check for rtmpdump first if not check_executable('rtmpdump', ['-h']): self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install') return False # Download using rtmpdump. rtmpdump returns exit code 2 when # the connection was interrupted and resuming appears to be # possible. This is part of rtmpdump's normal usage, AFAIK. basic_args = [ 'rtmpdump', '--verbose', '-r', url, '-o', tmpfilename] if player_url is not None: basic_args += ['--swfVfy', player_url] if page_url is not None: basic_args += ['--pageUrl', page_url] if app is not None: basic_args += ['--app', app] if play_path is not None: basic_args += ['--playpath', play_path] if tc_url is not None: basic_args += ['--tcUrl', tc_url] if test: basic_args += ['--stop', '1'] if flash_version is not None: basic_args += ['--flashVer', flash_version] if live: basic_args += ['--live'] if isinstance(conn, list): for entry in conn: basic_args += ['--conn', entry] elif isinstance(conn, str): basic_args += ['--conn', conn] if protocol is not None: basic_args += ['--protocol', protocol] if real_time: basic_args += ['--realtime'] args = basic_args if not no_resume and continue_dl and not live: args += ['--resume'] if not live and continue_dl: args += ['--skip', '1'] args = [encodeArgument(a) for a in args] self._debug_cmd(args, exe='rtmpdump') RD_SUCCESS = 0 RD_FAILED = 1 RD_INCOMPLETE = 2 RD_NO_CONNECT = 3 started = time.time() try: retval = run_rtmpdump(args) except KeyboardInterrupt: if not info_dict.get('is_live'): raise retval = RD_SUCCESS self.to_screen('\n[rtmpdump] Interrupted by user') if retval == RD_NO_CONNECT: self.report_error('[rtmpdump] Could not connect to RTMP server.') return False while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: prevsize = os.path.getsize(encodeFilename(tmpfilename)) self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize) time.sleep(5.0) # This seems to be needed args = basic_args + ['--resume'] if retval == RD_FAILED: args += ['--skip', '1'] args = [encodeArgument(a) for a in args] retval = run_rtmpdump(args) cursize = os.path.getsize(encodeFilename(tmpfilename)) if prevsize == cursize and retval == RD_FAILED: break # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024: self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.') retval = RD_SUCCESS break if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): fsize = os.path.getsize(encodeFilename(tmpfilename)) self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize) self.try_rename(tmpfilename, filename) self._hook_progress({ 'downloaded_bytes': fsize, 'total_bytes': fsize, 'filename': filename, 'status': 'finished', 'elapsed': time.time() - started, }, info_dict) return True else: self.to_stderr('\n') self.report_error('rtmpdump exited with code %d' % retval) return False yt-dlp-2022.08.19/yt_dlp/downloader/rtsp.py000066400000000000000000000027451427755243700203530ustar00rootroot00000000000000import os import subprocess from .common import FileDownloader from ..utils import check_executable, encodeFilename class RtspFD(FileDownloader): def real_download(self, filename, info_dict): url = info_dict['url'] self.report_destination(filename) tmpfilename = self.temp_name(filename) if check_executable('mplayer', ['-h']): args = [ 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url] elif check_executable('mpv', ['-h']): args = [ 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url] else: self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install one') return False self._debug_cmd(args) retval = subprocess.call(args) if retval == 0: fsize = os.path.getsize(encodeFilename(tmpfilename)) self.to_screen(f'\r[{args[0]}] {fsize} bytes') self.try_rename(tmpfilename, filename) self._hook_progress({ 'downloaded_bytes': fsize, 'total_bytes': fsize, 'filename': filename, 'status': 'finished', }, info_dict) return True else: self.to_stderr('\n') self.report_error('%s exited with code %d' % (args[0], retval)) return False yt-dlp-2022.08.19/yt_dlp/downloader/websocket.py000066400000000000000000000033541427755243700213460ustar00rootroot00000000000000import asyncio import contextlib import os import signal import threading from .common import FileDownloader from .external import FFmpegFD from ..dependencies import websockets class FFmpegSinkFD(FileDownloader): """ A sink to ffmpeg for downloading fragments in any form """ def real_download(self, filename, info_dict): info_copy = info_dict.copy() info_copy['url'] = '-' async def call_conn(proc, stdin): try: await self.real_connection(stdin, info_dict) except OSError: pass finally: with contextlib.suppress(OSError): stdin.flush() stdin.close() os.kill(os.getpid(), signal.SIGINT) class FFmpegStdinFD(FFmpegFD): @classmethod def get_basename(cls): return FFmpegFD.get_basename() def on_process_started(self, proc, stdin): thread = threading.Thread(target=asyncio.run, daemon=True, args=(call_conn(proc, stdin), )) thread.start() return FFmpegStdinFD(self.ydl, self.params or {}).download(filename, info_copy) async def real_connection(self, sink, info_dict): """ Override this in subclasses """ raise NotImplementedError('This method must be implemented by subclasses') class WebSocketFragmentFD(FFmpegSinkFD): async def real_connection(self, sink, info_dict): async with websockets.connect(info_dict['url'], extra_headers=info_dict.get('http_headers', {})) as ws: while True: recv = await ws.recv() if isinstance(recv, str): recv = recv.encode('utf8') sink.write(recv) yt-dlp-2022.08.19/yt_dlp/downloader/youtube_live_chat.py000066400000000000000000000252561427755243700230770ustar00rootroot00000000000000import json import time import urllib.error from .fragment import FragmentFD from ..utils import ( RegexNotFoundError, RetryManager, dict_get, int_or_none, try_get, ) class YoutubeLiveChatFD(FragmentFD): """ Downloads YouTube live chats fragment by fragment """ def real_download(self, filename, info_dict): video_id = info_dict['video_id'] self.to_screen('[%s] Downloading live chat' % self.FD_NAME) if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat': self.report_warning('Live chat download runs until the livestream ends. ' 'If you wish to download the video simultaneously, run a separate yt-dlp instance') test = self.params.get('test', False) ctx = { 'filename': filename, 'live': True, 'total_frags': None, } from ..extractor.youtube import YoutubeBaseInfoExtractor ie = YoutubeBaseInfoExtractor(self.ydl) start_time = int(time.time() * 1000) def dl_fragment(url, data=None, headers=None): http_headers = info_dict.get('http_headers', {}) if headers: http_headers = http_headers.copy() http_headers.update(headers) return self._download_fragment(ctx, url, info_dict, http_headers, data) def parse_actions_replay(live_chat_continuation): offset = continuation_id = click_tracking_params = None processed_fragment = bytearray() for action in live_chat_continuation.get('actions', []): if 'replayChatItemAction' in action: replay_chat_item_action = action['replayChatItemAction'] offset = int(replay_chat_item_action['videoOffsetTimeMsec']) processed_fragment.extend( json.dumps(action, ensure_ascii=False).encode() + b'\n') if offset is not None: continuation = try_get( live_chat_continuation, lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict) if continuation: continuation_id = continuation.get('continuation') click_tracking_params = continuation.get('clickTrackingParams') self._append_fragment(ctx, processed_fragment) return continuation_id, offset, click_tracking_params def try_refresh_replay_beginning(live_chat_continuation): # choose the second option that contains the unfiltered live chat replay refresh_continuation = try_get( live_chat_continuation, lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict) if refresh_continuation: # no data yet but required to call _append_fragment self._append_fragment(ctx, b'') refresh_continuation_id = refresh_continuation.get('continuation') offset = 0 click_tracking_params = refresh_continuation.get('trackingParams') return refresh_continuation_id, offset, click_tracking_params return parse_actions_replay(live_chat_continuation) live_offset = 0 def parse_actions_live(live_chat_continuation): nonlocal live_offset continuation_id = click_tracking_params = None processed_fragment = bytearray() for action in live_chat_continuation.get('actions', []): timestamp = self.parse_live_timestamp(action) if timestamp is not None: live_offset = timestamp - start_time # compatibility with replay format pseudo_action = { 'replayChatItemAction': {'actions': [action]}, 'videoOffsetTimeMsec': str(live_offset), 'isLive': True, } processed_fragment.extend( json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n') continuation_data_getters = [ lambda x: x['continuations'][0]['invalidationContinuationData'], lambda x: x['continuations'][0]['timedContinuationData'], ] continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict) if continuation_data: continuation_id = continuation_data.get('continuation') click_tracking_params = continuation_data.get('clickTrackingParams') timeout_ms = int_or_none(continuation_data.get('timeoutMs')) if timeout_ms is not None: time.sleep(timeout_ms / 1000) self._append_fragment(ctx, processed_fragment) return continuation_id, live_offset, click_tracking_params def download_and_parse_fragment(url, frag_index, request_data=None, headers=None): for retry in RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=frag_index): try: success = dl_fragment(url, request_data, headers) if not success: return False, None, None, None raw_fragment = self._read_fragment(ctx) try: data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) except RegexNotFoundError: data = None if not data: data = json.loads(raw_fragment) live_chat_continuation = try_get( data, lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live or frag_index == 1 and try_refresh_replay_beginning or parse_actions_replay) return (True, *func(live_chat_continuation)) except urllib.error.HTTPError as err: retry.error = err continue return False, None, None, None self._prepare_and_start_frag_download(ctx, info_dict) success = dl_fragment(info_dict['url']) if not success: return False raw_fragment = self._read_fragment(ctx) try: data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) except RegexNotFoundError: return False continuation_id = try_get( data, lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']) # no data yet but required to call _append_fragment self._append_fragment(ctx, b'') ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace')) if not ytcfg: return False api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY']) innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT']) if not api_key or not innertube_context: return False visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str) if info_dict['protocol'] == 'youtube_live_chat_replay': url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id elif info_dict['protocol'] == 'youtube_live_chat': url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id frag_index = offset = 0 click_tracking_params = None while continuation_id is not None: frag_index += 1 request_data = { 'context': innertube_context, 'continuation': continuation_id, } if frag_index > 1: request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))} if click_tracking_params: request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params} headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data) headers.update({'content-type': 'application/json'}) fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n' success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( url, frag_index, fragment_request_data, headers) else: success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( chat_page_url, frag_index) if not success: return False if test: break self._finish_frag_download(ctx, info_dict) return True @staticmethod def parse_live_timestamp(action): action_content = dict_get( action, ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand']) if not isinstance(action_content, dict): return None item = dict_get(action_content, ['item', 'bannerRenderer']) if not isinstance(item, dict): return None renderer = dict_get(item, [ # text 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', # ticker 'liveChatTickerPaidMessageItemRenderer', 'liveChatTickerSponsorItemRenderer', # banner 'liveChatBannerRenderer', ]) if not isinstance(renderer, dict): return None parent_item_getters = [ lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'], lambda x: x['contents'], ] parent_item = try_get(renderer, parent_item_getters, dict) if parent_item: renderer = dict_get(parent_item, [ 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', ]) if not isinstance(renderer, dict): return None return int_or_none(renderer.get('timestampUsec'), 1000) yt-dlp-2022.08.19/yt_dlp/extractor/000077500000000000000000000000001427755243700166565ustar00rootroot00000000000000yt-dlp-2022.08.19/yt_dlp/extractor/__init__.py000066400000000000000000000025111427755243700207660ustar00rootroot00000000000000from ..compat.compat_utils import passthrough_module passthrough_module(__name__, '.extractors') del passthrough_module def gen_extractor_classes(): """ Return a list of supported extractors. The order does matter; the first extractor matched is the one handling the URL. """ from .extractors import _ALL_CLASSES return _ALL_CLASSES def gen_extractors(): """ Return a list of an instance of every supported extractor. The order does matter; the first extractor matched is the one handling the URL. """ return [klass() for klass in gen_extractor_classes()] def list_extractor_classes(age_limit=None): """Return a list of extractors that are suitable for the given age, sorted by extractor name""" from .generic import GenericIE yield from sorted(filter( lambda ie: ie.is_suitable(age_limit) and ie != GenericIE, gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower()) yield GenericIE def list_extractors(age_limit=None): """Return a list of extractor instances that are suitable for the given age, sorted by extractor name""" return [ie() for ie in list_extractor_classes(age_limit)] def get_info_extractor(ie_name): """Returns the info extractor class with the given ie_name""" from . import extractors return getattr(extractors, f'{ie_name}IE') yt-dlp-2022.08.19/yt_dlp/extractor/_extractors.py000066400000000000000000001361171427755243700215760ustar00rootroot00000000000000# flake8: noqa: F401 from .abc import ( ABCIE, ABCIViewIE, ABCIViewShowSeriesIE, ) from .abcnews import ( AbcNewsIE, AbcNewsVideoIE, ) from .abcotvs import ( ABCOTVSIE, ABCOTVSClipsIE, ) from .abematv import ( AbemaTVIE, AbemaTVTitleIE, ) from .academicearth import AcademicEarthCourseIE from .acast import ( ACastIE, ACastChannelIE, ) from .acfun import AcFunVideoIE, AcFunBangumiIE from .adn import ADNIE from .adobeconnect import AdobeConnectIE from .adobetv import ( AdobeTVEmbedIE, AdobeTVIE, AdobeTVShowIE, AdobeTVChannelIE, AdobeTVVideoIE, ) from .adultswim import AdultSwimIE from .aenetworks import ( AENetworksIE, AENetworksCollectionIE, AENetworksShowIE, HistoryTopicIE, HistoryPlayerIE, BiographyIE, ) from .afreecatv import ( AfreecaTVIE, AfreecaTVLiveIE, AfreecaTVUserIE, ) from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE from .amara import AmaraIE from .alura import ( AluraIE, AluraCourseIE ) from .amcnetworks import AMCNetworksIE from .amazon import AmazonStoreIE from .americastestkitchen import ( AmericasTestKitchenIE, AmericasTestKitchenSeasonIE, ) from .angel import AngelIE from .animeondemand import AnimeOnDemandIE from .anvato import AnvatoIE from .aol import AolIE from .allocine import AllocineIE from .aliexpress import AliExpressLiveIE from .alsace20tv import ( Alsace20TVIE, Alsace20TVEmbedIE, ) from .apa import APAIE from .aparat import AparatIE from .appleconnect import AppleConnectIE from .appletrailers import ( AppleTrailersIE, AppleTrailersSectionIE, ) from .applepodcasts import ApplePodcastsIE from .archiveorg import ( ArchiveOrgIE, YoutubeWebArchiveIE, ) from .arcpublishing import ArcPublishingIE from .arkena import ArkenaIE from .ard import ( ARDBetaMediathekIE, ARDIE, ARDMediathekIE, ) from .arte import ( ArteTVIE, ArteTVEmbedIE, ArteTVPlaylistIE, ArteTVCategoryIE, ) from .arnes import ArnesIE from .asiancrush import ( AsianCrushIE, AsianCrushPlaylistIE, ) from .atresplayer import AtresPlayerIE from .atscaleconf import AtScaleConfEventIE from .atttechchannel import ATTTechChannelIE from .atvat import ATVAtIE from .audimedia import AudiMediaIE from .audioboom import AudioBoomIE from .audiodraft import ( AudiodraftCustomIE, AudiodraftGenericIE, ) from .audiomack import AudiomackIE, AudiomackAlbumIE from .audius import ( AudiusIE, AudiusTrackIE, AudiusPlaylistIE, AudiusProfileIE, ) from .awaan import ( AWAANIE, AWAANVideoIE, AWAANLiveIE, AWAANSeasonIE, ) from .azmedien import AZMedienIE from .baidu import BaiduVideoIE from .banbye import ( BanByeIE, BanByeChannelIE, ) from .bandaichannel import BandaiChannelIE from .bandcamp import ( BandcampIE, BandcampAlbumIE, BandcampWeeklyIE, BandcampUserIE, ) from .bannedvideo import BannedVideoIE from .bbc import ( BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE, BBCIE, ) from .beeg import BeegIE from .behindkink import BehindKinkIE from .bellmedia import BellMediaIE from .beatport import BeatportIE from .bet import BetIE from .bfi import BFIPlayerIE from .bfmtv import ( BFMTVIE, BFMTVLiveIE, BFMTVArticleIE, ) from .bibeltv import BibelTVIE from .bigflix import BigflixIE from .bigo import BigoIE from .bild import BildIE from .bilibili import ( BiliBiliIE, BiliBiliSearchIE, BilibiliCategoryIE, BiliBiliBangumiIE, BilibiliAudioIE, BilibiliAudioAlbumIE, BiliBiliPlayerIE, BilibiliChannelIE, BiliIntlIE, BiliIntlSeriesIE, BiliLiveIE, ) from .biobiochiletv import BioBioChileTVIE from .bitchute import ( BitChuteIE, BitChuteChannelIE, ) from .bitwave import ( BitwaveReplayIE, BitwaveStreamIE, ) from .biqle import BIQLEIE from .blackboardcollaborate import BlackboardCollaborateIE from .bleacherreport import ( BleacherReportIE, BleacherReportCMSIE, ) from .blogger import BloggerIE from .bloomberg import BloombergIE from .bokecc import BokeCCIE from .bongacams import BongaCamsIE from .bostonglobe import BostonGlobeIE from .box import BoxIE from .bpb import BpbIE from .br import ( BRIE, BRMediathekIE, ) from .bravotv import BravoTVIE from .breakcom import BreakIE from .breitbart import BreitBartIE from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, ) from .businessinsider import BusinessInsiderIE from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE from .cableav import CableAVIE from .callin import CallinIE from .caltrans import CaltransIE from .cam4 import CAM4IE from .camdemy import ( CamdemyIE, CamdemyFolderIE ) from .cammodels import CamModelsIE from .camtasia import CamtasiaEmbedIE from .camwithher import CamWithHerIE from .canalalpha import CanalAlphaIE from .canalplus import CanalplusIE from .canalc2 import Canalc2IE from .canvas import ( CanvasIE, CanvasEenIE, VrtNUIE, DagelijkseKostIE, ) from .carambatv import ( CarambaTVIE, CarambaTVPageIE, ) from .cartoonnetwork import CartoonNetworkIE from .cbc import ( CBCIE, CBCPlayerIE, CBCGemIE, CBCGemPlaylistIE, CBCGemLiveIE, ) from .cbs import CBSIE from .cbslocal import ( CBSLocalIE, CBSLocalArticleIE, ) from .cbsinteractive import CBSInteractiveIE from .cbsnews import ( CBSNewsEmbedIE, CBSNewsIE, CBSNewsLiveVideoIE, ) from .cbssports import ( CBSSportsEmbedIE, CBSSportsIE, TwentyFourSevenSportsIE, ) from .ccc import ( CCCIE, CCCPlaylistIE, ) from .ccma import CCMAIE from .cctv import CCTVIE from .cda import CDAIE from .cellebrite import CellebriteIE from .ceskatelevize import CeskaTelevizeIE from .cgtn import CGTNIE from .channel9 import Channel9IE from .charlierose import CharlieRoseIE from .chaturbate import ChaturbateIE from .chilloutzone import ChilloutzoneIE from .chingari import ( ChingariIE, ChingariUserIE, ) from .chirbit import ( ChirbitIE, ChirbitProfileIE, ) from .cinchcast import CinchcastIE from .cinemax import CinemaxIE from .ciscolive import ( CiscoLiveSessionIE, CiscoLiveSearchIE, ) from .ciscowebex import CiscoWebexIE from .cjsw import CJSWIE from .cliphunter import CliphunterIE from .clippit import ClippitIE from .cliprs import ClipRsIE from .clipsyndicate import ClipsyndicateIE from .closertotruth import CloserToTruthIE from .cloudflarestream import CloudflareStreamIE from .cloudy import CloudyIE from .clubic import ClubicIE from .clyp import ClypIE from .cmt import CMTIE from .cnbc import ( CNBCIE, CNBCVideoIE, ) from .cnn import ( CNNIE, CNNBlogsIE, CNNArticleIE, ) from .coub import CoubIE from .comedycentral import ( ComedyCentralIE, ComedyCentralTVIE, ) from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .commonprotocols import ( MmsIE, RtmpIE, ViewSourceIE, ) from .condenast import CondeNastIE from .contv import CONtvIE from .corus import CorusIE from .cpac import ( CPACIE, CPACPlaylistIE, ) from .cozytv import CozyTVIE from .cracked import CrackedIE from .crackle import CrackleIE from .craftsy import CraftsyIE from .crooksandliars import CrooksAndLiarsIE from .crowdbunker import ( CrowdBunkerIE, CrowdBunkerChannelIE, ) from .crunchyroll import ( CrunchyrollIE, CrunchyrollShowPlaylistIE, CrunchyrollBetaIE, CrunchyrollBetaShowIE, ) from .cspan import CSpanIE, CSpanCongressIE from .ctsnews import CtsNewsIE from .ctv import CTVIE from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE from .curiositystream import ( CuriosityStreamIE, CuriosityStreamCollectionsIE, CuriosityStreamSeriesIE, ) from .cwtv import CWTVIE from .cybrary import ( CybraryIE, CybraryCourseIE ) from .daftsex import DaftsexIE from .dailymail import DailyMailIE from .dailymotion import ( DailymotionIE, DailymotionPlaylistIE, DailymotionUserIE, ) from .dailywire import ( DailyWireIE, DailyWirePodcastIE, ) from .damtomo import ( DamtomoRecordIE, DamtomoVideoIE, ) from .daum import ( DaumIE, DaumClipIE, DaumPlaylistIE, DaumUserIE, ) from .daystar import DaystarClipIE from .dbtv import DBTVIE from .dctp import DctpTvIE from .deezer import ( DeezerPlaylistIE, DeezerAlbumIE, ) from .democracynow import DemocracynowIE from .detik import Detik20IE from .dfb import DFBIE from .dhm import DHMIE from .digg import DiggIE from .dotsub import DotsubIE from .douyutv import ( DouyuShowIE, DouyuTVIE, ) from .dplay import ( DPlayIE, DiscoveryPlusIE, HGTVDeIE, GoDiscoveryIE, TravelChannelIE, CookingChannelIE, HGTVUsaIE, FoodNetworkIE, InvestigationDiscoveryIE, DestinationAmericaIE, AmHistoryChannelIE, ScienceChannelIE, DIYNetworkIE, DiscoveryLifeIE, AnimalPlanetIE, TLCIE, MotorTrendIE, DiscoveryPlusIndiaIE, DiscoveryNetworksDeIE, DiscoveryPlusItalyIE, DiscoveryPlusItalyShowIE, DiscoveryPlusIndiaShowIE, ) from .dreisat import DreiSatIE from .drbonanza import DRBonanzaIE from .drtuber import DrTuberIE from .drtv import ( DRTVIE, DRTVLiveIE, ) from .dtube import DTubeIE from .dvtv import DVTVIE from .duboku import ( DubokuIE, DubokuPlaylistIE ) from .dumpert import DumpertIE from .defense import DefenseGouvFrIE from .digitalconcerthall import DigitalConcertHallIE from .discovery import DiscoveryIE from .disney import DisneyIE from .dispeak import DigitallySpeakingIE from .doodstream import DoodStreamIE from .dropbox import DropboxIE from .dropout import ( DropoutSeasonIE, DropoutIE ) from .dw import ( DWIE, DWArticleIE, ) from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE from .ebaumsworld import EbaumsWorldIE from .echomsk import EchoMskIE from .egghead import ( EggheadCourseIE, EggheadLessonIE, ) from .ehow import EHowIE from .eighttracks import EightTracksIE from .einthusan import EinthusanIE from .eitb import EitbIE from .ellentube import ( EllenTubeIE, EllenTubeVideoIE, EllenTubePlaylistIE, ) from .elonet import ElonetIE from .elpais import ElPaisIE from .embedly import EmbedlyIE from .engadget import EngadgetIE from .epicon import ( EpiconIE, EpiconSeriesIE, ) from .eporner import EpornerIE from .eroprofile import ( EroProfileIE, EroProfileAlbumIE, ) from .ertgr import ( ERTFlixCodenameIE, ERTFlixIE, ERTWebtvEmbedIE, ) from .escapist import EscapistIE from .espn import ( ESPNIE, WatchESPNIE, ESPNArticleIE, FiveThirtyEightIE, ESPNCricInfoIE, ) from .esri import EsriVideoIE from .europa import EuropaIE from .europeantour import EuropeanTourIE from .euscreen import EUScreenIE from .expotv import ExpoTVIE from .expressen import ExpressenIE from .extremetube import ExtremeTubeIE from .eyedotv import EyedoTVIE from .facebook import ( FacebookIE, FacebookPluginsVideoIE, FacebookRedirectURLIE, FacebookReelIE, ) from .fancode import ( FancodeVodIE, FancodeLiveIE ) from .faz import FazIE from .fc2 import ( FC2IE, FC2EmbedIE, FC2LiveIE, ) from .fczenit import FczenitIE from .fifa import FifaIE from .filmmodu import FilmmoduIE from .filmon import ( FilmOnIE, FilmOnChannelIE, ) from .filmweb import FilmwebIE from .firsttv import FirstTVIE from .fivetv import FiveTVIE from .flickr import FlickrIE from .folketinget import FolketingetIE from .footyroom import FootyRoomIE from .formula1 import Formula1IE from .fourtube import ( FourTubeIE, PornTubeIE, PornerBrosIE, FuxIE, ) from .fourzerostudio import ( FourZeroStudioArchiveIE, FourZeroStudioClipIE, ) from .fox import FOXIE from .fox9 import ( FOX9IE, FOX9NewsIE, ) from .foxgay import FoxgayIE from .foxnews import ( FoxNewsIE, FoxNewsArticleIE, ) from .foxsports import FoxSportsIE from .fptplay import FptplayIE from .franceinter import FranceInterIE from .francetv import ( FranceTVIE, FranceTVSiteIE, FranceTVInfoIE, ) from .freesound import FreesoundIE from .freespeech import FreespeechIE from .frontendmasters import ( FrontendMastersIE, FrontendMastersLessonIE, FrontendMastersCourseIE ) from .freetv import ( FreeTvIE, FreeTvMoviesIE, ) from .fujitv import FujiTVFODPlus7IE from .funimation import ( FunimationIE, FunimationPageIE, FunimationShowIE, ) from .funk import FunkIE from .fusion import FusionIE from .fuyintv import FuyinTVIE from .gab import ( GabTVIE, GabIE, ) from .gaia import GaiaIE from .gameinformer import GameInformerIE from .gamejolt import ( GameJoltIE, GameJoltUserIE, GameJoltGameIE, GameJoltGameSoundtrackIE, GameJoltCommunityIE, GameJoltSearchIE, ) from .gamespot import GameSpotIE from .gamestar import GameStarIE from .gaskrank import GaskrankIE from .gazeta import GazetaIE from .gdcvault import GDCVaultIE from .gedidigital import GediDigitalIE from .generic import GenericIE from .gettr import ( GettrIE, GettrStreamingIE, ) from .gfycat import GfycatIE from .giantbomb import GiantBombIE from .giga import GigaIE from .glide import GlideIE from .globo import ( GloboIE, GloboArticleIE, ) from .go import GoIE from .godtube import GodTubeIE from .gofile import GofileIE from .golem import GolemIE from .goodgame import GoodGameIE from .googledrive import ( GoogleDriveIE, GoogleDriveFolderIE, ) from .googlepodcasts import ( GooglePodcastsIE, GooglePodcastsFeedIE, ) from .googlesearch import GoogleSearchIE from .gopro import GoProIE from .goshgay import GoshgayIE from .gotostage import GoToStageIE from .gputechconf import GPUTechConfIE from .gronkh import ( GronkhIE, GronkhFeedIE, GronkhVodsIE ) from .groupon import GrouponIE from .harpodeon import HarpodeonIE from .hbo import HBOIE from .hearthisat import HearThisAtIE from .heise import HeiseIE from .hellporno import HellPornoIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE from .hgtv import HGTVComShowIE from .hketv import HKETVIE from .hidive import HiDiveIE from .historicfilms import HistoricFilmsIE from .hitbox import HitboxIE, HitboxLiveIE from .hitrecord import HitRecordIE from .holodex import HolodexIE from .hotnewhiphop import HotNewHipHopIE from .hotstar import ( HotStarIE, HotStarPrefixIE, HotStarPlaylistIE, HotStarSeriesIE, ) from .howcast import HowcastIE from .howstuffworks import HowStuffWorksIE from .hrfensehen import HRFernsehenIE from .hrti import ( HRTiIE, HRTiPlaylistIE, ) from .hse import ( HSEShowIE, HSEProductIE, ) from .genericembeds import HTML5MediaEmbedIE from .huajiao import HuajiaoIE from .huya import HuyaLiveIE from .huffpost import HuffPostIE from .hungama import ( HungamaIE, HungamaSongIE, HungamaAlbumPlaylistIE, ) from .hypem import HypemIE from .hytale import HytaleIE from .icareus import IcareusIE from .ichinanalive import ( IchinanaLiveIE, IchinanaLiveClipIE, ) from .ign import ( IGNIE, IGNVideoIE, IGNArticleIE, ) from .iheart import ( IHeartRadioIE, IHeartRadioPodcastIE, ) from .imdb import ( ImdbIE, ImdbListIE ) from .imgur import ( ImgurIE, ImgurAlbumIE, ImgurGalleryIE, ) from .ina import InaIE from .inc import IncIE from .indavideo import IndavideoEmbedIE from .infoq import InfoQIE from .instagram import ( InstagramIE, InstagramIOSIE, InstagramUserIE, InstagramTagIE, InstagramStoryIE, ) from .internazionale import InternazionaleIE from .internetvideoarchive import InternetVideoArchiveIE from .iprima import ( IPrimaIE, IPrimaCNNIE ) from .iqiyi import ( IqiyiIE, IqIE, IqAlbumIE ) from .itprotv import ( ITProTVIE, ITProTVCourseIE ) from .itv import ( ITVIE, ITVBTCCIE, ) from .ivi import ( IviIE, IviCompilationIE ) from .ivideon import IvideonIE from .iwara import ( IwaraIE, IwaraPlaylistIE, IwaraUserIE, ) from .ixigua import IxiguaIE from .izlesene import IzleseneIE from .jable import ( JableIE, JablePlaylistIE, ) from .jamendo import ( JamendoIE, JamendoAlbumIE, ) from .jeuxvideo import JeuxVideoIE from .jove import JoveIE from .joj import JojIE from .jwplatform import JWPlatformIE from .kakao import KakaoIE from .kaltura import KalturaIE from .karaoketv import KaraoketvIE from .karrierevideos import KarriereVideosIE from .keezmovies import KeezMoviesIE from .kelbyone import KelbyOneIE from .ketnet import KetnetIE from .khanacademy import ( KhanAcademyIE, KhanAcademyUnitIE, ) from .kicker import KickerIE from .kickstarter import KickStarterIE from .kinja import KinjaEmbedIE from .kinopoisk import KinoPoiskIE from .kompas import KompasVideoIE from .konserthusetplay import KonserthusetPlayIE from .koo import KooIE from .kth import KTHIE from .krasview import KrasViewIE from .ku6 import Ku6IE from .kusi import KUSIIE from .kuwo import ( KuwoIE, KuwoAlbumIE, KuwoChartIE, KuwoSingerIE, KuwoCategoryIE, KuwoMvIE, ) from .la7 import ( LA7IE, LA7PodcastEpisodeIE, LA7PodcastIE, ) from .laola1tv import ( Laola1TvEmbedIE, Laola1TvIE, EHFTVIE, ITTFIE, ) from .lastfm import ( LastFMIE, LastFMPlaylistIE, LastFMUserIE, ) from .lbry import ( LBRYIE, LBRYChannelIE, ) from .lci import LCIIE from .lcp import ( LcpPlayIE, LcpIE, ) from .lecture2go import Lecture2GoIE from .lecturio import ( LecturioIE, LecturioCourseIE, LecturioDeCourseIE, ) from .leeco import ( LeIE, LePlaylistIE, LetvCloudIE, ) from .lego import LEGOIE from .lemonde import LemondeIE from .lenta import LentaIE from .libraryofcongress import LibraryOfCongressIE from .libsyn import LibsynIE from .lifenews import ( LifeNewsIE, LifeEmbedIE, ) from .likee import ( LikeeIE, LikeeUserIE ) from .limelight import ( LimelightMediaIE, LimelightChannelIE, LimelightChannelListIE, ) from .line import ( LineLiveIE, LineLiveChannelIE, ) from .linkedin import ( LinkedInIE, LinkedInLearningIE, LinkedInLearningCourseIE, ) from .linuxacademy import LinuxAcademyIE from .liputan6 import Liputan6IE from .litv import LiTVIE from .livejournal import LiveJournalIE from .livestream import ( LivestreamIE, LivestreamOriginalIE, LivestreamShortenerIE, ) from .livestreamfails import LivestreamfailsIE from .lnkgo import ( LnkGoIE, LnkIE, ) from .localnews8 import LocalNews8IE from .lovehomeporn import LoveHomePornIE from .lrt import ( LRTVODIE, LRTStreamIE ) from .lynda import ( LyndaIE, LyndaCourseIE ) from .m6 import M6IE from .magentamusik360 import MagentaMusik360IE from .mailru import ( MailRuIE, MailRuMusicIE, MailRuMusicSearchIE, ) from .mainstreaming import MainStreamingIE from .malltv import MallTVIE from .mangomolo import ( MangomoloVideoIE, MangomoloLiveIE, ) from .manoto import ( ManotoTVIE, ManotoTVShowIE, ManotoTVLiveIE, ) from .manyvids import ManyVidsIE from .maoritv import MaoriTVIE from .markiza import ( MarkizaIE, MarkizaPageIE, ) from .massengeschmacktv import MassengeschmackTVIE from .masters import MastersIE from .matchtv import MatchTVIE from .mdr import MDRIE from .medaltv import MedalTVIE from .mediaite import MediaiteIE from .mediaklikk import MediaKlikkIE from .mediaset import ( MediasetIE, MediasetShowIE, ) from .mediasite import ( MediasiteIE, MediasiteCatalogIE, MediasiteNamedCatalogIE, ) from .medici import MediciIE from .megaphone import MegaphoneIE from .meipai import MeipaiIE from .melonvod import MelonVODIE from .meta import METAIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mgoon import MgoonIE from .mgtv import MGTVIE from .miaopai import MiaoPaiIE from .microsoftstream import MicrosoftStreamIE from .microsoftvirtualacademy import ( MicrosoftVirtualAcademyIE, MicrosoftVirtualAcademyCourseIE, ) from .mildom import ( MildomIE, MildomVodIE, MildomClipIE, MildomUserVodIE, ) from .minds import ( MindsIE, MindsChannelIE, MindsGroupIE, ) from .ministrygrid import MinistryGridIE from .minoto import MinotoIE from .miomio import MioMioIE from .mirrativ import ( MirrativIE, MirrativUserIE, ) from .mirrorcouk import MirrorCoUKIE from .mit import TechTVMITIE, OCWMITIE from .mitele import MiTeleIE from .mixch import ( MixchIE, MixchArchiveIE, ) from .mixcloud import ( MixcloudIE, MixcloudUserIE, MixcloudPlaylistIE, ) from .mlb import ( MLBIE, MLBVideoIE, MLBTVIE, ) from .mlssoccer import MLSSoccerIE from .mnet import MnetIE from .mocha import MochaVideoIE from .moevideo import MoeVideoIE from .mofosex import ( MofosexIE, MofosexEmbedIE, ) from .mojvideo import MojvideoIE from .morningstar import MorningstarIE from .motherless import ( MotherlessIE, MotherlessGroupIE ) from .motorsport import MotorsportIE from .movieclips import MovieClipsIE from .moviepilot import MoviepilotIE from .moview import MoviewPlayIE from .moviezine import MoviezineIE from .movingimage import MovingImageIE from .msn import MSNIE from .mtv import ( MTVIE, MTVVideoIE, MTVServicesEmbeddedIE, MTVDEIE, MTVJapanIE, MTVItaliaIE, MTVItaliaProgrammaIE, ) from .muenchentv import MuenchenTVIE from .murrtube import MurrtubeIE, MurrtubeUserIE from .musescore import MuseScoreIE from .musicdex import ( MusicdexSongIE, MusicdexAlbumIE, MusicdexArtistIE, MusicdexPlaylistIE, ) from .mwave import MwaveIE, MwaveMeetGreetIE from .mxplayer import ( MxplayerIE, MxplayerShowIE, ) from .mychannels import MyChannelsIE from .myspace import MySpaceIE, MySpaceAlbumIE from .myspass import MySpassIE from .myvi import ( MyviIE, MyviEmbedIE, ) from .myvideoge import MyVideoGeIE from .myvidster import MyVidsterIE from .n1 import ( N1InfoAssetIE, N1InfoIIE, ) from .nate import ( NateIE, NateProgramIE, ) from .nationalgeographic import ( NationalGeographicVideoIE, NationalGeographicTVIE, ) from .naver import ( NaverIE, NaverLiveIE, NaverNowIE, ) from .nba import ( NBAWatchEmbedIE, NBAWatchIE, NBAWatchCollectionIE, NBAEmbedIE, NBAIE, NBAChannelIE, ) from .nbc import ( NBCIE, NBCNewsIE, NBCOlympicsIE, NBCOlympicsStreamIE, NBCSportsIE, NBCSportsStreamIE, NBCSportsVPlayerIE, ) from .ndr import ( NDRIE, NJoyIE, NDREmbedBaseIE, NDREmbedIE, NJoyEmbedIE, ) from .ndtv import NDTVIE from .nebula import ( NebulaIE, NebulaSubscriptionsIE, NebulaChannelIE, ) from .nerdcubed import NerdCubedFeedIE from .netzkino import NetzkinoIE from .neteasemusic import ( NetEaseMusicIE, NetEaseMusicAlbumIE, NetEaseMusicSingerIE, NetEaseMusicListIE, NetEaseMusicMvIE, NetEaseMusicProgramIE, NetEaseMusicDjRadioIE, ) from .netverse import ( NetverseIE, NetversePlaylistIE, ) from .newgrounds import ( NewgroundsIE, NewgroundsPlaylistIE, NewgroundsUserIE, ) from .newstube import NewstubeIE from .newsy import NewsyIE from .nextmedia import ( NextMediaIE, NextMediaActionNewsIE, AppleDailyIE, NextTVIE, ) from .nexx import ( NexxIE, NexxEmbedIE, ) from .nfb import NFBIE from .nfhsnetwork import NFHSNetworkIE from .nfl import ( NFLIE, NFLArticleIE, ) from .nhk import ( NhkVodIE, NhkVodProgramIE, NhkForSchoolBangumiIE, NhkForSchoolSubjectIE, NhkForSchoolProgramListIE, ) from .nhl import NHLIE from .nick import ( NickIE, NickBrIE, NickDeIE, NickNightIE, NickRuIE, ) from .niconico import ( NiconicoIE, NiconicoPlaylistIE, NiconicoUserIE, NiconicoSeriesIE, NiconicoHistoryIE, NicovideoSearchDateIE, NicovideoSearchIE, NicovideoSearchURLIE, NicovideoTagURLIE, ) from .ninecninemedia import ( NineCNineMediaIE, CPTwentyFourIE, ) from .ninegag import NineGagIE from .ninenow import NineNowIE from .nintendo import NintendoIE from .nitter import NitterIE from .njpwworld import NJPWWorldIE from .nobelprize import NobelPrizeIE from .nonktube import NonkTubeIE from .noodlemagazine import NoodleMagazineIE from .noovo import NoovoIE from .normalboots import NormalbootsIE from .nosvideo import NosVideoIE from .nova import ( NovaEmbedIE, NovaIE, ) from .novaplay import NovaPlayIE from .nowness import ( NownessIE, NownessPlaylistIE, NownessSeriesIE, ) from .noz import NozIE from .npo import ( AndereTijdenIE, NPOIE, NPOLiveIE, NPORadioIE, NPORadioFragmentIE, SchoolTVIE, HetKlokhuisIE, VPROIE, WNLIE, ) from .npr import NprIE from .nrk import ( NRKIE, NRKPlaylistIE, NRKSkoleIE, NRKTVIE, NRKTVDirekteIE, NRKRadioPodkastIE, NRKTVEpisodeIE, NRKTVEpisodesIE, NRKTVSeasonIE, NRKTVSeriesIE, ) from .nrl import NRLTVIE from .ntvcojp import NTVCoJpCUIE from .ntvde import NTVDeIE from .ntvru import NTVRuIE from .nytimes import ( NYTimesIE, NYTimesArticleIE, NYTimesCookingIE, ) from .nuvid import NuvidIE from .nzherald import NZHeraldIE from .nzz import NZZIE from .odatv import OdaTVIE from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE from .olympics import OlympicsReplayIE from .on24 import On24IE from .ondemandkorea import OnDemandKoreaIE from .onefootball import OneFootballIE from .onet import ( OnetIE, OnetChannelIE, OnetMVPIE, OnetPlIE, ) from .onionstudios import OnionStudiosIE from .ooyala import ( OoyalaIE, OoyalaExternalIE, ) from .opencast import ( OpencastIE, OpencastPlaylistIE, ) from .openrec import ( OpenRecIE, OpenRecCaptureIE, OpenRecMovieIE, ) from .ora import OraTVIE from .orf import ( ORFTVthekIE, ORFFM4StoryIE, ORFRadioIE, ORFIPTVIE, ) from .outsidetv import OutsideTVIE from .packtpub import ( PacktPubIE, PacktPubCourseIE, ) from .palcomp3 import ( PalcoMP3IE, PalcoMP3ArtistIE, PalcoMP3VideoIE, ) from .pandoratv import PandoraTVIE from .panopto import ( PanoptoIE, PanoptoListIE, PanoptoPlaylistIE ) from .paramountplus import ( ParamountPlusIE, ParamountPlusSeriesIE, ) from .parler import ParlerIE from .parlview import ParlviewIE from .patreon import ( PatreonIE, PatreonCampaignIE ) from .pbs import PBSIE from .pearvideo import PearVideoIE from .peekvids import PeekVidsIE, PlayVidsIE from .peertube import ( PeerTubeIE, PeerTubePlaylistIE, ) from .peertv import PeerTVIE from .peloton import ( PelotonIE, PelotonLiveIE ) from .people import PeopleIE from .performgroup import PerformGroupIE from .periscope import ( PeriscopeIE, PeriscopeUserIE, ) from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE from .piapro import PiaproIE from .picarto import ( PicartoIE, PicartoVodIE, ) from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pinterest import ( PinterestIE, PinterestCollectionIE, ) from .pixivsketch import ( PixivSketchIE, PixivSketchUserIE, ) from .pladform import PladformIE from .planetmarathi import PlanetMarathiIE from .platzi import ( PlatziIE, PlatziCourseIE, ) from .playfm import PlayFMIE from .playplustv import PlayPlusTVIE from .plays import PlaysTVIE from .playstuff import PlayStuffIE from .playsuisse import PlaySuisseIE from .playtvak import PlaytvakIE from .playvid import PlayvidIE from .playwire import PlaywireIE from .plutotv import PlutoTVIE from .pluralsight import ( PluralsightIE, PluralsightCourseIE, ) from .podchaser import PodchaserIE from .podomatic import PodomaticIE from .pokemon import ( PokemonIE, PokemonWatchIE, ) from .pokergo import ( PokerGoIE, PokerGoCollectionIE, ) from .polsatgo import PolsatGoIE from .polskieradio import ( PolskieRadioIE, PolskieRadioCategoryIE, PolskieRadioPlayerIE, PolskieRadioPodcastIE, PolskieRadioPodcastListIE, PolskieRadioRadioKierowcowIE, ) from .popcorntimes import PopcorntimesIE from .popcorntv import PopcornTVIE from .porn91 import Porn91IE from .porncom import PornComIE from .pornflip import PornFlipIE from .pornhd import PornHdIE from .pornhub import ( PornHubIE, PornHubUserIE, PornHubPlaylistIE, PornHubPagedVideoListIE, PornHubUserVideosUploadIE, ) from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE from .pornoxo import PornoXOIE from .pornez import PornezIE from .puhutv import ( PuhuTVIE, PuhuTVSerieIE, ) from .premiershiprugby import PremiershipRugbyIE from .presstv import PressTVIE from .projectveritas import ProjectVeritasIE from .prosiebensat1 import ProSiebenSat1IE from .prx import ( PRXStoryIE, PRXSeriesIE, PRXAccountIE, PRXStoriesSearchIE, PRXSeriesSearchIE ) from .puls4 import Puls4IE from .pyvideo import PyvideoIE from .qqmusic import ( QQMusicIE, QQMusicSingerIE, QQMusicAlbumIE, QQMusicToplistIE, QQMusicPlaylistIE, ) from .r7 import ( R7IE, R7ArticleIE, ) from .radiko import RadikoIE, RadikoRadioIE from .radiocanada import ( RadioCanadaIE, RadioCanadaAudioVideoIE, ) from .radiode import RadioDeIE from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE from .radiofrance import FranceCultureIE, RadioFranceIE from .radiozet import RadioZetPodcastIE from .radiokapital import ( RadioKapitalIE, RadioKapitalShowIE, ) from .radlive import ( RadLiveIE, RadLiveChannelIE, RadLiveSeasonIE, ) from .rai import ( RaiPlayIE, RaiPlayLiveIE, RaiPlayPlaylistIE, RaiPlaySoundIE, RaiPlaySoundLiveIE, RaiPlaySoundPlaylistIE, RaiNewsIE, RaiSudtirolIE, RaiIE, ) from .raywenderlich import ( RayWenderlichIE, RayWenderlichCourseIE, ) from .rbmaradio import RBMARadioIE from .rcs import ( RCSIE, RCSEmbedsIE, RCSVariousIE, ) from .rcti import ( RCTIPlusIE, RCTIPlusSeriesIE, RCTIPlusTVIE, ) from .rds import RDSIE from .redbee import ParliamentLiveUKIE, RTBFIE from .redbulltv import ( RedBullTVIE, RedBullEmbedIE, RedBullTVRrnContentIE, RedBullIE, ) from .reddit import RedditIE from .redgifs import ( RedGifsIE, RedGifsSearchIE, RedGifsUserIE, ) from .redtube import RedTubeIE from .regiotv import RegioTVIE from .rentv import ( RENTVIE, RENTVArticleIE, ) from .restudy import RestudyIE from .reuters import ReutersIE from .reverbnation import ReverbNationIE from .rice import RICEIE from .rmcdecouverte import RMCDecouverteIE from .rockstargames import RockstarGamesIE from .rokfin import ( RokfinIE, RokfinStackIE, RokfinChannelIE, RokfinSearchIE, ) from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE from .rottentomatoes import RottenTomatoesIE from .rozhlas import RozhlasIE from .rte import RteIE, RteRadioIE from .rtlnl import ( RtlNlIE, RTLLuTeleVODIE, RTLLuArticleIE, RTLLuLiveIE, RTLLuRadioIE, ) from .rtl2 import ( RTL2IE, RTL2YouIE, RTL2YouSeriesIE, ) from .rtnews import ( RTNewsIE, RTDocumentryIE, RTDocumentryPlaylistIE, RuptlyIE, ) from .rtp import RTPIE from .rtrfm import RTRFMIE from .rts import RTSIE from .rtve import ( RTVEALaCartaIE, RTVEAudioIE, RTVELiveIE, RTVEInfantilIE, RTVETelevisionIE, ) from .rtvnh import RTVNHIE from .rtvs import RTVSIE from .rtvslo import RTVSLOIE from .ruhd import RUHDIE from .rule34video import Rule34VideoIE from .rumble import ( RumbleEmbedIE, RumbleChannelIE, ) from .rutube import ( RutubeIE, RutubeChannelIE, RutubeEmbedIE, RutubeMovieIE, RutubePersonIE, RutubePlaylistIE, RutubeTagsIE, ) from .glomex import ( GlomexIE, GlomexEmbedIE, ) from .megatvcom import ( MegaTVComIE, MegaTVComEmbedIE, ) from .ant1newsgr import ( Ant1NewsGrWatchIE, Ant1NewsGrArticleIE, Ant1NewsGrEmbedIE, ) from .rutv import RUTVIE from .ruutu import RuutuIE from .ruv import ( RuvIE, RuvSpilaIE ) from .safari import ( SafariIE, SafariApiIE, SafariCourseIE, ) from .saitosan import SaitosanIE from .samplefocus import SampleFocusIE from .sapo import SapoIE from .savefrom import SaveFromIE from .sbs import SBSIE from .screencast import ScreencastIE from .screencastomatic import ScreencastOMaticIE from .scrippsnetworks import ( ScrippsNetworksWatchIE, ScrippsNetworksIE, ) from .scte import ( SCTEIE, SCTECourseIE, ) from .scrolller import ScrolllerIE from .seeker import SeekerIE from .senategov import SenateISVPIE, SenateGovIE from .sendtonews import SendtoNewsIE from .servus import ServusIE from .sevenplus import SevenPlusIE from .sexu import SexuIE from .seznamzpravy import ( SeznamZpravyIE, SeznamZpravyArticleIE, ) from .shahid import ( ShahidIE, ShahidShowIE, ) from .shared import ( SharedIE, VivoIE, ) from .sharevideos import ShareVideosEmbedIE from .shemaroome import ShemarooMeIE from .showroomlive import ShowRoomLiveIE from .simplecast import ( SimplecastIE, SimplecastEpisodeIE, SimplecastPodcastIE, ) from .sina import SinaIE from .sixplay import SixPlayIE from .skeb import SkebIE from .skyit import ( SkyItPlayerIE, SkyItVideoIE, SkyItVideoLiveIE, SkyItIE, SkyItAcademyIE, SkyItArteIE, CieloTVItIE, TV8ItIE, ) from .skylinewebcams import SkylineWebcamsIE from .skynewsarabia import ( SkyNewsArabiaIE, SkyNewsArabiaArticleIE, ) from .skynewsau import SkyNewsAUIE from .sky import ( SkyNewsIE, SkyNewsStoryIE, SkySportsIE, SkySportsNewsIE, ) from .slideshare import SlideshareIE from .slideslive import SlidesLiveIE from .slutload import SlutloadIE from .snotr import SnotrIE from .sohu import SohuIE from .sonyliv import ( SonyLIVIE, SonyLIVSeriesIE, ) from .soundcloud import ( SoundcloudEmbedIE, SoundcloudIE, SoundcloudSetIE, SoundcloudRelatedIE, SoundcloudUserIE, SoundcloudTrackStationIE, SoundcloudPlaylistIE, SoundcloudSearchIE, ) from .soundgasm import ( SoundgasmIE, SoundgasmProfileIE ) from .southpark import ( SouthParkIE, SouthParkDeIE, SouthParkDkIE, SouthParkEsIE, SouthParkLatIE, SouthParkNlIE ) from .sovietscloset import ( SovietsClosetIE, SovietsClosetPlaylistIE ) from .spankbang import ( SpankBangIE, SpankBangPlaylistIE, ) from .spankwire import SpankwireIE from .spiegel import SpiegelIE from .spike import ( BellatorIE, ParamountNetworkIE, ) from .startrek import StarTrekIE from .stitcher import ( StitcherIE, StitcherShowIE, ) from .sport5 import Sport5IE from .sportbox import SportBoxIE from .sportdeutschland import SportDeutschlandIE from .spotify import ( SpotifyIE, SpotifyShowIE, ) from .spreaker import ( SpreakerIE, SpreakerPageIE, SpreakerShowIE, SpreakerShowPageIE, ) from .springboardplatform import SpringboardPlatformIE from .sprout import SproutIE from .srgssr import ( SRGSSRIE, SRGSSRPlayIE, ) from .srmediathek import SRMediathekIE from .stanfordoc import StanfordOpenClassroomIE from .startv import StarTVIE from .steam import ( SteamIE, SteamCommunityBroadcastIE, ) from .storyfire import ( StoryFireIE, StoryFireUserIE, StoryFireSeriesIE, ) from .streamable import StreamableIE from .streamanity import StreamanityIE from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streamff import StreamFFIE from .streetvoice import StreetVoiceIE from .stretchinternet import StretchInternetIE from .stripchat import StripchatIE from .stv import STVPlayerIE from .substack import SubstackIE from .sunporno import SunPornoIE from .sverigesradio import ( SverigesRadioEpisodeIE, SverigesRadioPublicationIE, ) from .svt import ( SVTIE, SVTPageIE, SVTPlayIE, SVTSeriesIE, ) from .swrmediathek import SWRMediathekIE from .syvdk import SYVDKIE from .syfy import SyfyIE from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE from .tass import TassIE from .tbs import TBSIE from .tdslifeway import TDSLifewayIE from .teachable import ( TeachableIE, TeachableCourseIE, ) from .teachertube import ( TeacherTubeIE, TeacherTubeUserIE, ) from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE from .teamtreehouse import TeamTreeHouseIE from .techtalks import TechTalksIE from .ted import ( TedEmbedIE, TedPlaylistIE, TedSeriesIE, TedTalkIE, ) from .tele5 import Tele5IE from .tele13 import Tele13IE from .telebruxelles import TeleBruxellesIE from .telecinco import TelecincoIE from .telegraaf import TelegraafIE from .telegram import TelegramEmbedIE from .telemb import TeleMBIE from .telemundo import TelemundoIE from .telequebec import ( TeleQuebecIE, TeleQuebecSquatIE, TeleQuebecEmissionIE, TeleQuebecLiveIE, TeleQuebecVideoIE, ) from .teletask import TeleTaskIE from .telewebion import TelewebionIE from .tempo import TempoIE from .tennistv import TennisTVIE from .tenplay import TenPlayIE from .testurl import TestURLIE from .tf1 import TF1IE from .tfo import TFOIE from .theholetv import TheHoleTvIE from .theintercept import TheInterceptIE from .theplatform import ( ThePlatformIE, ThePlatformFeedIE, ) from .thestar import TheStarIE from .thesun import TheSunIE from .theta import ( ThetaVideoIE, ThetaStreamIE, ) from .theweatherchannel import TheWeatherChannelIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE from .thisoldhouse import ThisOldHouseIE from .threespeak import ( ThreeSpeakIE, ThreeSpeakUserIE, ) from .threeqsdn import ThreeQSDNIE from .tiktok import ( TikTokIE, TikTokUserIE, TikTokSoundIE, TikTokEffectIE, TikTokTagIE, TikTokVMIE, DouyinIE, ) from .tinypic import TinyPicIE from .tmz import TMZIE from .tnaflix import ( TNAFlixNetworkEmbedIE, TNAFlixIE, EMPFlixIE, MovieFapIE, ) from .toggle import ( ToggleIE, MeWatchIE, ) from .toggo import ( ToggoIE, ) from .tokentube import ( TokentubeIE, TokentubeChannelIE ) from .tonline import TOnlineIE from .toongoggles import ToonGogglesIE from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE from .trovo import ( TrovoIE, TrovoVodIE, TrovoChannelVodIE, TrovoChannelClipIE, ) from .trueid import TrueIDIE from .trunews import TruNewsIE from .truth import TruthIE from .trutv import TruTVIE from .tube8 import Tube8IE from .tubetugraz import TubeTuGrazIE, TubeTuGrazSeriesIE from .tubitv import ( TubiTvIE, TubiTvShowIE, ) from .tumblr import TumblrIE from .tunein import ( TuneInClipIE, TuneInStationIE, TuneInProgramIE, TuneInTopicIE, TuneInShortenerIE, ) from .tunepk import TunePkIE from .turbo import TurboIE from .tv2 import ( TV2IE, TV2ArticleIE, KatsomoIE, MTVUutisetArticleIE, ) from .tv2dk import ( TV2DKIE, TV2DKBornholmPlayIE, ) from .tv2hu import ( TV2HuIE, TV2HuSeriesIE, ) from .tv4 import TV4IE from .tv5mondeplus import TV5MondePlusIE from .tv5unis import ( TV5UnisVideoIE, TV5UnisIE, ) from .tva import ( TVAIE, QubIE, ) from .tvanouvelles import ( TVANouvellesIE, TVANouvellesArticleIE, ) from .tvc import ( TVCIE, TVCArticleIE, ) from .tver import TVerIE from .tvigle import TvigleIE from .tviplayer import TVIPlayerIE from .tvland import TVLandIE from .tvn24 import TVN24IE from .tvnet import TVNetIE from .tvnoe import TVNoeIE from .tvnow import ( TVNowIE, TVNowFilmIE, TVNowNewIE, TVNowSeasonIE, TVNowAnnualIE, TVNowShowIE, ) from .tvopengr import ( TVOpenGrWatchIE, TVOpenGrEmbedIE, ) from .tvp import ( TVPEmbedIE, TVPIE, TVPStreamIE, TVPWebsiteIE, ) from .tvplay import ( TVPlayIE, ViafreeIE, TVPlayHomeIE, ) from .tvplayer import TVPlayerIE from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE from .twentymin import TwentyMinutenIE from .twentythreevideo import TwentyThreeVideoIE from .twitcasting import ( TwitCastingIE, TwitCastingLiveIE, TwitCastingUserIE, ) from .twitch import ( TwitchVodIE, TwitchCollectionIE, TwitchVideosIE, TwitchVideosClipsIE, TwitchVideosCollectionsIE, TwitchStreamIE, TwitchClipsIE, ) from .twitter import ( TwitterCardIE, TwitterIE, TwitterAmplifyIE, TwitterBroadcastIE, TwitterShortenerIE, ) from .udemy import ( UdemyIE, UdemyCourseIE ) from .udn import UDNEmbedIE from .ufctv import ( UFCTVIE, UFCArabiaIE, ) from .ukcolumn import UkColumnIE from .uktvplay import UKTVPlayIE from .digiteka import DigitekaIE from .dlive import ( DLiveVODIE, DLiveStreamIE, ) from .drooble import DroobleIE from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE from .uol import UOLIE from .uplynk import ( UplynkIE, UplynkPreplayIE, ) from .urort import UrortIE from .urplay import URPlayIE from .usanetwork import USANetworkIE from .usatoday import USATodayIE from .ustream import UstreamIE, UstreamChannelIE from .ustudio import ( UstudioIE, UstudioEmbedIE, ) from .utreon import UtreonIE from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veehd import VeeHDIE from .veo import VeoIE from .veoh import VeohIE from .vesti import VestiIE from .vevo import ( VevoIE, VevoPlaylistIE, ) from .vgtv import ( BTArticleIE, BTVestlendingenIE, VGTVIE, ) from .vh1 import VH1IE from .vice import ( ViceIE, ViceArticleIE, ViceShowIE, ) from .vidbit import VidbitIE from .viddler import ViddlerIE from .videa import VideaIE from .videocampus_sachsen import ( VideocampusSachsenIE, ViMPPlaylistIE, ) from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE from .videomore import ( VideomoreIE, VideomoreVideoIE, VideomoreSeasonIE, ) from .videopress import VideoPressIE from .vidio import ( VidioIE, VidioPremierIE, VidioLiveIE ) from .vidlii import VidLiiIE from .vier import VierIE, VierVideosIE from .viewlift import ( ViewLiftIE, ViewLiftEmbedIE, ) from .viidea import ViideaIE from .vimeo import ( VimeoIE, VimeoAlbumIE, VimeoChannelIE, VimeoGroupsIE, VimeoLikesIE, VimeoOndemandIE, VimeoReviewIE, VimeoUserIE, VimeoWatchLaterIE, VHXEmbedIE, ) from .vimm import ( VimmIE, VimmRecordingIE, ) from .vimple import VimpleIE from .vine import ( VineIE, VineUserIE, ) from .viki import ( VikiIE, VikiChannelIE, ) from .viqeo import ViqeoIE from .viu import ( ViuIE, ViuPlaylistIE, ViuOTTIE, ) from .vk import ( VKIE, VKUserVideosIE, VKWallPostIE, ) from .vlive import ( VLiveIE, VLivePostIE, VLiveChannelIE, ) from .vodlocker import VodlockerIE from .vodpl import VODPlIE from .vodplatform import VODPlatformIE from .voicerepublic import VoiceRepublicIE from .voicy import ( VoicyIE, VoicyChannelIE, ) from .voot import ( VootIE, VootSeriesIE, ) from .voxmedia import ( VoxMediaVolumeIE, VoxMediaIE, ) from .vrt import VRTIE from .vrak import VrakIE from .vrv import ( VRVIE, VRVSeriesIE, ) from .vshare import VShareIE from .vtm import VTMIE from .medialaan import MedialaanIE from .vuclip import VuClipIE from .vupload import VuploadIE from .vvvvid import ( VVVVIDIE, VVVVIDShowIE, ) from .vyborymos import VyboryMosIE from .vzaar import VzaarIE from .wakanim import WakanimIE from .walla import WallaIE from .washingtonpost import ( WashingtonPostIE, WashingtonPostArticleIE, ) from .wasdtv import ( WASDTVStreamIE, WASDTVRecordIE, WASDTVClipIE, ) from .wat import WatIE from .watchbox import WatchBoxIE from .watchindianporn import WatchIndianPornIE from .wdr import ( WDRIE, WDRPageIE, WDRElefantIE, WDRMobileIE, ) from .webcaster import ( WebcasterIE, WebcasterFeedIE, ) from .webofstories import ( WebOfStoriesIE, WebOfStoriesPlaylistIE, ) from .weibo import ( WeiboIE, WeiboMobileIE ) from .weiqitv import WeiqiTVIE from .wetv import WeTvEpisodeIE, WeTvSeriesIE from .wikimedia import WikimediaIE from .willow import WillowIE from .wimtv import WimTVIE from .whowatch import WhoWatchIE from .wistia import ( WistiaIE, WistiaPlaylistIE, ) from .worldstarhiphop import WorldStarHipHopIE from .wppilot import ( WPPilotIE, WPPilotChannelsIE, ) from .wsj import ( WSJIE, WSJArticleIE, ) from .wwe import WWEIE from .xbef import XBefIE from .xboxclips import XboxClipsIE from .xfileshare import XFileShareIE from .xhamster import ( XHamsterIE, XHamsterEmbedIE, XHamsterUserIE, ) from .xiami import ( XiamiSongIE, XiamiAlbumIE, XiamiArtistIE, XiamiCollectionIE ) from .ximalaya import ( XimalayaIE, XimalayaAlbumIE ) from .xinpianchang import XinpianchangIE from .xminus import XMinusIE from .xnxx import XNXXIE from .xstream import XstreamIE from .xtube import XTubeUserIE, XTubeIE from .xuite import XuiteIE from .xvideos import XVideosIE from .xxxymovies import XXXYMoviesIE from .yahoo import ( YahooIE, YahooSearchIE, YahooGyaOPlayerIE, YahooGyaOIE, YahooJapanNewsIE, ) from .yandexdisk import YandexDiskIE from .yandexmusic import ( YandexMusicTrackIE, YandexMusicAlbumIE, YandexMusicPlaylistIE, YandexMusicArtistTracksIE, YandexMusicArtistAlbumsIE, ) from .yandexvideo import ( YandexVideoIE, YandexVideoPreviewIE, ZenYandexIE, ZenYandexChannelIE, ) from .yapfiles import YapFilesIE from .yesjapan import YesJapanIE from .yinyuetai import YinYueTaiIE from .ynet import YnetIE from .youjizz import YouJizzIE from .youku import ( YoukuIE, YoukuShowIE, ) from .younow import ( YouNowLiveIE, YouNowChannelIE, YouNowMomentIE, ) from .youporn import YouPornIE from .yourporn import YourPornIE from .yourupload import YourUploadIE from .youtube import ( YoutubeIE, YoutubeClipIE, YoutubeFavouritesIE, YoutubeNotificationsIE, YoutubeHistoryIE, YoutubeTabIE, YoutubeLivestreamEmbedIE, YoutubePlaylistIE, YoutubeRecommendedIE, YoutubeSearchDateIE, YoutubeSearchIE, YoutubeSearchURLIE, YoutubeMusicSearchURLIE, YoutubeSubscriptionsIE, YoutubeStoriesIE, YoutubeTruncatedIDIE, YoutubeTruncatedURLIE, YoutubeYtBeIE, YoutubeYtUserIE, YoutubeWatchLaterIE, ) from .zapiks import ZapiksIE from .zattoo import ( BBVTVIE, BBVTVLiveIE, BBVTVRecordingsIE, EinsUndEinsTVIE, EinsUndEinsTVLiveIE, EinsUndEinsTVRecordingsIE, EWETVIE, EWETVLiveIE, EWETVRecordingsIE, GlattvisionTVIE, GlattvisionTVLiveIE, GlattvisionTVRecordingsIE, MNetTVIE, MNetTVLiveIE, MNetTVRecordingsIE, NetPlusTVIE, NetPlusTVLiveIE, NetPlusTVRecordingsIE, OsnatelTVIE, OsnatelTVLiveIE, OsnatelTVRecordingsIE, QuantumTVIE, QuantumTVLiveIE, QuantumTVRecordingsIE, SaltTVIE, SaltTVLiveIE, SaltTVRecordingsIE, SAKTVIE, SAKTVLiveIE, SAKTVRecordingsIE, VTXTVIE, VTXTVLiveIE, VTXTVRecordingsIE, WalyTVIE, WalyTVLiveIE, WalyTVRecordingsIE, ZattooIE, ZattooLiveIE, ZattooMoviesIE, ZattooRecordingsIE, ) from .zdf import ZDFIE, ZDFChannelIE from .zee5 import ( Zee5IE, Zee5SeriesIE, ) from .zhihu import ZhihuIE from .zingmp3 import ( ZingMp3IE, ZingMp3AlbumIE, ZingMp3ChartHomeIE, ZingMp3WeekChartIE, ZingMp3ChartMusicVideoIE, ZingMp3UserIE, ) from .zoom import ZoomIE from .zype import ZypeIE yt-dlp-2022.08.19/yt_dlp/extractor/abc.py000066400000000000000000000310101427755243700177500ustar00rootroot00000000000000import hashlib import hmac import re import time from .common import InfoExtractor from ..compat import compat_str from ..utils import ( dict_get, ExtractorError, js_to_json, int_or_none, parse_iso8601, str_or_none, try_get, unescapeHTML, update_url_query, ) class ABCIE(InfoExtractor): IE_NAME = 'abc.net.au' _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/(?:news|btn)/(?:[^/]+/){1,4}(?P\d{5,})' _TESTS = [{ 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', 'md5': 'cb3dd03b18455a661071ee1e28344d9f', 'info_dict': { 'id': '5868334', 'ext': 'mp4', 'title': 'Australia to help staff Ebola treatment centre in Sierra Leone', 'description': 'md5:809ad29c67a05f54eb41f2a105693a67', }, 'skip': 'this video has expired', }, { 'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326', 'md5': '4ebd61bdc82d9a8b722f64f1f4b4d121', 'info_dict': { 'id': 'NvqvPeNZsHU', 'ext': 'mp4', 'upload_date': '20150816', 'uploader': 'ABC News (Australia)', 'description': 'Government backbencher Warren Entsch introduces a cross-party sponsored bill to legalise same-sex marriage, saying the bill is designed to promote "an inclusive Australia, not a divided one.". Read more here: http://ab.co/1Mwc6ef', 'uploader_id': 'NewsOnABC', 'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill', }, 'add_ie': ['Youtube'], 'skip': 'Not accessible from Travis CI server', }, { 'url': 'http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080', 'md5': 'b96eee7c9edf4fc5a358a0252881cc1f', 'info_dict': { 'id': '6880080', 'ext': 'mp3', 'title': 'NAB lifts interest rates, following Westpac and CBA', 'description': 'md5:f13d8edc81e462fce4a0437c7dc04728', }, }, { 'url': 'http://www.abc.net.au/news/2015-10-19/6866214', 'only_matching': True, }, { 'url': 'https://www.abc.net.au/btn/classroom/wwi-centenary/10527914', 'info_dict': { 'id': '10527914', 'ext': 'mp4', 'title': 'WWI Centenary', 'description': 'md5:c2379ec0ca84072e86b446e536954546', } }, { 'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074', 'info_dict': { 'id': '12342074', 'ext': 'mp4', 'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia', 'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f', } }, { 'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476', 'info_dict': { 'id': 'tDL8Ld4dK_8', 'ext': 'mp4', 'title': 'Fortnite Banned From Apple and Google App Stores', 'description': 'md5:a6df3f36ce8f816b74af4bd6462f5651', 'upload_date': '20200813', 'uploader': 'Behind the News', 'uploader_id': 'behindthenews', } }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) mobj = re.search(r'[^"]+)"\s+data-duration="\d+"\s+title="Download audio directly">', webpage) if mobj: urls_info = mobj.groupdict() youtube = False video = False else: mobj = re.search(r'External Link:', webpage) if mobj is None: mobj = re.search(r'' xml_root = self._html_search_regex( PLAYER_REGEX, start_page, 'xml root', default=None) if xml_root is None: # Probably need to authenticate login_res = self._login(webpage_url, display_id) if login_res is None: self.report_warning('Could not login.') else: start_page = login_res # Grab the url from the authenticated page xml_root = self._html_search_regex( PLAYER_REGEX, start_page, 'xml root') xml_name = self._html_search_regex( r'', webpage): url = self._search_regex( r'src=(["\'])(?P.+?partnerplayer.+?)\1', iframe, 'player URL', default=None, group='url') if url: break if not url: url = self._og_search_url(webpage) mobj = re.match( self._VALID_URL, self._proto_relative_url(url.strip())) player_id = mobj.group('player_id') if not display_id: display_id = player_id if player_id: player_page = self._download_webpage( url, display_id, note='Downloading player page', errnote='Could not download player page') video_id = self._search_regex( r'\d+)' _TEST = { 'url': 'http://www.pearvideo.com/video_1076290', 'info_dict': { 'id': '1076290', 'ext': 'mp4', 'title': '小浣熊在主人家玻璃上滚石头:没砸', 'description': 'md5:01d576b747de71be0ee85eb7cac25f9d', 'timestamp': 1494275280, 'upload_date': '20170508', } } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) quality = qualities( ('ldflv', 'ld', 'sdflv', 'sd', 'hdflv', 'hd', 'src')) formats = [{ 'url': mobj.group('url'), 'format_id': mobj.group('id'), 'quality': quality(mobj.group('id')), } for mobj in re.finditer( r'(?P[a-zA-Z]+)Url\s*=\s*(["\'])(?P(?:https?:)?//.+?)\2', webpage)] if not formats: info = self._download_json( 'https://www.pearvideo.com/videoStatus.jsp', video_id=video_id, query={'contId': video_id}, headers={'Referer': url}) formats = [{ 'format_id': k, 'url': v.replace(info['systemTime'], f'cont-{video_id}') if k == 'srcUrl' else v } for k, v in traverse_obj(info, ('videoInfo', 'videos'), default={}).items() if v] self._sort_formats(formats) title = self._search_regex( (r']+\bclass=(["\'])video-tt\1[^>]*>(?P[^<]+)', r'<[^>]+\bdata-title=(["\'])(?P(?:(?!\1).)+)\1'), webpage, 'title', group='value') description = self._search_regex( (r']+\bclass=(["\'])summary\1[^>]*>(?P[^<]+)', r'<[^>]+\bdata-summary=(["\'])(?P(?:(?!\1).)+)\1'), webpage, 'description', default=None, group='value') or self._html_search_meta('Description', webpage) timestamp = unified_timestamp(self._search_regex( r']+\bclass=["\']date["\'][^>]*>([^<]+)', webpage, 'timestamp', fatal=False)) return { 'id': video_id, 'title': title, 'description': description, 'timestamp': timestamp, 'formats': formats, } yt-dlp-2022.08.19/yt_dlp/extractor/peekvids.py000066400000000000000000000057541427755243700210550ustar00rootroot00000000000000from .common import InfoExtractor class PeekVidsIE(InfoExtractor): _VALID_URL = r'''(?x) https?://(?:www\.)?peekvids\.com/ (?:(?:[^/?#]+/){2}|embed/?\?(?:[^#]*&)?v=) (?P[^/?&#]*) ''' _TESTS = [{ 'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd', 'md5': 'a00940646c428e232407e3e62f0e8ef5', 'info_dict': { 'id': 'BSyLMbN0YCd', 'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp, SEXYhub', 'ext': 'mp4', 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'Watch Dane Jones - Cute redhead with perfect tits with Mini Vamp (7 min), uploaded by SEXYhub.com', 'timestamp': 1642579329, 'upload_date': '20220119', 'duration': 416, 'view_count': int, 'age_limit': 18, }, }] _DOMAIN = 'www.peekvids.com' def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) short_video_id = self._html_search_regex(r'\s*\s*([\d,.]+)', r'class=["\']views["\'][^>]*>

([\d,.]+)'), webpage, 'view count', fatal=False)) def extract_list(kind): s = self._search_regex( (r'(?s)%s:\s*\s*(.+?)' % kind.capitalize(), r'(?s)]*>%s:(.+?)

' % kind.capitalize()), webpage, kind, fatal=False) return re.findall(r']+>([^<]+)
', s or '') return { 'id': video_id, 'display_id': display_id, 'title': title, 'thumbnail': thumbnail, 'duration': duration, 'view_count': view_count, 'formats': formats, 'age_limit': 18, 'categories': extract_list('categories'), 'tags': extract_list('tags'), } yt-dlp-2022.08.19/yt_dlp/extractor/pornez.py000066400000000000000000000032371427755243700205520ustar00rootroot00000000000000from .common import InfoExtractor from ..utils import int_or_none class PornezIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pornez\.net/video(?P[0-9]+)/' _TEST = { 'url': 'https://pornez.net/video344819/mistresst-funny_penis_names-wmv/', 'md5': '2e19a0a1cff3a5dbea0ef1b9e80bcbbc', 'info_dict': { 'id': '344819', 'ext': 'mp4', 'title': r'mistresst funny_penis_names wmv', 'thumbnail': r're:^https?://.*\.jpg$', 'age_limit': 18, } } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) iframe_src = self._html_search_regex( r']+src="(https?://pornez\.net/player/\?[^"]+)"', webpage, 'iframe', fatal=True) title = self._html_search_meta(['name', 'twitter:title', 'og:title'], webpage, 'title', default=None) if title is None: title = self._search_regex(r'

(.*?)

', webpage, 'title', fatal=True) thumbnail = self._html_search_meta(['thumbnailUrl'], webpage, 'title', default=None) webpage = self._download_webpage(iframe_src, video_id) entries = self._parse_html5_media_entries(iframe_src, webpage, video_id)[0] for format in entries['formats']: height = self._search_regex(r'_(\d+)\.m3u8', format['url'], 'height') format['format_id'] = '%sp' % height format['height'] = int_or_none(height) entries.update({ 'id': video_id, 'title': title, 'thumbnail': thumbnail, 'age_limit': 18 }) return entries yt-dlp-2022.08.19/yt_dlp/extractor/pornflip.py000066400000000000000000000067201427755243700210660ustar00rootroot00000000000000from .common import InfoExtractor from ..utils import ( int_or_none, parse_duration, parse_iso8601 ) class PornFlipIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:(embed|sv|v)/)?(?P[^/]+)' _TESTS = [ { 'url': 'https://www.pornflip.com/dzv9Mtw1qj2/sv/brazzers-double-dare-two-couples-fucked-jenna-reid-maya-bijou', 'info_dict': { 'id': 'dzv9Mtw1qj2', 'ext': 'mp4', 'title': 'Brazzers - Double Dare Two couples fucked Jenna Reid Maya Bijou', 'description': 'md5:d2b69e6cc743c5fd158e162aa7f05821', 'duration': 476, 'like_count': int, 'dislike_count': int, 'view_count': int, 'timestamp': 1617846819, 'upload_date': '20210408', 'uploader': 'Brazzers', 'age_limit': 18, }, 'params': { 'skip_download': True, }, }, { 'url': 'https://www.pornflip.com/v/IrJEC40i21L', 'only_matching': True, }, { 'url': 'https://www.pornflip.com/Z3jzbChC5-P/sexintaxi-e-sereyna-gomez-czech-naked-couple', 'only_matching': True, }, { 'url': 'https://www.pornflip.com/embed/bLcDFxnrZnU', 'only_matching': True, }, ] _HOST = 'www.pornflip.com' def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( 'https://{}/sv/{}'.format(self._HOST, video_id), video_id, headers={'host': self._HOST}) description = self._html_search_regex(r'&p\[summary\]=(.*?)\s*&p', webpage, 'description', fatal=False) duration = self._search_regex(r'"duration":\s+"([^"]+)",', webpage, 'duration', fatal=False) view_count = self._search_regex(r'"interactionCount":\s+"([^"]+)"', webpage, 'view_count', fatal=False) title = self._html_search_regex(r'id="mediaPlayerTitleLink"[^>]*>(.+)', webpage, 'title', fatal=False) uploader = self._html_search_regex(r'class="title-chanel"[^>]*>[^<]*]*>([^<]+)<', webpage, 'uploader', fatal=False) upload_date = self._search_regex(r'"uploadDate":\s+"([^"]+)",', webpage, 'upload_date', fatal=False) likes = self._html_search_regex( r'class="btn btn-up-rating[^>]*>[^<]*]*>[^<]*[^>]*]*>[^0-9]*([0-9]+)[^<0-9]*<', webpage, 'like_count', fatal=False) dislikes = self._html_search_regex( r'class="btn btn-down-rating[^>]*>[^<]*]*>[^<]*[^>]*]*>[^0-9]*([0-9]+)[^<0-9]*<', webpage, 'dislike_count', fatal=False) mpd_url = self._search_regex(r'"([^"]+userscontent.net/dash/[0-9]+/manifest.mpd[^"]*)"', webpage, 'mpd_url').replace('&', '&') formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash') self._sort_formats(formats) return { 'age_limit': 18, 'description': description, 'dislike_count': int_or_none(dislikes), 'duration': parse_duration(duration), 'formats': formats, 'id': video_id, 'like_count': int_or_none(likes), 'timestamp': parse_iso8601(upload_date), 'thumbnail': self._og_search_thumbnail(webpage), 'title': title, 'uploader': uploader, 'view_count': int_or_none(view_count), } yt-dlp-2022.08.19/yt_dlp/extractor/pornhd.py000066400000000000000000000107461427755243700205320ustar00rootroot00000000000000from .common import InfoExtractor from ..utils import ( determine_ext, ExtractorError, int_or_none, js_to_json, merge_dicts, urljoin, ) class PornHdIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P\d+)(?:/(?P.+))?' _TESTS = [{ 'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', 'md5': '87f1540746c1d32ec7a2305c12b96b25', 'info_dict': { 'id': '9864', 'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', 'ext': 'mp4', 'title': 'Restroom selfie masturbation', 'description': 'md5:3748420395e03e31ac96857a8f125b2b', 'thumbnail': r're:^https?://.*\.jpg', 'view_count': int, 'like_count': int, 'age_limit': 18, }, 'skip': 'HTTP Error 404: Not Found', }, { 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', 'md5': '1b7b3a40b9d65a8e5b25f7ab9ee6d6de', 'info_dict': { 'id': '1962', 'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video', 'ext': 'mp4', 'title': 'md5:98c6f8b2d9c229d0f0fde47f61a1a759', 'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294', 'thumbnail': r're:^https?://.*\.jpg', 'view_count': int, 'like_count': int, 'age_limit': 18, }, }] def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') display_id = mobj.group('display_id') webpage = self._download_webpage(url, display_id or video_id) title = self._html_search_regex( [r']+class=["\']video-name["\'][^>]*>([^<]+)', r'(.+?) - .*?[Pp]ornHD.*?'], webpage, 'title') sources = self._parse_json(js_to_json(self._search_regex( r"(?s)sources'?\s*[:=]\s*(\{.+?\})", webpage, 'sources', default='{}')), video_id) info = {} if not sources: entries = self._parse_html5_media_entries(url, webpage, video_id) if entries: info = entries[0] if not sources and not info: message = self._html_search_regex( r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P.+?)]+class=["\']video-description[^>]+>(?P.+?)', r'<(div|p)[^>]+class="description"[^>]*>(?P[^<]+)(?:(?!\1).)+)\1", webpage, 'thumbnail', default=None, group='url') like_count = int_or_none(self._search_regex( (r'(\d+)\s*likes', r'(\d+)\s*]+>(?: |\s)*\blikes', r'class=["\']save-count["\'][^>]*>\s*(\d+)'), webpage, 'like count', fatal=False)) return merge_dicts(info, { 'id': video_id, 'display_id': display_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'view_count': view_count, 'like_count': like_count, 'formats': formats, 'age_limit': 18, }) yt-dlp-2022.08.19/yt_dlp/extractor/pornhub.py000066400000000000000000000752031427755243700207140ustar00rootroot00000000000000import functools import itertools import math import operator import re import urllib.request from .common import InfoExtractor from .openload import PhantomJSwrapper from ..compat import compat_HTTPError, compat_str from ..utils import ( NO_DEFAULT, ExtractorError, clean_html, determine_ext, format_field, int_or_none, merge_dicts, orderedSet, remove_quotes, remove_start, str_to_int, update_url_query, url_or_none, urlencode_postdata, ) class PornHubBaseIE(InfoExtractor): _NETRC_MACHINE = 'pornhub' _PORNHUB_HOST_RE = r'(?:(?Ppornhub(?:premium)?\.(?:com|net|org))|pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd\.onion)' def _download_webpage_handle(self, *args, **kwargs): def dl(*args, **kwargs): return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs) ret = dl(*args, **kwargs) if not ret: return ret webpage, urlh = ret if any(re.search(p, webpage) for p in ( r']+\bonload=["\']go\(\)', r'document\.cookie\s*=\s*["\']RNKEY=', r'document\.location\.reload\(true\)')): url_or_request = args[0] url = (url_or_request.get_full_url() if isinstance(url_or_request, urllib.request.Request) else url_or_request) phantom = PhantomJSwrapper(self, required_version='2.0') phantom.get(url, html=webpage) webpage, urlh = dl(*args, **kwargs) return webpage, urlh def _real_initialize(self): self._logged_in = False def _login(self, host): if self._logged_in: return site = host.split('.')[0] # Both sites pornhub and pornhubpremium have separate accounts # so there should be an option to provide credentials for both. # At the same time some videos are available under the same video id # on both sites so that we have to identify them as the same video. # For that purpose we have to keep both in the same extractor # but under different netrc machines. username, password = self._get_login_info(netrc_machine=site) if username is None: return login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '') login_page = self._download_webpage( login_url, None, 'Downloading %s login page' % site) def is_logged(webpage): return any(re.search(p, webpage) for p in ( r'class=["\']signOut', r'>Sign\s+[Oo]ut\s*<')) if is_logged(login_page): self._logged_in = True return login_form = self._hidden_inputs(login_page) login_form.update({ 'username': username, 'password': password, }) response = self._download_json( 'https://www.%s/front/authenticate' % host, None, 'Logging in to %s' % site, data=urlencode_postdata(login_form), headers={ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Referer': login_url, 'X-Requested-With': 'XMLHttpRequest', }) if response.get('success') == '1': self._logged_in = True return message = response.get('message') if message is not None: raise ExtractorError( 'Unable to login: %s' % message, expected=True) raise ExtractorError('Unable to log in') class PornHubIE(PornHubBaseIE): IE_DESC = 'PornHub and Thumbzilla' _VALID_URL = r'''(?x) https?:// (?: (?:[^/]+\.)? %s /(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| (?:www\.)?thumbzilla\.com/video/ ) (?P[\da-z]+) ''' % PornHubBaseIE._PORNHUB_HOST_RE _EMBED_REGEX = [r']+?src=["\'](?P(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)'] _TESTS = [{ 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 'md5': 'a6391306d050e4547f62b3f485dd9ba9', 'info_dict': { 'id': '648719015', 'ext': 'mp4', 'title': 'Seductive Indian beauty strips down and fingers her pink pussy', 'uploader': 'Babes', 'upload_date': '20130628', 'timestamp': 1372447216, 'duration': 361, 'view_count': int, 'like_count': int, 'dislike_count': int, 'comment_count': int, 'age_limit': 18, 'tags': list, 'categories': list, 'cast': list, }, }, { # non-ASCII title 'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002', 'info_dict': { 'id': '1331683002', 'ext': 'mp4', 'title': '重庆婷婷女王足交', 'upload_date': '20150213', 'timestamp': 1423804862, 'duration': 1753, 'view_count': int, 'like_count': int, 'dislike_count': int, 'comment_count': int, 'age_limit': 18, 'tags': list, 'categories': list, }, 'params': { 'skip_download': True, }, 'skip': 'Video has been flagged for verification in accordance with our trust and safety policy', }, { # subtitles 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7', 'info_dict': { 'id': 'ph5af5fef7c2aa7', 'ext': 'mp4', 'title': 'BFFS - Cute Teen Girls Share Cock On the Floor', 'uploader': 'BFFs', 'duration': 622, 'view_count': int, 'like_count': int, 'dislike_count': int, 'comment_count': int, 'age_limit': 18, 'tags': list, 'categories': list, 'subtitles': { 'en': [{ "ext": 'srt' }] }, }, 'params': { 'skip_download': True, }, 'skip': 'This video has been disabled', }, { 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph601dc30bae19a', 'info_dict': { 'id': 'ph601dc30bae19a', 'uploader': 'Projekt Melody', 'uploader_id': 'projekt-melody', 'upload_date': '20210205', 'title': '"Welcome to My Pussy Mansion" - CB Stream (02/03/21)', 'thumbnail': r're:https?://.+', }, }, { 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', 'only_matching': True, }, { # removed at the request of cam4.com 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862', 'only_matching': True, }, { # removed at the request of the copyright owner 'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859', 'only_matching': True, }, { # removed by uploader 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111', 'only_matching': True, }, { # private video 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7', 'only_matching': True, }, { 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex', 'only_matching': True, }, { 'url': 'http://www.pornhub.com/video/show?viewkey=648719015', 'only_matching': True, }, { 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933', 'only_matching': True, }, { 'url': 'https://www.pornhub.org/view_video.php?viewkey=203640933', 'only_matching': True, }, { 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82', 'only_matching': True, }, { # Some videos are available with the same id on both premium # and non-premium sites (e.g. this and the following test) 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3', 'only_matching': True, }, { 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3', 'only_matching': True, }, { # geo restricted 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156', 'only_matching': True, }, { 'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/view_video.php?viewkey=ph5a9813bfa7156', 'only_matching': True, }] def _extract_count(self, pattern, webpage, name): return str_to_int(self._search_regex(pattern, webpage, '%s count' % name, default=None)) def _real_extract(self, url): mobj = self._match_valid_url(url) host = mobj.group('host') or 'pornhub.com' video_id = mobj.group('id') self._login(host) self._set_cookie(host, 'age_verified', '1') def dl_webpage(platform): self._set_cookie(host, 'platform', platform) return self._download_webpage( 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id), video_id, 'Downloading %s webpage' % platform) webpage = dl_webpage('pc') error_msg = self._html_search_regex( (r'(?s)]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P.+?)', r'(?s)]+class=["\']noVideo["\'][^>]*>(?P.+?)'), webpage, 'error message', default=None, group='error') if error_msg: error_msg = re.sub(r'\s+', ' ', error_msg) raise ExtractorError( 'PornHub said: %s' % error_msg, expected=True, video_id=video_id) if any(re.search(p, webpage) for p in ( r'class=["\']geoBlocked["\']', r'>\s*This content is unavailable in your country')): self.raise_geo_restricted() # video_title from flashvars contains whitespace instead of non-ASCII (see # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying # on that anymore. title = self._html_search_meta( 'twitter:title', webpage, default=None) or self._html_search_regex( (r'(?s)]+class=["\']title["\'][^>]*>(?P.+?)</h1>', r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1', r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'), webpage, 'title', group='title') video_urls = [] video_urls_set = set() subtitles = {} flashvars = self._parse_json( self._search_regex( r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'), video_id) if flashvars: subtitle_url = url_or_none(flashvars.get('closedCaptionsFile')) if subtitle_url: subtitles.setdefault('en', []).append({ 'url': subtitle_url, 'ext': 'srt', }) thumbnail = flashvars.get('image_url') duration = int_or_none(flashvars.get('video_duration')) media_definitions = flashvars.get('mediaDefinitions') if isinstance(media_definitions, list): for definition in media_definitions: if not isinstance(definition, dict): continue video_url = definition.get('videoUrl') if not video_url or not isinstance(video_url, compat_str): continue if video_url in video_urls_set: continue video_urls_set.add(video_url) video_urls.append( (video_url, int_or_none(definition.get('quality')))) else: thumbnail, duration = [None] * 2 def extract_js_vars(webpage, pattern, default=NO_DEFAULT): assignments = self._search_regex( pattern, webpage, 'encoded url', default=default) if not assignments: return {} assignments = assignments.split(';') js_vars = {} def parse_js_value(inp): inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp) if '+' in inp: inps = inp.split('+') return functools.reduce( operator.concat, map(parse_js_value, inps)) inp = inp.strip() if inp in js_vars: return js_vars[inp] return remove_quotes(inp) for assn in assignments: assn = assn.strip() if not assn: continue assn = re.sub(r'var\s+', '', assn) vname, value = assn.split('=', 1) js_vars[vname] = parse_js_value(value) return js_vars def add_video_url(video_url): v_url = url_or_none(video_url) if not v_url: return if v_url in video_urls_set: return video_urls.append((v_url, None)) video_urls_set.add(v_url) def parse_quality_items(quality_items): q_items = self._parse_json(quality_items, video_id, fatal=False) if not isinstance(q_items, list): return for item in q_items: if isinstance(item, dict): add_video_url(item.get('url')) if not video_urls: FORMAT_PREFIXES = ('media', 'quality', 'qualityItems') js_vars = extract_js_vars( webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES), default=None) if js_vars: for key, format_url in js_vars.items(): if key.startswith(FORMAT_PREFIXES[-1]): parse_quality_items(format_url) elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]): add_video_url(format_url) if not video_urls and re.search( r'<[^>]+\bid=["\']lockedPlayer', webpage): raise ExtractorError( 'Video %s is locked' % video_id, expected=True) if not video_urls: js_vars = extract_js_vars( dl_webpage('tv'), r'(var.+?mediastring.+?)</script>') add_video_url(js_vars['mediastring']) for mobj in re.finditer( r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage): video_url = mobj.group('url') if video_url not in video_urls_set: video_urls.append((video_url, None)) video_urls_set.add(video_url) upload_date = None formats = [] def add_format(format_url, height=None): ext = determine_ext(format_url) if ext == 'mpd': formats.extend(self._extract_mpd_formats( format_url, video_id, mpd_id='dash', fatal=False)) return if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( format_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) return if not height: height = int_or_none(self._search_regex( r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height', default=None)) formats.append({ 'url': format_url, 'format_id': format_field(height, None, '%dp'), 'height': height, }) for video_url, height in video_urls: if not upload_date: upload_date = self._search_regex( r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None) if upload_date: upload_date = upload_date.replace('/', '') if '/video/get_media' in video_url: medias = self._download_json(video_url, video_id, fatal=False) if isinstance(medias, list): for media in medias: if not isinstance(media, dict): continue video_url = url_or_none(media.get('videoUrl')) if not video_url: continue height = int_or_none(media.get('quality')) add_format(video_url, height) continue add_format(video_url) # field_preference is unnecessary here, but kept for code-similarity with youtube-dl self._sort_formats( formats, field_preference=('height', 'width', 'fps', 'format_id')) model_profile = self._search_json( r'var\s+MODEL_PROFILE\s*=', webpage, 'model profile', video_id, fatal=False) video_uploader = self._html_search_regex( r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', webpage, 'uploader', default=None) or model_profile.get('username') def extract_vote_count(kind, name): return self._extract_count( (r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind, r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind), webpage, name) view_count = self._extract_count( r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view') like_count = extract_vote_count('Up', 'like') dislike_count = extract_vote_count('Down', 'dislike') comment_count = self._extract_count( r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') def extract_list(meta_key): div = self._search_regex( r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>' % meta_key, webpage, meta_key, default=None) if div: return [clean_html(x).strip() for x in re.findall(r'(?s)<a[^>]+\bhref=[^>]+>.+?</a>', div)] info = self._search_json_ld(webpage, video_id, default={}) # description provided in JSON-LD is irrelevant info['description'] = None return merge_dicts({ 'id': video_id, 'uploader': video_uploader, 'uploader_id': remove_start(model_profile.get('modelProfileLink'), '/model/'), 'upload_date': upload_date, 'title': title, 'thumbnail': thumbnail, 'duration': duration, 'view_count': view_count, 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, 'formats': formats, 'age_limit': 18, 'tags': extract_list('tags'), 'categories': extract_list('categories'), 'cast': extract_list('pornstars'), 'subtitles': subtitles, }, info) class PornHubPlaylistBaseIE(PornHubBaseIE): def _extract_page(self, url): return int_or_none(self._search_regex( r'\bpage=(\d+)', url, 'page', default=None)) def _extract_entries(self, webpage, host): # Only process container div with main playlist content skipping # drop-down menu that uses similar pattern for videos (see # https://github.com/ytdl-org/youtube-dl/issues/11594). container = self._search_regex( r'(?s)(<div[^>]+class=["\']container.+)', webpage, 'container', default=webpage) return [ self.url_result( 'http://www.%s/%s' % (host, video_url), PornHubIE.ie_key(), video_title=title) for video_url, title in orderedSet(re.findall( r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"', container)) ] class PornHubUserIE(PornHubPlaylistBaseIE): _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph', 'playlist_mincount': 118, }, { 'url': 'https://www.pornhub.com/pornstar/liz-vicious', 'info_dict': { 'id': 'liz-vicious', }, 'playlist_mincount': 118, }, { 'url': 'https://www.pornhub.com/users/russianveet69', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/channels/povd', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1', 'only_matching': True, }, { # Unavailable via /videos page, but available with direct pagination # on pornstar page (see [1]), requires premium # 1. https://github.com/ytdl-org/youtube-dl/issues/27853 'url': 'https://www.pornhubpremium.com/pornstar/sienna-west', 'only_matching': True, }, { # Same as before, multi page 'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau', 'only_matching': True, }, { 'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph', 'only_matching': True, }] def _real_extract(self, url): mobj = self._match_valid_url(url) user_id = mobj.group('id') videos_url = '%s/videos' % mobj.group('url') page = self._extract_page(url) if page: videos_url = update_url_query(videos_url, {'page': page}) return self.url_result( videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id) class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): @staticmethod def _has_more(webpage): return re.search( r'''(?x) <li[^>]+\bclass=["\']page_next| <link[^>]+\brel=["\']next| <button[^>]+\bid=["\']moreDataBtn ''', webpage) is not None def _entries(self, url, host, item_id): page = self._extract_page(url) VIDEOS = '/videos' def download_page(base_url, num, fallback=False): note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '') return self._download_webpage( base_url, item_id, note, query={'page': num}) def is_404(e): return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404 base_url = url has_page = page is not None first_page = page if has_page else 1 for page_num in (first_page, ) if has_page else itertools.count(first_page): try: try: webpage = download_page(base_url, page_num) except ExtractorError as e: # Some sources may not be available via /videos page, # trying to fallback to main page pagination (see [1]) # 1. https://github.com/ytdl-org/youtube-dl/issues/27853 if is_404(e) and page_num == first_page and VIDEOS in base_url: base_url = base_url.replace(VIDEOS, '') webpage = download_page(base_url, page_num, fallback=True) else: raise except ExtractorError as e: if is_404(e) and page_num != first_page: break raise page_entries = self._extract_entries(webpage, host) if not page_entries: break for e in page_entries: yield e if not self._has_more(webpage): break def _real_extract(self, url): mobj = self._match_valid_url(url) host = mobj.group('host') item_id = mobj.group('id') self._login(host) return self.playlist_result(self._entries(url, host, item_id), item_id) class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): _VALID_URL = r'https?://(?:[^/]+\.)?%s/(?!playlist/)(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph/videos', 'only_matching': True, }, { 'url': 'http://www.pornhub.com/users/rushandlia/videos', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos', 'info_dict': { 'id': 'pornstar/jenny-blighe/videos', }, 'playlist_mincount': 149, }, { 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3', 'info_dict': { 'id': 'pornstar/jenny-blighe/videos', }, 'playlist_mincount': 40, }, { # default sorting as Top Rated Videos 'url': 'https://www.pornhub.com/channels/povd/videos', 'info_dict': { 'id': 'channels/povd/videos', }, 'playlist_mincount': 293, }, { # Top Rated Videos 'url': 'https://www.pornhub.com/channels/povd/videos?o=ra', 'only_matching': True, }, { # Most Recent Videos 'url': 'https://www.pornhub.com/channels/povd/videos?o=da', 'only_matching': True, }, { # Most Viewed Videos 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi', 'only_matching': True, }, { 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', 'only_matching': True, }, { # Most Viewed Videos 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv', 'only_matching': True, }, { # Top Rated Videos 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr', 'only_matching': True, }, { # Longest Videos 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg', 'only_matching': True, }, { # Newest Videos 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/video', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/video?page=3', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/video/search?search=123', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/categories/teen', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/categories/teen?page=3', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/hd', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/hd?page=3', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/described-video', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/described-video?page=2', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn', 'only_matching': True, }, { 'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph/videos', 'only_matching': True, }] @classmethod def suitable(cls, url): return (False if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) else super(PornHubPagedVideoListIE, cls).suitable(url)) class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' % PornHubBaseIE._PORNHUB_HOST_RE _TESTS = [{ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', 'info_dict': { 'id': 'jenny-blighe', }, 'playlist_mincount': 129, }, { 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', 'only_matching': True, }, { 'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/pornstar/jenny-blighe/videos/upload', 'only_matching': True, }] class PornHubPlaylistIE(PornHubPlaylistBaseIE): _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/playlist/(?P<id>[^/?#&]+))' % PornHubBaseIE._PORNHUB_HOST_RE _TESTS = [{ 'url': 'https://www.pornhub.com/playlist/44121572', 'info_dict': { 'id': '44121572', }, 'playlist_count': 77, }, { 'url': 'https://www.pornhub.com/playlist/4667351', 'only_matching': True, }, { 'url': 'https://de.pornhub.com/playlist/4667351', 'only_matching': True, }, { 'url': 'https://de.pornhub.com/playlist/4667351?page=2', 'only_matching': True, }] def _entries(self, url, host, item_id): webpage = self._download_webpage(url, item_id, 'Downloading page 1') playlist_id = self._search_regex(r'var\s+playlistId\s*=\s*"([^"]+)"', webpage, 'playlist_id') video_count = int_or_none( self._search_regex(r'var\s+itemsCount\s*=\s*([0-9]+)\s*\|\|', webpage, 'video_count')) token = self._search_regex(r'var\s+token\s*=\s*"([^"]+)"', webpage, 'token') page_count = math.ceil((video_count - 36) / 40.) + 1 page_entries = self._extract_entries(webpage, host) def download_page(page_num): note = 'Downloading page {}'.format(page_num) page_url = 'https://www.{}/playlist/viewChunked'.format(host) return self._download_webpage(page_url, item_id, note, query={ 'id': playlist_id, 'page': page_num, 'token': token, }) for page_num in range(1, page_count + 1): if page_num > 1: webpage = download_page(page_num) page_entries = self._extract_entries(webpage, host) if not page_entries: break for e in page_entries: yield e def _real_extract(self, url): mobj = self._match_valid_url(url) host = mobj.group('host') item_id = mobj.group('id') self._login(host) return self.playlist_result(self._entries(mobj.group('url'), host, item_id), item_id) ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/pornotube.py�����������������������������������������������������0000664�0000000�0000000�00000006051�14277552437�0021247�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import json from .common import InfoExtractor from ..utils import int_or_none class PornotubeIE(InfoExtractor): _VALID_URL = r'https?://(?:\w+\.)?pornotube\.com/(?:[^?#]*?)/video/(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.pornotube.com/orientation/straight/video/4964/title/weird-hot-and-wet-science', 'md5': '60fc5a4f0d93a97968fc7999d98260c9', 'info_dict': { 'id': '4964', 'ext': 'mp4', 'upload_date': '20141203', 'title': 'Weird Hot and Wet Science', 'description': 'md5:a8304bef7ef06cb4ab476ca6029b01b0', 'categories': ['Adult Humor', 'Blondes'], 'uploader': 'Alpha Blue Archives', 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1417582800, 'age_limit': 18, } } def _real_extract(self, url): video_id = self._match_id(url) token = self._download_json( 'https://api.aebn.net/auth/v2/origins/authenticate', video_id, note='Downloading token', data=json.dumps({'credentials': 'Clip Application'}).encode('utf-8'), headers={ 'Content-Type': 'application/json', 'Origin': 'http://www.pornotube.com', })['tokenKey'] video_url = self._download_json( 'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id, video_id, note='Downloading delivery information', headers={'Authorization': token})['mediaUrl'] FIELDS = ( 'title', 'description', 'startSecond', 'endSecond', 'publishDate', 'studios{name}', 'categories{name}', 'movieId', 'primaryImageNumber' ) info = self._download_json( 'https://api.aebn.net/content/v2/clips/%s?fields=%s' % (video_id, ','.join(FIELDS)), video_id, note='Downloading metadata', headers={'Authorization': token}) if isinstance(info, list): info = info[0] title = info['title'] timestamp = int_or_none(info.get('publishDate'), scale=1000) uploader = info.get('studios', [{}])[0].get('name') movie_id = info.get('movieId') primary_image_number = info.get('primaryImageNumber') thumbnail = None if movie_id and primary_image_number: thumbnail = 'http://pic.aebn.net/dis/t/%s/%s_%08d.jpg' % ( movie_id, movie_id, primary_image_number) start = int_or_none(info.get('startSecond')) end = int_or_none(info.get('endSecond')) duration = end - start if start and end else None categories = [c['name'] for c in info.get('categories', []) if c.get('name')] return { 'id': video_id, 'url': video_url, 'title': title, 'description': info.get('description'), 'duration': duration, 'timestamp': timestamp, 'uploader': uploader, 'thumbnail': thumbnail, 'categories': categories, 'age_limit': 18, } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/pornovoisines.py�������������������������������������������������0000664�0000000�0000000�00000007533�14277552437�0022155�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( int_or_none, float_or_none, unified_strdate, ) class PornoVoisinesIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/videos/show/(?P<id>\d+)/(?P<display_id>[^/.]+)' _TEST = { 'url': 'http://www.pornovoisines.com/videos/show/919/recherche-appartement.html', 'md5': '6f8aca6a058592ab49fe701c8ba8317b', 'info_dict': { 'id': '919', 'display_id': 'recherche-appartement', 'ext': 'mp4', 'title': 'Recherche appartement', 'description': 'md5:fe10cb92ae2dd3ed94bb4080d11ff493', 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20140925', 'duration': 120, 'view_count': int, 'average_rating': float, 'categories': ['Débutante', 'Débutantes', 'Scénario', 'Sodomie'], 'age_limit': 18, 'subtitles': { 'fr': [{ 'ext': 'vtt', }] }, } } def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') display_id = mobj.group('display_id') settings_url = self._download_json( 'http://www.pornovoisines.com/api/video/%s/getsettingsurl/' % video_id, video_id, note='Getting settings URL')['video_settings_url'] settings = self._download_json(settings_url, video_id)['data'] formats = [] for kind, data in settings['variants'].items(): if kind == 'HLS': formats.extend(self._extract_m3u8_formats( data, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls')) elif kind == 'MP4': for item in data: formats.append({ 'url': item['url'], 'height': item.get('height'), 'bitrate': item.get('bitrate'), }) self._sort_formats(formats) webpage = self._download_webpage(url, video_id) title = self._og_search_title(webpage) description = self._og_search_description(webpage) # The webpage has a bug - there's no space between "thumb" and src= thumbnail = self._html_search_regex( r'<img[^>]+class=([\'"])thumb\1[^>]*src=([\'"])(?P<url>[^"]+)\2', webpage, 'thumbnail', fatal=False, group='url') upload_date = unified_strdate(self._search_regex( r'Le\s*<b>([\d/]+)', webpage, 'upload date', fatal=False)) duration = settings.get('main', {}).get('duration') view_count = int_or_none(self._search_regex( r'(\d+) vues', webpage, 'view count', fatal=False)) average_rating = self._search_regex( r'Note\s*:\s*(\d+(?:,\d+)?)', webpage, 'average rating', fatal=False) if average_rating: average_rating = float_or_none(average_rating.replace(',', '.')) categories = self._html_search_regex( r'(?s)Catégories\s*:\s*<b>(.+?)</b>', webpage, 'categories', fatal=False) if categories: categories = [category.strip() for category in categories.split(',')] subtitles = {'fr': [{ 'url': subtitle, } for subtitle in settings.get('main', {}).get('vtt_tracks', {}).values()]} return { 'id': video_id, 'display_id': display_id, 'formats': formats, 'title': title, 'description': description, 'thumbnail': thumbnail, 'upload_date': upload_date, 'duration': duration, 'view_count': view_count, 'average_rating': average_rating, 'categories': categories, 'age_limit': 18, 'subtitles': subtitles, } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/pornoxo.py�������������������������������������������������������0000664�0000000�0000000�00000003533�14277552437�0020740�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( str_to_int, ) class PornoXOIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html' _TEST = { 'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html', 'md5': '582f28ecbaa9e6e24cb90f50f524ce87', 'info_dict': { 'id': '7564', 'ext': 'flv', 'title': 'Striptease From Sexy Secretary!', 'display_id': 'striptease-from-sexy-secretary', 'description': 'md5:0ee35252b685b3883f4a1d38332f9980', 'categories': list, # NSFW 'thumbnail': r're:https?://.*\.jpg$', 'age_limit': 18, } } def _real_extract(self, url): mobj = self._match_valid_url(url) video_id, display_id = mobj.groups() webpage = self._download_webpage(url, video_id) video_data = self._extract_jwplayer_data(webpage, video_id, require_title=False) title = self._html_search_regex( r'<title>([^<]+)\s*-\s*PornoXO', webpage, 'title') view_count = str_to_int(self._html_search_regex( r'[vV]iews:\s*([0-9,]+)', webpage, 'view count', fatal=False)) categories_str = self._html_search_regex( r'<meta name="description" content=".*featuring\s*([^"]+)"', webpage, 'categories', fatal=False) categories = ( None if categories_str is None else categories_str.split(',')) video_data.update({ 'id': video_id, 'title': title, 'display_id': display_id, 'description': self._html_search_meta('description', webpage), 'categories': categories, 'view_count': view_count, 'age_limit': 18, }) return video_data ���������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/premiershiprugby.py����������������������������������������������0000664�0000000�0000000�00000003453�14277552437�0022635�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import int_or_none, traverse_obj class PremiershipRugbyIE(InfoExtractor): _VALID_URL = r'https?://(?:\w+\.)premiershiprugby\.(?:com)/watch/(?P<id>[\w-]+)' _TESTS = [{ 'url': 'https://www.premiershiprugby.com/watch/full-match-harlequins-v-newcastle-falcons', 'info_dict': { 'id': '0_mbkb7ldt', 'title': 'Full Match: Harlequins v Newcastle Falcons', 'ext': 'mp4', 'thumbnail': 'https://open.http.mp.streamamg.com/p/3000914/sp/300091400/thumbnail/entry_id/0_mbkb7ldt//width/960/height/540/type/1/quality/75', 'duration': 6093.0, 'tags': ['video'], 'categories': ['Full Match', 'Harlequins', 'Newcastle Falcons', 'gallaher premiership'], } }] def _real_extract(self, url): display_id = self._match_id(url) json_data = self._download_json( f'https://article-cms-api.incrowdsports.com/v2/articles/slug/{display_id}', display_id, query={'clientId': 'PRL'})['data']['article'] formats, subs = self._extract_m3u8_formats_and_subtitles( json_data['heroMedia']['content']['videoLink'], display_id) return { 'id': json_data['heroMedia']['content']['sourceSystemId'], 'display_id': display_id, 'title': traverse_obj(json_data, ('heroMedia', 'title')), 'formats': formats, 'subtitles': subs, 'thumbnail': traverse_obj(json_data, ('heroMedia', 'content', 'videoThumbnail')), 'duration': int_or_none(traverse_obj(json_data, ('heroMedia', 'content', 'metadata', 'msDuration')), scale=1000), 'tags': json_data.get('tags'), 'categories': traverse_obj(json_data, ('categories', ..., 'text')), } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/presstv.py�������������������������������������������������������0000664�0000000�0000000�00000004416�14277552437�0020743�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import remove_start class PressTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?presstv\.ir/[^/]+/(?P<y>\d+)/(?P<m>\d+)/(?P<d>\d+)/(?P<id>\d+)/(?P<display_id>[^/]+)?' _TEST = { 'url': 'http://www.presstv.ir/Detail/2016/04/09/459911/Australian-sewerage-treatment-facility-/', 'md5': '5d7e3195a447cb13e9267e931d8dd5a5', 'info_dict': { 'id': '459911', 'display_id': 'Australian-sewerage-treatment-facility-', 'ext': 'mp4', 'title': 'Organic mattresses used to clean waste water', 'upload_date': '20160409', 'thumbnail': r're:^https?://.*\.jpg', 'description': 'md5:20002e654bbafb6908395a5c0cfcd125' } } def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') display_id = mobj.group('display_id') or video_id webpage = self._download_webpage(url, display_id) # extract video URL from webpage video_url = self._hidden_inputs(webpage)['inpPlayback'] # build list of available formats # specified in http://www.presstv.ir/Scripts/playback.js base_url = 'http://192.99.219.222:82/presstv' _formats = [ (180, '_low200.mp4'), (360, '_low400.mp4'), (720, '_low800.mp4'), (1080, '.mp4') ] formats = [{ 'url': base_url + video_url[:-4] + extension, 'format_id': '%dp' % height, 'height': height, } for height, extension in _formats] # extract video metadata title = remove_start( self._html_search_meta('title', webpage, fatal=True), 'PressTV-') thumbnail = self._og_search_thumbnail(webpage) description = self._og_search_description(webpage) upload_date = '%04d%02d%02d' % ( int(mobj.group('y')), int(mobj.group('m')), int(mobj.group('d')), ) return { 'id': video_id, 'display_id': display_id, 'title': title, 'formats': formats, 'thumbnail': thumbnail, 'upload_date': upload_date, 'description': description } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/projectveritas.py������������������������������������������������0000664�0000000�0000000�00000004630�14277552437�0022277�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( ExtractorError, traverse_obj, unified_strdate, ) class ProjectVeritasIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?projectveritas\.com/(?P<type>news|video)/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://www.projectveritas.com/news/exclusive-inside-the-new-york-and-new-jersey-hospitals-battling-coronavirus/', 'info_dict': { 'id': '51910aab-365a-5cf1-88f2-8eb1ca5fd3c6', 'ext': 'mp4', 'title': 'Exclusive: Inside The New York and New Jersey Hospitals Battling Coronavirus', 'upload_date': '20200327', 'thumbnail': 'md5:6076477fe50b03eb8708be9415e18e1c', } }, { 'url': 'https://www.projectveritas.com/video/ilhan-omar-connected-ballot-harvester-in-cash-for-ballots-scheme-car-is-full/', 'info_dict': { 'id': 'c5aab304-a56b-54b1-9f0b-03b77bc5f2f6', 'ext': 'mp4', 'title': 'Ilhan Omar connected Ballot Harvester in cash-for-ballots scheme: "Car is full" of absentee ballots', 'upload_date': '20200927', 'thumbnail': 'md5:194b8edf0e2ba64f25500ff4378369a4', } }] def _real_extract(self, url): id, type = self._match_valid_url(url).group('id', 'type') api_url = f'https://www.projectveritas.com/page-data/{type}/{id}/page-data.json' data_json = self._download_json(api_url, id)['result']['data'] main_data = traverse_obj(data_json, 'video', 'post') video_id = main_data['id'] thumbnail = traverse_obj(main_data, ('image', 'ogImage', 'src')) mux_asset = traverse_obj(main_data, 'muxAsset', ('body', 'json', 'content', ..., 'data', 'target', 'fields', 'muxAsset'), get_all=False, expected_type=dict) if not mux_asset: raise ExtractorError('No video on the provided url.', expected=True) playback_id = traverse_obj(mux_asset, 'playbackId', ('en-US', 'playbackId')) formats = self._extract_m3u8_formats(f'https://stream.mux.com/{playback_id}.m3u8', video_id) self._sort_formats(formats) return { 'id': video_id, 'title': main_data['title'], 'upload_date': unified_strdate(main_data.get('date')), 'thumbnail': thumbnail.replace('//', ''), 'formats': formats, } ��������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/prosiebensat1.py�������������������������������������������������0000664�0000000�0000000�00000052031�14277552437�0022010�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from hashlib import sha1 from .common import InfoExtractor from ..compat import compat_str from ..utils import ( ExtractorError, determine_ext, float_or_none, int_or_none, merge_dicts, unified_strdate, ) class ProSiebenSat1BaseIE(InfoExtractor): _GEO_BYPASS = False _ACCESS_ID = None _SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear' _V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get' def _extract_video_info(self, url, clip_id): client_location = url video = self._download_json( 'http://vas.sim-technik.de/vas/live/v2/videos', clip_id, 'Downloading videos JSON', query={ 'access_token': self._TOKEN, 'client_location': client_location, 'client_name': self._CLIENT_NAME, 'ids': clip_id, })[0] if not self.get_param('allow_unplayable_formats') and video.get('is_protected') is True: self.report_drm(clip_id) formats = [] if self._ACCESS_ID: raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID protocols = self._download_json( self._V4_BASE_URL + 'protocols', clip_id, 'Downloading protocols JSON', headers=self.geo_verification_headers(), query={ 'access_id': self._ACCESS_ID, 'client_token': sha1((raw_ct).encode()).hexdigest(), 'video_id': clip_id, }, fatal=False, expected_status=(403,)) or {} error = protocols.get('error') or {} if error.get('title') == 'Geo check failed': self.raise_geo_restricted(countries=['AT', 'CH', 'DE']) server_token = protocols.get('server_token') if server_token: urls = (self._download_json( self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={ 'access_id': self._ACCESS_ID, 'client_token': sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(), 'protocols': self._SUPPORTED_PROTOCOLS, 'server_token': server_token, 'video_id': clip_id, }, fatal=False) or {}).get('urls') or {} for protocol, variant in urls.items(): source_url = variant.get('clear', {}).get('url') if not source_url: continue if protocol == 'dash': formats.extend(self._extract_mpd_formats( source_url, clip_id, mpd_id=protocol, fatal=False)) elif protocol == 'hls': formats.extend(self._extract_m3u8_formats( source_url, clip_id, 'mp4', 'm3u8_native', m3u8_id=protocol, fatal=False)) else: formats.append({ 'url': source_url, 'format_id': protocol, }) if not formats: source_ids = [compat_str(source['id']) for source in video['sources']] client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() sources = self._download_json( 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id, clip_id, 'Downloading sources JSON', query={ 'access_token': self._TOKEN, 'client_id': client_id, 'client_location': client_location, 'client_name': self._CLIENT_NAME, }) server_id = sources['server_id'] def fix_bitrate(bitrate): bitrate = int_or_none(bitrate) if not bitrate: return None return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate for source_id in source_ids: client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() urls = self._download_json( 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id, clip_id, 'Downloading urls JSON', fatal=False, query={ 'access_token': self._TOKEN, 'client_id': client_id, 'client_location': client_location, 'client_name': self._CLIENT_NAME, 'server_id': server_id, 'source_ids': source_id, }) if not urls: continue if urls.get('status_code') != 0: raise ExtractorError('This video is unavailable', expected=True) urls_sources = urls['sources'] if isinstance(urls_sources, dict): urls_sources = urls_sources.values() for source in urls_sources: source_url = source.get('url') if not source_url: continue protocol = source.get('protocol') mimetype = source.get('mimetype') if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': formats.extend(self._extract_f4m_formats( source_url, clip_id, f4m_id='hds', fatal=False)) elif mimetype == 'application/x-mpegURL': formats.extend(self._extract_m3u8_formats( source_url, clip_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) elif mimetype == 'application/dash+xml': formats.extend(self._extract_mpd_formats( source_url, clip_id, mpd_id='dash', fatal=False)) else: tbr = fix_bitrate(source['bitrate']) if protocol in ('rtmp', 'rtmpe'): mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url) if not mobj: continue path = mobj.group('path') mp4colon_index = path.rfind('mp4:') app = path[:mp4colon_index] play_path = path[mp4colon_index:] formats.append({ 'url': '%s/%s' % (mobj.group('url'), app), 'app': app, 'play_path': play_path, 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', 'page_url': 'http://www.prosieben.de', 'tbr': tbr, 'ext': 'flv', 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''), }) else: formats.append({ 'url': source_url, 'tbr': tbr, 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''), }) self._sort_formats(formats) return { 'duration': float_or_none(video.get('duration')), 'formats': formats, } class ProSiebenSat1IE(ProSiebenSat1BaseIE): IE_NAME = 'prosiebensat1' IE_DESC = 'ProSiebenSat.1 Digital' _VALID_URL = r'''(?x) https?:// (?:www\.)? (?: (?:beta\.)? (?: prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|advopedia )\.(?:de|at|ch)| ran\.de|fem\.com|advopedia\.de|galileo\.tv/video ) /(?P<id>.+) ''' _TESTS = [ { # Tests changes introduced in https://github.com/ytdl-org/youtube-dl/pull/6242 # in response to fixing https://github.com/ytdl-org/youtube-dl/issues/6215: # - malformed f4m manifest support # - proper handling of URLs starting with `https?://` in 2.0 manifests # - recursive child f4m manifests extraction 'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge', 'info_dict': { 'id': '2104602', 'ext': 'mp4', 'title': 'CIRCUS HALLIGALLI - Episode 18 - Staffel 2', 'description': 'md5:8733c81b702ea472e069bc48bb658fc1', 'upload_date': '20131231', 'duration': 5845.04, 'series': 'CIRCUS HALLIGALLI', 'season_number': 2, 'episode': 'Episode 18 - Staffel 2', 'episode_number': 18, }, }, { 'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html', 'info_dict': { 'id': '2570327', 'ext': 'mp4', 'title': 'Lady-Umstyling für Audrina', 'description': 'md5:4c16d0c17a3461a0d43ea4084e96319d', 'upload_date': '20131014', 'duration': 606.76, }, 'params': { # rtmp download 'skip_download': True, }, 'skip': 'Seems to be broken', }, { 'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge', 'info_dict': { 'id': '2429369', 'ext': 'mp4', 'title': 'Countdown für die Autowerkstatt', 'description': 'md5:809fc051a457b5d8666013bc40698817', 'upload_date': '20140223', 'duration': 2595.04, }, 'params': { # rtmp download 'skip_download': True, }, 'skip': 'This video is unavailable', }, { 'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip', 'info_dict': { 'id': '2904997', 'ext': 'mp4', 'title': 'Sexy laufen in Ugg Boots', 'description': 'md5:edf42b8bd5bc4e5da4db4222c5acb7d6', 'upload_date': '20140122', 'duration': 245.32, }, 'params': { # rtmp download 'skip_download': True, }, 'skip': 'This video is unavailable', }, { 'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip', 'info_dict': { 'id': '2906572', 'ext': 'mp4', 'title': 'Im Interview: Kai Wiesinger', 'description': 'md5:e4e5370652ec63b95023e914190b4eb9', 'upload_date': '20140203', 'duration': 522.56, }, 'params': { # rtmp download 'skip_download': True, }, 'skip': 'This video is unavailable', }, { 'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge', 'info_dict': { 'id': '2992323', 'ext': 'mp4', 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2', 'description': 'md5:2669cde3febe9bce13904f701e774eb6', 'upload_date': '20141014', 'duration': 2410.44, }, 'params': { # rtmp download 'skip_download': True, }, 'skip': 'This video is unavailable', }, { 'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge', 'info_dict': { 'id': '3004256', 'ext': 'mp4', 'title': 'Schalke: Tönnies möchte Raul zurück', 'description': 'md5:4b5b271d9bcde223b54390754c8ece3f', 'upload_date': '20140226', 'duration': 228.96, }, 'params': { # rtmp download 'skip_download': True, }, 'skip': 'This video is unavailable', }, { 'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip', 'info_dict': { 'id': '2572814', 'ext': 'mp4', 'title': 'The Voice of Germany - Andreas Kümmert: Rocket Man', 'description': 'md5:6ddb02b0781c6adf778afea606652e38', 'timestamp': 1382041620, 'upload_date': '20131017', 'duration': 469.88, }, 'params': { 'skip_download': True, }, }, { 'url': 'http://www.fem.com/videos/beauty-lifestyle/kurztrips-zum-valentinstag', 'info_dict': { 'id': '2156342', 'ext': 'mp4', 'title': 'Kurztrips zum Valentinstag', 'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.', 'duration': 307.24, }, 'params': { 'skip_download': True, }, }, { 'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist', 'info_dict': { 'id': '439664', 'title': 'Episode 8 - Ganze Folge - Playlist', 'description': 'md5:63b8963e71f481782aeea877658dec84', }, 'playlist_count': 2, 'skip': 'This video is unavailable', }, { # title in <h2 class="subtitle"> 'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip', 'info_dict': { 'id': '4895826', 'ext': 'mp4', 'title': 'Jetzt erst enthüllt: Das Geheimnis von Emma Stones Oscar-Robe', 'description': 'md5:e5ace2bc43fadf7b63adc6187e9450b9', 'upload_date': '20170302', }, 'params': { 'skip_download': True, }, 'skip': 'geo restricted to Germany', }, { # geo restricted to Germany 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge', 'only_matching': True, }, { # geo restricted to Germany 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge', 'only_matching': True, }, { # geo restricted to Germany 'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden', 'only_matching': True, }, { 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel', 'only_matching': True, }, { 'url': 'http://www.advopedia.de/videos/lenssen-klaert-auf/lenssen-klaert-auf-folge-8-staffel-3-feiertage-und-freie-tage', 'only_matching': True, }, ] _TOKEN = 'prosieben' _SALT = '01!8d8F_)r9]4s[qeuXfP%' _CLIENT_NAME = 'kolibri-2.0.19-splec4' _ACCESS_ID = 'x_prosiebenmaxx-de' _ENCRYPTION_KEY = 'Eeyeey9oquahthainoofashoyoikosag' _IV = 'Aeluchoc6aevechuipiexeeboowedaok' _CLIPID_REGEXES = [ r'"clip_id"\s*:\s+"(\d+)"', r'clipid: "(\d+)"', r'clip[iI]d=(\d+)', r'clip[iI][dD]\s*=\s*["\'](\d+)', r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)", r'proMamsId"\s*:\s*"(\d+)', r'proMamsId"\s*:\s*"(\d+)', ] _TITLE_REGEXES = [ r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', r'<header class="clearfix">\s*<h3>(.+?)</h3>', r'<!-- start video -->\s*<h1>(.+?)</h1>', r'<h1 class="att-name">\s*(.+?)</h1>', r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>', r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>', r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>', r'<h2[^>]+class="subtitle"[^>]*>([^<]+)</h2>', ] _DESCRIPTION_REGEXES = [ r'<p itemprop="description">\s*(.+?)</p>', r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>', r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>', r'<p class="att-description">\s*(.+?)\s*</p>', r'<p class="video-description" itemprop="description">\s*(.+?)</p>', r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>', ] _UPLOAD_DATE_REGEXES = [ r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"', r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr', r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>', r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>', ] _PAGE_TYPE_REGEXES = [ r'<meta name="page_type" content="([^"]+)">', r"'itemType'\s*:\s*'([^']*)'", ] _PLAYLIST_ID_REGEXES = [ r'content[iI]d=(\d+)', r"'itemId'\s*:\s*'([^']*)'", ] _PLAYLIST_CLIP_REGEXES = [ r'(?s)data-qvt=.+?<a href="([^"]+)"', ] def _extract_clip(self, url, webpage): clip_id = self._html_search_regex( self._CLIPID_REGEXES, webpage, 'clip id') title = self._html_search_regex( self._TITLE_REGEXES, webpage, 'title', default=None) or self._og_search_title(webpage) info = self._extract_video_info(url, clip_id) description = self._html_search_regex( self._DESCRIPTION_REGEXES, webpage, 'description', default=None) if description is None: description = self._og_search_description(webpage) thumbnail = self._og_search_thumbnail(webpage) upload_date = unified_strdate( self._html_search_meta('og:published_time', webpage, 'upload date', default=None) or self._html_search_regex(self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None)) json_ld = self._search_json_ld(webpage, clip_id, default={}) return merge_dicts(info, { 'id': clip_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'upload_date': upload_date, }, json_ld) def _extract_playlist(self, url, webpage): playlist_id = self._html_search_regex( self._PLAYLIST_ID_REGEXES, webpage, 'playlist id') playlist = self._parse_json( self._search_regex( r'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script', webpage, 'playlist'), playlist_id) entries = [] for item in playlist: clip_id = item.get('id') or item.get('upc') if not clip_id: continue info = self._extract_video_info(url, clip_id) info.update({ 'id': clip_id, 'title': item.get('title') or item.get('teaser', {}).get('headline'), 'description': item.get('teaser', {}).get('description'), 'thumbnail': item.get('poster'), 'duration': float_or_none(item.get('duration')), 'series': item.get('tvShowTitle'), 'uploader': item.get('broadcastPublisher'), }) entries.append(info) return self.playlist_result(entries, playlist_id) def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) page_type = self._search_regex( self._PAGE_TYPE_REGEXES, webpage, 'page type', default='clip').lower() if page_type == 'clip': return self._extract_clip(url, webpage) elif page_type == 'playlist': return self._extract_playlist(url, webpage) else: raise ExtractorError( 'Unsupported page type %s' % page_type, expected=True) �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/prx.py�����������������������������������������������������������0000664�0000000�0000000�00000037301�14277552437�0020045�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import itertools from .common import InfoExtractor, SearchInfoExtractor from ..utils import ( urljoin, traverse_obj, int_or_none, mimetype2ext, clean_html, url_or_none, unified_timestamp, str_or_none, ) class PRXBaseIE(InfoExtractor): PRX_BASE_URL_RE = r'https?://(?:(?:beta|listen)\.)?prx.org/%s' def _call_api(self, item_id, path, query=None, fatal=True, note='Downloading CMS API JSON'): return self._download_json( urljoin('https://cms.prx.org/api/v1/', path), item_id, query=query, fatal=fatal, note=note) @staticmethod def _get_prx_embed_response(response, section): return traverse_obj(response, ('_embedded', f'prx:{section}')) @staticmethod def _extract_file_link(response): return url_or_none(traverse_obj( response, ('_links', 'enclosure', 'href'), expected_type=str)) @classmethod def _extract_image(cls, image_response): if not isinstance(image_response, dict): return return { 'id': str_or_none(image_response.get('id')), 'filesize': image_response.get('size'), 'width': image_response.get('width'), 'height': image_response.get('height'), 'url': cls._extract_file_link(image_response) } @classmethod def _extract_base_info(cls, response): if not isinstance(response, dict): return item_id = str_or_none(response.get('id')) if not item_id: return thumbnail_dict = cls._extract_image(cls._get_prx_embed_response(response, 'image')) description = ( clean_html(response.get('description')) or response.get('shortDescription')) return { 'id': item_id, 'title': response.get('title') or item_id, 'thumbnails': [thumbnail_dict] if thumbnail_dict else None, 'description': description, 'release_timestamp': unified_timestamp(response.get('releasedAt')), 'timestamp': unified_timestamp(response.get('createdAt')), 'modified_timestamp': unified_timestamp(response.get('updatedAt')), 'duration': int_or_none(response.get('duration')), 'tags': response.get('tags'), 'episode_number': int_or_none(response.get('episodeIdentifier')), 'season_number': int_or_none(response.get('seasonIdentifier')) } @classmethod def _extract_series_info(cls, series_response): base_info = cls._extract_base_info(series_response) if not base_info: return account_info = cls._extract_account_info( cls._get_prx_embed_response(series_response, 'account')) or {} return { **base_info, 'channel_id': account_info.get('channel_id'), 'channel_url': account_info.get('channel_url'), 'channel': account_info.get('channel'), 'series': base_info.get('title'), 'series_id': base_info.get('id'), } @classmethod def _extract_account_info(cls, account_response): base_info = cls._extract_base_info(account_response) if not base_info: return name = account_response.get('name') return { **base_info, 'title': name, 'channel_id': base_info.get('id'), 'channel_url': 'https://beta.prx.org/accounts/%s' % base_info.get('id'), 'channel': name, } @classmethod def _extract_story_info(cls, story_response): base_info = cls._extract_base_info(story_response) if not base_info: return series = cls._extract_series_info( cls._get_prx_embed_response(story_response, 'series')) or {} account = cls._extract_account_info( cls._get_prx_embed_response(story_response, 'account')) or {} return { **base_info, 'series': series.get('series'), 'series_id': series.get('series_id'), 'channel_id': account.get('channel_id'), 'channel_url': account.get('channel_url'), 'channel': account.get('channel') } def _entries(self, item_id, endpoint, entry_func, query=None): """ Extract entries from paginated list API @param entry_func: Function to generate entry from response item """ total = 0 for page in itertools.count(1): response = self._call_api(f'{item_id}: page {page}', endpoint, query={ **(query or {}), 'page': page, 'per': 100 }) items = self._get_prx_embed_response(response, 'items') if not response or not items: break yield from filter(None, map(entry_func, items)) total += response['count'] if total >= response['total']: break def _story_playlist_entry(self, response): story = self._extract_story_info(response) if not story: return story.update({ '_type': 'url', 'url': 'https://beta.prx.org/stories/%s' % story['id'], 'ie_key': PRXStoryIE.ie_key() }) return story def _series_playlist_entry(self, response): series = self._extract_series_info(response) if not series: return series.update({ '_type': 'url', 'url': 'https://beta.prx.org/series/%s' % series['id'], 'ie_key': PRXSeriesIE.ie_key() }) return series class PRXStoryIE(PRXBaseIE): _VALID_URL = PRXBaseIE.PRX_BASE_URL_RE % r'stories/(?P<id>\d+)' _TESTS = [ { # Story with season and episode details 'url': 'https://beta.prx.org/stories/399200', 'info_dict': { 'id': '399200', 'title': 'Fly Me To The Moon', 'description': 'md5:43230168390b95d3322048d8a56bf2bb', 'release_timestamp': 1640250000, 'timestamp': 1640208972, 'modified_timestamp': 1641318202, 'duration': 1004, 'tags': 'count:7', 'episode_number': 8, 'season_number': 5, 'series': 'AirSpace', 'series_id': '38057', 'channel_id': '220986', 'channel_url': 'https://beta.prx.org/accounts/220986', 'channel': 'Air and Space Museum', }, 'playlist': [{ 'info_dict': { 'id': '399200_part1', 'title': 'Fly Me To The Moon', 'description': 'md5:43230168390b95d3322048d8a56bf2bb', 'release_timestamp': 1640250000, 'timestamp': 1640208972, 'modified_timestamp': 1641318202, 'duration': 530, 'tags': 'count:7', 'episode_number': 8, 'season_number': 5, 'series': 'AirSpace', 'series_id': '38057', 'channel_id': '220986', 'channel_url': 'https://beta.prx.org/accounts/220986', 'channel': 'Air and Space Museum', 'ext': 'mp3', 'upload_date': '20211222', 'episode': 'Episode 8', 'release_date': '20211223', 'season': 'Season 5', 'modified_date': '20220104' } }, { 'info_dict': { 'id': '399200_part2', 'title': 'Fly Me To The Moon', 'description': 'md5:43230168390b95d3322048d8a56bf2bb', 'release_timestamp': 1640250000, 'timestamp': 1640208972, 'modified_timestamp': 1641318202, 'duration': 474, 'tags': 'count:7', 'episode_number': 8, 'season_number': 5, 'series': 'AirSpace', 'series_id': '38057', 'channel_id': '220986', 'channel_url': 'https://beta.prx.org/accounts/220986', 'channel': 'Air and Space Museum', 'ext': 'mp3', 'upload_date': '20211222', 'episode': 'Episode 8', 'release_date': '20211223', 'season': 'Season 5', 'modified_date': '20220104' } } ] }, { # Story with only split audio 'url': 'https://beta.prx.org/stories/326414', 'info_dict': { 'id': '326414', 'title': 'Massachusetts v EPA', 'description': 'md5:744fffba08f19f4deab69fa8d49d5816', 'timestamp': 1592509124, 'modified_timestamp': 1592510457, 'duration': 3088, 'tags': 'count:0', 'series': 'Outside/In', 'series_id': '36252', 'channel_id': '206', 'channel_url': 'https://beta.prx.org/accounts/206', 'channel': 'New Hampshire Public Radio', }, 'playlist_count': 4 }, { # Story with single combined audio 'url': 'https://beta.prx.org/stories/400404', 'info_dict': { 'id': '400404', 'title': 'Cafe Chill (Episode 2022-01)', 'thumbnails': 'count:1', 'description': 'md5:9f1b5a3cbd64fb159d08c3baa31f1539', 'timestamp': 1641233952, 'modified_timestamp': 1641234248, 'duration': 3540, 'series': 'Café Chill', 'series_id': '37762', 'channel_id': '5767', 'channel_url': 'https://beta.prx.org/accounts/5767', 'channel': 'C89.5 - KNHC Seattle', 'ext': 'mp3', 'tags': 'count:0', 'thumbnail': r're:https?://cms\.prx\.org/pub/\w+/0/web/story_image/767965/medium/Aurora_Over_Trees\.jpg', 'upload_date': '20220103', 'modified_date': '20220103' } }, { 'url': 'https://listen.prx.org/stories/399200', 'only_matching': True } ] def _extract_audio_pieces(self, audio_response): return [{ 'format_id': str_or_none(piece_response.get('id')), 'format_note': str_or_none(piece_response.get('label')), 'filesize': int_or_none(piece_response.get('size')), 'duration': int_or_none(piece_response.get('duration')), 'ext': mimetype2ext(piece_response.get('contentType')), 'asr': int_or_none(piece_response.get('frequency'), scale=1000), 'abr': int_or_none(piece_response.get('bitRate')), 'url': self._extract_file_link(piece_response), 'vcodec': 'none' } for piece_response in sorted( self._get_prx_embed_response(audio_response, 'items') or [], key=lambda p: int_or_none(p.get('position')))] def _extract_story(self, story_response): info = self._extract_story_info(story_response) if not info: return audio_pieces = self._extract_audio_pieces( self._get_prx_embed_response(story_response, 'audio')) if len(audio_pieces) == 1: return { 'formats': audio_pieces, **info } entries = [{ **info, 'id': '%s_part%d' % (info['id'], (idx + 1)), 'formats': [fmt], } for idx, fmt in enumerate(audio_pieces)] return { '_type': 'multi_video', 'entries': entries, **info } def _real_extract(self, url): story_id = self._match_id(url) response = self._call_api(story_id, f'stories/{story_id}') return self._extract_story(response) class PRXSeriesIE(PRXBaseIE): _VALID_URL = PRXBaseIE.PRX_BASE_URL_RE % r'series/(?P<id>\d+)' _TESTS = [ { 'url': 'https://beta.prx.org/series/36252', 'info_dict': { 'id': '36252', 'title': 'Outside/In', 'thumbnails': 'count:1', 'description': 'md5:a6bedc5f810777bcb09ab30ff9059114', 'timestamp': 1470684964, 'modified_timestamp': 1582308830, 'channel_id': '206', 'channel_url': 'https://beta.prx.org/accounts/206', 'channel': 'New Hampshire Public Radio', 'series': 'Outside/In', 'series_id': '36252' }, 'playlist_mincount': 39 }, { # Blank series 'url': 'https://beta.prx.org/series/25038', 'info_dict': { 'id': '25038', 'title': '25038', 'timestamp': 1207612800, 'modified_timestamp': 1207612800, 'channel_id': '206', 'channel_url': 'https://beta.prx.org/accounts/206', 'channel': 'New Hampshire Public Radio', 'series': '25038', 'series_id': '25038' }, 'playlist_count': 0 } ] def _extract_series(self, series_response): info = self._extract_series_info(series_response) return { '_type': 'playlist', 'entries': self._entries(info['id'], 'series/%s/stories' % info['id'], self._story_playlist_entry), **info } def _real_extract(self, url): series_id = self._match_id(url) response = self._call_api(series_id, f'series/{series_id}') return self._extract_series(response) class PRXAccountIE(PRXBaseIE): _VALID_URL = PRXBaseIE.PRX_BASE_URL_RE % r'accounts/(?P<id>\d+)' _TESTS = [{ 'url': 'https://beta.prx.org/accounts/206', 'info_dict': { 'id': '206', 'title': 'New Hampshire Public Radio', 'description': 'md5:277f2395301d0aca563c80c70a18ee0a', 'channel_id': '206', 'channel_url': 'https://beta.prx.org/accounts/206', 'channel': 'New Hampshire Public Radio', 'thumbnails': 'count:1' }, 'playlist_mincount': 380 }] def _extract_account(self, account_response): info = self._extract_account_info(account_response) series = self._entries( info['id'], f'accounts/{info["id"]}/series', self._series_playlist_entry) stories = self._entries( info['id'], f'accounts/{info["id"]}/stories', self._story_playlist_entry) return { '_type': 'playlist', 'entries': itertools.chain(series, stories), **info } def _real_extract(self, url): account_id = self._match_id(url) response = self._call_api(account_id, f'accounts/{account_id}') return self._extract_account(response) class PRXStoriesSearchIE(PRXBaseIE, SearchInfoExtractor): IE_DESC = 'PRX Stories Search' IE_NAME = 'prxstories:search' _SEARCH_KEY = 'prxstories' def _search_results(self, query): yield from self._entries( f'query {query}', 'stories/search', self._story_playlist_entry, query={'q': query}) class PRXSeriesSearchIE(PRXBaseIE, SearchInfoExtractor): IE_DESC = 'PRX Series Search' IE_NAME = 'prxseries:search' _SEARCH_KEY = 'prxseries' def _search_results(self, query): yield from self._entries( f'query {query}', 'series/search', self._series_playlist_entry, query={'q': query}) �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/puhutv.py��������������������������������������������������������0000664�0000000�0000000�00000020321�14277552437�0020561�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..compat import ( compat_HTTPError, compat_str, ) from ..utils import ( ExtractorError, int_or_none, float_or_none, parse_resolution, str_or_none, try_get, unified_timestamp, url_or_none, urljoin, ) class PuhuTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle' IE_NAME = 'puhutv' _TESTS = [{ # film 'url': 'https://puhutv.com/sut-kardesler-izle', 'md5': 'a347470371d56e1585d1b2c8dab01c96', 'info_dict': { 'id': '5085', 'display_id': 'sut-kardesler', 'ext': 'mp4', 'title': 'Süt Kardeşler', 'description': 'md5:ca09da25b7e57cbb5a9280d6e48d17aa', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 4832.44, 'creator': 'Arzu Film', 'timestamp': 1561062602, 'upload_date': '20190620', 'release_year': 1976, 'view_count': int, 'tags': list, }, }, { # episode, geo restricted, bypassable with --geo-verification-proxy 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle', 'only_matching': True, }, { # 4k, with subtitles 'url': 'https://puhutv.com/dip-1-bolum-izle', 'only_matching': True, }] _SUBTITLE_LANGS = { 'English': 'en', 'Deutsch': 'de', 'عربى': 'ar' } def _real_extract(self, url): display_id = self._match_id(url) info = self._download_json( urljoin(url, '/api/slug/%s-izle' % display_id), display_id)['data'] video_id = compat_str(info['id']) show = info.get('title') or {} title = info.get('name') or show['name'] if info.get('display_name'): title = '%s %s' % (title, info['display_name']) try: videos = self._download_json( 'https://puhutv.com/api/assets/%s/videos' % video_id, display_id, 'Downloading video JSON', headers=self.geo_verification_headers()) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: self.raise_geo_restricted() raise urls = [] formats = [] for video in videos['data']['videos']: media_url = url_or_none(video.get('url')) if not media_url or media_url in urls: continue urls.append(media_url) playlist = video.get('is_playlist') if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url: formats.extend(self._extract_m3u8_formats( media_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) continue quality = int_or_none(video.get('quality')) f = { 'url': media_url, 'ext': 'mp4', 'height': quality } video_format = video.get('video_format') is_hls = (video_format == 'hls' or '/hls/' in media_url or '/chunklist.m3u8' in media_url) and playlist is False if is_hls: format_id = 'hls' f['protocol'] = 'm3u8_native' elif video_format == 'mp4': format_id = 'http' else: continue if quality: format_id += '-%sp' % quality f['format_id'] = format_id formats.append(f) self._sort_formats(formats) creator = try_get( show, lambda x: x['producer']['name'], compat_str) content = info.get('content') or {} images = try_get( content, lambda x: x['images']['wide'], dict) or {} thumbnails = [] for image_id, image_url in images.items(): if not isinstance(image_url, compat_str): continue if not image_url.startswith(('http', '//')): image_url = 'https://%s' % image_url t = parse_resolution(image_id) t.update({ 'id': image_id, 'url': image_url }) thumbnails.append(t) tags = [] for genre in show.get('genres') or []: if not isinstance(genre, dict): continue genre_name = genre.get('name') if genre_name and isinstance(genre_name, compat_str): tags.append(genre_name) subtitles = {} for subtitle in content.get('subtitles') or []: if not isinstance(subtitle, dict): continue lang = subtitle.get('language') sub_url = url_or_none(subtitle.get('url') or subtitle.get('file')) if not lang or not isinstance(lang, compat_str) or not sub_url: continue subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ 'url': sub_url }] return { 'id': video_id, 'display_id': display_id, 'title': title, 'description': info.get('description') or show.get('description'), 'season_id': str_or_none(info.get('season_id')), 'season_number': int_or_none(info.get('season_number')), 'episode_number': int_or_none(info.get('episode_number')), 'release_year': int_or_none(show.get('released_at')), 'timestamp': unified_timestamp(info.get('created_at')), 'creator': creator, 'view_count': int_or_none(content.get('watch_count')), 'duration': float_or_none(content.get('duration_in_ms'), 1000), 'tags': tags, 'subtitles': subtitles, 'thumbnails': thumbnails, 'formats': formats } class PuhuTVSerieIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay' IE_NAME = 'puhutv:serie' _TESTS = [{ 'url': 'https://puhutv.com/deniz-yildizi-detay', 'info_dict': { 'title': 'Deniz Yıldızı', 'id': 'deniz-yildizi', }, 'playlist_mincount': 205, }, { # a film detail page which is using same url with serie page 'url': 'https://puhutv.com/kaybedenler-kulubu-detay', 'only_matching': True, }] def _extract_entries(self, seasons): for season in seasons: season_id = season.get('id') if not season_id: continue page = 1 has_more = True while has_more is True: season = self._download_json( 'https://galadriel.puhutv.com/seasons/%s' % season_id, season_id, 'Downloading page %s' % page, query={ 'page': page, 'per': 40, }) episodes = season.get('episodes') if isinstance(episodes, list): for ep in episodes: slug_path = str_or_none(ep.get('slugPath')) if not slug_path: continue video_id = str_or_none(int_or_none(ep.get('id'))) yield self.url_result( 'https://puhutv.com/%s' % slug_path, ie=PuhuTVIE.ie_key(), video_id=video_id, video_title=ep.get('name') or ep.get('eventLabel')) page += 1 has_more = season.get('hasMore') def _real_extract(self, url): playlist_id = self._match_id(url) info = self._download_json( urljoin(url, '/api/slug/%s-detay' % playlist_id), playlist_id)['data'] seasons = info.get('seasons') if seasons: return self.playlist_result( self._extract_entries(seasons), playlist_id, info.get('name')) # For films, these are using same url with series video_id = info.get('slug') or info['assets'][0]['slug'] return self.url_result( 'https://puhutv.com/%s-izle' % video_id, PuhuTVIE.ie_key(), video_id) ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/puls4.py���������������������������������������������������������0000664�0000000�0000000�00000004301�14277552437�0020275�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .prosiebensat1 import ProSiebenSat1BaseIE from ..compat import compat_str from ..utils import parse_duration, unified_strdate class Puls4IE(ProSiebenSat1BaseIE): _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>[^?#&]+)' _TESTS = [{ 'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118', 'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03', 'info_dict': { 'id': '118118', 'ext': 'flv', 'title': 'Tobias Homberger von myclubs im #2min2miotalk', 'description': 'md5:f9def7c5e8745d6026d8885487d91955', 'upload_date': '20160830', 'uploader': 'PULS_4', }, }, { 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident.-Norbert-Hofer', 'only_matching': True, }, { 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident-Analyse-des-Interviews-mit-Norbert-Hofer-416598', 'only_matching': True, }] _TOKEN = 'puls4' _SALT = '01!kaNgaiNgah1Ie4AeSha' _CLIENT_NAME = '' def _real_extract(self, url): path = self._match_id(url) content_path = self._download_json( 'http://www.puls4.com/api/json-fe/page/' + path, path)['content'][0]['url'] media = self._download_json( 'http://www.puls4.com' + content_path, content_path)['mediaCurrent'] player_content = media['playerContent'] info = self._extract_video_info(url, player_content['id']) info.update({ 'id': compat_str(media['objectId']), 'title': player_content['title'], 'description': media.get('description'), 'thumbnail': media.get('previewLink'), 'upload_date': unified_strdate(media.get('date')), 'duration': parse_duration(player_content.get('duration')), 'episode': player_content.get('episodePartName'), 'show': media.get('channel'), 'season_id': player_content.get('seasonId'), 'uploader': player_content.get('sourceCompany'), }) return info �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/pyvideo.py�������������������������������������������������������0000664�0000000�0000000�00000005237�14277552437�0020716�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..compat import compat_str from ..utils import int_or_none class PyvideoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/(?P<category>[^/]+)/(?P<id>[^/?#&.]+)' _TESTS = [{ 'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html', 'info_dict': { 'id': 'become-a-logging-expert-in-30-minutes', }, 'playlist_count': 2, }, { 'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html', 'md5': '5fe1c7e0a8aa5570330784c847ff6d12', 'info_dict': { 'id': '2542', 'ext': 'm4v', 'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v', }, }] def _real_extract(self, url): mobj = self._match_valid_url(url) category = mobj.group('category') video_id = mobj.group('id') entries = [] data = self._download_json( 'https://raw.githubusercontent.com/pyvideo/data/master/%s/videos/%s.json' % (category, video_id), video_id, fatal=False) if data: for video in data['videos']: video_url = video.get('url') if video_url: if video.get('type') == 'youtube': entries.append(self.url_result(video_url, 'Youtube')) else: entries.append({ 'id': compat_str(data.get('id') or video_id), 'url': video_url, 'title': data['title'], 'description': data.get('description') or data.get('summary'), 'thumbnail': data.get('thumbnail_url'), 'duration': int_or_none(data.get('duration')), }) else: webpage = self._download_webpage(url, video_id) title = self._og_search_title(webpage) media_urls = self._search_regex( r'(?s)Media URL:(.+?)</li>', webpage, 'media urls') for m in re.finditer( r'<a[^>]+href=(["\'])(?P<url>http.+?)\1', media_urls): media_url = m.group('url') if re.match(r'https?://www\.youtube\.com/watch\?v=.*', media_url): entries.append(self.url_result(media_url, 'Youtube')) else: entries.append({ 'id': video_id, 'url': media_url, 'title': title, }) return self.playlist_result(entries, video_id) �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/qqmusic.py�������������������������������������������������������0000664�0000000�0000000�00000032422�14277552437�0020715�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import random import re import time from .common import InfoExtractor from ..utils import ( clean_html, ExtractorError, strip_jsonp, unescapeHTML, ) class QQMusicIE(InfoExtractor): IE_NAME = 'qqmusic' IE_DESC = 'QQ音乐' _VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P<id>[0-9A-Za-z]+)\.html' _TESTS = [{ 'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html', 'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8', 'info_dict': { 'id': '004295Et37taLD', 'ext': 'mp3', 'title': '可惜没如果', 'release_date': '20141227', 'creator': '林俊杰', 'description': 'md5:d85afb3051952ecc50a1ee8a286d1eac', 'thumbnail': r're:^https?://.*\.jpg$', } }, { 'note': 'There is no mp3-320 version of this song.', 'url': 'https://y.qq.com/n/yqq/song/004MsGEo3DdNxV.html', 'md5': 'fa3926f0c585cda0af8fa4f796482e3e', 'info_dict': { 'id': '004MsGEo3DdNxV', 'ext': 'mp3', 'title': '如果', 'release_date': '20050626', 'creator': '李季美', 'description': 'md5:46857d5ed62bc4ba84607a805dccf437', 'thumbnail': r're:^https?://.*\.jpg$', } }, { 'note': 'lyrics not in .lrc format', 'url': 'https://y.qq.com/n/yqq/song/001JyApY11tIp6.html', 'info_dict': { 'id': '001JyApY11tIp6', 'ext': 'mp3', 'title': 'Shadows Over Transylvania', 'release_date': '19970225', 'creator': 'Dark Funeral', 'description': 'md5:c9b20210587cbcd6836a1c597bab4525', 'thumbnail': r're:^https?://.*\.jpg$', }, 'params': { 'skip_download': True, }, }] _FORMATS = { 'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320}, 'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128}, 'm4a': {'prefix': 'C200', 'ext': 'm4a', 'preference': 10} } # Reference: m_r_GetRUin() in top_player.js # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js @staticmethod def m_r_get_ruin(): curMs = int(time.time() * 1000) % 1000 return int(round(random.random() * 2147483647) * curMs % 1E10) def _real_extract(self, url): mid = self._match_id(url) detail_info_page = self._download_webpage( 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid, mid, note='Download song detail info', errnote='Unable to get song detail info', encoding='gbk') song_name = self._html_search_regex( r"songname:\s*'([^']+)'", detail_info_page, 'song name') publish_time = self._html_search_regex( r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page, 'publish time', default=None) if publish_time: publish_time = publish_time.replace('-', '') singer = self._html_search_regex( r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None) lrc_content = self._html_search_regex( r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>', detail_info_page, 'LRC lyrics', default=None) if lrc_content: lrc_content = lrc_content.replace('\\n', '\n') thumbnail_url = None albummid = self._search_regex( [r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'], detail_info_page, 'album mid', default=None) if albummid: thumbnail_url = 'http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg' \ % (albummid[-2:-1], albummid[-1], albummid) guid = self.m_r_get_ruin() vkey = self._download_json( 'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid, mid, note='Retrieve vkey', errnote='Unable to get vkey', transform_source=strip_jsonp)['key'] formats = [] for format_id, details in self._FORMATS.items(): formats.append({ 'url': 'http://cc.stream.qqmusic.qq.com/%s%s.%s?vkey=%s&guid=%s&fromtag=0' % (details['prefix'], mid, details['ext'], vkey, guid), 'format': format_id, 'format_id': format_id, 'quality': details['preference'], 'abr': details.get('abr'), }) self._check_formats(formats, mid) self._sort_formats(formats) actual_lrc_lyrics = ''.join( line + '\n' for line in re.findall( r'(?m)^(\[[0-9]{2}:[0-9]{2}(?:\.[0-9]{2,})?\][^\n]*|\[[^\]]*\])', lrc_content)) info_dict = { 'id': mid, 'formats': formats, 'title': song_name, 'release_date': publish_time, 'creator': singer, 'description': lrc_content, 'thumbnail': thumbnail_url } if actual_lrc_lyrics: info_dict['subtitles'] = { 'origin': [{ 'ext': 'lrc', 'data': actual_lrc_lyrics, }] } return info_dict class QQPlaylistBaseIE(InfoExtractor): @staticmethod def qq_static_url(category, mid): return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid) def get_singer_all_songs(self, singmid, num): return self._download_webpage( r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg', singmid, query={ 'format': 'json', 'inCharset': 'utf8', 'outCharset': 'utf-8', 'platform': 'yqq', 'needNewCode': 0, 'singermid': singmid, 'order': 'listen', 'begin': 0, 'num': num, 'songstatus': 1, }) def get_entries_from_page(self, singmid): entries = [] default_num = 1 json_text = self.get_singer_all_songs(singmid, default_num) json_obj_all_songs = self._parse_json(json_text, singmid) if json_obj_all_songs['code'] == 0: total = json_obj_all_songs['data']['total'] json_text = self.get_singer_all_songs(singmid, total) json_obj_all_songs = self._parse_json(json_text, singmid) for item in json_obj_all_songs['data']['list']: if item['musicData'].get('songmid') is not None: songmid = item['musicData']['songmid'] entries.append(self.url_result( r'https://y.qq.com/n/yqq/song/%s.html' % songmid, 'QQMusic', songmid)) return entries class QQMusicSingerIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:singer' IE_DESC = 'QQ音乐 - 歌手' _VALID_URL = r'https?://y\.qq\.com/n/yqq/singer/(?P<id>[0-9A-Za-z]+)\.html' _TEST = { 'url': 'https://y.qq.com/n/yqq/singer/001BLpXF2DyJe2.html', 'info_dict': { 'id': '001BLpXF2DyJe2', 'title': '林俊杰', 'description': 'md5:870ec08f7d8547c29c93010899103751', }, 'playlist_mincount': 12, } def _real_extract(self, url): mid = self._match_id(url) entries = self.get_entries_from_page(mid) singer_page = self._download_webpage(url, mid, 'Download singer page') singer_name = self._html_search_regex( r"singername\s*:\s*'(.*?)'", singer_page, 'singer name', default=None) singer_desc = None if mid: singer_desc_page = self._download_xml( 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg', mid, 'Donwload singer description XML', query={'utf8': 1, 'outCharset': 'utf-8', 'format': 'xml', 'singermid': mid}, headers={'Referer': 'https://y.qq.com/n/yqq/singer/'}) singer_desc = singer_desc_page.find('./data/info/desc').text return self.playlist_result(entries, mid, singer_name, singer_desc) class QQMusicAlbumIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:album' IE_DESC = 'QQ音乐 - 专辑' _VALID_URL = r'https?://y\.qq\.com/n/yqq/album/(?P<id>[0-9A-Za-z]+)\.html' _TESTS = [{ 'url': 'https://y.qq.com/n/yqq/album/000gXCTb2AhRR1.html', 'info_dict': { 'id': '000gXCTb2AhRR1', 'title': '我们都是这样长大的', 'description': 'md5:179c5dce203a5931970d306aa9607ea6', }, 'playlist_count': 4, }, { 'url': 'https://y.qq.com/n/yqq/album/002Y5a3b3AlCu3.html', 'info_dict': { 'id': '002Y5a3b3AlCu3', 'title': '그리고...', 'description': 'md5:a48823755615508a95080e81b51ba729', }, 'playlist_count': 8, }] def _real_extract(self, url): mid = self._match_id(url) album = self._download_json( 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid=%s&format=json' % mid, mid, 'Download album page')['data'] entries = [ self.url_result( 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'] ) for song in album['list'] ] album_name = album.get('name') album_detail = album.get('desc') if album_detail is not None: album_detail = album_detail.strip() return self.playlist_result(entries, mid, album_name, album_detail) class QQMusicToplistIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:toplist' IE_DESC = 'QQ音乐 - 排行榜' _VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P<id>[0-9]+)\.html' _TESTS = [{ 'url': 'https://y.qq.com/n/yqq/toplist/123.html', 'info_dict': { 'id': '123', 'title': '美国iTunes榜', 'description': 'md5:89db2335fdbb10678dee2d43fe9aba08', }, 'playlist_count': 100, }, { 'url': 'https://y.qq.com/n/yqq/toplist/3.html', 'info_dict': { 'id': '3', 'title': '巅峰榜·欧美', 'description': 'md5:5a600d42c01696b26b71f8c4d43407da', }, 'playlist_count': 100, }, { 'url': 'https://y.qq.com/n/yqq/toplist/106.html', 'info_dict': { 'id': '106', 'title': '韩国Mnet榜', 'description': 'md5:cb84b325215e1d21708c615cac82a6e7', }, 'playlist_count': 50, }] def _real_extract(self, url): list_id = self._match_id(url) toplist_json = self._download_json( 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg', list_id, note='Download toplist page', query={'type': 'toplist', 'topid': list_id, 'format': 'json'}) entries = [self.url_result( 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic', song['data']['songmid']) for song in toplist_json['songlist']] topinfo = toplist_json.get('topinfo', {}) list_name = topinfo.get('ListName') list_description = topinfo.get('info') return self.playlist_result(entries, list_id, list_name, list_description) class QQMusicPlaylistIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:playlist' IE_DESC = 'QQ音乐 - 歌单' _VALID_URL = r'https?://y\.qq\.com/n/yqq/playlist/(?P<id>[0-9]+)\.html' _TESTS = [{ 'url': 'http://y.qq.com/n/yqq/playlist/3462654915.html', 'info_dict': { 'id': '3462654915', 'title': '韩国5月新歌精选下旬', 'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4', }, 'playlist_count': 40, 'skip': 'playlist gone', }, { 'url': 'https://y.qq.com/n/yqq/playlist/1374105607.html', 'info_dict': { 'id': '1374105607', 'title': '易入人心的华语民谣', 'description': '民谣的歌曲易于传唱、、歌词朗朗伤口、旋律简单温馨。属于那种才入耳孔。却上心头的感觉。没有太多的复杂情绪。简单而直接地表达乐者的情绪,就是这样的简单才易入人心。', }, 'playlist_count': 20, }] def _real_extract(self, url): list_id = self._match_id(url) list_json = self._download_json( 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg', list_id, 'Download list page', query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id}, transform_source=strip_jsonp) if not len(list_json.get('cdlist', [])): if list_json.get('code'): raise ExtractorError( 'QQ Music said: error %d in fetching playlist info' % list_json['code'], expected=True) raise ExtractorError('Unable to get playlist info') cdlist = list_json['cdlist'][0] entries = [self.url_result( 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid']) for song in cdlist['songlist']] list_name = cdlist.get('dissname') list_description = clean_html(unescapeHTML(cdlist.get('desc'))) return self.playlist_result(entries, list_id, list_name, list_description) ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/r7.py������������������������������������������������������������0000664�0000000�0000000�00000010677�14277552437�0017573�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import int_or_none class R7IE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/| noticias\.r7\.com(?:/[^/]+)+/[^/]+-| player\.r7\.com/video/i/ ) (?P<id>[\da-f]{24}) ''' _TESTS = [{ 'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html', 'md5': '403c4e393617e8e8ddc748978ee8efde', 'info_dict': { 'id': '54e7050b0cf2ff57e0279389', 'ext': 'mp4', 'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"', 'description': 'md5:01812008664be76a6479aa58ec865b72', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 98, 'like_count': int, 'view_count': int, }, }, { 'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html', 'only_matching': True, }, { 'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/', 'only_matching': True, }, { 'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) video = self._download_json( 'http://player-api.r7.com/video/i/%s' % video_id, video_id) title = video['title'] formats = [] media_url_hls = video.get('media_url_hls') if media_url_hls: formats.extend(self._extract_m3u8_formats( media_url_hls, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) media_url = video.get('media_url') if media_url: f = { 'url': media_url, 'format_id': 'http', } # m3u8 format always matches the http format, let's copy metadata from # one to another m3u8_formats = list(filter( lambda f: f.get('vcodec') != 'none', formats)) if len(m3u8_formats) == 1: f_copy = m3u8_formats[0].copy() f_copy.update(f) f_copy['protocol'] = 'http' f = f_copy formats.append(f) self._sort_formats(formats) description = video.get('description') thumbnail = video.get('thumb') duration = int_or_none(video.get('media_duration')) like_count = int_or_none(video.get('likes')) view_count = int_or_none(video.get('views')) return { 'id': video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'duration': duration, 'like_count': like_count, 'view_count': view_count, 'formats': formats, } class R7ArticleIE(InfoExtractor): _VALID_URL = r'https?://(?:[a-zA-Z]+)\.r7\.com/(?:[^/]+/)+[^/?#&]+-(?P<id>\d+)' _TEST = { 'url': 'http://tv.r7.com/record-play/balanco-geral/videos/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-16102015', 'only_matching': True, } @classmethod def suitable(cls, url): return False if R7IE.suitable(url) else super(R7ArticleIE, cls).suitable(url) def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) video_id = self._search_regex( r'<div[^>]+(?:id=["\']player-|class=["\']embed["\'][^>]+id=["\'])([\da-f]{24})', webpage, 'video id') return self.url_result('http://player.r7.com/video/i/%s' % video_id, R7IE.ie_key()) �����������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/radiko.py��������������������������������������������������������0000664�0000000�0000000�00000020136�14277552437�0020503�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import base64 import re import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, clean_html, time_seconds, try_call, unified_timestamp, update_url_query, ) class RadikoBaseIE(InfoExtractor): _FULL_KEY = None def _auth_client(self): _, auth1_handle = self._download_webpage_handle( 'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page', headers={ 'x-radiko-app': 'pc_html5', 'x-radiko-app-version': '0.0.1', 'x-radiko-device': 'pc', 'x-radiko-user': 'dummy_user', }) auth1_header = auth1_handle.info() auth_token = auth1_header['X-Radiko-AuthToken'] kl = int(auth1_header['X-Radiko-KeyLength']) ko = int(auth1_header['X-Radiko-KeyOffset']) raw_partial_key = self._extract_full_key()[ko:ko + kl] partial_key = base64.b64encode(raw_partial_key).decode() area_id = self._download_webpage( 'https://radiko.jp/v2/api/auth2', None, 'Authenticating', headers={ 'x-radiko-device': 'pc', 'x-radiko-user': 'dummy_user', 'x-radiko-authtoken': auth_token, 'x-radiko-partialkey': partial_key, }).split(',')[0] auth_data = (auth_token, area_id) self.cache.store('radiko', 'auth_data', auth_data) return auth_data def _extract_full_key(self): if self._FULL_KEY: return self._FULL_KEY jscode = self._download_webpage( 'https://radiko.jp/apps/js/playerCommon.js', None, note='Downloading player js code') full_key = self._search_regex( (r"RadikoJSPlayer\([^,]*,\s*(['\"])pc_html5\1,\s*(['\"])(?P<fullkey>[0-9a-f]+)\2,\s*{"), jscode, 'full key', fatal=False, group='fullkey') if full_key: full_key = full_key.encode() else: # use full key ever known full_key = b'bcd151073c03b352e1ef2fd66c32209da9ca0afa' self._FULL_KEY = full_key return full_key def _find_program(self, video_id, station, cursor): station_program = self._download_xml( 'https://radiko.jp/v3/program/station/weekly/%s.xml' % station, video_id, note='Downloading radio program for %s station' % station) prog = None for p in station_program.findall('.//prog'): ft_str, to_str = p.attrib['ft'], p.attrib['to'] ft = unified_timestamp(ft_str, False) to = unified_timestamp(to_str, False) if ft <= cursor and cursor < to: prog = p break if not prog: raise ExtractorError('Cannot identify radio program to download!') assert ft, to return prog, station_program, ft, ft_str, to_str def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token, area_id, query): m3u8_playlist_data = self._download_xml( f'https://radiko.jp/v3/station/stream/pc_html5/{station}.xml', video_id, note='Downloading stream information') m3u8_urls = m3u8_playlist_data.findall('.//url') formats = [] found = set() for url_tag in m3u8_urls: pcu = url_tag.find('playlist_create_url') url_attrib = url_tag.attrib playlist_url = update_url_query(pcu.text, { 'station_id': station, **query, 'l': '15', 'lsid': '88ecea37e968c1f17d5413312d9f8003', 'type': 'b', }) if playlist_url in found: continue else: found.add(playlist_url) time_to_skip = None if is_onair else cursor - ft domain = urllib.parse.urlparse(playlist_url).netloc subformats = self._extract_m3u8_formats( playlist_url, video_id, ext='m4a', live=True, fatal=False, m3u8_id=domain, note=f'Downloading m3u8 information from {domain}', headers={ 'X-Radiko-AreaId': area_id, 'X-Radiko-AuthToken': auth_token, }) for sf in subformats: if re.fullmatch(r'[cf]-radiko\.smartstream\.ne\.jp', domain): # Prioritize live radio vs playback based on extractor sf['preference'] = 100 if is_onair else -100 if not is_onair and url_attrib['timefree'] == '1' and time_to_skip: sf['downloader_options'] = {'ffmpeg_args': ['-ss', time_to_skip]} formats.extend(subformats) self._sort_formats(formats) return formats class RadikoIE(RadikoBaseIE): _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)' _TESTS = [{ # QRR (文化放送) station provides <desc> 'url': 'https://radiko.jp/#!/ts/QRR/20210425101300', 'only_matching': True, }, { # FMT (TOKYO FM) station does not provide <desc> 'url': 'https://radiko.jp/#!/ts/FMT/20210810150000', 'only_matching': True, }, { 'url': 'https://radiko.jp/#!/ts/JOAK-FM/20210509090000', 'only_matching': True, }] def _real_extract(self, url): station, video_id = self._match_valid_url(url).groups() vid_int = unified_timestamp(video_id, False) prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int) auth_cache = self.cache.load('radiko', 'auth_data') for attempt in range(2): auth_token, area_id = (not attempt and auth_cache) or self._auth_client() formats = self._extract_formats( video_id=video_id, station=station, is_onair=False, ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id, query={ 'start_at': radio_begin, 'ft': radio_begin, 'end_at': radio_end, 'to': radio_end, 'seek': video_id, }) if formats: break return { 'id': video_id, 'title': try_call(lambda: prog.find('title').text), 'description': clean_html(try_call(lambda: prog.find('info').text)), 'uploader': try_call(lambda: station_program.find('.//name').text), 'uploader_id': station, 'timestamp': vid_int, 'formats': formats, 'is_live': True, } class RadikoRadioIE(RadikoBaseIE): _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/live/(?P<id>[A-Z0-9-]+)' _TESTS = [{ # QRR (文化放送) station provides <desc> 'url': 'https://radiko.jp/#!/live/QRR', 'only_matching': True, }, { # FMT (TOKYO FM) station does not provide <desc> 'url': 'https://radiko.jp/#!/live/FMT', 'only_matching': True, }, { 'url': 'https://radiko.jp/#!/live/JOAK-FM', 'only_matching': True, }] def _real_extract(self, url): station = self._match_id(url) self.report_warning('Downloader will not stop at the end of the program! Press Ctrl+C to stop') auth_token, area_id = self._auth_client() # get current time in JST (GMT+9:00 w/o DST) vid_now = time_seconds(hours=9) prog, station_program, ft, _, _ = self._find_program(station, station, vid_now) title = prog.find('title').text description = clean_html(prog.find('info').text) station_name = station_program.find('.//name').text formats = self._extract_formats( video_id=station, station=station, is_onair=True, ft=ft, cursor=vid_now, auth_token=auth_token, area_id=area_id, query={}) return { 'id': station, 'title': title, 'description': description, 'uploader': station_name, 'uploader_id': station, 'timestamp': ft, 'formats': formats, 'is_live': True, } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/radiobremen.py���������������������������������������������������0000664�0000000�0000000�00000004505�14277552437�0021523�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import parse_duration class RadioBremenIE(InfoExtractor): _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(?:index\.html)?\?id=(?P<id>[0-9]+)' IE_NAME = 'radiobremen' _TEST = { 'url': 'http://www.radiobremen.de/mediathek/?id=141876', 'info_dict': { 'id': '141876', 'ext': 'mp4', 'duration': 178, 'width': 512, 'title': 'Druck auf Patrick Öztürk', 'thumbnail': r're:https?://.*\.jpg$', 'description': 'Gegen den SPD-Bürgerschaftsabgeordneten Patrick Öztürk wird wegen Beihilfe zum gewerbsmäßigen Betrug ermittelt. Am Donnerstagabend sollte er dem Vorstand des SPD-Unterbezirks Bremerhaven dazu Rede und Antwort stehen.', }, } def _real_extract(self, url): video_id = self._match_id(url) meta_url = 'http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s' % video_id meta_doc = self._download_webpage( meta_url, video_id, 'Downloading metadata') title = self._html_search_regex( r'<h1.*>(?P<title>.+)</h1>', meta_doc, 'title') description = self._html_search_regex( r'<p>(?P<description>.*)</p>', meta_doc, 'description', fatal=False) duration = parse_duration(self._html_search_regex( r'Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>', meta_doc, 'duration', fatal=False)) page_doc = self._download_webpage( url, video_id, 'Downloading video information') mobj = re.search( r"ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)", page_doc) video_url = ( "http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" % (video_id, video_id, mobj.group("secret"), mobj.group('width'))) formats = [{ 'url': video_url, 'ext': 'mp4', 'width': int(mobj.group('width')), }] return { 'id': video_id, 'title': title, 'description': description, 'duration': duration, 'formats': formats, 'thumbnail': mobj.group('thumbnail'), } �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/radiocanada.py���������������������������������������������������0000664�0000000�0000000�00000014205�14277552437�0021460�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( determine_ext, ExtractorError, int_or_none, unified_strdate, ) class RadioCanadaIE(InfoExtractor): IE_NAME = 'radiocanada' _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)' _TESTS = [ { 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', 'info_dict': { 'id': '7184272', 'ext': 'mp4', 'title': 'Le parcours du tireur capté sur vidéo', 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', 'upload_date': '20141023', }, 'params': { # m3u8 download 'skip_download': True, } }, { # empty Title 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/', 'info_dict': { 'id': '7754998', 'ext': 'mp4', 'title': 'letelejournal22h', 'description': 'INTEGRALE WEB 22H-TJ', 'upload_date': '20170720', }, 'params': { # m3u8 download 'skip_download': True, }, }, { # with protectionType but not actually DRM protected 'url': 'radiocanada:toutv:140872', 'info_dict': { 'id': '140872', 'title': 'Épisode 1', 'series': 'District 31', }, 'only_matching': True, } ] _GEO_COUNTRIES = ['CA'] _access_token = None _claims = None def _call_api(self, path, video_id=None, app_code=None, query=None): if not query: query = {} query.update({ 'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb', 'output': 'json', }) if video_id: query.update({ 'appCode': app_code, 'idMedia': video_id, }) if self._access_token: query['access_token'] = self._access_token try: return self._download_json( 'https://services.radio-canada.ca/media/' + path, video_id, query=query) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 422): data = self._parse_json(e.cause.read().decode(), None) error = data.get('error_description') or data['errorMessage']['text'] raise ExtractorError(error, expected=True) raise def _extract_info(self, app_code, video_id): metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas'] def get_meta(name): for meta in metas: if meta.get('name') == name: text = meta.get('text') if text: return text # protectionType does not necessarily mean the video is DRM protected (see # https://github.com/ytdl-org/youtube-dl/pull/18609). if get_meta('protectionType'): self.report_warning('This video is probably DRM protected.') query = { 'connectionType': 'hd', 'deviceType': 'ipad', 'multibitrate': 'true', } if self._claims: query['claims'] = self._claims v_data = self._call_api('validation/v2/', video_id, app_code, query) v_url = v_data.get('url') if not v_url: error = v_data['message'] if error == "Le contenu sélectionné n'est pas disponible dans votre pays": raise self.raise_geo_restricted(error, self._GEO_COUNTRIES) if error == 'Le contenu sélectionné est disponible seulement en premium': self.raise_login_required(error) raise ExtractorError( '%s said: %s' % (self.IE_NAME, error), expected=True) formats = self._extract_m3u8_formats(v_url, video_id, 'mp4') self._sort_formats(formats) subtitles = {} closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5') if closed_caption_url: subtitles['fr'] = [{ 'url': closed_caption_url, 'ext': determine_ext(closed_caption_url, 'vtt'), }] return { 'id': video_id, 'title': get_meta('Title') or get_meta('AV-nomEmission'), 'description': get_meta('Description') or get_meta('ShortDescription'), 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'), 'duration': int_or_none(get_meta('length')), 'series': get_meta('Emission'), 'season_number': int_or_none('SrcSaison'), 'episode_number': int_or_none('SrcEpisode'), 'upload_date': unified_strdate(get_meta('Date')), 'subtitles': subtitles, 'formats': formats, } def _real_extract(self, url): return self._extract_info(*self._match_valid_url(url).groups()) class RadioCanadaAudioVideoIE(InfoExtractor): IE_NAME = 'radiocanada:audiovideo' _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', 'info_dict': { 'id': '7527184', 'ext': 'mp4', 'title': 'Barack Obama au Vietnam', 'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam', 'upload_date': '20160523', }, 'params': { # m3u8 download 'skip_download': True, }, }, { 'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam', 'only_matching': True, }] def _real_extract(self, url): return self.url_result('radiocanada:medianet:%s' % self._match_id(url)) �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/radiode.py�������������������������������������������������������0000664�0000000�0000000�00000003341�14277552437�0020640�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor class RadioDeIE(InfoExtractor): IE_NAME = 'radio.de' _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)' _TEST = { 'url': 'http://ndr2.radio.de/', 'info_dict': { 'id': 'ndr2', 'ext': 'mp3', 'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': 'md5:591c49c702db1a33751625ebfb67f273', 'thumbnail': r're:^https?://.*\.png', 'is_live': True, }, 'params': { 'skip_download': True, } } def _real_extract(self, url): radio_id = self._match_id(url) webpage = self._download_webpage(url, radio_id) jscode = self._search_regex( r"'components/station/stationService':\s*\{\s*'?station'?:\s*(\{.*?\s*\}),\n", webpage, 'broadcast') broadcast = self._parse_json(jscode, radio_id) title = broadcast['name'] description = broadcast.get('description') or broadcast.get('shortDescription') thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100') formats = [{ 'url': stream['streamUrl'], 'ext': stream['streamContentFormat'].lower(), 'acodec': stream['streamContentFormat'], 'abr': stream['bitRate'], 'asr': stream['sampleRate'] } for stream in broadcast['streamUrls']] self._sort_formats(formats) return { 'id': radio_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'is_live': True, 'formats': formats, } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/radiofrance.py���������������������������������������������������0000664�0000000�0000000�00000011401�14277552437�0021502�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import parse_duration, unified_strdate class RadioFranceIE(InfoExtractor): _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)' IE_NAME = 'radiofrance' _TEST = { 'url': 'http://maison.radiofrance.fr/radiovisions/one-one', 'md5': 'bdbb28ace95ed0e04faab32ba3160daf', 'info_dict': { 'id': 'one-one', 'ext': 'ogg', 'title': 'One to one', 'description': "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.", 'uploader': 'Thomas Hercouët', }, } def _real_extract(self, url): m = self._match_valid_url(url) video_id = m.group('id') webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title') description = self._html_search_regex( r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>', webpage, 'description', fatal=False) uploader = self._html_search_regex( r'<div class="credit">  © (.*?)</div>', webpage, 'uploader', fatal=False) formats_str = self._html_search_regex( r'class="jp-jplayer[^"]*" data-source="([^"]+)">', webpage, 'audio URLs') formats = [ { 'format_id': fm[0], 'url': fm[1], 'vcodec': 'none', 'quality': i, } for i, fm in enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)) ] self._sort_formats(formats) return { 'id': video_id, 'title': title, 'formats': formats, 'description': description, 'uploader': uploader, } class FranceCultureIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?radiofrance\.fr/(?:franceculture|fip|francemusique|mouv|franceinter)/podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d+)($|[?#])' _TESTS = [ { 'url': 'https://www.radiofrance.fr/franceculture/podcasts/science-en-questions/la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau-8440487', 'info_dict': { 'id': '8440487', 'display_id': 'la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau', 'ext': 'mp3', 'title': 'La physique d’Einstein aiderait-elle à comprendre le cerveau ?', 'description': 'Existerait-il un pont conceptuel entre la physique de l’espace-temps et les neurosciences ?', 'thumbnail': 'https://cdn.radiofrance.fr/s3/cruiser-production/2022/05/d184e7a3-4827-4494-bf94-04ed7b120db4/1200x630_gettyimages-200171095-001.jpg', 'upload_date': '20220514', 'duration': 2750, }, }, { 'url': 'https://www.radiofrance.fr/franceinter/podcasts/la-rafle-du-vel-d-hiv-une-affaire-d-etat/les-racines-du-crime-episode-1-3715507', 'only_matching': True, } ] def _real_extract(self, url): video_id, display_id = self._match_valid_url(url).group('id', 'display_id') webpage = self._download_webpage(url, display_id) # _search_json_ld doesn't correctly handle this. See https://github.com/yt-dlp/yt-dlp/pull/3874#discussion_r891903846 video_data = self._search_json('', webpage, 'audio data', display_id, contains_pattern=r'\s*"@type"\s*:\s*"AudioObject"\s*.+') return { 'id': video_id, 'display_id': display_id, 'url': video_data['contentUrl'], 'ext': video_data.get('encodingFormat'), 'vcodec': 'none' if video_data.get('encodingFormat') == 'mp3' else None, 'duration': parse_duration(video_data.get('duration')), 'title': self._html_search_regex(r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>', webpage, 'title', default=self._og_search_title(webpage)), 'description': self._html_search_regex( r'(?s)<meta name="description"\s*content="([^"]+)', webpage, 'description', default=None), 'thumbnail': self._og_search_thumbnail(webpage), 'uploader': self._html_search_regex( r'(?s)<span class="author">(.*?)</span>', webpage, 'uploader', default=None), 'upload_date': unified_strdate(self._search_regex( r'"datePublished"\s*:\s*"([^"]+)', webpage, 'timestamp', fatal=False)) } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/radiojavan.py����������������������������������������������������0000664�0000000�0000000�00000005240�14277552437�0021347�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import ( parse_resolution, str_to_int, unified_strdate, urlencode_postdata, urljoin, ) class RadioJavanIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?' _TEST = { 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam', 'md5': 'e85208ffa3ca8b83534fca9fe19af95b', 'info_dict': { 'id': 'chaartaar-ashoobam', 'ext': 'mp4', 'title': 'Chaartaar - Ashoobam', 'thumbnail': r're:^https?://.*\.jpe?g$', 'upload_date': '20150215', 'view_count': int, 'like_count': int, 'dislike_count': int, } } def _real_extract(self, url): video_id = self._match_id(url) download_host = self._download_json( 'https://www.radiojavan.com/videos/video_host', video_id, data=urlencode_postdata({'id': video_id}), headers={ 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': url, }).get('host', 'https://host1.rjmusicmedia.com') webpage = self._download_webpage(url, video_id) formats = [] for format_id, _, video_path in re.findall( r'RJ\.video(?P<format_id>\d+[pPkK])\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2', webpage): f = parse_resolution(format_id) f.update({ 'url': urljoin(download_host, video_path), 'format_id': format_id, }) formats.append(f) self._sort_formats(formats) title = self._og_search_title(webpage) thumbnail = self._og_search_thumbnail(webpage) upload_date = unified_strdate(self._search_regex( r'class="date_added">Date added: ([^<]+)<', webpage, 'upload date', fatal=False)) view_count = str_to_int(self._search_regex( r'class="views">Plays: ([\d,]+)', webpage, 'view count', fatal=False)) like_count = str_to_int(self._search_regex( r'class="rating">([\d,]+) likes', webpage, 'like count', fatal=False)) dislike_count = str_to_int(self._search_regex( r'class="rating">([\d,]+) dislikes', webpage, 'dislike count', fatal=False)) return { 'id': video_id, 'title': title, 'thumbnail': thumbnail, 'upload_date': upload_date, 'view_count': view_count, 'like_count': like_count, 'dislike_count': dislike_count, 'formats': formats, } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/radiokapital.py��������������������������������������������������0000664�0000000�0000000�00000006444�14277552437�0021704�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( clean_html, traverse_obj, unescapeHTML, ) import itertools from urllib.parse import urlencode class RadioKapitalBaseIE(InfoExtractor): def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}): return self._download_json( f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urlencode(qs)}', video_id, note=note) def _parse_episode(self, data): release = '%s%s%s' % (data['published'][6:11], data['published'][3:6], data['published'][:3]) return { '_type': 'url_transparent', 'url': data['mixcloud_url'], 'ie_key': 'Mixcloud', 'title': unescapeHTML(data['title']), 'description': clean_html(data.get('content')), 'tags': traverse_obj(data, ('tags', ..., 'name')), 'release_date': release, 'series': traverse_obj(data, ('show', 'title')), } class RadioKapitalIE(RadioKapitalBaseIE): IE_NAME = 'radiokapital' _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/[a-z\d-]+/(?P<id>[a-z\d-]+)' _TESTS = [{ 'url': 'https://radiokapital.pl/shows/tutaj-sa-smoki/5-its-okay-to-be-immaterial', 'info_dict': { 'id': 'radiokapital_radio-kapitał-tutaj-są-smoki-5-its-okay-to-be-immaterial-2021-05-20', 'ext': 'm4a', 'title': '#5: It’s okay to\xa0be\xa0immaterial', 'description': 'md5:2499da5fbfb0e88333b7d37ec8e9e4c4', 'uploader': 'Radio Kapitał', 'uploader_id': 'radiokapital', 'timestamp': 1621640164, 'upload_date': '20210521', }, }] def _real_extract(self, url): video_id = self._match_id(url) episode = self._call_api('episodes/%s' % video_id, video_id) return self._parse_episode(episode) class RadioKapitalShowIE(RadioKapitalBaseIE): IE_NAME = 'radiokapital:show' _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/(?P<id>[a-z\d-]+)/?(?:$|[?#])' _TESTS = [{ 'url': 'https://radiokapital.pl/shows/wesz', 'info_dict': { 'id': '100', 'title': 'WĘSZ', 'description': 'md5:3a557a1e0f31af612b0dcc85b1e0ca5c', }, 'playlist_mincount': 17, }] def _get_episode_list(self, series_id, page_no): return self._call_api( 'episodes', series_id, f'Downloading episode list page #{page_no}', qs={ 'show': series_id, 'page': page_no, }) def _entries(self, series_id): for page_no in itertools.count(1): episode_list = self._get_episode_list(series_id, page_no) yield from (self._parse_episode(ep) for ep in episode_list['items']) if episode_list['next'] is None: break def _real_extract(self, url): series_id = self._match_id(url) show = self._call_api(f'shows/{series_id}', series_id, 'Downloading show metadata') entries = self._entries(series_id) return { '_type': 'playlist', 'entries': entries, 'id': str(show['id']), 'title': show.get('title'), 'description': clean_html(show.get('content')), } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/radiozet.py������������������������������������������������������0000664�0000000�0000000�00000004203�14277552437�0021050�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( traverse_obj, strip_or_none, ) class RadioZetPodcastIE(InfoExtractor): _VALID_URL = r'https?://player\.radiozet\.pl\/Podcasty/.*?/(?P<id>.+)' _TEST = { 'url': 'https://player.radiozet.pl/Podcasty/Nie-Ma-Za-Co/O-przedmiotach-szkolnych-ktore-przydaja-sie-w-zyciu', 'md5': 'e03665c316b4fbc5f6a8f232948bbba3', 'info_dict': { 'id': '42154', 'display_id': 'O-przedmiotach-szkolnych-ktore-przydaja-sie-w-zyciu', 'title': 'O przedmiotach szkolnych, które przydają się w życiu', 'description': 'md5:fa72bed49da334b09e5b2f79851f185c', 'release_timestamp': 1592985480, 'ext': 'mp3', 'thumbnail': r're:^https?://.*\.png$', 'duration': 83, 'series': 'Nie Ma Za Co', 'creator': 'Katarzyna Pakosińska', } } def _call_api(self, podcast_id, display_id): return self._download_json( f'https://player.radiozet.pl/api/podcasts/getPodcast/(node)/{podcast_id}/(station)/radiozet', display_id) def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) podcast_id = self._html_search_regex(r'<div.*?\sid="player".*?\sdata-id=[\'"]([^\'"]+)[\'"]', webpage, 'podcast id') data = self._call_api(podcast_id, display_id)['data'][0] return { 'id': podcast_id, 'display_id': display_id, 'title': strip_or_none(data.get('title')), 'description': strip_or_none(traverse_obj(data, ('program', 'desc'))), 'release_timestamp': data.get('published_date'), 'url': traverse_obj(data, ('player', 'stream')), 'thumbnail': traverse_obj(data, ('program', 'image', 'original')), 'duration': traverse_obj(data, ('player', 'duration')), 'series': strip_or_none(traverse_obj(data, ('program', 'title'))), 'creator': strip_or_none(traverse_obj(data, ('presenter', 0, 'title'))), } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/radlive.py�������������������������������������������������������0000664�0000000�0000000�00000015550�14277552437�0020664�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import json from ..utils import ( ExtractorError, format_field, traverse_obj, try_get, unified_timestamp ) from .common import InfoExtractor class RadLiveIE(InfoExtractor): IE_NAME = 'radlive' _VALID_URL = r'https?://(?:www\.)?rad\.live/content/(?P<content_type>feature|episode)/(?P<id>[a-f0-9-]+)' _TESTS = [{ 'url': 'https://rad.live/content/feature/dc5acfbc-761b-4bec-9564-df999905116a', 'md5': '6219d5d31d52de87d21c9cf5b7cb27ff', 'info_dict': { 'id': 'dc5acfbc-761b-4bec-9564-df999905116a', 'ext': 'mp4', 'title': 'Deathpact - Digital Mirage 2 [Full Set]', 'language': 'en', 'thumbnail': 'https://static.12core.net/cb65ae077a079c68380e38f387fbc438.png', 'description': '', 'release_timestamp': 1600185600.0, 'channel': 'Proximity', 'channel_id': '9ce6dd01-70a4-4d59-afb6-d01f807cd009', 'channel_url': 'https://rad.live/content/channel/9ce6dd01-70a4-4d59-afb6-d01f807cd009', } }, { 'url': 'https://rad.live/content/episode/bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf', 'md5': '40b2175f347592125d93e9a344080125', 'info_dict': { 'id': 'bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf', 'ext': 'mp4', 'title': 'E01: Bad Jokes 1', 'language': 'en', 'thumbnail': 'https://lsp.littlstar.com/channels/WHISTLE/BAD_JOKES/SEASON_1/BAD_JOKES_101/poster.jpg', 'description': 'Bad Jokes - Champions, Adam Pally, Super Troopers, Team Edge and 2Hype', 'release_timestamp': None, 'channel': None, 'channel_id': None, 'channel_url': None, 'episode': 'E01: Bad Jokes 1', 'episode_number': 1, 'episode_id': '336', }, }] def _real_extract(self, url): content_type, video_id = self._match_valid_url(url).groups() webpage = self._download_webpage(url, video_id) content_info = json.loads(self._search_regex( r'<script[^>]*type=([\'"])application/json\1[^>]*>(?P<json>{.+?})</script>', webpage, 'video info', group='json'))['props']['pageProps']['initialContentData'] video_info = content_info[content_type] if not video_info: raise ExtractorError('Unable to extract video info, make sure the URL is valid') formats = self._extract_m3u8_formats(video_info['assets']['videos'][0]['url'], video_id) self._sort_formats(formats) data = video_info.get('structured_data', {}) release_date = unified_timestamp(traverse_obj(data, ('releasedEvent', 'startDate'))) channel = next(iter(content_info.get('channels', [])), {}) channel_id = channel.get('lrn', '').split(':')[-1] or None result = { 'id': video_id, 'title': video_info['title'], 'formats': formats, 'language': traverse_obj(data, ('potentialAction', 'target', 'inLanguage')), 'thumbnail': traverse_obj(data, ('image', 'contentUrl')), 'description': data.get('description'), 'release_timestamp': release_date, 'channel': channel.get('name'), 'channel_id': channel_id, 'channel_url': format_field(channel_id, None, 'https://rad.live/content/channel/%s'), } if content_type == 'episode': result.update({ # TODO: Get season number when downloading single episode 'episode': video_info.get('title'), 'episode_number': video_info.get('number'), 'episode_id': video_info.get('id'), }) return result class RadLiveSeasonIE(RadLiveIE): IE_NAME = 'radlive:season' _VALID_URL = r'https?://(?:www\.)?rad\.live/content/season/(?P<id>[a-f0-9-]+)' _TESTS = [{ 'url': 'https://rad.live/content/season/08a290f7-c9ef-4e22-9105-c255995a2e75', 'md5': '40b2175f347592125d93e9a344080125', 'info_dict': { 'id': '08a290f7-c9ef-4e22-9105-c255995a2e75', 'title': 'Bad Jokes - Season 1', }, 'playlist_mincount': 5, }] @classmethod def suitable(cls, url): return False if RadLiveIE.suitable(url) else super(RadLiveSeasonIE, cls).suitable(url) def _real_extract(self, url): season_id = self._match_id(url) webpage = self._download_webpage(url, season_id) content_info = json.loads(self._search_regex( r'<script[^>]*type=([\'"])application/json\1[^>]*>(?P<json>{.+?})</script>', webpage, 'video info', group='json'))['props']['pageProps']['initialContentData'] video_info = content_info['season'] entries = [{ '_type': 'url_transparent', 'id': episode['structured_data']['url'].split('/')[-1], 'url': episode['structured_data']['url'], 'series': try_get(content_info, lambda x: x['series']['title']), 'season': video_info['title'], 'season_number': video_info.get('number'), 'season_id': video_info.get('id'), 'ie_key': RadLiveIE.ie_key(), } for episode in video_info['episodes']] return self.playlist_result(entries, season_id, video_info.get('title')) class RadLiveChannelIE(RadLiveIE): IE_NAME = 'radlive:channel' _VALID_URL = r'https?://(?:www\.)?rad\.live/content/channel/(?P<id>[a-f0-9-]+)' _TESTS = [{ 'url': 'https://rad.live/content/channel/5c4d8df4-6fa0-413c-81e3-873479b49274', 'md5': '625156a08b7f2b0b849f234e664457ac', 'info_dict': { 'id': '5c4d8df4-6fa0-413c-81e3-873479b49274', 'title': 'Whistle Sports', }, 'playlist_mincount': 7, }] _QUERY = ''' query WebChannelListing ($lrn: ID!) { channel (id:$lrn) { name features { structured_data } } }''' @classmethod def suitable(cls, url): return False if RadLiveIE.suitable(url) else super(RadLiveChannelIE, cls).suitable(url) def _real_extract(self, url): channel_id = self._match_id(url) graphql = self._download_json( 'https://content.mhq.12core.net/graphql', channel_id, headers={'Content-Type': 'application/json'}, data=json.dumps({ 'query': self._QUERY, 'variables': {'lrn': f'lrn:12core:media:content:channel:{channel_id}'} }).encode('utf-8')) data = traverse_obj(graphql, ('data', 'channel')) if not data: raise ExtractorError('Unable to extract video info, make sure the URL is valid') entries = [{ '_type': 'url_transparent', 'url': feature['structured_data']['url'], 'ie_key': RadLiveIE.ie_key(), } for feature in data['features']] return self.playlist_result(entries, channel_id, data.get('name')) ��������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rai.py�����������������������������������������������������������0000664�0000000�0000000�00000077224�14277552437�0020017�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..compat import ( compat_str, compat_urlparse, ) from ..utils import ( clean_html, determine_ext, ExtractorError, filter_dict, find_xpath_attr, fix_xml_ampersands, GeoRestrictedError, HEADRequest, int_or_none, join_nonempty, parse_duration, remove_start, strip_or_none, traverse_obj, try_get, unified_strdate, unified_timestamp, update_url_query, urljoin, xpath_text, ) class RaiBaseIE(InfoExtractor): _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' _GEO_COUNTRIES = ['IT'] _GEO_BYPASS = False def _extract_relinker_info(self, relinker_url, video_id, audio_only=False): if not re.match(r'https?://', relinker_url): return {'formats': [{'url': relinker_url}]} formats = [] geoprotection = None is_live = None duration = None for platform in ('mon', 'flash', 'native'): relinker = self._download_xml( relinker_url, video_id, note=f'Downloading XML metadata for platform {platform}', transform_source=fix_xml_ampersands, query={'output': 45, 'pl': platform}, headers=self.geo_verification_headers()) if xpath_text(relinker, './license_url', default='{}') != '{}': self.report_drm(video_id) if not geoprotection: geoprotection = xpath_text( relinker, './geoprotection', default=None) == 'Y' if not is_live: is_live = xpath_text( relinker, './is_live', default=None) == 'Y' if not duration: duration = parse_duration(xpath_text( relinker, './duration', default=None)) url_elem = find_xpath_attr(relinker, './url', 'type', 'content') if url_elem is None: continue media_url = url_elem.text # This does not imply geo restriction (e.g. # http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html) if '/video_no_available.mp4' in media_url: continue ext = determine_ext(media_url) if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'): continue if ext == 'mp3': formats.append({ 'url': media_url, 'vcodec': 'none', 'acodec': 'mp3', 'format_id': 'http-mp3', }) break elif ext == 'm3u8' or 'format=m3u8' in media_url or platform == 'mon': formats.extend(self._extract_m3u8_formats( media_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) elif ext == 'f4m' or platform == 'flash': manifest_url = update_url_query( media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'), {'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'}) formats.extend(self._extract_f4m_formats( manifest_url, video_id, f4m_id='hds', fatal=False)) else: bitrate = int_or_none(xpath_text(relinker, 'bitrate')) formats.append({ 'url': media_url, 'tbr': bitrate if bitrate > 0 else None, 'format_id': f'http-{bitrate if bitrate > 0 else "http"}', }) if not formats and geoprotection is True: self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) if not audio_only: formats.extend(self._create_http_urls(relinker_url, formats)) return filter_dict({ 'is_live': is_live, 'duration': duration, 'formats': formats, }) def _create_http_urls(self, relinker_url, fmts): _RELINKER_REG = r'https?://(?P<host>[^/]+?)/(?:i/)?(?P<extra>[^/]+?)/(?P<path>.+?)/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?' _MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s' _QUALITY = { # tbr: w, h '250': [352, 198], '400': [512, 288], '700': [512, 288], '800': [700, 394], '1200': [736, 414], '1800': [1024, 576], '2400': [1280, 720], '3200': [1440, 810], '3600': [1440, 810], '5000': [1920, 1080], '10000': [1920, 1080], } def test_url(url): resp = self._request_webpage( HEADRequest(url), None, headers={'User-Agent': 'Rai'}, fatal=False, errnote=False, note=False) if resp is False: return False if resp.code == 200: return False if resp.url == url else resp.url return None # filter out audio-only formats fmts = [f for f in fmts if not f.get('vcodec') == 'none'] def get_format_info(tbr): import math br = int_or_none(tbr) if len(fmts) == 1 and not br: br = fmts[0].get('tbr') if br and br > 300: tbr = compat_str(math.floor(br / 100) * 100) else: tbr = '250' # try extracting info from available m3u8 formats format_copy = None for f in fmts: if f.get('tbr'): br_limit = math.floor(br / 100) if br_limit - 1 <= math.floor(f['tbr'] / 100) <= br_limit + 1: format_copy = f.copy() return { 'width': format_copy.get('width'), 'height': format_copy.get('height'), 'tbr': format_copy.get('tbr'), 'vcodec': format_copy.get('vcodec'), 'acodec': format_copy.get('acodec'), 'fps': format_copy.get('fps'), 'format_id': f'https-{tbr}', } if format_copy else { 'width': _QUALITY[tbr][0], 'height': _QUALITY[tbr][1], 'format_id': f'https-{tbr}', 'tbr': int(tbr), } loc = test_url(_MP4_TMPL % (relinker_url, '*')) if not isinstance(loc, compat_str): return [] mobj = re.match( _RELINKER_REG, test_url(relinker_url) or '') if not mobj: return [] available_qualities = mobj.group('quality').split(',') if mobj.group('quality') else ['*'] available_qualities = [i for i in available_qualities if i] formats = [] for q in available_qualities: fmt = { 'url': _MP4_TMPL % (relinker_url, q), 'protocol': 'https', 'ext': 'mp4', **get_format_info(q) } formats.append(fmt) return formats @staticmethod def _extract_subtitles(url, video_data): STL_EXT = 'stl' SRT_EXT = 'srt' subtitles = {} subtitles_array = video_data.get('subtitlesArray') or [] for k in ('subtitles', 'subtitlesUrl'): subtitles_array.append({'url': video_data.get(k)}) for subtitle in subtitles_array: sub_url = subtitle.get('url') if sub_url and isinstance(sub_url, compat_str): sub_lang = subtitle.get('language') or 'it' sub_url = urljoin(url, sub_url) sub_ext = determine_ext(sub_url, SRT_EXT) subtitles.setdefault(sub_lang, []).append({ 'ext': sub_ext, 'url': sub_url, }) if STL_EXT == sub_ext: subtitles[sub_lang].append({ 'ext': SRT_EXT, 'url': sub_url[:-len(STL_EXT)] + SRT_EXT, }) return subtitles class RaiPlayIE(RaiBaseIE): _VALID_URL = rf'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>{RaiBaseIE._UUID_RE}))\.(?:html|json)' _TESTS = [{ 'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', 'md5': '8970abf8caf8aef4696e7b1f2adfc696', 'info_dict': { 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', 'ext': 'mp4', 'title': 'Report del 07/04/2014', 'alt_title': 'St 2013/14 - Report - Espresso nel caffè - 07/04/2014', 'description': 'md5:d730c168a58f4bb35600fc2f881ec04e', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Rai Gulp', 'duration': 6160, 'series': 'Report', 'season': '2013/14', 'subtitles': { 'it': 'count:4', }, 'release_year': 2022, 'episode': 'Espresso nel caffè - 07/04/2014', 'timestamp': 1396919880, 'upload_date': '20140408', }, 'params': { 'skip_download': True, }, }, { # 1080p direct mp4 url 'url': 'https://www.raiplay.it/video/2021/11/Blanca-S1E1-Senza-occhi-b1255a4a-8e72-4a2f-b9f3-fc1308e00736.html', 'md5': 'aeda7243115380b2dd5e881fd42d949a', 'info_dict': { 'id': 'b1255a4a-8e72-4a2f-b9f3-fc1308e00736', 'ext': 'mp4', 'title': 'Blanca - S1E1 - Senza occhi', 'alt_title': 'St 1 Ep 1 - Blanca - Senza occhi', 'description': 'md5:75f95d5c030ec8bac263b1212322e28c', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Rai 1', 'duration': 6493, 'series': 'Blanca', 'season': 'Season 1', 'episode_number': 1, 'release_year': 2021, 'season_number': 1, 'episode': 'Senza occhi', 'timestamp': 1637318940, 'upload_date': '20211119', }, }, { 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?', 'only_matching': True, }, { # subtitles at 'subtitlesArray' key (see #27698) 'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html', 'only_matching': True, }, { # DRM protected 'url': 'https://www.raiplay.it/video/2021/06/Lo-straordinario-mondo-di-Zoey-S2E1-Lo-straordinario-ritorno-di-Zoey-3ba992de-2332-41ad-9214-73e32ab209f4.html', 'only_matching': True, }] def _real_extract(self, url): base, video_id = self._match_valid_url(url).groups() media = self._download_json( base + '.json', video_id, 'Downloading video JSON') if not self.get_param('allow_unplayable_formats'): if try_get( media, (lambda x: x['rights_management']['rights']['drm'], lambda x: x['program_info']['rights_management']['rights']['drm']), dict): self.report_drm(video_id) title = media['name'] video = media['video'] relinker_info = self._extract_relinker_info(video['content_url'], video_id) self._sort_formats(relinker_info['formats']) thumbnails = [] for _, value in media.get('images', {}).items(): if value: thumbnails.append({ 'url': urljoin(url, value), }) date_published = media.get('date_published') time_published = media.get('time_published') if date_published and time_published: date_published += ' ' + time_published subtitles = self._extract_subtitles(url, video) program_info = media.get('program_info') or {} season = media.get('season') alt_title = join_nonempty(media.get('subtitle'), media.get('toptitle'), delim=' - ') return { 'id': remove_start(media.get('id'), 'ContentItem-') or video_id, 'display_id': video_id, 'title': title, 'alt_title': strip_or_none(alt_title or None), 'description': media.get('description'), 'uploader': strip_or_none(media.get('channel') or None), 'creator': strip_or_none(media.get('editor') or None), 'duration': parse_duration(video.get('duration')), 'timestamp': unified_timestamp(date_published), 'thumbnails': thumbnails, 'series': program_info.get('name'), 'season_number': int_or_none(season), 'season': season if (season and not season.isdigit()) else None, 'episode': media.get('episode_title'), 'episode_number': int_or_none(media.get('episode')), 'subtitles': subtitles, 'release_year': int_or_none(traverse_obj(media, ('track_info', 'edit_year'))), **relinker_info } class RaiPlayLiveIE(RaiPlayIE): _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))' _TESTS = [{ 'url': 'http://www.raiplay.it/dirette/rainews24', 'info_dict': { 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c', 'display_id': 'rainews24', 'ext': 'mp4', 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497', 'uploader': 'Rai News 24', 'creator': 'Rai News 24', 'is_live': True, 'live_status': 'is_live', 'upload_date': '20090502', 'timestamp': 1241276220, }, 'params': { 'skip_download': True, }, }] class RaiPlayPlaylistIE(InfoExtractor): _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))(?:/(?P<extra_id>[^?#&]+))?' _TESTS = [{ 'url': 'https://www.raiplay.it/programmi/nondirloalmiocapo/', 'info_dict': { 'id': 'nondirloalmiocapo', 'title': 'Non dirlo al mio capo', 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b', }, 'playlist_mincount': 12, }, { 'url': 'https://www.raiplay.it/programmi/nondirloalmiocapo/episodi/stagione-2/', 'info_dict': { 'id': 'nondirloalmiocapo', 'title': 'Non dirlo al mio capo - Stagione 2', 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b', }, 'playlist_mincount': 12, }] def _real_extract(self, url): base, playlist_id, extra_id = self._match_valid_url(url).groups() program = self._download_json( base + '.json', playlist_id, 'Downloading program JSON') if extra_id: extra_id = extra_id.upper().rstrip('/') playlist_title = program.get('name') entries = [] for b in (program.get('blocks') or []): for s in (b.get('sets') or []): if extra_id: if extra_id != join_nonempty( b.get('name'), s.get('name'), delim='/').replace(' ', '-').upper(): continue playlist_title = join_nonempty(playlist_title, s.get('name'), delim=' - ') s_id = s.get('id') if not s_id: continue medias = self._download_json( f'{base}/{s_id}.json', s_id, 'Downloading content set JSON', fatal=False) if not medias: continue for m in (medias.get('items') or []): path_id = m.get('path_id') if not path_id: continue video_url = urljoin(url, path_id) entries.append(self.url_result( video_url, ie=RaiPlayIE.ie_key(), video_id=RaiPlayIE._match_id(video_url))) return self.playlist_result( entries, playlist_id, playlist_title, try_get(program, lambda x: x['program_info']['description'])) class RaiPlaySoundIE(RaiBaseIE): _VALID_URL = rf'(?P<base>https?://(?:www\.)?raiplaysound\.it/.+?-(?P<id>{RaiBaseIE._UUID_RE}))\.(?:html|json)' _TESTS = [{ 'url': 'https://www.raiplaysound.it/audio/2021/12/IL-RUGGITO-DEL-CONIGLIO-1ebae2a7-7cdb-42bb-842e-fe0d193e9707.html', 'md5': '8970abf8caf8aef4696e7b1f2adfc696', 'info_dict': { 'id': '1ebae2a7-7cdb-42bb-842e-fe0d193e9707', 'ext': 'mp3', 'title': 'Il Ruggito del Coniglio del 10/12/2021', 'alt_title': 'md5:0e6476cd57858bb0f3fcc835d305b455', 'description': 'md5:2a17d2107e59a4a8faa0e18334139ee2', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'rai radio 2', 'duration': 5685, 'series': 'Il Ruggito del Coniglio', 'episode': 'Il Ruggito del Coniglio del 10/12/2021', 'creator': 'rai radio 2', 'timestamp': 1638346620, 'upload_date': '20211201', }, 'params': { 'skip_download': True, }, }] def _real_extract(self, url): base, audio_id = self._match_valid_url(url).group('base', 'id') media = self._download_json(f'{base}.json', audio_id, 'Downloading audio JSON') uid = try_get(media, lambda x: remove_start(remove_start(x['uniquename'], 'ContentItem-'), 'Page-')) info = {} formats = [] relinkers = set(traverse_obj(media, (('downloadable_audio', 'audio', ('live', 'cards', 0, 'audio')), 'url'))) for r in relinkers: info = self._extract_relinker_info(r, audio_id, True) formats.extend(info.get('formats')) date_published = try_get(media, (lambda x: f'{x["create_date"]} {x.get("create_time") or ""}', lambda x: x['live']['create_date'])) podcast_info = traverse_obj(media, 'podcast_info', ('live', 'cards', 0)) or {} thumbnails = [{ 'url': urljoin(url, thumb_url), } for thumb_url in (podcast_info.get('images') or {}).values() if thumb_url] return { **info, 'id': uid or audio_id, 'display_id': audio_id, 'title': traverse_obj(media, 'title', 'episode_title'), 'alt_title': traverse_obj(media, ('track_info', 'media_name'), expected_type=strip_or_none), 'description': media.get('description'), 'uploader': traverse_obj(media, ('track_info', 'channel'), expected_type=strip_or_none), 'creator': traverse_obj(media, ('track_info', 'editor'), expected_type=strip_or_none), 'timestamp': unified_timestamp(date_published), 'thumbnails': thumbnails, 'series': podcast_info.get('title'), 'season_number': int_or_none(media.get('season')), 'episode': media.get('episode_title'), 'episode_number': int_or_none(media.get('episode')), 'formats': formats, } class RaiPlaySoundLiveIE(RaiPlaySoundIE): _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/(?P<id>[^/?#&]+)$)' _TESTS = [{ 'url': 'https://www.raiplaysound.it/radio2', 'info_dict': { 'id': 'b00a50e6-f404-4af6-8f8c-ff3b9af73a44', 'display_id': 'radio2', 'ext': 'mp4', 'title': r're:Rai Radio 2 \d+-\d+-\d+ \d+:\d+', 'thumbnail': r're:https://www.raiplaysound.it/dl/img/.+?png', 'uploader': 'rai radio 2', 'series': 'Rai Radio 2', 'creator': 'raiplaysound', 'is_live': True, 'live_status': 'is_live', }, 'params': { 'skip_download': 'live', }, }] class RaiPlaySoundPlaylistIE(InfoExtractor): _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/(?:programmi|playlist|audiolibri)/(?P<id>[^/?#&]+))(?:/(?P<extra_id>[^?#&]+))?' _TESTS = [{ 'url': 'https://www.raiplaysound.it/programmi/ilruggitodelconiglio', 'info_dict': { 'id': 'ilruggitodelconiglio', 'title': 'Il Ruggito del Coniglio', 'description': 'md5:1bbaf631245a7ab1ec4d9fbb3c7aa8f3', }, 'playlist_mincount': 65, }, { 'url': 'https://www.raiplaysound.it/programmi/ilruggitodelconiglio/puntate/prima-stagione-1995', 'info_dict': { 'id': 'ilruggitodelconiglio_puntate_prima-stagione-1995', 'title': 'Prima Stagione 1995', }, 'playlist_count': 1, }] def _real_extract(self, url): base, playlist_id, extra_id = self._match_valid_url(url).group('base', 'id', 'extra_id') url = f'{base}.json' program = self._download_json(url, playlist_id, 'Downloading program JSON') if extra_id: extra_id = extra_id.rstrip('/') playlist_id += '_' + extra_id.replace('/', '_') path = next(c['path_id'] for c in program.get('filters') or [] if extra_id in c.get('weblink')) program = self._download_json( urljoin('https://www.raiplaysound.it', path), playlist_id, 'Downloading program secondary JSON') entries = [ self.url_result(urljoin(base, c['path_id']), ie=RaiPlaySoundIE.ie_key()) for c in traverse_obj(program, 'cards', ('block', 'cards')) or [] if c.get('path_id')] return self.playlist_result(entries, playlist_id, program.get('title'), traverse_obj(program, ('podcast_info', 'description'))) class RaiIE(RaiBaseIE): _VALID_URL = rf'https?://[^/]+\.(?:rai\.(?:it|tv))/.+?-(?P<id>{RaiBaseIE._UUID_RE})(?:-.+?)?\.html' _TESTS = [{ # var uniquename = "ContentItem-..." # data-id="ContentItem-..." 'url': 'https://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', 'info_dict': { 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', 'ext': 'mp4', 'title': 'TG PRIMO TEMPO', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 1758, 'upload_date': '20140612', }, 'skip': 'This content is available only in Italy', }, { # with ContentItem in og:url 'url': 'https://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html', 'md5': '06345bd97c932f19ffb129973d07a020', 'info_dict': { 'id': 'efb17665-691c-45d5-a60c-5301333cbb0c', 'ext': 'mp4', 'title': 'TG1 ore 20:00 del 03/11/2016', 'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 2214, 'upload_date': '20161103' } }, { # Direct MMS URL 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html', 'only_matching': True, }] def _extract_from_content_id(self, content_id, url): media = self._download_json( f'https://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-{content_id}.html?json', content_id, 'Downloading video JSON') title = media['name'].strip() media_type = media['type'] if 'Audio' in media_type: relinker_info = { 'formats': [{ 'format_id': media.get('formatoAudio'), 'url': media['audioUrl'], 'ext': media.get('formatoAudio'), }] } elif 'Video' in media_type: relinker_info = self._extract_relinker_info(media['mediaUri'], content_id) else: raise ExtractorError('not a media file') self._sort_formats(relinker_info['formats']) thumbnails = [] for image_type in ('image', 'image_medium', 'image_300'): thumbnail_url = media.get(image_type) if thumbnail_url: thumbnails.append({ 'url': compat_urlparse.urljoin(url, thumbnail_url), }) subtitles = self._extract_subtitles(url, media) return { 'id': content_id, 'title': title, 'description': strip_or_none(media.get('desc') or None), 'thumbnails': thumbnails, 'uploader': strip_or_none(media.get('author') or None), 'upload_date': unified_strdate(media.get('date')), 'duration': parse_duration(media.get('length')), 'subtitles': subtitles, **relinker_info } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) content_item_id = None content_item_url = self._html_search_meta( ('og:url', 'og:video', 'og:video:secure_url', 'twitter:url', 'twitter:player', 'jsonlink'), webpage, default=None) if content_item_url: content_item_id = self._search_regex( rf'ContentItem-({self._UUID_RE})', content_item_url, 'content item id', default=None) if not content_item_id: content_item_id = self._search_regex( rf'''(?x) (?: (?:initEdizione|drawMediaRaiTV)\(| <(?:[^>]+\bdata-id|var\s+uniquename)=| <iframe[^>]+\bsrc= ) (["\']) (?:(?!\1).)*\bContentItem-(?P<id>{self._UUID_RE}) ''', webpage, 'content item id', default=None, group='id') content_item_ids = set() if content_item_id: content_item_ids.add(content_item_id) if video_id not in content_item_ids: content_item_ids.add(video_id) for content_item_id in content_item_ids: try: return self._extract_from_content_id(content_item_id, url) except GeoRestrictedError: raise except ExtractorError: pass relinker_url = self._proto_relative_url(self._search_regex( r'''(?x) (?: var\s+videoURL| mediaInfo\.mediaUri )\s*=\s* ([\'"]) (?P<url> (?:https?:)? //mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\? (?:(?!\1).)*\bcont=(?:(?!\1).)+)\1 ''', webpage, 'relinker URL', group='url')) relinker_info = self._extract_relinker_info( urljoin(url, relinker_url), video_id) self._sort_formats(relinker_info['formats']) title = self._search_regex( r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1', webpage, 'title', group='title', default=None) or self._og_search_title(webpage) return { 'id': video_id, 'title': title, **relinker_info } class RaiNewsIE(RaiIE): _VALID_URL = rf'https?://(www\.)?rainews\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html' _EMBED_REGEX = [rf'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)'] _TESTS = [{ # new rainews player (#3911) 'url': 'https://www.rainews.it/rubriche/24mm/video/2022/05/24mm-del-29052022-12cf645d-1ffd-4220-b27c-07c226dbdecf.html', 'info_dict': { 'id': '12cf645d-1ffd-4220-b27c-07c226dbdecf', 'ext': 'mp4', 'title': 'Puntata del 29/05/2022', 'duration': 1589, 'upload_date': '20220529', 'uploader': 'rainews', } }, { # old content with fallback method to extract media urls 'url': 'https://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html', 'info_dict': { 'id': '1632c009-c843-4836-bb65-80c33084a64b', 'ext': 'mp4', 'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"', 'description': 'I film in uscita questa settimana.', 'thumbnail': r're:^https?://.*\.png$', 'duration': 833, 'upload_date': '20161103' }, 'expected_warnings': ['unable to extract player_data'], }, { # iframe + drm 'url': 'https://www.rainews.it/iframe/video/2022/07/euro2022-europei-calcio-femminile-italia-belgio-gol-0-1-video-4de06a69-de75-4e32-a657-02f0885f8118.html', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) player_data = self._search_json( r'<rainews-player\s*data=\'', webpage, 'player_data', video_id, transform_source=clean_html, fatal=False) track_info = player_data.get('track_info') relinker_url = traverse_obj(player_data, 'mediapolis', 'content_url') if not relinker_url: # fallback on old implementation for some old content try: return self._extract_from_content_id(video_id, url) except GeoRestrictedError: raise except ExtractorError as e: raise ExtractorError('Relinker URL not found', cause=e) relinker_info = self._extract_relinker_info(urljoin(url, relinker_url), video_id) self._sort_formats(relinker_info['formats']) return { 'id': video_id, 'title': track_info.get('title') or self._og_search_title(webpage), 'upload_date': unified_strdate(track_info.get('date')), 'uploader': strip_or_none(track_info.get('editor') or None), **relinker_info } class RaiSudtirolIE(RaiBaseIE): _VALID_URL = r'https?://raisudtirol\.rai\.it/.+?media=(?P<id>[TP]tv\d+)' _TESTS = [{ 'url': 'https://raisudtirol.rai.it/la/index.php?media=Ptv1619729460', 'info_dict': { 'id': 'Ptv1619729460', 'ext': 'mp4', 'title': 'Euro: trasmisciun d\'economia - 29-04-2021 20:51', 'series': 'Euro: trasmisciun d\'economia', 'upload_date': '20210429', 'thumbnail': r're:https://raisudtirol\.rai\.it/img/.+?\.jpg', 'uploader': 'raisudtirol', } }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_date = self._html_search_regex(r'<span class="med_data">(.+?)</span>', webpage, 'video_date', fatal=False) video_title = self._html_search_regex(r'<span class="med_title">(.+?)</span>', webpage, 'video_title', fatal=False) video_url = self._html_search_regex(r'sources:\s*\[\{file:\s*"(.+?)"\}\]', webpage, 'video_url') video_thumb = self._html_search_regex(r'image: \'(.+?)\'', webpage, 'video_thumb', fatal=False) return { 'id': video_id, 'title': join_nonempty(video_title, video_date, delim=' - '), 'series': video_title, 'upload_date': unified_strdate(video_date), 'thumbnail': urljoin('https://raisudtirol.rai.it/', video_thumb), 'uploader': 'raisudtirol', 'formats': [{ 'format_id': 'https-mp4', 'url': self._proto_relative_url(video_url), 'width': 1024, 'height': 576, 'fps': 25, 'vcodec': 'h264', 'acodec': 'aac', }], } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/raywenderlich.py�������������������������������������������������0000664�0000000�0000000�00000013754�14277552437�0022102�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from .vimeo import VimeoIE from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, merge_dicts, try_get, unescapeHTML, unified_timestamp, urljoin, ) class RayWenderlichIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: videos\.raywenderlich\.com/courses| (?:www\.)?raywenderlich\.com )/ (?P<course_id>[^/]+)/lessons/(?P<id>\d+) ''' _TESTS = [{ 'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1', 'info_dict': { 'id': '248377018', 'ext': 'mp4', 'title': 'Introduction', 'description': 'md5:804d031b3efa9fcb49777d512d74f722', 'timestamp': 1513906277, 'upload_date': '20171222', 'duration': 133, 'uploader': 'Ray Wenderlich', 'uploader_id': 'user3304672', }, 'params': { 'noplaylist': True, 'skip_download': True, }, 'add_ie': [VimeoIE.ie_key()], 'expected_warnings': ['HTTP Error 403: Forbidden'], }, { 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1', 'only_matching': True, }] @staticmethod def _extract_video_id(data, lesson_id): if not data: return groups = try_get(data, lambda x: x['groups'], list) or [] if not groups: return for group in groups: if not isinstance(group, dict): continue contents = try_get(data, lambda x: x['contents'], list) or [] for content in contents: if not isinstance(content, dict): continue ordinal = int_or_none(content.get('ordinal')) if ordinal != lesson_id: continue video_id = content.get('identifier') if video_id: return compat_str(video_id) def _real_extract(self, url): mobj = self._match_valid_url(url) course_id, lesson_id = mobj.group('course_id', 'id') display_id = '%s/%s' % (course_id, lesson_id) webpage = self._download_webpage(url, display_id) thumbnail = self._og_search_thumbnail( webpage, default=None) or self._html_search_meta( 'twitter:image', webpage, 'thumbnail') if '>Subscribe to unlock' in webpage: raise ExtractorError( 'This content is only available for subscribers', expected=True) info = { 'thumbnail': thumbnail, } vimeo_id = self._search_regex( r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None) if not vimeo_id: data = self._parse_json( self._search_regex( r'data-collection=(["\'])(?P<data>{.+?})\1', webpage, 'data collection', default='{}', group='data'), display_id, transform_source=unescapeHTML, fatal=False) video_id = self._extract_video_id( data, lesson_id) or self._search_regex( r'/videos/(\d+)/', thumbnail, 'video id') headers = { 'Referer': url, 'X-Requested-With': 'XMLHttpRequest', } csrf_token = self._html_search_meta( 'csrf-token', webpage, 'csrf token', default=None) if csrf_token: headers['X-CSRF-Token'] = csrf_token video = self._download_json( 'https://videos.raywenderlich.com/api/v1/videos/%s.json' % video_id, display_id, headers=headers)['video'] vimeo_id = video['clips'][0]['provider_id'] info.update({ '_type': 'url_transparent', 'title': video.get('name'), 'description': video.get('description') or video.get( 'meta_description'), 'duration': int_or_none(video.get('duration')), 'timestamp': unified_timestamp(video.get('created_at')), }) return merge_dicts(info, self.url_result( VimeoIE._smuggle_referrer( 'https://player.vimeo.com/video/%s' % vimeo_id, url), ie=VimeoIE.ie_key(), video_id=vimeo_id)) class RayWenderlichCourseIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: videos\.raywenderlich\.com/courses| (?:www\.)?raywenderlich\.com )/ (?P<id>[^/]+) ''' _TEST = { 'url': 'https://www.raywenderlich.com/3530-testing-in-ios', 'info_dict': { 'title': 'Testing in iOS', 'id': '3530-testing-in-ios', }, 'params': { 'noplaylist': False, }, 'playlist_count': 29, } @classmethod def suitable(cls, url): return False if RayWenderlichIE.suitable(url) else super( RayWenderlichCourseIE, cls).suitable(url) def _real_extract(self, url): course_id = self._match_id(url) webpage = self._download_webpage(url, course_id) entries = [] lesson_urls = set() for lesson_url in re.findall( r'<a[^>]+\bhref=["\'](/%s/lessons/\d+)' % course_id, webpage): if lesson_url in lesson_urls: continue lesson_urls.add(lesson_url) entries.append(self.url_result( urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key())) title = self._og_search_title( webpage, default=None) or self._html_search_meta( 'twitter:title', webpage, 'title', default=None) return self.playlist_result(entries, course_id, title) ��������������������yt-dlp-2022.08.19/yt_dlp/extractor/rbmaradio.py�����������������������������������������������������0000664�0000000�0000000�00000004456�14277552437�0021201�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..compat import compat_str from ..utils import ( clean_html, int_or_none, unified_timestamp, update_url_query, ) class RBMARadioIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?:rbmaradio|redbullradio)\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)' _TEST = { 'url': 'https://www.rbmaradio.com/shows/main-stage/episodes/ford-lopatin-live-at-primavera-sound-2011', 'md5': '6bc6f9bcb18994b4c983bc3bf4384d95', 'info_dict': { 'id': 'ford-lopatin-live-at-primavera-sound-2011', 'ext': 'mp3', 'title': 'Main Stage - Ford & Lopatin at Primavera Sound', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 'thumbnail': r're:^https?://.*\.jpg', 'duration': 2452, 'timestamp': 1307103164, 'upload_date': '20110603', }, } def _real_extract(self, url): mobj = self._match_valid_url(url) show_id = mobj.group('show_id') episode_id = mobj.group('id') webpage = self._download_webpage(url, episode_id) episode = self._parse_json( self._search_regex( r'__INITIAL_STATE__\s*=\s*({.+?})\s*</script>', webpage, 'json data'), episode_id)['episodes'][show_id][episode_id] title = episode['title'] show_title = episode.get('showTitle') if show_title: title = '%s - %s' % (show_title, title) formats = [{ 'url': update_url_query(episode['audioURL'], query={'cbr': abr}), 'format_id': compat_str(abr), 'abr': abr, 'vcodec': 'none', } for abr in (96, 128, 192, 256)] self._check_formats(formats, episode_id) description = clean_html(episode.get('longTeaser')) thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape')) duration = int_or_none(episode.get('duration')) timestamp = unified_timestamp(episode.get('publishedAt')) return { 'id': episode_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'duration': duration, 'timestamp': timestamp, 'formats': formats, } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rcs.py�����������������������������������������������������������0000664�0000000�0000000�00000042365�14277552437�0020031�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import ( clean_html, ExtractorError, js_to_json, base_url, url_basename, urljoin, ) class RCSBaseIE(InfoExtractor): # based on VideoPlayerLoader.prototype.getVideoSrc # and VideoPlayerLoader.prototype.transformSrc from # https://js2.corriereobjects.it/includes2013/LIBS/js/corriere_video.sjs _ALL_REPLACE = { 'media2vam.corriere.it.edgesuite.net': 'media2vam-corriere-it.akamaized.net', 'media.youreporter.it.edgesuite.net': 'media-youreporter-it.akamaized.net', 'corrierepmd.corriere.it.edgesuite.net': 'corrierepmd-corriere-it.akamaized.net', 'media2vam-corriere-it.akamaized.net/fcs.quotidiani/vr/videos/': 'video.corriere.it/vr360/videos/', '.net//': '.net/', } _MP4_REPLACE = { 'media2vam.corbologna.corriere.it.edgesuite.net': 'media2vam-bologna-corriere-it.akamaized.net', 'media2vam.corfiorentino.corriere.it.edgesuite.net': 'media2vam-fiorentino-corriere-it.akamaized.net', 'media2vam.cormezzogiorno.corriere.it.edgesuite.net': 'media2vam-mezzogiorno-corriere-it.akamaized.net', 'media2vam.corveneto.corriere.it.edgesuite.net': 'media2vam-veneto-corriere-it.akamaized.net', 'media2.oggi.it.edgesuite.net': 'media2-oggi-it.akamaized.net', 'media2.quimamme.it.edgesuite.net': 'media2-quimamme-it.akamaized.net', 'media2.amica.it.edgesuite.net': 'media2-amica-it.akamaized.net', 'media2.living.corriere.it.edgesuite.net': 'media2-living-corriere-it.akamaized.net', 'media2.style.corriere.it.edgesuite.net': 'media2-style-corriere-it.akamaized.net', 'media2.iodonna.it.edgesuite.net': 'media2-iodonna-it.akamaized.net', 'media2.leitv.it.edgesuite.net': 'media2-leitv-it.akamaized.net', } _MIGRATION_MAP = { 'videoamica-vh.akamaihd': 'amica', 'media2-amica-it.akamaized': 'amica', 'corrierevam-vh.akamaihd': 'corriere', 'media2vam-corriere-it.akamaized': 'corriere', 'cormezzogiorno-vh.akamaihd': 'corrieredelmezzogiorno', 'media2vam-mezzogiorno-corriere-it.akamaized': 'corrieredelmezzogiorno', 'corveneto-vh.akamaihd': 'corrieredelveneto', 'media2vam-veneto-corriere-it.akamaized': 'corrieredelveneto', 'corbologna-vh.akamaihd': 'corrieredibologna', 'media2vam-bologna-corriere-it.akamaized': 'corrieredibologna', 'corfiorentino-vh.akamaihd': 'corrierefiorentino', 'media2vam-fiorentino-corriere-it.akamaized': 'corrierefiorentino', 'corinnovazione-vh.akamaihd': 'corriereinnovazione', 'media2-gazzanet-gazzetta-it.akamaized': 'gazzanet', 'videogazzanet-vh.akamaihd': 'gazzanet', 'videogazzaworld-vh.akamaihd': 'gazzaworld', 'gazzettavam-vh.akamaihd': 'gazzetta', 'media2vam-gazzetta-it.akamaized': 'gazzetta', 'videoiodonna-vh.akamaihd': 'iodonna', 'media2-leitv-it.akamaized': 'leitv', 'videoleitv-vh.akamaihd': 'leitv', 'videoliving-vh.akamaihd': 'living', 'media2-living-corriere-it.akamaized': 'living', 'media2-oggi-it.akamaized': 'oggi', 'videooggi-vh.akamaihd': 'oggi', 'media2-quimamme-it.akamaized': 'quimamme', 'quimamme-vh.akamaihd': 'quimamme', 'videorunning-vh.akamaihd': 'running', 'media2-style-corriere-it.akamaized': 'style', 'style-vh.akamaihd': 'style', 'videostyle-vh.akamaihd': 'style', 'media2-stylepiccoli-it.akamaized': 'stylepiccoli', 'stylepiccoli-vh.akamaihd': 'stylepiccoli', 'doveviaggi-vh.akamaihd': 'viaggi', 'media2-doveviaggi-it.akamaized': 'viaggi', 'media2-vivimilano-corriere-it.akamaized': 'vivimilano', 'vivimilano-vh.akamaihd': 'vivimilano', 'media2-youreporter-it.akamaized': 'youreporter' } _MIGRATION_MEDIA = { 'advrcs-vh.akamaihd': '', 'corriere-f.akamaihd': '', 'corrierepmd-corriere-it.akamaized': '', 'corrprotetto-vh.akamaihd': '', 'gazzetta-f.akamaihd': '', 'gazzettapmd-gazzetta-it.akamaized': '', 'gazzprotetto-vh.akamaihd': '', 'periodici-f.akamaihd': '', 'periodicisecure-vh.akamaihd': '', 'videocoracademy-vh.akamaihd': '' } def _get_video_src(self, video): mediaFiles = video.get('mediaProfile').get('mediaFile') src = {} # audio if video.get('mediaType') == 'AUDIO': for aud in mediaFiles: # todo: check src['mp3'] = aud.get('value') # video else: for vid in mediaFiles: if vid.get('mimeType') == 'application/vnd.apple.mpegurl': src['m3u8'] = vid.get('value') if vid.get('mimeType') == 'video/mp4': src['mp4'] = vid.get('value') # replace host for t in src: for s, r in self._ALL_REPLACE.items(): src[t] = src[t].replace(s, r) for s, r in self._MP4_REPLACE.items(): src[t] = src[t].replace(s, r) # switch cdn if 'mp4' in src and 'm3u8' in src: if ('-lh.akamaihd' not in src.get('m3u8') and 'akamai' in src.get('mp4')): if 'm3u8' in src: matches = re.search(r'(?:https*:)?\/\/(?P<host>.*)\.net\/i(?P<path>.*)$', src.get('m3u8')) src['m3u8'] = 'https://vod.rcsobjects.it/hls/%s%s' % ( self._MIGRATION_MAP[matches.group('host')], matches.group('path').replace( '///', '/').replace( '//', '/').replace( '.csmil', '.urlset' ) ) if 'mp4' in src: matches = re.search(r'(?:https*:)?\/\/(?P<host>.*)\.net\/i(?P<path>.*)$', src.get('mp4')) if matches: if matches.group('host') in self._MIGRATION_MEDIA: vh_stream = 'https://media2.corriereobjects.it' if src.get('mp4').find('fcs.quotidiani_!'): vh_stream = 'https://media2-it.corriereobjects.it' src['mp4'] = '%s%s' % ( vh_stream, matches.group('path').replace( '///', '/').replace( '//', '/').replace( '/fcs.quotidiani/mediacenter', '').replace( '/fcs.quotidiani_!/mediacenter', '').replace( 'corriere/content/mediacenter/', '').replace( 'gazzetta/content/mediacenter/', '') ) else: src['mp4'] = 'https://vod.rcsobjects.it/%s%s' % ( self._MIGRATION_MAP[matches.group('host')], matches.group('path').replace('///', '/').replace('//', '/') ) if 'mp3' in src: src['mp3'] = src.get('mp3').replace( 'media2vam-corriere-it.akamaized.net', 'vod.rcsobjects.it/corriere') if 'mp4' in src: if src.get('mp4').find('fcs.quotidiani_!'): src['mp4'] = src.get('mp4').replace('vod.rcsobjects', 'vod-it.rcsobjects') if 'm3u8' in src: if src.get('m3u8').find('fcs.quotidiani_!'): src['m3u8'] = src.get('m3u8').replace('vod.rcsobjects', 'vod-it.rcsobjects') if 'geoblocking' in video.get('mediaProfile'): if 'm3u8' in src: src['m3u8'] = src.get('m3u8').replace('vod.rcsobjects', 'vod-it.rcsobjects') if 'mp4' in src: src['mp4'] = src.get('mp4').replace('vod.rcsobjects', 'vod-it.rcsobjects') if 'm3u8' in src: if src.get('m3u8').find('csmil') and src.get('m3u8').find('vod'): src['m3u8'] = src.get('m3u8').replace('.csmil', '.urlset') return src def _create_formats(self, urls, video_id): formats = [] formats = self._extract_m3u8_formats( urls.get('m3u8'), video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) if urls.get('mp4'): formats.append({ 'format_id': 'http-mp4', 'url': urls['mp4'] }) self._sort_formats(formats) return formats def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') if 'cdn' not in mobj.groupdict(): raise ExtractorError('CDN not found in url: %s' % url) # for leitv/youreporter/viaggi don't use the embed page if ((mobj.group('cdn') not in ['leitv.it', 'youreporter.it']) and (mobj.group('vid') == 'video')): url = 'https://video.%s/video-embed/%s' % (mobj.group('cdn'), video_id) page = self._download_webpage(url, video_id) video_data = None # look for json video data url json = self._search_regex( r'''(?x)url\s*=\s*(["']) (?P<url> (?:https?:)?//video\.rcs\.it /fragment-includes/video-includes/.+?\.json )\1;''', page, video_id, group='url', default=None) if json: if json.startswith('//'): json = 'https:%s' % json video_data = self._download_json(json, video_id) # if json url not found, look for json video data directly in the page else: # RCS normal pages and most of the embeds json = self._search_regex( r'[\s;]video\s*=\s*({[\s\S]+?})(?:;|,playlist=)', page, video_id, default=None) if not json and 'video-embed' in url: page = self._download_webpage(url.replace('video-embed', 'video-json'), video_id) json = self._search_regex( r'##start-video##({[\s\S]+?})##end-video##', page, video_id, default=None) if not json: # if no video data found try search for iframes emb = RCSEmbedsIE._extract_url(page) if emb: return { '_type': 'url_transparent', 'url': emb, 'ie_key': RCSEmbedsIE.ie_key() } if json: video_data = self._parse_json( json, video_id, transform_source=js_to_json) if not video_data: raise ExtractorError('Video data not found in the page') formats = self._create_formats( self._get_video_src(video_data), video_id) description = (video_data.get('description') or clean_html(video_data.get('htmlDescription')) or self._html_search_meta('description', page)) uploader = video_data.get('provider') or mobj.group('cdn') return { 'id': video_id, 'title': video_data.get('title'), 'description': description, 'uploader': uploader, 'formats': formats } class RCSEmbedsIE(RCSBaseIE): _VALID_URL = r'''(?x) https?://(?P<vid>video)\. (?P<cdn> (?: rcs| (?:corriere\w+\.)?corriere| (?:gazzanet\.)?gazzetta )\.it) /video-embed/(?P<id>[^/=&\?]+?)(?:$|\?)''' _EMBED_REGEX = [r'''(?x) (?: data-frame-src=| <iframe[^\n]+src= ) (["']) (?P<url>(?:https?:)?//video\. (?: rcs| (?:corriere\w+\.)?corriere| (?:gazzanet\.)?gazzetta ) \.it/video-embed/.+?) \1'''] _TESTS = [{ 'url': 'https://video.rcs.it/video-embed/iodonna-0001585037', 'md5': '623ecc8ffe7299b2d0c1046d8331a9df', 'info_dict': { 'id': 'iodonna-0001585037', 'ext': 'mp4', 'title': 'Sky Arte racconta Madonna nella serie "Artist to icon"', 'description': 'md5:65b09633df9ffee57f48b39e34c9e067', 'uploader': 'rcs.it', } }, { # redownload the page changing 'video-embed' in 'video-json' 'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789', 'md5': 'a043e3fecbe4d9ed7fc5d888652a5440', 'info_dict': { 'id': 'gazzanet-mo05-0000260789', 'ext': 'mp4', 'title': 'Valentino Rossi e papà Graziano si divertono col drifting', 'description': 'md5:a8bf90d6adafd9815f70fc74c0fc370a', 'uploader': 'rcd', } }, { 'url': 'https://video.corriere.it/video-embed/b727632a-f9d0-11ea-91b0-38d50a849abb?player', 'match_only': True }, { 'url': 'https://video.gazzetta.it/video-embed/49612410-00ca-11eb-bcd8-30d4253e0140', 'match_only': True }] @staticmethod def _sanitize_urls(urls): # add protocol if missing for i, e in enumerate(urls): if e.startswith('//'): urls[i] = 'https:%s' % e # clean iframes urls for i, e in enumerate(urls): urls[i] = urljoin(base_url(e), url_basename(e)) return urls @classmethod def _extract_embed_urls(cls, url, webpage): return cls._sanitize_urls(tuple(super()._extract_embed_urls(url, webpage))) class RCSIE(RCSBaseIE): _VALID_URL = r'''(?x)https?://(?P<vid>video|viaggi)\. (?P<cdn> (?: corrieredelmezzogiorno\. |corrieredelveneto\. |corrieredibologna\. |corrierefiorentino\. )?corriere\.it |(?:gazzanet\.)?gazzetta\.it) /(?!video-embed/).+?/(?P<id>[^/\?]+)(?=\?|/$|$)''' _TESTS = [{ 'url': 'https://video.corriere.it/sport/formula-1/vettel-guida-ferrari-sf90-mugello-suo-fianco-c-elecrerc-bendato-video-esilarante/b727632a-f9d0-11ea-91b0-38d50a849abb', 'md5': '0f4ededc202b0f00b6e509d831e2dcda', 'info_dict': { 'id': 'b727632a-f9d0-11ea-91b0-38d50a849abb', 'ext': 'mp4', 'title': 'Vettel guida la Ferrari SF90 al Mugello e al suo fianco c\'è Leclerc (bendato): il video è esilarante', 'description': 'md5:93b51c9161ac8a64fb2f997b054d0152', 'uploader': 'Corriere Tv', } }, { # video data inside iframe 'url': 'https://viaggi.corriere.it/video/norvegia-il-nuovo-ponte-spettacolare-sopra-la-cascata-di-voringsfossen/', 'md5': 'da378e4918d2afbf7d61c35abb948d4c', 'info_dict': { 'id': '5b7cd134-e2c1-11ea-89b3-b56dd0df2aa2', 'ext': 'mp4', 'title': 'La nuova spettacolare attrazione in Norvegia: il ponte sopra Vøringsfossen', 'description': 'md5:18b35a291f6746c0c8dacd16e5f5f4f8', 'uploader': 'DOVE Viaggi', } }, { 'url': 'https://video.gazzetta.it/video-motogp-catalogna-cadute-dovizioso-vale-rossi/49612410-00ca-11eb-bcd8-30d4253e0140?vclk=Videobar', 'md5': 'eedc1b5defd18e67383afef51ff7bdf9', 'info_dict': { 'id': '49612410-00ca-11eb-bcd8-30d4253e0140', 'ext': 'mp4', 'title': 'Dovizioso, il contatto con Zarco e la caduta. E anche Vale finisce a terra', 'description': 'md5:8c6e905dc3b9413218beca11ebd69778', 'uploader': 'AMorici', } }, { 'url': 'https://video.corriere.it/video-360/metro-copenaghen-tutta-italiana/a248a7f0-e2db-11e9-9830-af2de6b1f945', 'match_only': True }] class RCSVariousIE(RCSBaseIE): _VALID_URL = r'''(?x)https?://www\. (?P<cdn> leitv\.it| youreporter\.it )/(?:[^/]+/)?(?P<id>[^/]+?)(?:$|\?|/)''' _TESTS = [{ 'url': 'https://www.leitv.it/benessere/mal-di-testa-come-combatterlo-ed-evitarne-la-comparsa/', 'md5': '92b4e63667b8f95acb0a04da25ae28a1', 'info_dict': { 'id': 'mal-di-testa-come-combatterlo-ed-evitarne-la-comparsa', 'ext': 'mp4', 'title': 'Cervicalgia e mal di testa, il video con i suggerimenti dell\'esperto', 'description': 'md5:ae21418f34cee0b8d02a487f55bcabb5', 'uploader': 'leitv.it', } }, { 'url': 'https://www.youreporter.it/fiume-sesia-3-ottobre-2020/', 'md5': '8dccd436b47a830bab5b4a88232f391a', 'info_dict': { 'id': 'fiume-sesia-3-ottobre-2020', 'ext': 'mp4', 'title': 'Fiume Sesia 3 ottobre 2020', 'description': 'md5:0070eef1cc884d13c970a4125063de55', 'uploader': 'youreporter.it', } }] ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rcti.py����������������������������������������������������������0000664�0000000�0000000�00000040176�14277552437�0020201�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import json import random import time from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( dict_get, ExtractorError, strip_or_none, traverse_obj, try_get ) class RCTIPlusBaseIE(InfoExtractor): def _real_initialize(self): self._AUTH_KEY = self._download_json( 'https://api.rctiplus.com/api/v1/visitor?platform=web', # platform can be web, mweb, android, ios None, 'Fetching authorization key')['data']['access_token'] def _call_api(self, url, video_id, note=None): json = self._download_json( url, video_id, note=note, headers={'Authorization': self._AUTH_KEY}) if json.get('status', {}).get('code', 0) != 0: raise ExtractorError(f'{self.IE_NAME} said: {json["status"]["message_client"]}', cause=json) return json.get('data'), json.get('meta') class RCTIPlusIE(RCTIPlusBaseIE): _VALID_URL = r'https://www\.rctiplus\.com/(?:programs/\d+?/.*?/)?(?P<type>episode|clip|extra|live-event|missed-event)/(?P<id>\d+)/(?P<display_id>[^/?#&]+)' _TESTS = [{ 'url': 'https://www.rctiplus.com/programs/1259/kiko-untuk-lola/episode/22124/untuk-lola', 'md5': '56ed45affad45fa18d5592a1bc199997', 'info_dict': { 'id': 'v_e22124', 'title': 'Untuk Lola', 'display_id': 'untuk-lola', 'description': 'md5:2b809075c0b1e071e228ad6d13e41deb', 'ext': 'mp4', 'duration': 1400, 'timestamp': 1615978800, 'upload_date': '20210317', 'series': 'Kiko : Untuk Lola', 'season_number': 1, 'episode_number': 1, 'channel': 'RCTI', }, 'params': { 'fixup': 'never', }, }, { # Clip; Series title doesn't appear on metadata JSON 'url': 'https://www.rctiplus.com/programs/316/cahaya-terindah/clip/3921/make-a-wish', 'md5': 'd179b2ff356f0e91a53bcc6a4d8504f0', 'info_dict': { 'id': 'v_c3921', 'title': 'Make A Wish', 'display_id': 'make-a-wish', 'description': 'Make A Wish', 'ext': 'mp4', 'duration': 288, 'timestamp': 1571652600, 'upload_date': '20191021', 'series': 'Cahaya Terindah', 'channel': 'RCTI', }, 'params': { 'fixup': 'never', }, }, { # Extra 'url': 'https://www.rctiplus.com/programs/616/inews-malam/extra/9438/diungkapkan-melalui-surat-terbuka-ceo-ruangguru-belva-devara-mundur-dari-staf-khusus-presiden', 'md5': 'c48106afdbce609749f5e0c007d9278a', 'info_dict': { 'id': 'v_ex9438', 'title': 'md5:2ede828c0f8bde249e0912be150314ca', 'display_id': 'md5:62b8d4e9ff096db527a1ad797e8a9933', 'description': 'md5:2ede828c0f8bde249e0912be150314ca', 'ext': 'mp4', 'duration': 93, 'timestamp': 1587561540, 'upload_date': '20200422', 'series': 'iNews Malam', 'channel': 'INews', }, }, { # Missed event/replay 'url': 'https://www.rctiplus.com/missed-event/2507/mou-signing-ceremony-27-juli-2021-1400-wib', 'md5': '649c5f27250faed1452ca8b91e06922d', 'info_dict': { 'id': 'v_pe2507', 'title': 'MOU Signing Ceremony | 27 Juli 2021 | 14.00 WIB', 'display_id': 'mou-signing-ceremony-27-juli-2021-1400-wib', 'ext': 'mp4', 'timestamp': 1627142400, 'upload_date': '20210724', 'was_live': True, 'release_timestamp': 1627369200, }, 'params': { 'fixup': 'never', }, }, { # Live event; Cloudfront CDN 'url': 'https://www.rctiplus.com/live-event/2530/dai-muda-charging-imun-dengan-iman-4-agustus-2021-1600-wib', 'info_dict': { 'id': 'v_le2530', 'title': 'Dai Muda : Charging Imun dengan Iman | 4 Agustus 2021 | 16.00 WIB', 'display_id': 'dai-muda-charging-imun-dengan-iman-4-agustus-2021-1600-wib', 'ext': 'mp4', 'timestamp': 1627898400, 'upload_date': '20210802', 'release_timestamp': 1628067600, }, 'params': { 'skip_download': True, }, 'skip': 'This live event has ended.', }, { # TV; live_at is null 'url': 'https://www.rctiplus.com/live-event/1/rcti', 'info_dict': { 'id': 'v_lt1', 'title': 'RCTI', 'display_id': 'rcti', 'ext': 'mp4', 'timestamp': 1546344000, 'upload_date': '20190101', 'is_live': True, }, 'params': { 'skip_download': True, }, }] _CONVIVA_JSON_TEMPLATE = { 't': 'CwsSessionHb', 'cid': 'ff84ae928c3b33064b76dec08f12500465e59a6f', 'clid': '0', 'sid': 0, 'seq': 0, 'caps': 0, 'sf': 7, 'sdk': True, } def _real_extract(self, url): match = self._match_valid_url(url).groupdict() video_type, video_id, display_id = match['type'], match['id'], match['display_id'] url_api_version = 'v2' if video_type == 'missed-event' else 'v1' appier_id = '23984824_' + str(random.randint(0, 10000000000)) # Based on the webpage's uuidRandom generator video_json = self._call_api( f'https://api.rctiplus.com/api/{url_api_version}/{video_type}/{video_id}/url?appierid={appier_id}', display_id, 'Downloading video URL JSON')[0] video_url = video_json['url'] is_upcoming = try_get(video_json, lambda x: x['current_date'] < x['live_at']) if is_upcoming is None: is_upcoming = try_get(video_json, lambda x: x['current_date'] < x['start_date']) if is_upcoming: self.raise_no_formats( 'This event will start at %s.' % video_json['live_label'] if video_json.get('live_label') else 'This event has not started yet.', expected=True) if 'akamaized' in video_url: # For some videos hosted on Akamai's CDN (possibly AES-encrypted ones?), a session needs to at least be made via Conviva's API conviva_json_data = { **self._CONVIVA_JSON_TEMPLATE, 'url': video_url, 'sst': int(time.time()) } conviva_json_res = self._download_json( 'https://ff84ae928c3b33064b76dec08f12500465e59a6f.cws.conviva.com/0/wsg', display_id, 'Creating Conviva session', 'Failed to create Conviva session', fatal=False, data=json.dumps(conviva_json_data).encode('utf-8')) if conviva_json_res and conviva_json_res.get('err') != 'ok': self.report_warning('Conviva said: %s' % str(conviva_json_res.get('err'))) video_meta, meta_paths = self._call_api( 'https://api.rctiplus.com/api/v1/%s/%s' % (video_type, video_id), display_id, 'Downloading video metadata') thumbnails, image_path = [], meta_paths.get('image_path', 'https://rstatic.akamaized.net/media/') if video_meta.get('portrait_image'): thumbnails.append({ 'id': 'portrait_image', 'url': '%s%d%s' % (image_path, 2000, video_meta['portrait_image']) # 2000px seems to be the highest resolution that can be given }) if video_meta.get('landscape_image'): thumbnails.append({ 'id': 'landscape_image', 'url': '%s%d%s' % (image_path, 2000, video_meta['landscape_image']) }) try: formats = self._extract_m3u8_formats(video_url, display_id, 'mp4', headers={'Referer': 'https://www.rctiplus.com/'}) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: self.raise_geo_restricted(countries=['ID'], metadata_available=True) else: raise e for f in formats: if 'akamaized' in f['url'] or 'cloudfront' in f['url']: f.setdefault('http_headers', {})['Referer'] = 'https://www.rctiplus.com/' # Referer header is required for akamai/cloudfront CDNs self._sort_formats(formats) return { 'id': video_meta.get('product_id') or video_json.get('product_id'), 'title': dict_get(video_meta, ('title', 'name')) or dict_get(video_json, ('content_name', 'assets_name')), 'display_id': display_id, 'description': video_meta.get('summary'), 'timestamp': video_meta.get('release_date') or video_json.get('start_date'), 'duration': video_meta.get('duration'), 'categories': [video_meta['genre']] if video_meta.get('genre') else None, 'average_rating': video_meta.get('star_rating'), 'series': video_meta.get('program_title') or video_json.get('program_title'), 'season_number': video_meta.get('season'), 'episode_number': video_meta.get('episode'), 'channel': video_json.get('tv_name'), 'channel_id': video_json.get('tv_id'), 'formats': formats, 'thumbnails': thumbnails, 'is_live': video_type == 'live-event' and not is_upcoming, 'was_live': video_type == 'missed-event', 'live_status': 'is_upcoming' if is_upcoming else None, 'release_timestamp': video_json.get('live_at'), } class RCTIPlusSeriesIE(RCTIPlusBaseIE): _VALID_URL = r'https://www\.rctiplus\.com/programs/(?P<id>\d+)/(?P<display_id>[^/?#&]+)(?:/(?P<type>episodes|extras|clips))?' _TESTS = [{ 'url': 'https://www.rctiplus.com/programs/829/putri-untuk-pangeran', 'playlist_mincount': 1019, 'info_dict': { 'id': '829', 'title': 'Putri Untuk Pangeran', 'description': 'md5:aca7b54d05bd95a67d4f4613cc1d622d', 'age_limit': 2, 'cast': ['Verrel Bramasta', 'Ranty Maria', 'Riza Syah', 'Ivan Fadilla', 'Nicole Parham', 'Dll', 'Aviv Elham'], 'display_id': 'putri-untuk-pangeran', 'tag': 'count:18', }, }, { # No episodes 'url': 'https://www.rctiplus.com/programs/615/inews-pagi', 'playlist_mincount': 388, 'info_dict': { 'id': '615', 'title': 'iNews Pagi', 'description': 'md5:f18ee3d4643cfb41c358e5a9b693ee04', 'age_limit': 2, 'tag': 'count:11', 'display_id': 'inews-pagi', } }] _AGE_RATINGS = { # Based off https://id.wikipedia.org/wiki/Sistem_rating_konten_televisi with additional ratings 'S-SU': 2, 'SU': 2, 'P': 2, 'A': 7, 'R': 13, 'R-R/1': 17, # Labelled as 17+ despite being R 'D': 18, } @classmethod def suitable(cls, url): return False if RCTIPlusIE.suitable(url) else super(RCTIPlusSeriesIE, cls).suitable(url) def _entries(self, url, display_id=None, note='Downloading entries JSON', metadata={}): total_pages = 0 try: total_pages = self._call_api( '%s&length=20&page=0' % url, display_id, note)[1]['pagination']['total_page'] except ExtractorError as e: if 'not found' in str(e): return [] raise e if total_pages <= 0: return [] for page_num in range(1, total_pages + 1): episode_list = self._call_api( '%s&length=20&page=%s' % (url, page_num), display_id, '%s page %s' % (note, page_num))[0] or [] for video_json in episode_list: yield { '_type': 'url', 'url': video_json['share_link'], 'ie_key': RCTIPlusIE.ie_key(), 'id': video_json.get('product_id'), 'title': video_json.get('title'), 'display_id': video_json.get('title_code').replace('_', '-'), 'description': video_json.get('summary'), 'timestamp': video_json.get('release_date'), 'duration': video_json.get('duration'), 'season_number': video_json.get('season'), 'episode_number': video_json.get('episode'), **metadata } def _series_entries(self, series_id, display_id=None, video_type=None, metadata={}): if not video_type or video_type in 'episodes': try: seasons_list = self._call_api( f'https://api.rctiplus.com/api/v1/program/{series_id}/season', display_id, 'Downloading seasons list JSON')[0] except ExtractorError as e: if 'not found' not in str(e): raise seasons_list = [] for season in seasons_list: yield from self._entries( f'https://api.rctiplus.com/api/v2/program/{series_id}/episode?season={season["season"]}', display_id, f'Downloading season {season["season"]} episode entries', metadata) if not video_type or video_type in 'extras': yield from self._entries( f'https://api.rctiplus.com/api/v2/program/{series_id}/extra?content_id=0', display_id, 'Downloading extra entries', metadata) if not video_type or video_type in 'clips': yield from self._entries( f'https://api.rctiplus.com/api/v2/program/{series_id}/clip?content_id=0', display_id, 'Downloading clip entries', metadata) def _real_extract(self, url): series_id, display_id, video_type = self._match_valid_url(url).group('id', 'display_id', 'type') if video_type: self.report_warning( f'Only {video_type} will be downloaded. ' f'To download everything from the series, remove "/{video_type}" from the URL') series_meta, meta_paths = self._call_api( f'https://api.rctiplus.com/api/v1/program/{series_id}/detail', display_id, 'Downloading series metadata') metadata = { 'age_limit': try_get(series_meta, lambda x: self._AGE_RATINGS[x['age_restriction'][0]['code']]), 'cast': traverse_obj(series_meta, (('starring', 'creator', 'writer'), ..., 'name'), expected_type=lambda x: strip_or_none(x) or None), 'tag': traverse_obj(series_meta, ('tag', ..., 'name'), expected_type=lambda x: strip_or_none(x) or None), } return self.playlist_result( self._series_entries(series_id, display_id, video_type, metadata), series_id, series_meta.get('title'), series_meta.get('summary'), display_id=display_id, **metadata) class RCTIPlusTVIE(RCTIPlusBaseIE): _VALID_URL = r'https://www\.rctiplus\.com/((tv/(?P<tvname>\w+))|(?P<eventname>live-event|missed-event))' _TESTS = [{ 'url': 'https://www.rctiplus.com/tv/rcti', 'info_dict': { 'id': 'v_lt1', 'title': 'RCTI', 'ext': 'mp4', 'timestamp': 1546344000, 'upload_date': '20190101', }, 'params': { 'skip_download': True, } }, { # Returned video will always change 'url': 'https://www.rctiplus.com/live-event', 'only_matching': True, }, { # Returned video will also always change 'url': 'https://www.rctiplus.com/missed-event', 'only_matching': True, }] @classmethod def suitable(cls, url): return False if RCTIPlusIE.suitable(url) else super(RCTIPlusTVIE, cls).suitable(url) def _real_extract(self, url): match = self._match_valid_url(url).groupdict() tv_id = match.get('tvname') or match.get('eventname') webpage = self._download_webpage(url, tv_id) video_type, video_id = self._search_regex( r'url\s*:\s*["\']https://api\.rctiplus\.com/api/v./(?P<type>[^/]+)/(?P<id>\d+)/url', webpage, 'video link', group=('type', 'id')) return self.url_result(f'https://www.rctiplus.com/{video_type}/{video_id}/{tv_id}', 'RCTIPlus') ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rds.py�����������������������������������������������������������0000664�0000000�0000000�00000005410�14277552437�0020020�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( parse_duration, parse_iso8601, js_to_json, ) from ..compat import compat_str class RDSIE(InfoExtractor): IE_DESC = 'RDS.ca' _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+' _TESTS = [{ # has two 9c9media ContentPackages, the web player selects the first ContentPackage 'url': 'https://www.rds.ca/videos/Hockey/NationalHockeyLeague/teams/9/forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande-3.1377606', 'info_dict': { 'id': '2083309', 'display_id': 'forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande', 'ext': 'flv', 'title': 'Forum du 5 à 7 : Kotkaniemi de retour de Finlande', 'description': 'md5:83fa38ecc4a79b19e433433254077f25', 'timestamp': 1606129030, 'upload_date': '20201123', 'duration': 773.039, } }, { 'url': 'http://www.rds.ca/vid%C3%A9os/un-voyage-positif-3.877934', 'only_matching': True, }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json) video_id = compat_str(item['id']) title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta( 'title', webpage, 'title', fatal=True) description = self._og_search_description(webpage) or self._html_search_meta( 'description', webpage, 'description') thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex( [r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"', r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'], webpage, 'thumbnail', fatal=False) timestamp = parse_iso8601(self._search_regex( r'<span[^>]+itemprop="uploadDate"[^>]+content="([^"]+)"', webpage, 'upload date', fatal=False)) duration = parse_duration(self._search_regex( r'<span[^>]+itemprop="duration"[^>]+content="([^"]+)"', webpage, 'duration', fatal=False)) age_limit = self._family_friendly_search(webpage) return { '_type': 'url_transparent', 'id': video_id, 'display_id': display_id, 'url': '9c9media:rds_web:%s' % video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'timestamp': timestamp, 'duration': duration, 'age_limit': age_limit, 'ie_key': 'NineCNineMedia', } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/redbee.py��������������������������������������������������������0000664�0000000�0000000�00000034272�14277552437�0020466�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import json import re import time import urllib.parse import uuid from .common import InfoExtractor from ..utils import ( ExtractorError, float_or_none, int_or_none, strip_or_none, traverse_obj, unified_timestamp, ) class RedBeeBaseIE(InfoExtractor): _DEVICE_ID = str(uuid.uuid4()) @property def _API_URL(self): """ Ref: https://apidocs.emp.ebsd.ericsson.net Subclasses must set _REDBEE_CUSTOMER, _REDBEE_BUSINESS_UNIT """ return f'https://exposure.api.redbee.live/v2/customer/{self._REDBEE_CUSTOMER}/businessunit/{self._REDBEE_BUSINESS_UNIT}' def _get_bearer_token(self, asset_id, jwt=None): request = { 'deviceId': self._DEVICE_ID, 'device': { 'deviceId': self._DEVICE_ID, 'name': 'Mozilla Firefox 102', 'type': 'WEB', }, } if jwt: request['jwt'] = jwt return self._download_json( f'{self._API_URL}/auth/{"gigyaLogin" if jwt else "anonymous"}', asset_id, data=json.dumps(request).encode('utf-8'), headers={ 'Content-Type': 'application/json;charset=utf-8' })['sessionToken'] def _get_formats_and_subtitles(self, asset_id, **kwargs): bearer_token = self._get_bearer_token(asset_id, **kwargs) api_response = self._download_json( f'{self._API_URL}/entitlement/{asset_id}/play', asset_id, headers={ 'Authorization': f'Bearer {bearer_token}', 'Accept': 'application/json, text/plain, */*' }) formats, subtitles = [], {} for format in api_response['formats']: if not format.get('mediaLocator'): continue fmts, subs = [], {} if format.get('format') == 'DASH': fmts, subs = self._extract_mpd_formats_and_subtitles( format['mediaLocator'], asset_id, fatal=False) elif format.get('format') == 'SMOOTHSTREAMING': fmts, subs = self._extract_ism_formats_and_subtitles( format['mediaLocator'], asset_id, fatal=False) elif format.get('format') == 'HLS': fmts, subs = self._extract_m3u8_formats_and_subtitles( format['mediaLocator'], asset_id, fatal=False) if format.get('drm'): for f in fmts: f['has_drm'] = True formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) return formats, subtitles class ParliamentLiveUKIE(RedBeeBaseIE): IE_NAME = 'parliamentlive.tv' IE_DESC = 'UK parliament videos' _VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' _REDBEE_CUSTOMER = 'UKParliament' _REDBEE_BUSINESS_UNIT = 'ParliamentLive' _TESTS = [{ 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b', 'info_dict': { 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b', 'ext': 'mp4', 'title': 'Home Affairs Committee', 'timestamp': 1395153872, 'upload_date': '20140318', 'thumbnail': r're:https?://[^?#]+c1e9d44d-fd6c-4263-b50f-97ed26cc998b[^/]*/thumbnail', }, }, { 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4', 'only_matching': True, }, { 'url': 'https://parliamentlive.tv/Event/Index/27cf25e4-e77b-42a3-93c5-c815cd6d7377', 'info_dict': { 'id': '27cf25e4-e77b-42a3-93c5-c815cd6d7377', 'ext': 'mp4', 'title': 'House of Commons', 'timestamp': 1658392447, 'upload_date': '20220721', 'thumbnail': r're:https?://[^?#]+27cf25e4-e77b-42a3-93c5-c815cd6d7377[^/]*/thumbnail', }, }] def _real_extract(self, url): video_id = self._match_id(url) formats, subtitles = self._get_formats_and_subtitles(video_id) self._sort_formats(formats) video_info = self._download_json( f'https://www.parliamentlive.tv/Event/GetShareVideo/{video_id}', video_id, fatal=False) self._sort_formats(formats, ['res', 'proto']) return { 'id': video_id, 'formats': formats, 'subtitles': subtitles, 'title': traverse_obj(video_info, ('event', 'title')), 'thumbnail': traverse_obj(video_info, 'thumbnailUrl'), 'timestamp': traverse_obj( video_info, ('event', 'publishedStartTime'), expected_type=unified_timestamp), } class RTBFIE(RedBeeBaseIE): _VALID_URL = r'''(?x) https?://(?:www\.)?rtbf\.be/ (?: video/[^?]+\?.*\bid=| ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=| auvio/[^/]+\?.*\b(?P<live>l)?id= )(?P<id>\d+)''' _NETRC_MACHINE = 'rtbf' _REDBEE_CUSTOMER = 'RTBF' _REDBEE_BUSINESS_UNIT = 'Auvio' _TESTS = [{ 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274', 'md5': '8c876a1cceeb6cf31b476461ade72384', 'info_dict': { 'id': '1921274', 'ext': 'mp4', 'title': 'Les Diables au coeur (épisode 2)', 'description': '(du 25/04/2014)', 'duration': 3099.54, 'upload_date': '20140425', 'timestamp': 1398456300, }, 'skip': 'No longer available', }, { # geo restricted 'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442', 'only_matching': True, }, { 'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858', 'only_matching': True, }, { 'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996', 'only_matching': True, }, { # Live 'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775', 'only_matching': True, }, { # Audio 'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811', 'only_matching': True, }, { # With Subtitle 'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588', 'only_matching': True, }, { 'url': 'https://www.rtbf.be/auvio/detail_investigation?id=2921926', 'md5': 'd5d11bb62169fef38d7ce7ac531e034f', 'info_dict': { 'id': '2921926', 'ext': 'mp4', 'title': 'Le handicap un confinement perpétuel - Maladie de Lyme', 'description': 'md5:dcbd5dcf6015488c9069b057c15ccc52', 'duration': 5258.8, 'upload_date': '20220727', 'timestamp': 1658934000, 'series': '#Investigation', 'thumbnail': r're:^https?://[^?&]+\.jpg$', }, }, { 'url': 'https://www.rtbf.be/auvio/detail_la-belgique-criminelle?id=2920492', 'md5': '054f9f143bc79c89647c35e5a7d35fa8', 'info_dict': { 'id': '2920492', 'ext': 'mp4', 'title': '04 - Le crime de la rue Royale', 'description': 'md5:0c3da1efab286df83f2ab3f8f96bd7a6', 'duration': 1574.6, 'upload_date': '20220723', 'timestamp': 1658596887, 'series': 'La Belgique criminelle - TV', 'thumbnail': r're:^https?://[^?&]+\.jpg$', }, }] _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be' _PROVIDERS = { 'YOUTUBE': 'Youtube', 'DAILYMOTION': 'Dailymotion', 'VIMEO': 'Vimeo', } _QUALITIES = [ ('mobile', 'SD'), ('web', 'MD'), ('high', 'HD'), ] _LOGIN_URL = 'https://login.rtbf.be/accounts.login' _GIGYA_API_KEY = '3_kWKuPgcdAybqnqxq_MvHVk0-6PN8Zk8pIIkJM_yXOu-qLPDDsGOtIDFfpGivtbeO' _LOGIN_COOKIE_ID = f'glt_{_GIGYA_API_KEY}' def _perform_login(self, username, password): if self._get_cookies(self._LOGIN_URL).get(self._LOGIN_COOKIE_ID): return self._set_cookie('.rtbf.be', 'gmid', 'gmid.ver4', secure=True, expire_time=time.time() + 3600) login_response = self._download_json( self._LOGIN_URL, None, data=urllib.parse.urlencode({ 'loginID': username, 'password': password, 'APIKey': self._GIGYA_API_KEY, 'targetEnv': 'jssdk', 'sessionExpiration': '-2', }).encode('utf-8'), headers={ 'Content-Type': 'application/x-www-form-urlencoded', }) if login_response['statusCode'] != 200: raise ExtractorError('Login failed. Server message: %s' % login_response['errorMessage'], expected=True) self._set_cookie('.rtbf.be', self._LOGIN_COOKIE_ID, login_response['sessionInfo']['login_token'], secure=True, expire_time=time.time() + 3600) def _get_formats_and_subtitles(self, url, media_id): login_token = self._get_cookies(url).get(self._LOGIN_COOKIE_ID) if not login_token: self.raise_login_required() session_jwt = self._download_json( 'https://login.rtbf.be/accounts.getJWT', media_id, query={ 'login_token': login_token.value, 'APIKey': self._GIGYA_API_KEY, 'sdk': 'js_latest', 'authMode': 'cookie', 'pageURL': url, 'sdkBuild': '13273', 'format': 'json', })['id_token'] return super()._get_formats_and_subtitles(media_id, jwt=session_jwt) def _real_extract(self, url): live, media_id = self._match_valid_url(url).groups() embed_page = self._download_webpage( 'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'), media_id, query={'id': media_id}) media_data = self._html_search_regex(r'data-media="([^"]+)"', embed_page, 'media data', fatal=False) if not media_data: if re.search(r'<div[^>]+id="js-error-expired"[^>]+class="(?![^"]*hidden)', embed_page): raise ExtractorError('Livestream has ended.', expected=True) if re.search(r'<div[^>]+id="js-sso-connect"[^>]+class="(?![^"]*hidden)', embed_page): self.raise_login_required() raise ExtractorError('Could not find media data') data = self._parse_json(media_data, media_id) error = data.get('error') if error: raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) provider = data.get('provider') if provider in self._PROVIDERS: return self.url_result(data['url'], self._PROVIDERS[provider]) title = traverse_obj(data, 'subtitle', 'title') is_live = data.get('isLive') height_re = r'-(\d+)p\.' formats, subtitles = [], {} # The old api still returns m3u8 and mpd manifest for livestreams, but these are 'fake' # since all they contain is a 20s video that is completely unrelated. # https://github.com/yt-dlp/yt-dlp/issues/4656#issuecomment-1214461092 m3u8_url = None if data.get('isLive') else traverse_obj(data, 'urlHlsAes128', 'urlHls') if m3u8_url: fmts, subs = self._extract_m3u8_formats_and_subtitles( m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x http_url = data.get('url') if formats and http_url and re.search(height_re, http_url): http_url = fix_url(http_url) for m3u8_f in formats[:]: height = m3u8_f.get('height') if not height: continue f = m3u8_f.copy() del f['protocol'] f.update({ 'format_id': m3u8_f['format_id'].replace('hls-', 'http-'), 'url': re.sub(height_re, '-%dp.' % height, http_url), }) formats.append(f) else: sources = data.get('sources') or {} for key, format_id in self._QUALITIES: format_url = sources.get(key) if not format_url: continue height = int_or_none(self._search_regex( height_re, format_url, 'height', default=None)) formats.append({ 'format_id': format_id, 'url': fix_url(format_url), 'height': height, }) mpd_url = None if data.get('isLive') else data.get('urlDash') if mpd_url and (self.get_param('allow_unplayable_formats') or not data.get('drm')): fmts, subs = self._extract_mpd_formats_and_subtitles( mpd_url, media_id, mpd_id='dash', fatal=False) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) audio_url = data.get('urlAudio') if audio_url: formats.append({ 'format_id': 'audio', 'url': audio_url, 'vcodec': 'none', }) for track in (data.get('tracks') or {}).values(): sub_url = track.get('url') if not sub_url: continue subtitles.setdefault(track.get('lang') or 'fr', []).append({ 'url': sub_url, }) if not formats: fmts, subs = self._get_formats_and_subtitles(url, f'live_{media_id}' if is_live else media_id) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) self._sort_formats(formats, ['res', 'proto']) return { 'id': media_id, 'formats': formats, 'title': title, 'description': strip_or_none(data.get('description')), 'thumbnail': data.get('thumbnail'), 'duration': float_or_none(data.get('realDuration')), 'timestamp': int_or_none(data.get('liveFrom')), 'series': data.get('programLabel'), 'subtitles': subtitles, 'is_live': is_live, } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/redbulltv.py�����������������������������������������������������0000664�0000000�0000000�00000022050�14277552437�0021232�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( float_or_none, ExtractorError, ) class RedBullTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)(?:/events/[^/]+)?/(?:videos?|live|(?:film|episode)s)/(?P<id>AP-\w+)' _TESTS = [{ # film 'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11', 'md5': 'fb0445b98aa4394e504b413d98031d1f', 'info_dict': { 'id': 'AP-1Q6XCDTAN1W11', 'ext': 'mp4', 'title': 'ABC of... WRC - ABC of... S1E6', 'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31', 'duration': 1582.04, }, }, { # episode 'url': 'https://www.redbull.tv/video/AP-1PMHKJFCW1W11', 'info_dict': { 'id': 'AP-1PMHKJFCW1W11', 'ext': 'mp4', 'title': 'Grime - Hashtags S2E4', 'description': 'md5:5546aa612958c08a98faaad4abce484d', 'duration': 904, }, 'params': { 'skip_download': True, }, }, { 'url': 'https://www.redbull.com/int-en/tv/video/AP-1UWHCAR9S1W11/rob-meets-sam-gaze?playlist=playlists::3f81040a-2f31-4832-8e2e-545b1d39d173', 'only_matching': True, }, { 'url': 'https://www.redbull.com/us-en/videos/AP-1YM9QCYE52111', 'only_matching': True, }, { 'url': 'https://www.redbull.com/us-en/events/AP-1XV2K61Q51W11/live/AP-1XUJ86FDH1W11', 'only_matching': True, }, { 'url': 'https://www.redbull.com/int-en/films/AP-1ZSMAW8FH2111', 'only_matching': True, }, { 'url': 'https://www.redbull.com/int-en/episodes/AP-1TQWK7XE11W11', 'only_matching': True, }] def extract_info(self, video_id): session = self._download_json( 'https://api.redbull.tv/v3/session', video_id, note='Downloading access token', query={ 'category': 'personal_computer', 'os_family': 'http', }) if session.get('code') == 'error': raise ExtractorError('%s said: %s' % ( self.IE_NAME, session['message'])) token = session['token'] try: video = self._download_json( 'https://api.redbull.tv/v3/products/' + video_id, video_id, note='Downloading video information', headers={'Authorization': token} ) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: error_message = self._parse_json( e.cause.read().decode(), video_id)['error'] raise ExtractorError('%s said: %s' % ( self.IE_NAME, error_message), expected=True) raise title = video['title'].strip() formats, subtitles = self._extract_m3u8_formats_and_subtitles( 'https://dms.redbull.tv/v3/%s/%s/playlist.m3u8' % (video_id, token), video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') self._sort_formats(formats) for resource in video.get('resources', []): if resource.startswith('closed_caption_'): splitted_resource = resource.split('_') if splitted_resource[2]: subtitles.setdefault('en', []).append({ 'url': 'https://resources.redbull.tv/%s/%s' % (video_id, resource), 'ext': splitted_resource[2], }) subheading = video.get('subheading') if subheading: title += ' - %s' % subheading return { 'id': video_id, 'title': title, 'description': video.get('long_description') or video.get( 'short_description'), 'duration': float_or_none(video.get('duration'), scale=1000), 'formats': formats, 'subtitles': subtitles, } def _real_extract(self, url): video_id = self._match_id(url) return self.extract_info(video_id) class RedBullEmbedIE(RedBullTVIE): _VALID_URL = r'https?://(?:www\.)?redbull\.com/embed/(?P<id>rrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}:[a-z]{2}-[A-Z]{2,3})' _TESTS = [{ # HLS manifest accessible only using assetId 'url': 'https://www.redbull.com/embed/rrn:content:episode-videos:f3021f4f-3ed4-51ac-915a-11987126e405:en-INT', 'only_matching': True, }] _VIDEO_ESSENSE_TMPL = '''... on %s { videoEssence { attributes } }''' def _real_extract(self, url): rrn_id = self._match_id(url) asset_id = self._download_json( 'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql', rrn_id, headers={ 'Accept': 'application/json', 'API-KEY': 'e90a1ff11335423998b100c929ecc866', }, query={ 'query': '''{ resource(id: "%s", enforceGeoBlocking: false) { %s %s } }''' % (rrn_id, self._VIDEO_ESSENSE_TMPL % 'LiveVideo', self._VIDEO_ESSENSE_TMPL % 'VideoResource'), })['data']['resource']['videoEssence']['attributes']['assetId'] return self.extract_info(asset_id) class RedBullTVRrnContentIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P<region>[a-z]{2,3})-(?P<lang>[a-z]{2})/tv/(?:video|live|film)/(?P<id>rrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' _TESTS = [{ 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:live-videos:e3e6feb4-e95f-50b7-962a-c70f8fd13c73/mens-dh-finals-fort-william', 'only_matching': True, }, { 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:videos:a36a0f36-ff1b-5db8-a69d-ee11a14bf48b/tn-ts-style?playlist=rrn:content:event-profiles:83f05926-5de8-5389-b5e4-9bb312d715e8:extras', 'only_matching': True, }, { 'url': 'https://www.redbull.com/int-en/tv/film/rrn:content:films:d1f4d00e-4c04-5d19-b510-a805ffa2ab83/follow-me', 'only_matching': True, }] def _real_extract(self, url): region, lang, rrn_id = self._match_valid_url(url).groups() rrn_id += ':%s-%s' % (lang, region.upper()) return self.url_result( 'https://www.redbull.com/embed/' + rrn_id, RedBullEmbedIE.ie_key(), rrn_id) class RedBullIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P<region>[a-z]{2,3})-(?P<lang>[a-z]{2})/(?P<type>(?:episode|film|(?:(?:recap|trailer)-)?video)s|live)/(?!AP-|rrn:content:)(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://www.redbull.com/int-en/episodes/grime-hashtags-s02-e04', 'md5': 'db8271a7200d40053a1809ed0dd574ff', 'info_dict': { 'id': 'AA-1MT8DQWA91W14', 'ext': 'mp4', 'title': 'Grime - Hashtags S2E4', 'description': 'md5:5546aa612958c08a98faaad4abce484d', }, }, { 'url': 'https://www.redbull.com/int-en/films/kilimanjaro-mountain-of-greatness', 'only_matching': True, }, { 'url': 'https://www.redbull.com/int-en/recap-videos/uci-mountain-bike-world-cup-2017-mens-xco-finals-from-vallnord', 'only_matching': True, }, { 'url': 'https://www.redbull.com/int-en/trailer-videos/kings-of-content', 'only_matching': True, }, { 'url': 'https://www.redbull.com/int-en/videos/tnts-style-red-bull-dance-your-style-s1-e12', 'only_matching': True, }, { 'url': 'https://www.redbull.com/int-en/live/mens-dh-finals-fort-william', 'only_matching': True, }, { # only available on the int-en website so a fallback is need for the API # https://www.redbull.com/v3/api/graphql/v1/v3/query/en-GB>en-INT?filter[uriSlug]=fia-wrc-saturday-recap-estonia&rb3Schema=v1:hero 'url': 'https://www.redbull.com/gb-en/live/fia-wrc-saturday-recap-estonia', 'only_matching': True, }] _INT_FALLBACK_LIST = ['de', 'en', 'es', 'fr'] _LAT_FALLBACK_MAP = ['ar', 'bo', 'car', 'cl', 'co', 'mx', 'pe'] def _real_extract(self, url): region, lang, filter_type, display_id = self._match_valid_url(url).groups() if filter_type == 'episodes': filter_type = 'episode-videos' elif filter_type == 'live': filter_type = 'live-videos' regions = [region.upper()] if region != 'int': if region in self._LAT_FALLBACK_MAP: regions.append('LAT') if lang in self._INT_FALLBACK_LIST: regions.append('INT') locale = '>'.join(['%s-%s' % (lang, reg) for reg in regions]) rrn_id = self._download_json( 'https://www.redbull.com/v3/api/graphql/v1/v3/query/' + locale, display_id, query={ 'filter[type]': filter_type, 'filter[uriSlug]': display_id, 'rb3Schema': 'v1:hero', })['data']['id'] return self.url_result( 'https://www.redbull.com/embed/' + rrn_id, RedBullEmbedIE.ie_key(), rrn_id) ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/reddit.py��������������������������������������������������������0000664�0000000�0000000�00000014760�14277552437�0020513�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import random from urllib.parse import urlparse from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, float_or_none, try_get, unescapeHTML, url_or_none, traverse_obj ) class RedditIE(InfoExtractor): _VALID_URL = r'https?://(?P<subdomain>[^/]+\.)?reddit(?:media)?\.com/r/(?P<slug>[^/]+/comments/(?P<id>[^/?#&]+))' _TESTS = [{ 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', 'info_dict': { 'id': 'zv89llsvexdz', 'ext': 'mp4', 'display_id': '6rrwyj', 'title': 'That small heart attack.', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'thumbnails': 'count:4', 'timestamp': 1501941939, 'upload_date': '20170805', 'uploader': 'Antw87', 'duration': 12, 'like_count': int, 'dislike_count': int, 'comment_count': int, 'age_limit': 0, }, 'params': { 'skip_download': True, }, }, { 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj', 'only_matching': True, }, { # imgur 'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/', 'only_matching': True, }, { # imgur @ old reddit 'url': 'https://old.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/', 'only_matching': True, }, { # streamable 'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/', 'only_matching': True, }, { # youtube 'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/', 'only_matching': True, }, { # reddit video @ nm reddit 'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/', 'only_matching': True, }, { 'url': 'https://www.redditmedia.com/r/serbia/comments/pu9wbx/ako_vu%C4%8Di%C4%87_izgubi_izbore_ja_%C4%87u_da_crknem/', 'only_matching': True, }] @staticmethod def _gen_session_id(): id_length = 16 rand_max = 1 << (id_length * 4) return '%0.*x' % (id_length, random.randrange(rand_max)) def _real_extract(self, url): subdomain, slug, video_id = self._match_valid_url(url).group('subdomain', 'slug', 'id') self._set_cookie('.reddit.com', 'reddit_session', self._gen_session_id()) self._set_cookie('.reddit.com', '_options', '%7B%22pref_quarantine_optin%22%3A%20true%7D') data = self._download_json(f'https://{subdomain}reddit.com/r/{slug}/.json', video_id, fatal=False) if not data: # Fall back to old.reddit.com in case the requested subdomain fails data = self._download_json(f'https://old.reddit.com/r/{slug}/.json', video_id) data = data[0]['data']['children'][0]['data'] video_url = data['url'] # Avoid recursing into the same reddit URL if 'reddit.com/' in video_url and '/%s/' % video_id in video_url: raise ExtractorError('No media found', expected=True) over_18 = data.get('over_18') if over_18 is True: age_limit = 18 elif over_18 is False: age_limit = 0 else: age_limit = None thumbnails = [] def add_thumbnail(src): if not isinstance(src, dict): return thumbnail_url = url_or_none(src.get('url')) if not thumbnail_url: return thumbnails.append({ 'url': unescapeHTML(thumbnail_url), 'width': int_or_none(src.get('width')), 'height': int_or_none(src.get('height')), }) for image in try_get(data, lambda x: x['preview']['images']) or []: if not isinstance(image, dict): continue add_thumbnail(image.get('source')) resolutions = image.get('resolutions') if isinstance(resolutions, list): for resolution in resolutions: add_thumbnail(resolution) info = { 'title': data.get('title'), 'thumbnails': thumbnails, 'timestamp': float_or_none(data.get('created_utc')), 'uploader': data.get('author'), 'like_count': int_or_none(data.get('ups')), 'dislike_count': int_or_none(data.get('downs')), 'comment_count': int_or_none(data.get('num_comments')), 'age_limit': age_limit, } # Check if media is hosted on reddit: reddit_video = traverse_obj(data, (('media', 'secure_media'), 'reddit_video'), get_all=False) if reddit_video: playlist_urls = [ try_get(reddit_video, lambda x: unescapeHTML(x[y])) for y in ('dash_url', 'hls_url') ] # Update video_id display_id = video_id video_id = self._search_regex( r'https?://v\.redd\.it/(?P<id>[^/?#&]+)', reddit_video['fallback_url'], 'video_id', default=display_id) dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd' hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8' formats = self._extract_m3u8_formats( hls_playlist_url, display_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) formats.extend(self._extract_mpd_formats( dash_playlist_url, display_id, mpd_id='dash', fatal=False)) self._sort_formats(formats) return { **info, 'id': video_id, 'display_id': display_id, 'formats': formats, 'duration': int_or_none(reddit_video.get('duration')), } parsed_url = urlparse(video_url) if parsed_url.netloc == 'v.redd.it': self.raise_no_formats('This video is processing', expected=True, video_id=video_id) return { **info, 'id': parsed_url.path.split('/')[1], 'display_id': video_id, } # Not hosted on reddit, must continue extraction return { **info, 'display_id': video_id, '_type': 'url_transparent', 'url': video_url, } ����������������yt-dlp-2022.08.19/yt_dlp/extractor/redgifs.py�������������������������������������������������������0000664�0000000�0000000�00000020031�14277552437�0020647�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import functools from .common import InfoExtractor from ..compat import compat_parse_qs from ..utils import ( ExtractorError, int_or_none, qualities, try_get, OnDemandPagedList, ) class RedGifsBaseInfoExtractor(InfoExtractor): _FORMATS = { 'gif': 250, 'sd': 480, 'hd': None, } def _parse_gif_data(self, gif_data): video_id = gif_data.get('id') quality = qualities(tuple(self._FORMATS.keys())) orig_height = int_or_none(gif_data.get('height')) aspect_ratio = try_get(gif_data, lambda x: orig_height / x['width']) formats = [] for format_id, height in self._FORMATS.items(): video_url = gif_data['urls'].get(format_id) if not video_url: continue height = min(orig_height, height or orig_height) formats.append({ 'url': video_url, 'format_id': format_id, 'width': height * aspect_ratio if aspect_ratio else None, 'height': height, 'quality': quality(format_id), }) self._sort_formats(formats) return { 'id': video_id, 'webpage_url': f'https://redgifs.com/watch/{video_id}', 'ie_key': RedGifsIE.ie_key(), 'extractor': 'RedGifs', 'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs', 'timestamp': int_or_none(gif_data.get('createDate')), 'uploader': gif_data.get('userName'), 'duration': int_or_none(gif_data.get('duration')), 'view_count': int_or_none(gif_data.get('views')), 'like_count': int_or_none(gif_data.get('likes')), 'categories': gif_data.get('tags') or [], 'tags': gif_data.get('tags'), 'age_limit': 18, 'formats': formats, } def _call_api(self, ep, video_id, *args, **kwargs): data = self._download_json( f'https://api.redgifs.com/v2/{ep}', video_id, *args, **kwargs) if 'error' in data: raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id) return data def _fetch_page(self, ep, video_id, query, page): query['page'] = page + 1 data = self._call_api( ep, video_id, query=query, note=f'Downloading JSON metadata page {page + 1}') for entry in data['gifs']: yield self._parse_gif_data(entry) def _prepare_api_query(self, query, fields): api_query = [ (field_name, query.get(field_name, (default,))[0]) for field_name, default in fields.items()] return {key: val for key, val in api_query if val is not None} def _paged_entries(self, ep, item_id, query, fields): page = int_or_none(query.get('page', (None,))[0]) page_fetcher = functools.partial( self._fetch_page, ep, item_id, self._prepare_api_query(query, fields)) return page_fetcher(page) if page else OnDemandPagedList(page_fetcher, self._PAGE_SIZE) class RedGifsIE(RedGifsBaseInfoExtractor): _VALID_URL = r'https?://(?:(?:www\.)?redgifs\.com/watch/|thumbs2\.redgifs\.com/)(?P<id>[^-/?#\.]+)' _TESTS = [{ 'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent', 'info_dict': { 'id': 'squeakyhelplesswisent', 'ext': 'mp4', 'title': 'Hotwife Legs Thick', 'timestamp': 1636287915, 'upload_date': '20211107', 'uploader': 'ignored52', 'duration': 16, 'view_count': int, 'like_count': int, 'categories': list, 'age_limit': 18, } }, { 'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0', 'info_dict': { 'id': 'squeakyhelplesswisent', 'ext': 'mp4', 'title': 'Hotwife Legs Thick', 'timestamp': 1636287915, 'upload_date': '20211107', 'uploader': 'ignored52', 'duration': 16, 'view_count': int, 'like_count': int, 'categories': list, 'age_limit': 18, } }] def _real_extract(self, url): video_id = self._match_id(url).lower() video_info = self._call_api( f'gifs/{video_id}', video_id, note='Downloading video info') return self._parse_gif_data(video_info['gif']) class RedGifsSearchIE(RedGifsBaseInfoExtractor): IE_DESC = 'Redgifs search' _VALID_URL = r'https?://(?:www\.)?redgifs\.com/browse\?(?P<query>[^#]+)' _PAGE_SIZE = 80 _TESTS = [ { 'url': 'https://www.redgifs.com/browse?tags=Lesbian', 'info_dict': { 'id': 'tags=Lesbian', 'title': 'Lesbian', 'description': 'RedGifs search for Lesbian, ordered by trending' }, 'playlist_mincount': 100, }, { 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian', 'info_dict': { 'id': 'type=g&order=latest&tags=Lesbian', 'title': 'Lesbian', 'description': 'RedGifs search for Lesbian, ordered by latest' }, 'playlist_mincount': 100, }, { 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian&page=2', 'info_dict': { 'id': 'type=g&order=latest&tags=Lesbian&page=2', 'title': 'Lesbian', 'description': 'RedGifs search for Lesbian, ordered by latest' }, 'playlist_count': 80, } ] def _real_extract(self, url): query_str = self._match_valid_url(url).group('query') query = compat_parse_qs(query_str) if not query.get('tags'): raise ExtractorError('Invalid query tags', expected=True) tags = query.get('tags')[0] order = query.get('order', ('trending',))[0] query['search_text'] = [tags] entries = self._paged_entries('gifs/search', query_str, query, { 'search_text': None, 'order': 'trending', 'type': None, }) return self.playlist_result( entries, query_str, tags, f'RedGifs search for {tags}, ordered by {order}') class RedGifsUserIE(RedGifsBaseInfoExtractor): IE_DESC = 'Redgifs user' _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?' _PAGE_SIZE = 30 _TESTS = [ { 'url': 'https://www.redgifs.com/users/lamsinka89', 'info_dict': { 'id': 'lamsinka89', 'title': 'lamsinka89', 'description': 'RedGifs user lamsinka89, ordered by recent' }, 'playlist_mincount': 100, }, { 'url': 'https://www.redgifs.com/users/lamsinka89?page=3', 'info_dict': { 'id': 'lamsinka89?page=3', 'title': 'lamsinka89', 'description': 'RedGifs user lamsinka89, ordered by recent' }, 'playlist_count': 30, }, { 'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g', 'info_dict': { 'id': 'lamsinka89?order=best&type=g', 'title': 'lamsinka89', 'description': 'RedGifs user lamsinka89, ordered by best' }, 'playlist_mincount': 100, } ] def _real_extract(self, url): username, query_str = self._match_valid_url(url).group('username', 'query') playlist_id = f'{username}?{query_str}' if query_str else username query = compat_parse_qs(query_str) order = query.get('order', ('recent',))[0] entries = self._paged_entries(f'users/{username}/search', playlist_id, query, { 'order': 'recent', 'type': None, }) return self.playlist_result( entries, playlist_id, username, f'RedGifs user {username}, ordered by {order}') �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/redtube.py�������������������������������������������������������0000664�0000000�0000000�00000013741�14277552437�0020670�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( determine_ext, ExtractorError, int_or_none, merge_dicts, str_to_int, unified_strdate, url_or_none, ) class RedTubeIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)' _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)'] _TESTS = [{ 'url': 'https://www.redtube.com/38864951', 'md5': '4fba70cbca3aefd25767ab4b523c9878', 'info_dict': { 'id': '38864951', 'ext': 'mp4', 'title': 'Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu', 'description': 'Watch video Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu on Redtube, home of free Blowjob porn videos and Blonde sex movies online. Video length: (10:46) - Uploaded by leolulu - Verified User - Starring Pornstar: Leolulu', 'upload_date': '20210111', 'timestamp': 1610343109, 'duration': 646, 'view_count': int, 'age_limit': 18, 'thumbnail': r're:https://\wi-ph\.rdtcdn\.com/videos/.+/.+\.jpg', }, }, { 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286', 'only_matching': True, }, { 'url': 'http://it.redtube.com/66418', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( 'http://www.redtube.com/%s' % video_id, video_id) ERRORS = ( (('video-deleted-info', '>This video has been removed'), 'has been removed'), (('private_video_text', '>This video is private', '>Send a friend request to its owner to be able to view it'), 'is private'), ) for patterns, message in ERRORS: if any(p in webpage for p in patterns): raise ExtractorError( 'Video %s %s' % (video_id, message), expected=True) info = self._search_json_ld(webpage, video_id, default={}) if not info.get('title'): info['title'] = self._html_search_regex( (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle|video_title)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>', r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',), webpage, 'title', group='title', default=None) or self._og_search_title(webpage) formats = [] sources = self._parse_json( self._search_regex( r'sources\s*:\s*({.+?})', webpage, 'source', default='{}'), video_id, fatal=False) if sources and isinstance(sources, dict): for format_id, format_url in sources.items(): if format_url: formats.append({ 'url': format_url, 'format_id': format_id, 'height': int_or_none(format_id), }) medias = self._parse_json( self._search_regex( r'mediaDefinition["\']?\s*:\s*(\[.+?}\s*\])', webpage, 'media definitions', default='{}'), video_id, fatal=False) for media in medias if isinstance(medias, list) else []: format_url = url_or_none(media.get('videoUrl')) if not format_url: continue format_id = media.get('format') quality = media.get('quality') if format_id == 'hls' or (format_id == 'mp4' and not quality): more_media = self._download_json(format_url, video_id, fatal=False) else: more_media = [media] for media in more_media if isinstance(more_media, list) else []: format_url = url_or_none(media.get('videoUrl')) if not format_url: continue format_id = media.get('format') if format_id == 'hls' or determine_ext(format_url) == 'm3u8': formats.extend(self._extract_m3u8_formats( format_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=format_id or 'hls', fatal=False)) continue format_id = media.get('quality') formats.append({ 'url': format_url, 'ext': 'mp4', 'format_id': format_id, 'height': int_or_none(format_id), }) if not formats: video_url = self._html_search_regex( r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL') formats.append({'url': video_url, 'ext': 'mp4'}) self._sort_formats(formats) thumbnail = self._og_search_thumbnail(webpage) upload_date = unified_strdate(self._search_regex( r'<span[^>]+>(?:ADDED|Published on) ([^<]+)<', webpage, 'upload date', default=None)) duration = int_or_none(self._og_search_property( 'video:duration', webpage, default=None) or self._search_regex( r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None)) view_count = str_to_int(self._search_regex( (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)', r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)', r'<span[^>]+\bclass=["\']video_view_count[^>]*>\s*([\d,.]+)'), webpage, 'view count', default=None)) # No self-labeling, but they describe themselves as # "Home of Videos Porno" age_limit = 18 return merge_dicts(info, { 'id': video_id, 'ext': 'mp4', 'thumbnail': thumbnail, 'upload_date': upload_date, 'duration': duration, 'view_count': view_count, 'age_limit': age_limit, 'formats': formats, }) �������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/regiotv.py�������������������������������������������������������0000664�0000000�0000000�00000004234�14277552437�0020712�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( sanitized_Request, xpath_text, xpath_with_ns, ) class RegioTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?regio-tv\.de/video/(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://www.regio-tv.de/video/395808.html', 'info_dict': { 'id': '395808', 'ext': 'mp4', 'title': 'Wir in Ludwigsburg', 'description': 'Mit unseren zuckersüßen Adventskindern, außerdem besuchen wir die Abendsterne!', } }, { 'url': 'http://www.regio-tv.de/video/395808', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) key = self._search_regex( r'key\s*:\s*(["\'])(?P<key>.+?)\1', webpage, 'key', group='key') title = self._og_search_title(webpage) SOAP_TEMPLATE = '<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><{0} xmlns="http://v.telvi.de/"><key xsi:type="xsd:string">{1}</key></{0}></soap:Body></soap:Envelope>' request = sanitized_Request( 'http://v.telvi.de/', SOAP_TEMPLATE.format('GetHTML5VideoData', key).encode('utf-8')) video_data = self._download_xml(request, video_id, 'Downloading video XML') NS_MAP = { 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', 'soap': 'http://schemas.xmlsoap.org/soap/envelope/', } video_url = xpath_text( video_data, xpath_with_ns('.//video', NS_MAP), 'video url', fatal=True) thumbnail = xpath_text( video_data, xpath_with_ns('.//image', NS_MAP), 'thumbnail') description = self._og_search_description( webpage) or self._html_search_meta('description', webpage) return { 'id': video_id, 'url': video_url, 'title': title, 'description': description, 'thumbnail': thumbnail, } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rentv.py���������������������������������������������������������0000664�0000000�0000000�00000010003�14277552437�0020360�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..compat import compat_str from ..utils import ( determine_ext, int_or_none, url_or_none, ) class RENTVIE(InfoExtractor): _VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)' _TESTS = [{ 'url': 'http://ren.tv/video/epizod/118577', 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb', 'info_dict': { 'id': '118577', 'ext': 'mp4', 'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"', 'timestamp': 1472230800, 'upload_date': '20160826', } }, { 'url': 'http://ren.tv/player/118577', 'only_matching': True, }, { 'url': 'rentv:118577', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage('http://ren.tv/player/' + video_id, video_id) config = self._parse_json(self._search_regex( r'config\s*=\s*({.+})\s*;', webpage, 'config'), video_id) title = config['title'] formats = [] for video in config['src']: src = url_or_none(video.get('src')) if not src: continue ext = determine_ext(src) if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( src, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) else: formats.append({ 'url': src, }) self._sort_formats(formats) return { 'id': video_id, 'title': title, 'description': config.get('description'), 'thumbnail': config.get('image'), 'duration': int_or_none(config.get('duration')), 'timestamp': int_or_none(config.get('date')), 'formats': formats, } class RENTVArticleIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ren\.tv/novosti/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'http://ren.tv/novosti/2016-10-26/video-mikroavtobus-popavshiy-v-dtp-s-gruzovikami-v-podmoskove-prevratilsya-v', 'md5': 'ebd63c4680b167693745ab91343df1d6', 'info_dict': { 'id': '136472', 'ext': 'mp4', 'title': 'Видео: микроавтобус, попавший в ДТП с грузовиками в Подмосковье, превратился в груду металла', 'description': 'Жертвами столкновения двух фур и микроавтобуса, по последним данным, стали семь человек.', } }, { # TODO: invalid m3u8 'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video', 'info_dict': { 'id': 'playlist', 'ext': 'mp4', 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ', 'uploader': 'ren.tv', }, 'params': { # m3u8 downloads 'skip_download': True, }, 'skip': True, }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) drupal_settings = self._parse_json(self._search_regex( r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'), display_id) entries = [] for config_profile in drupal_settings.get('ren_jwplayer', {}).values(): media_id = config_profile.get('mediaid') if not media_id: continue media_id = compat_str(media_id) entries.append(self.url_result('rentv:' + media_id, 'RENTV', media_id)) return self.playlist_result(entries, display_id) �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/restudy.py�������������������������������������������������������0000664�0000000�0000000�00000002416�14277552437�0020732�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor class RestudyIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:www|portal)\.)?restudy\.dk/video/[^/]+/id/(?P<id>[0-9]+)' _TESTS = [{ 'url': 'https://www.restudy.dk/video/play/id/1637', 'info_dict': { 'id': '1637', 'ext': 'flv', 'title': 'Leiden-frosteffekt', 'description': 'Denne video er et eksperiment med flydende kvælstof.', }, 'params': { # rtmp download 'skip_download': True, } }, { 'url': 'https://portal.restudy.dk/video/leiden-frosteffekt/id/1637', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._og_search_title(webpage).strip() description = self._og_search_description(webpage).strip() formats = self._extract_smil_formats( 'https://cdn.portal.restudy.dk/dynamic/themes/front/awsmedia/SmilDirectory/video_%s.xml' % video_id, video_id) self._sort_formats(formats) return { 'id': video_id, 'title': title, 'description': description, 'formats': formats, } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/reuters.py�������������������������������������������������������0000664�0000000�0000000�00000004515�14277552437�0020726�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import ( js_to_json, int_or_none, unescapeHTML, ) class ReutersIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562', 'md5': '8015113643a0b12838f160b0b81cc2ee', 'info_dict': { 'id': '368575562', 'ext': 'mp4', 'title': 'San Francisco police chief resigns', } } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( 'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id) video_data = js_to_json(self._search_regex( r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);', webpage, 'video data')) def get_json_value(key, fatal=False): return self._search_regex(r'"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal) title = unescapeHTML(get_json_value('title', fatal=True)) mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups() mas_data = self._download_json( 'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid), video_id, transform_source=js_to_json) formats = [] for f in mas_data: f_url = f.get('url') if not f_url: continue method = f.get('method') if method == 'hls': formats.extend(self._extract_m3u8_formats( f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) else: container = f.get('container') ext = '3gp' if method == 'mobile' else container formats.append({ 'format_id': ext, 'url': f_url, 'ext': ext, 'container': container if method != 'mobile' else None, }) self._sort_formats(formats) return { 'id': video_id, 'title': title, 'thumbnail': get_json_value('thumb'), 'duration': int_or_none(get_json_value('seconds')), 'formats': formats, } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/reverbnation.py��������������������������������������������������0000664�0000000�0000000�00000003062�14277552437�0021727�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( qualities, str_or_none, ) class ReverbNationIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$' _TESTS = [{ 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', 'md5': 'c0aaf339bcee189495fdf5a8c8ba8645', 'info_dict': { 'id': '16965047', 'ext': 'mp3', 'title': 'MONA LISA', 'uploader': 'ALKILADOS', 'uploader_id': '216429', 'thumbnail': r're:^https?://.*\.jpg', }, }] def _real_extract(self, url): song_id = self._match_id(url) api_res = self._download_json( 'https://api.reverbnation.com/song/%s' % song_id, song_id, note='Downloading information of song %s' % song_id ) THUMBNAILS = ('thumbnail', 'image') quality = qualities(THUMBNAILS) thumbnails = [] for thumb_key in THUMBNAILS: if api_res.get(thumb_key): thumbnails.append({ 'url': api_res[thumb_key], 'preference': quality(thumb_key) }) return { 'id': song_id, 'title': api_res['name'], 'url': api_res['url'], 'uploader': api_res.get('artist', {}).get('name'), 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), 'thumbnails': thumbnails, 'ext': 'mp3', 'vcodec': 'none', } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rice.py����������������������������������������������������������0000664�0000000�0000000�00000010647�14277552437�0020162�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..compat import compat_parse_qs from ..utils import ( xpath_text, xpath_element, int_or_none, parse_iso8601, ExtractorError, ) class RICEIE(InfoExtractor): _VALID_URL = r'https?://mediahub\.rice\.edu/app/[Pp]ortal/video\.aspx\?(?P<query>.+)' _TEST = { 'url': 'https://mediahub.rice.edu/app/Portal/video.aspx?PortalID=25ffd62c-3d01-4b29-8c70-7c94270efb3e&DestinationID=66bc9434-03bd-4725-b47e-c659d8d809db&ContentID=YEWIvbhb40aqdjMD1ALSqw', 'md5': '9b83b4a2eead4912dc3b7fac7c449b6a', 'info_dict': { 'id': 'YEWIvbhb40aqdjMD1ALSqw', 'ext': 'mp4', 'title': 'Active Learning in Archeology', 'upload_date': '20140616', 'timestamp': 1402926346, } } _NS = 'http://schemas.datacontract.org/2004/07/ensembleVideo.Data.Service.Contracts.Models.Player.Config' def _real_extract(self, url): qs = compat_parse_qs(self._match_valid_url(url).group('query')) if not qs.get('PortalID') or not qs.get('DestinationID') or not qs.get('ContentID'): raise ExtractorError('Invalid URL', expected=True) portal_id = qs['PortalID'][0] playlist_id = qs['DestinationID'][0] content_id = qs['ContentID'][0] content_data = self._download_xml('https://mediahub.rice.edu/api/portal/GetContentTitle', content_id, query={ 'portalId': portal_id, 'playlistId': playlist_id, 'contentId': content_id }) metadata = xpath_element(content_data, './/metaData', fatal=True) title = xpath_text(metadata, 'primaryTitle', fatal=True) encodings = xpath_element(content_data, './/encodings', fatal=True) player_data = self._download_xml('https://mediahub.rice.edu/api/player/GetPlayerConfig', content_id, query={ 'temporaryLinkId': xpath_text(encodings, 'temporaryLinkId', fatal=True), 'contentId': content_id, }) common_fmt = {} dimensions = xpath_text(encodings, 'dimensions') if dimensions: wh = dimensions.split('x') if len(wh) == 2: common_fmt.update({ 'width': int_or_none(wh[0]), 'height': int_or_none(wh[1]), }) formats = [] rtsp_path = xpath_text(player_data, self._xpath_ns('RtspPath', self._NS)) if rtsp_path: fmt = { 'url': rtsp_path, 'format_id': 'rtsp', } fmt.update(common_fmt) formats.append(fmt) for source in player_data.findall(self._xpath_ns('.//Source', self._NS)): video_url = xpath_text(source, self._xpath_ns('File', self._NS)) if not video_url: continue if '.m3u8' in video_url: formats.extend(self._extract_m3u8_formats(video_url, content_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) else: fmt = { 'url': video_url, 'format_id': video_url.split(':')[0], } fmt.update(common_fmt) rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url) if rtmp: fmt.update({ 'url': rtmp.group('url'), 'play_path': rtmp.group('playpath'), 'app': rtmp.group('app'), 'ext': 'flv', }) formats.append(fmt) self._sort_formats(formats) thumbnails = [] for content_asset in content_data.findall('.//contentAssets'): asset_type = xpath_text(content_asset, 'type') if asset_type == 'image': image_url = xpath_text(content_asset, 'httpPath') if not image_url: continue thumbnails.append({ 'id': xpath_text(content_asset, 'ID'), 'url': image_url, }) return { 'id': content_id, 'title': title, 'description': xpath_text(metadata, 'abstract'), 'duration': int_or_none(xpath_text(metadata, 'duration')), 'timestamp': parse_iso8601(xpath_text(metadata, 'dateUpdated')), 'thumbnails': thumbnails, 'formats': formats, } �����������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rmcdecouverte.py�������������������������������������������������0000664�0000000�0000000�00000005333�14277552437�0022103�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from .brightcove import BrightcoveLegacyIE from ..compat import ( compat_parse_qs, compat_urlparse, ) from ..utils import smuggle_url class RMCDecouverteIE(InfoExtractor): _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/(?:[^?#]*_(?P<id>\d+)|mediaplayer-direct)/?(?:[#?]|$)' _TESTS = [{ 'url': 'https://rmcdecouverte.bfmtv.com/vestiges-de-guerre_22240/les-bunkers-secrets-domaha-beach_25303/', 'info_dict': { 'id': '6250879771001', 'ext': 'mp4', 'title': 'LES BUNKERS SECRETS D´OMAHA BEACH', 'uploader_id': '1969646226001', 'description': 'md5:aed573ca24abde62a148e0eba909657d', 'timestamp': 1619622984, 'upload_date': '20210428', }, 'params': { 'skip_download': True, }, }, { 'url': 'https://rmcdecouverte.bfmtv.com/wheeler-dealers-occasions-a-saisir/program_2566/', 'info_dict': { 'id': '5983675500001', 'ext': 'mp4', 'title': 'CORVETTE', 'description': 'md5:c1e8295521e45ffebf635d6a7658f506', 'uploader_id': '1969646226001', 'upload_date': '20181226', 'timestamp': 1545861635, }, 'params': { 'skip_download': True, }, 'skip': 'only available for a week', }, { 'url': 'https://rmcdecouverte.bfmtv.com/avions-furtifs-la-technologie-de-lextreme_10598', 'only_matching': True, }, { # The website accepts any URL as long as it has _\d+ at the end 'url': 'https://rmcdecouverte.bfmtv.com/any/thing/can/go/here/_10598', 'only_matching': True, }, { # live, geo restricted, bypassable 'url': 'https://rmcdecouverte.bfmtv.com/mediaplayer-direct/', 'only_matching': True, }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s' def _real_extract(self, url): mobj = self._match_valid_url(url) display_id = mobj.group('id') or 'direct' webpage = self._download_webpage(url, display_id) brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) if brightcove_legacy_url: brightcove_id = compat_parse_qs(compat_urlparse.urlparse( brightcove_legacy_url).query)['@videoPlayer'][0] else: brightcove_id = self._search_regex( r'data-video-id=["\'](\d+)', webpage, 'brightcove id') return self.url_result( smuggle_url( self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['FR']}), 'BrightcoveNew', brightcove_id) �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rockstargames.py�������������������������������������������������0000664�0000000�0000000�00000004217�14277552437�0022101�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( int_or_none, parse_iso8601, ) class RockstarGamesIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos(?:/video/|#?/?\?.*\bvideo=)(?P<id>\d+)' _TESTS = [{ 'url': 'https://www.rockstargames.com/videos/video/11544/', 'md5': '03b5caa6e357a4bd50e3143fc03e5733', 'info_dict': { 'id': '11544', 'ext': 'mp4', 'title': 'Further Adventures in Finance and Felony Trailer', 'description': 'md5:6d31f55f30cb101b5476c4a379e324a3', 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1464876000, 'upload_date': '20160602', } }, { 'url': 'http://www.rockstargames.com/videos#/?video=48', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) video = self._download_json( 'https://www.rockstargames.com/videoplayer/videos/get-video.json', video_id, query={ 'id': video_id, 'locale': 'en_us', })['video'] title = video['title'] formats = [] for video in video['files_processed']['video/mp4']: if not video.get('src'): continue resolution = video.get('resolution') height = int_or_none(self._search_regex( r'^(\d+)[pP]$', resolution or '', 'height', default=None)) formats.append({ 'url': self._proto_relative_url(video['src']), 'format_id': resolution, 'height': height, }) if not formats: youtube_id = video.get('youtube_id') if youtube_id: return self.url_result(youtube_id, 'Youtube') self._sort_formats(formats) return { 'id': video_id, 'title': title, 'description': video.get('description'), 'thumbnail': self._proto_relative_url(video.get('screencap')), 'timestamp': parse_iso8601(video.get('created')), 'formats': formats, } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rokfin.py��������������������������������������������������������0000664�0000000�0000000�00000045051�14277552437�0020525�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import itertools import json import re import urllib.parse from datetime import datetime from .common import InfoExtractor, SearchInfoExtractor from ..utils import ( ExtractorError, determine_ext, float_or_none, format_field, int_or_none, str_or_none, traverse_obj, try_get, unescapeHTML, unified_timestamp, url_or_none, urlencode_postdata, ) _API_BASE_URL = 'https://prod-api-v2.production.rokfin.com/api/v2/public/' class RokfinIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?P<id>(?P<type>post|stream)/\d+)' _NETRC_MACHINE = 'rokfin' _AUTH_BASE = 'https://secure.rokfin.com/auth/realms/rokfin-web/protocol/openid-connect' _access_mgmt_tokens = {} # OAuth 2.0: RFC 6749, Sec. 1.4-5 _TESTS = [{ 'url': 'https://www.rokfin.com/post/57548/Mitt-Romneys-Crazy-Solution-To-Climate-Change', 'info_dict': { 'id': 'post/57548', 'ext': 'mp4', 'title': 'Mitt Romney\'s Crazy Solution To Climate Change', 'thumbnail': r're:https://img\.production\.rokfin\.com/.+', 'upload_date': '20211023', 'timestamp': 1634998029, 'channel': 'Jimmy Dore', 'channel_id': 65429, 'channel_url': 'https://rokfin.com/TheJimmyDoreShow', 'duration': 213.0, 'availability': 'public', 'live_status': 'not_live', 'dislike_count': int, 'like_count': int, } }, { 'url': 'https://rokfin.com/post/223/Julian-Assange-Arrested-Streaming-In-Real-Time', 'info_dict': { 'id': 'post/223', 'ext': 'mp4', 'title': 'Julian Assange Arrested: Streaming In Real Time', 'thumbnail': r're:https://img\.production\.rokfin\.com/.+', 'upload_date': '20190412', 'timestamp': 1555052644, 'channel': 'Ron Placone', 'channel_id': 10, 'channel_url': 'https://rokfin.com/RonPlacone', 'availability': 'public', 'live_status': 'not_live', 'dislike_count': int, 'like_count': int, 'tags': ['FreeThinkingMedia^', 'RealProgressives^'], } }, { 'url': 'https://www.rokfin.com/stream/10543/Its-A-Crazy-Mess-Regional-Director-Blows-Whistle-On-Pfizers-Vaccine-Trial-Data', 'info_dict': { 'id': 'stream/10543', 'ext': 'mp4', 'title': '"It\'s A Crazy Mess" Regional Director Blows Whistle On Pfizer\'s Vaccine Trial Data', 'thumbnail': r're:https://img\.production\.rokfin\.com/.+', 'description': 'md5:324ce2d3e3b62e659506409e458b9d8e', 'channel': 'Ryan Cristián', 'channel_id': 53856, 'channel_url': 'https://rokfin.com/TLAVagabond', 'availability': 'public', 'is_live': False, 'was_live': True, 'live_status': 'was_live', 'timestamp': 1635874720, 'release_timestamp': 1635874720, 'release_date': '20211102', 'upload_date': '20211102', 'dislike_count': int, 'like_count': int, 'tags': ['FreeThinkingMedia^'], } }] def _real_extract(self, url): video_id, video_type = self._match_valid_url(url).group('id', 'type') metadata = self._download_json_using_access_token(f'{_API_BASE_URL}{video_id}', video_id) scheduled = unified_timestamp(metadata.get('scheduledAt')) live_status = ('was_live' if metadata.get('stoppedAt') else 'is_upcoming' if scheduled else 'is_live' if video_type == 'stream' else 'not_live') video_url = traverse_obj(metadata, 'url', ('content', 'contentUrl'), expected_type=url_or_none) formats, subtitles = [{'url': video_url}] if video_url else [], {} if determine_ext(video_url) == 'm3u8': formats, subtitles = self._extract_m3u8_formats_and_subtitles( video_url, video_id, fatal=False, live=live_status == 'is_live') if not formats: if traverse_obj(metadata, 'premiumPlan', 'premium'): self.raise_login_required('This video is only available to premium users', True, method='cookies') elif scheduled: self.raise_no_formats( f'Stream is offline; scheduled for {datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}', video_id=video_id, expected=True) self._sort_formats(formats) uploader = traverse_obj(metadata, ('createdBy', 'username'), ('creator', 'username')) timestamp = (scheduled or float_or_none(metadata.get('postedAtMilli'), 1000) or unified_timestamp(metadata.get('creationDateTime'))) return { 'id': video_id, 'formats': formats, 'subtitles': subtitles, 'title': str_or_none(traverse_obj(metadata, 'title', ('content', 'contentTitle'))), 'duration': float_or_none(traverse_obj(metadata, ('content', 'duration'))), 'thumbnail': url_or_none(traverse_obj(metadata, 'thumbnail', ('content', 'thumbnailUrl1'))), 'description': str_or_none(traverse_obj(metadata, 'description', ('content', 'contentDescription'))), 'like_count': int_or_none(metadata.get('likeCount')), 'dislike_count': int_or_none(metadata.get('dislikeCount')), 'channel': str_or_none(traverse_obj(metadata, ('createdBy', 'name'), ('creator', 'name'))), 'channel_id': traverse_obj(metadata, ('createdBy', 'id'), ('creator', 'id')), 'channel_url': url_or_none(f'https://rokfin.com/{uploader}') if uploader else None, 'timestamp': timestamp, 'release_timestamp': timestamp if live_status != 'not_live' else None, 'tags': traverse_obj(metadata, ('tags', ..., 'title'), expected_type=str_or_none), 'live_status': live_status, 'availability': self._availability( needs_premium=bool(traverse_obj(metadata, 'premiumPlan', 'premium')), is_private=False, needs_subscription=False, needs_auth=False, is_unlisted=False), # 'comment_count': metadata.get('numComments'), # Data provided by website is wrong '__post_extractor': self.extract_comments(video_id) if video_type == 'post' else None, } def _get_comments(self, video_id): pages_total = None for page_n in itertools.count(): raw_comments = self._download_json( f'{_API_BASE_URL}comment?postId={video_id[5:]}&page={page_n}&size=50', video_id, note=f'Downloading viewer comments page {page_n + 1}{format_field(pages_total, None, " of %s")}', fatal=False) or {} for comment in raw_comments.get('content') or []: yield { 'text': str_or_none(comment.get('comment')), 'author': str_or_none(comment.get('name')), 'id': comment.get('commentId'), 'author_id': comment.get('userId'), 'parent': 'root', 'like_count': int_or_none(comment.get('numLikes')), 'dislike_count': int_or_none(comment.get('numDislikes')), 'timestamp': unified_timestamp(comment.get('postedAt')) } pages_total = int_or_none(raw_comments.get('totalPages')) or None is_last = raw_comments.get('last') if not raw_comments.get('content') or is_last or (page_n > pages_total if pages_total else is_last is not False): return def _perform_login(self, username, password): # https://openid.net/specs/openid-connect-core-1_0.html#CodeFlowAuth (Sec. 3.1) login_page = self._download_webpage( f'{self._AUTH_BASE}/auth?client_id=web&redirect_uri=https%3A%2F%2Frokfin.com%2Ffeed&response_mode=fragment&response_type=code&scope=openid', None, note='loading login page', errnote='error loading login page') authentication_point_url = unescapeHTML(self._search_regex( r'<form\s+[^>]+action\s*=\s*"(https://secure\.rokfin\.com/auth/realms/rokfin-web/login-actions/authenticate\?[^"]+)"', login_page, name='Authentication URL')) resp_body = self._download_webpage( authentication_point_url, None, note='logging in', fatal=False, expected_status=404, data=urlencode_postdata({'username': username, 'password': password, 'rememberMe': 'off', 'credentialId': ''})) if not self._authentication_active(): if re.search(r'(?i)(invalid\s+username\s+or\s+password)', resp_body or ''): raise ExtractorError('invalid username/password', expected=True) raise ExtractorError('Login failed') urlh = self._request_webpage( f'{self._AUTH_BASE}/auth', None, note='granting user authorization', errnote='user authorization rejected by Rokfin', query={ 'client_id': 'web', 'prompt': 'none', 'redirect_uri': 'https://rokfin.com/silent-check-sso.html', 'response_mode': 'fragment', 'response_type': 'code', 'scope': 'openid', }) self._access_mgmt_tokens = self._download_json( f'{self._AUTH_BASE}/token', None, note='getting access credentials', errnote='error getting access credentials', data=urlencode_postdata({ 'code': urllib.parse.parse_qs(urllib.parse.urldefrag(urlh.geturl()).fragment).get('code')[0], 'client_id': 'web', 'grant_type': 'authorization_code', 'redirect_uri': 'https://rokfin.com/silent-check-sso.html' })) def _authentication_active(self): return not ( {'KEYCLOAK_IDENTITY', 'KEYCLOAK_IDENTITY_LEGACY', 'KEYCLOAK_SESSION', 'KEYCLOAK_SESSION_LEGACY'} - set(self._get_cookies(self._AUTH_BASE))) def _get_auth_token(self): return try_get(self._access_mgmt_tokens, lambda x: ' '.join([x['token_type'], x['access_token']])) def _download_json_using_access_token(self, url_or_request, video_id, headers={}, query={}): assert 'authorization' not in headers headers = headers.copy() auth_token = self._get_auth_token() refresh_token = self._access_mgmt_tokens.get('refresh_token') if auth_token: headers['authorization'] = auth_token json_string, urlh = self._download_webpage_handle( url_or_request, video_id, headers=headers, query=query, expected_status=401) if not auth_token or urlh.code != 401 or refresh_token is None: return self._parse_json(json_string, video_id) self._access_mgmt_tokens = self._download_json( f'{self._AUTH_BASE}/token', video_id, note='User authorization expired or canceled by Rokfin. Re-authorizing ...', errnote='Failed to re-authorize', data=urlencode_postdata({ 'grant_type': 'refresh_token', 'refresh_token': refresh_token, 'client_id': 'web' })) headers['authorization'] = self._get_auth_token() if headers['authorization'] is None: raise ExtractorError('User authorization lost', expected=True) return self._download_json(url_or_request, video_id, headers=headers, query=query) class RokfinPlaylistBaseIE(InfoExtractor): _TYPES = { 'video': 'post', 'audio': 'post', 'stream': 'stream', 'dead_stream': 'stream', 'stack': 'stack', } def _get_video_data(self, metadata): for content in metadata.get('content') or []: media_type = self._TYPES.get(content.get('mediaType')) video_id = content.get('id') if media_type == 'post' else content.get('mediaId') if not media_type or not video_id: continue yield self.url_result(f'https://rokfin.com/{media_type}/{video_id}', video_id=f'{media_type}/{video_id}', video_title=str_or_none(traverse_obj(content, ('content', 'contentTitle')))) class RokfinStackIE(RokfinPlaylistBaseIE): IE_NAME = 'rokfin:stack' IE_DESC = 'Rokfin Stacks' _VALID_URL = r'https?://(?:www\.)?rokfin\.com/stack/(?P<id>[^/]+)' _TESTS = [{ 'url': 'https://www.rokfin.com/stack/271/Tulsi-Gabbard-Portsmouth-Townhall-FULL--Feb-9-2020', 'playlist_count': 8, 'info_dict': { 'id': '271', }, }] def _real_extract(self, url): list_id = self._match_id(url) return self.playlist_result(self._get_video_data( self._download_json(f'{_API_BASE_URL}stack/{list_id}', list_id)), list_id) class RokfinChannelIE(RokfinPlaylistBaseIE): IE_NAME = 'rokfin:channel' IE_DESC = 'Rokfin Channels' _VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?!((feed/?)|(discover/?)|(channels/?))$)(?P<id>[^/]+)/?$' _TESTS = [{ 'url': 'https://rokfin.com/TheConvoCouch', 'playlist_mincount': 100, 'info_dict': { 'id': '12071-new', 'title': 'TheConvoCouch - New', 'description': 'md5:bb622b1bca100209b91cd685f7847f06', }, }] _TABS = { 'new': 'posts', 'top': 'top', 'videos': 'video', 'podcasts': 'audio', 'streams': 'stream', 'stacks': 'stack', } def _real_initialize(self): self._validate_extractor_args() def _validate_extractor_args(self): requested_tabs = self._configuration_arg('tab', None) if requested_tabs is not None and (len(requested_tabs) > 1 or requested_tabs[0] not in self._TABS): raise ExtractorError(f'Invalid extractor-arg "tab". Must be one of {", ".join(self._TABS)}', expected=True) def _entries(self, channel_id, channel_name, tab): pages_total = None for page_n in itertools.count(0): if tab in ('posts', 'top'): data_url = f'{_API_BASE_URL}user/{channel_name}/{tab}?page={page_n}&size=50' else: data_url = f'{_API_BASE_URL}post/search/{tab}?page={page_n}&size=50&creator={channel_id}' metadata = self._download_json( data_url, channel_name, note=f'Downloading video metadata page {page_n + 1}{format_field(pages_total, None, " of %s")}') yield from self._get_video_data(metadata) pages_total = int_or_none(metadata.get('totalPages')) or None is_last = metadata.get('last') if is_last or (page_n > pages_total if pages_total else is_last is not False): return def _real_extract(self, url): channel_name = self._match_id(url) channel_info = self._download_json(f'{_API_BASE_URL}user/{channel_name}', channel_name) channel_id = channel_info['id'] tab = self._configuration_arg('tab', default=['new'])[0] return self.playlist_result( self._entries(channel_id, channel_name, self._TABS[tab]), f'{channel_id}-{tab}', f'{channel_name} - {tab.title()}', str_or_none(channel_info.get('description'))) class RokfinSearchIE(SearchInfoExtractor): IE_NAME = 'rokfin:search' IE_DESC = 'Rokfin Search' _SEARCH_KEY = 'rkfnsearch' _TYPES = { 'video': (('id', 'raw'), 'post'), 'audio': (('id', 'raw'), 'post'), 'stream': (('content_id', 'raw'), 'stream'), 'dead_stream': (('content_id', 'raw'), 'stream'), 'stack': (('content_id', 'raw'), 'stack'), } _TESTS = [{ 'url': 'rkfnsearch5:"zelenko"', 'playlist_count': 5, 'info_dict': { 'id': '"zelenko"', 'title': '"zelenko"', } }] _db_url = None _db_access_key = None def _real_initialize(self): self._db_url, self._db_access_key = self.cache.load(self.ie_key(), 'auth', default=(None, None)) if not self._db_url: self._get_db_access_credentials() def _search_results(self, query): total_pages = None for page_number in itertools.count(1): search_results = self._run_search_query( query, data={'query': query, 'page': {'size': 100, 'current': page_number}}, note=f'Downloading page {page_number}{format_field(total_pages, None, " of ~%s")}') total_pages = traverse_obj(search_results, ('meta', 'page', 'total_pages'), expected_type=int_or_none) for result in search_results.get('results') or []: video_id_key, video_type = self._TYPES.get(traverse_obj(result, ('content_type', 'raw')), (None, None)) video_id = traverse_obj(result, video_id_key, expected_type=int_or_none) if video_id and video_type: yield self.url_result(url=f'https://rokfin.com/{video_type}/{video_id}') if not search_results.get('results'): return def _run_search_query(self, video_id, data, **kwargs): data = json.dumps(data).encode() for attempt in range(2): search_results = self._download_json( self._db_url, video_id, data=data, fatal=(attempt == 1), headers={'authorization': self._db_access_key}, **kwargs) if search_results: return search_results self.write_debug('Updating access credentials') self._get_db_access_credentials(video_id) def _get_db_access_credentials(self, video_id=None): auth_data = {'SEARCH_KEY': None, 'ENDPOINT_BASE': None} notfound_err_page = self._download_webpage( 'https://rokfin.com/discover', video_id, expected_status=404, note='Downloading home page') for js_file_path in re.findall(r'<script\b[^>]*\ssrc\s*=\s*"(/static/js/[^">]+)"', notfound_err_page): js_content = self._download_webpage( f'https://rokfin.com{js_file_path}', video_id, note='Downloading JavaScript file', fatal=False) auth_data.update(re.findall( rf'REACT_APP_({"|".join(auth_data.keys())})\s*:\s*"([^"]+)"', js_content or '')) if not all(auth_data.values()): continue self._db_url = url_or_none(f'{auth_data["ENDPOINT_BASE"]}/api/as/v1/engines/rokfin-search/search.json') self._db_access_key = f'Bearer {auth_data["SEARCH_KEY"]}' self.cache.store(self.ie_key(), 'auth', (self._db_url, self._db_access_key)) return raise ExtractorError('Unable to extract access credentials') ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/roosterteeth.py��������������������������������������������������0000664�0000000�0000000�00000021273�14277552437�0021764�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( ExtractorError, int_or_none, join_nonempty, LazyList, parse_qs, str_or_none, traverse_obj, url_or_none, urlencode_postdata, urljoin, update_url_query, ) class RoosterTeethBaseIE(InfoExtractor): _NETRC_MACHINE = 'roosterteeth' _API_BASE = 'https://svod-be.roosterteeth.com' _API_BASE_URL = f'{_API_BASE}/api/v1' def _perform_login(self, username, password): if self._get_cookies(self._API_BASE_URL).get('rt_access_token'): return try: self._download_json( 'https://auth.roosterteeth.com/oauth/token', None, 'Logging in', data=urlencode_postdata({ 'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5', 'grant_type': 'password', 'username': username, 'password': password, })) except ExtractorError as e: msg = 'Unable to login' if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: resp = self._parse_json(e.cause.read().decode(), None, fatal=False) if resp: error = resp.get('extra_info') or resp.get('error_description') or resp.get('error') if error: msg += ': ' + error self.report_warning(msg) def _extract_video_info(self, data): thumbnails = [] for image in traverse_obj(data, ('included', 'images')): if image.get('type') not in ('episode_image', 'bonus_feature_image'): continue thumbnails.extend([{ 'id': name, 'url': url, } for name, url in (image.get('attributes') or {}).items() if url_or_none(url)]) attributes = data.get('attributes') or {} title = traverse_obj(attributes, 'title', 'display_title') sub_only = attributes.get('is_sponsors_only') return { 'id': str(data.get('id')), 'display_id': attributes.get('slug'), 'title': title, 'description': traverse_obj(attributes, 'description', 'caption'), 'series': attributes.get('show_title'), 'season_number': int_or_none(attributes.get('season_number')), 'season_id': attributes.get('season_id'), 'episode': title, 'episode_number': int_or_none(attributes.get('number')), 'episode_id': str_or_none(data.get('uuid')), 'channel_id': attributes.get('channel_id'), 'duration': int_or_none(attributes.get('length')), 'thumbnails': thumbnails, 'availability': self._availability( needs_premium=sub_only, needs_subscription=sub_only, needs_auth=sub_only, is_private=False, is_unlisted=False), 'tags': attributes.get('genres') } class RoosterTeethIE(RoosterTeethBaseIE): _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'info_dict': { 'id': '9156', 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'ext': 'mp4', 'title': 'Million Dollars, But... The Game Announcement', 'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5', 'thumbnail': r're:^https?://.*\.png$', 'series': 'Million Dollars, But...', 'episode': 'Million Dollars, But... The Game Announcement', }, 'params': {'skip_download': True}, }, { 'url': 'https://roosterteeth.com/watch/rwby-bonus-25', 'info_dict': { 'id': '40432', 'display_id': 'rwby-bonus-25', 'title': 'Grimm', 'description': 'md5:f30ff570741213418a8d2c19868b93ab', 'episode': 'Grimm', 'channel_id': '92f780eb-ebfe-4bf5-a3b5-c6ad5460a5f1', 'thumbnail': r're:^https?://.*\.(png|jpe?g)$', 'ext': 'mp4', }, 'params': {'skip_download': True}, }, { 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', 'only_matching': True, }, { 'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts', 'only_matching': True, }, { 'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow', 'only_matching': True, }, { 'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better', 'only_matching': True, }, { # only available for FIRST members 'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one', 'only_matching': True, }, { 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'only_matching': True, }] def _real_extract(self, url): display_id = self._match_id(url) api_episode_url = f'{self._API_BASE_URL}/watch/{display_id}' try: video_data = self._download_json( api_episode_url + '/videos', display_id, 'Downloading video JSON metadata')['data'][0] m3u8_url = video_data['attributes']['url'] # XXX: additional URL at video_data['links']['download'] except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if self._parse_json(e.cause.read().decode(), display_id).get('access') is False: self.raise_login_required( '%s is only available for FIRST members' % display_id) raise formats, subtitles = self._extract_m3u8_formats_and_subtitles( m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls') self._sort_formats(formats) episode = self._download_json( api_episode_url, display_id, 'Downloading episode JSON metadata')['data'][0] return { 'display_id': display_id, 'formats': formats, 'subtitles': subtitles, **self._extract_video_info(episode) } class RoosterTeethSeriesIE(RoosterTeethBaseIE): _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/series/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://roosterteeth.com/series/rwby?season=7', 'playlist_count': 13, 'info_dict': { 'id': 'rwby-7', 'title': 'RWBY - Season 7', } }, { 'url': 'https://roosterteeth.com/series/role-initiative', 'playlist_mincount': 16, 'info_dict': { 'id': 'role-initiative', 'title': 'Role Initiative', } }, { 'url': 'https://roosterteeth.com/series/let-s-play-minecraft?season=9', 'playlist_mincount': 50, 'info_dict': { 'id': 'let-s-play-minecraft-9', 'title': 'Let\'s Play Minecraft - Season 9', } }] def _entries(self, series_id, season_number): display_id = join_nonempty(series_id, season_number) # TODO: extract bonus material for data in self._download_json( f'{self._API_BASE_URL}/shows/{series_id}/seasons?order=asc&order_by', display_id)['data']: idx = traverse_obj(data, ('attributes', 'number')) if season_number and idx != season_number: continue season_url = update_url_query(urljoin(self._API_BASE, data['links']['episodes']), {'per_page': 1000}) season = self._download_json(season_url, display_id, f'Downloading season {idx} JSON metadata')['data'] for episode in season: yield self.url_result( f'https://www.roosterteeth.com{episode["canonical_links"]["self"]}', RoosterTeethIE.ie_key(), **self._extract_video_info(episode)) def _real_extract(self, url): series_id = self._match_id(url) season_number = traverse_obj(parse_qs(url), ('season', 0), expected_type=int_or_none) entries = LazyList(self._entries(series_id, season_number)) return self.playlist_result( entries, join_nonempty(series_id, season_number), join_nonempty(entries[0].get('series'), season_number, delim=' - Season ')) �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rottentomatoes.py������������������������������������������������0000664�0000000�0000000�00000002330�14277552437�0022315�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from .internetvideoarchive import InternetVideoArchiveIE class RottenTomatoesIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)' _TEST = { 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', 'info_dict': { 'id': '11028566', 'ext': 'mp4', 'title': 'Toy Story 3', 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', 'thumbnail': r're:^https?://.*\.jpg$', }, } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) iva_id = self._search_regex(r'publishedid=(\d+)', webpage, 'internet video archive id') return { '_type': 'url_transparent', 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?domain=www.videodetective.com&customerid=69249&playerid=641&publishedid=' + iva_id, 'ie_key': InternetVideoArchiveIE.ie_key(), 'id': video_id, 'title': self._og_search_title(webpage), } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rozhlas.py�������������������������������������������������������0000664�0000000�0000000�00000003465�14277552437�0020722�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( int_or_none, remove_start, ) class RozhlasIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?prehravac\.rozhlas\.cz/audio/(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://prehravac.rozhlas.cz/audio/3421320', 'md5': '504c902dbc9e9a1fd50326eccf02a7e2', 'info_dict': { 'id': '3421320', 'ext': 'mp3', 'title': 'Echo Pavla Klusáka (30.06.2015 21:00)', 'description': 'Osmdesátiny Terryho Rileyho jsou skvělou příležitostí proletět se elektronickými i akustickými díly zakladatatele minimalismu, který je aktivní už přes padesát let' } }, { 'url': 'http://prehravac.rozhlas.cz/audio/3421320/embed', 'only_matching': True, }] def _real_extract(self, url): audio_id = self._match_id(url) webpage = self._download_webpage( 'http://prehravac.rozhlas.cz/audio/%s' % audio_id, audio_id) title = self._html_search_regex( r'<h3>(.+?)</h3>\s*<p[^>]*>.*?</p>\s*<div[^>]+id=["\']player-track', webpage, 'title', default=None) or remove_start( self._og_search_title(webpage), 'Radio Wave - ') description = self._html_search_regex( r'<p[^>]+title=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>.*?</p>\s*<div[^>]+id=["\']player-track', webpage, 'description', fatal=False, group='url') duration = int_or_none(self._search_regex( r'data-duration=["\'](\d+)', webpage, 'duration', default=None)) return { 'id': audio_id, 'url': 'http://media.rozhlas.cz/_audio/%s.mp3' % audio_id, 'title': title, 'description': description, 'duration': duration, 'vcodec': 'none', } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rte.py�����������������������������������������������������������0000664�0000000�0000000�00000014130�14277552437�0020021�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( float_or_none, parse_iso8601, str_or_none, try_get, unescapeHTML, url_or_none, ExtractorError, ) class RteBaseIE(InfoExtractor): def _real_extract(self, url): item_id = self._match_id(url) info_dict = {} formats = [] ENDPOINTS = ( 'https://feeds.rasset.ie/rteavgen/player/playlist?type=iptv&format=json&showId=', 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=', ) for num, ep_url in enumerate(ENDPOINTS, start=1): try: data = self._download_json(ep_url + item_id, item_id) except ExtractorError as ee: if num < len(ENDPOINTS) or formats: continue if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False) if error_info: raise ExtractorError( '%s said: %s' % (self.IE_NAME, error_info['message']), expected=True) raise # NB the string values in the JSON are stored using XML escaping(!) show = try_get(data, lambda x: x['shows'][0], dict) if not show: continue if not info_dict: title = unescapeHTML(show['title']) description = unescapeHTML(show.get('description')) thumbnail = show.get('thumbnail') duration = float_or_none(show.get('duration'), 1000) timestamp = parse_iso8601(show.get('published')) info_dict = { 'id': item_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'timestamp': timestamp, 'duration': duration, } mg = try_get(show, lambda x: x['media:group'][0], dict) if not mg: continue if mg.get('url'): m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url']) if m: m = m.groupdict() formats.append({ 'url': m['url'] + '/' + m['app'], 'app': m['app'], 'play_path': m['playpath'], 'player_url': url, 'ext': 'flv', 'format_id': 'rtmp', }) if mg.get('hls_server') and mg.get('hls_url'): formats.extend(self._extract_m3u8_formats( mg['hls_server'] + mg['hls_url'], item_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) if mg.get('hds_server') and mg.get('hds_url'): formats.extend(self._extract_f4m_formats( mg['hds_server'] + mg['hds_url'], item_id, f4m_id='hds', fatal=False)) mg_rte_server = str_or_none(mg.get('rte:server')) mg_url = str_or_none(mg.get('url')) if mg_rte_server and mg_url: hds_url = url_or_none(mg_rte_server + mg_url) if hds_url: formats.extend(self._extract_f4m_formats( hds_url, item_id, f4m_id='hds', fatal=False)) self._sort_formats(formats) info_dict['formats'] = formats return info_dict class RteIE(RteBaseIE): IE_NAME = 'rte' IE_DESC = 'Raidió Teilifís Éireann TV' _VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/', 'md5': '4a76eb3396d98f697e6e8110563d2604', 'info_dict': { 'id': '10478715', 'ext': 'mp4', 'title': 'iWitness', 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'The spirit of Ireland, one voice and one minute at a time.', 'duration': 60.046, 'upload_date': '20151012', 'timestamp': 1444694160, }, } class RteRadioIE(RteBaseIE): IE_NAME = 'rte:radio' IE_DESC = 'Raidió Teilifís Éireann radio' # Radioplayer URLs have two distinct specifier formats, # the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>: # the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_ # where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated. # An <id> uniquely defines an individual recording, and is the only part we require. _VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P<id>[0-9]+)' _TESTS = [{ # Old-style player URL; HLS and RTMPE formats 'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:', 'md5': 'c79ccb2c195998440065456b69760411', 'info_dict': { 'id': '10507902', 'ext': 'mp4', 'title': 'Gloria', 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'md5:9ce124a7fb41559ec68f06387cabddf0', 'timestamp': 1451203200, 'upload_date': '20151227', 'duration': 7230.0, }, }, { # New-style player URL; RTMPE formats only 'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_', 'info_dict': { 'id': '3250678', 'ext': 'flv', 'title': 'The Lyric Concert with Paul Herriott', 'thumbnail': r're:^https?://.*\.jpg$', 'description': '', 'timestamp': 1333742400, 'upload_date': '20120406', 'duration': 7199.016, }, 'params': { # rtmp download 'skip_download': True, }, }] ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rtl2.py����������������������������������������������������������0000664�0000000�0000000�00000015770�14277552437�0020125�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 from ..compat import ( compat_b64decode, compat_str, ) from ..utils import ( ExtractorError, int_or_none, strip_or_none, ) class RTL2IE(InfoExtractor): IE_NAME = 'rtl2' _VALID_URL = r'https?://(?:www\.)?rtl2\.de/sendung/[^/]+/(?:video/(?P<vico_id>\d+)[^/]+/(?P<vivi_id>\d+)-|folge/)(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0', 'info_dict': { 'id': 'folge-203-0', 'ext': 'f4v', 'title': 'GRIP sucht den Sommerkönig', 'description': 'md5:e3adbb940fd3c6e76fa341b8748b562f' }, 'params': { # rtmp download 'skip_download': True, }, 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], }, { 'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/', 'info_dict': { 'id': 'anna-erwischt-alex', 'ext': 'mp4', 'title': 'Anna erwischt Alex!', 'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.' }, 'params': { # rtmp download 'skip_download': True, }, 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], }] def _real_extract(self, url): vico_id, vivi_id, display_id = self._match_valid_url(url).groups() if not vico_id: webpage = self._download_webpage(url, display_id) mobj = re.search( r'data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"', webpage) if mobj: vico_id = mobj.group('vico_id') vivi_id = mobj.group('vivi_id') else: vico_id = self._html_search_regex( r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id') vivi_id = self._html_search_regex( r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id') info = self._download_json( 'https://service.rtl2.de/api-player-vipo/video.php', display_id, query={ 'vico_id': vico_id, 'vivi_id': vivi_id, }) video_info = info['video'] title = video_info['titel'] formats = [] rtmp_url = video_info.get('streamurl') if rtmp_url: rtmp_url = rtmp_url.replace('\\', '') stream_url = 'mp4:' + self._html_search_regex(r'/ondemand/(.+)', rtmp_url, 'stream URL') rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0'] formats.append({ 'format_id': 'rtmp', 'url': rtmp_url, 'play_path': stream_url, 'player_url': 'https://www.rtl2.de/sites/default/modules/rtl2/jwplayer/jwplayer-7.6.0/jwplayer.flash.swf', 'page_url': url, 'flash_version': 'LNX 11,2,202,429', 'rtmp_conn': rtmp_conn, 'no_resume': True, 'quality': 1, }) m3u8_url = video_info.get('streamurl_hls') if m3u8_url: formats.extend(self._extract_akamai_formats(m3u8_url, display_id)) self._sort_formats(formats) return { 'id': display_id, 'title': title, 'thumbnail': video_info.get('image'), 'description': video_info.get('beschreibung'), 'duration': int_or_none(video_info.get('duration')), 'formats': formats, } class RTL2YouBaseIE(InfoExtractor): _BACKWERK_BASE_URL = 'https://p-you-backwerk.rtl2apps.de/' class RTL2YouIE(RTL2YouBaseIE): IE_NAME = 'rtl2:you' _VALID_URL = r'http?://you\.rtl2\.de/(?:video/\d+/|youplayer/index\.html\?.*?\bvid=)(?P<id>\d+)' _TESTS = [{ 'url': 'http://you.rtl2.de/video/3002/15740/MJUNIK%20%E2%80%93%20Home%20of%20YOU/307-hirn-wo-bist-du', 'info_dict': { 'id': '15740', 'ext': 'mp4', 'title': 'MJUNIK – Home of YOU - #307 Hirn, wo bist du?!', 'description': 'md5:ddaa95c61b372b12b66e115b2772fe01', 'age_limit': 12, }, }, { 'url': 'http://you.rtl2.de/youplayer/index.html?vid=15712', 'only_matching': True, }] _AES_KEY = b'\xe9W\xe4.<*\xb8\x1a\xd2\xb6\x92\xf3C\xd3\xefL\x1b\x03*\xbbbH\xc0\x03\xffo\xc2\xf2(\xaa\xaa!' _GEO_COUNTRIES = ['DE'] def _real_extract(self, url): video_id = self._match_id(url) stream_data = self._download_json( self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id) data, iv = compat_b64decode(stream_data['streamUrl']).decode().split(':') stream_url = unpad_pkcs7(aes_cbc_decrypt_bytes( compat_b64decode(data), self._AES_KEY, compat_b64decode(iv))) if b'rtl2_you_video_not_found' in stream_url: raise ExtractorError('video not found', expected=True) formats = self._extract_m3u8_formats(stream_url.decode(), video_id, 'mp4', 'm3u8_native') self._sort_formats(formats) video_data = self._download_json( self._BACKWERK_BASE_URL + 'video/' + video_id, video_id) series = video_data.get('formatTitle') title = episode = video_data.get('title') or series if series and series != title: title = '%s - %s' % (series, title) return { 'id': video_id, 'title': title, 'formats': formats, 'description': strip_or_none(video_data.get('description')), 'thumbnail': video_data.get('image'), 'duration': int_or_none(stream_data.get('duration') or video_data.get('duration'), 1000), 'series': series, 'episode': episode, 'age_limit': int_or_none(video_data.get('minimumAge')), } class RTL2YouSeriesIE(RTL2YouBaseIE): IE_NAME = 'rtl2:you:series' _VALID_URL = r'http?://you\.rtl2\.de/videos/(?P<id>\d+)' _TEST = { 'url': 'http://you.rtl2.de/videos/115/dragon-ball', 'info_dict': { 'id': '115', }, 'playlist_mincount': 5, } def _real_extract(self, url): series_id = self._match_id(url) stream_data = self._download_json( self._BACKWERK_BASE_URL + 'videos', series_id, query={ 'formatId': series_id, 'limit': 1000000000, }) entries = [] for video in stream_data.get('videos', []): video_id = compat_str(video['videoId']) if not video_id: continue entries.append(self.url_result( 'http://you.rtl2.de/video/%s/%s' % (series_id, video_id), 'RTL2You', video_id)) return self.playlist_result(entries, series_id) ��������yt-dlp-2022.08.19/yt_dlp/extractor/rtlnl.py���������������������������������������������������������0000664�0000000�0000000�00000027642�14277552437�0020376�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( int_or_none, parse_duration, ) class RtlNlIE(InfoExtractor): IE_NAME = 'rtl.nl' IE_DESC = 'rtl.nl and rtlxl.nl' _EMBED_REGEX = [r'<iframe[^>]+?\bsrc=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)(?P=q1)'] _VALID_URL = r'''(?x) https?://(?:(?:www|static)\.)? (?: rtlxl\.nl/(?:[^\#]*\#!|programma)/[^/]+/| rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)| embed\.rtl\.nl/\#uuid= ) (?P<id>[0-9a-f-]+)''' _TESTS = [{ # new URL schema 'url': 'https://www.rtlxl.nl/programma/rtl-nieuws/0bd1384d-d970-3086-98bb-5c104e10c26f', 'md5': '490428f1187b60d714f34e1f2e3af0b6', 'info_dict': { 'id': '0bd1384d-d970-3086-98bb-5c104e10c26f', 'ext': 'mp4', 'title': 'RTL Nieuws', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 'timestamp': 1593293400, 'upload_date': '20200627', 'duration': 661.08, }, }, { # old URL schema 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416', 'md5': '473d1946c1fdd050b2c0161a4b13c373', 'info_dict': { 'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416', 'ext': 'mp4', 'title': 'RTL Nieuws', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 'timestamp': 1461951000, 'upload_date': '20160429', 'duration': 1167.96, }, 'skip': '404', }, { # best format available a3t 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', 'md5': 'dea7474214af1271d91ef332fb8be7ea', 'info_dict': { 'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed', 'ext': 'mp4', 'timestamp': 1424039400, 'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag', 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$', 'upload_date': '20150215', 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.', } }, { # empty synopsis and missing episodes (see https://github.com/ytdl-org/youtube-dl/issues/6275) # best format available nettv 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false', 'info_dict': { 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a', 'ext': 'mp4', 'title': 'RTL Nieuws - Meer beelden van overval juwelier', 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$', 'timestamp': 1437233400, 'upload_date': '20150718', 'duration': 30.474, }, 'params': { 'skip_download': True, }, }, { # encrypted m3u8 streams, georestricted 'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7', 'only_matching': True, }, { 'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0', 'only_matching': True, }, { 'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f', 'only_matching': True, }, { 'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/', 'only_matching': True, }, { 'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl', 'only_matching': True, }, { # new embed URL schema 'url': 'https://embed.rtl.nl/#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', 'only_matching': True, }] def _real_extract(self, url): uuid = self._match_id(url) info = self._download_json( 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid, uuid) material = info['material'][0] title = info['abstracts'][0]['name'] subtitle = material.get('title') if subtitle: title += ' - %s' % subtitle description = material.get('synopsis') meta = info.get('meta', {}) videopath = material['videopath'] m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath formats = self._extract_m3u8_formats( m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False) self._sort_formats(formats) thumbnails = [] for p in ('poster_base_url', '"thumb_base_url"'): if not meta.get(p): continue thumbnails.append({ 'url': self._proto_relative_url(meta[p] + uuid), 'width': int_or_none(self._search_regex( r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)), 'height': int_or_none(self._search_regex( r'/sz=[0-9]+x([0-9]+)', meta[p], 'thumbnail height', fatal=False)) }) return { 'id': uuid, 'title': title, 'formats': formats, 'timestamp': material['original_date'], 'description': description, 'duration': parse_duration(material.get('duration')), 'thumbnails': thumbnails, } class RTLLuBaseIE(InfoExtractor): _MEDIA_REGEX = { 'video': r'<rtl-player\s[^>]*\bhls\s*=\s*"([^"]+)', 'audio': r'<rtl-audioplayer\s[^>]*\bsrc\s*=\s*"([^"]+)', 'thumbnail': r'<rtl-player\s[^>]*\bposter\s*=\s*"([^"]+)', } def get_media_url(self, webpage, video_id, media_type): return self._search_regex(self._MEDIA_REGEX[media_type], webpage, f'{media_type} url', default=None) def get_formats_and_subtitles(self, webpage, video_id): video_url, audio_url = self.get_media_url(webpage, video_id, 'video'), self.get_media_url(webpage, video_id, 'audio') formats, subtitles = [], {} if video_url is not None: formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id) if audio_url is not None: formats.append({'url': audio_url, 'ext': 'mp3', 'vcodec': 'none'}) return formats, subtitles def _real_extract(self, url): video_id = self._match_id(url) is_live = video_id in ('live', 'live-2', 'lauschteren') # TODO: extract comment from https://www.rtl.lu/comments?status=1&order=desc&context=news|article|<video_id> # we can context from <rtl-comments context=<context> in webpage webpage = self._download_webpage(url, video_id) formats, subtitles = self.get_formats_and_subtitles(webpage, video_id) self._sort_formats(formats) return { 'id': video_id, 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage, default=None), 'formats': formats, 'subtitles': subtitles, 'thumbnail': self.get_media_url(webpage, video_id, 'thumbnail') or self._og_search_thumbnail(webpage, default=None), 'is_live': is_live, } class RTLLuTeleVODIE(RTLLuBaseIE): IE_NAME = 'rtl.lu:tele-vod' _VALID_URL = r'https?://(?:www\.)?rtl\.lu/(tele/(?P<slug>[\w-]+)/v/|video/)(?P<id>\d+)(\.html)?' _TESTS = [{ 'url': 'https://www.rtl.lu/tele/de-journal-vun-der-tele/v/3266757.html', 'info_dict': { 'id': '3266757', 'title': 'Informatiounsversammlung Héichwaasser', 'ext': 'mp4', 'thumbnail': 'https://replay-assets.rtl.lu/2021/11/16/d3647fc4-470d-11ec-adc2-3a00abd6e90f_00008.jpg', 'description': 'md5:b1db974408cc858c9fd241812e4a2a14', } }, { 'url': 'https://www.rtl.lu/video/3295215', 'info_dict': { 'id': '3295215', 'title': 'Kulturassisen iwwer d\'Bestandsopnam vum Lëtzebuerger Konscht', 'ext': 'mp4', 'thumbnail': 'https://replay-assets.rtl.lu/2022/06/28/0000_3295215_0000.jpg', 'description': 'md5:85bcd4e0490aa6ec969d9bf16927437b', } }] class RTLLuArticleIE(RTLLuBaseIE): IE_NAME = 'rtl.lu:article' _VALID_URL = r'https?://(?:(www|5minutes|today)\.)rtl\.lu/(?:[\w-]+)/(?:[\w-]+)/a/(?P<id>\d+)\.html' _TESTS = [{ # Audio-only 'url': 'https://www.rtl.lu/sport/news/a/1934360.html', 'info_dict': { 'id': '1934360', 'ext': 'mp3', 'thumbnail': 'https://static.rtl.lu/rtl2008.lu/nt/p/2022/06/28/19/e4b37d66ddf00bab4c45617b91a5bb9b.jpeg', 'description': 'md5:5eab4a2a911c1fff7efc1682a38f9ef7', 'title': 'md5:40aa85f135578fbd549d3c9370321f99', } }, { # 5minutes 'url': 'https://5minutes.rtl.lu/espace-frontaliers/frontaliers-en-questions/a/1853173.html', 'info_dict': { 'id': '1853173', 'ext': 'mp4', 'description': 'md5:ac031da0740e997a5cf4633173634fee', 'title': 'md5:87e17722ed21af0f24be3243f4ec0c46', 'thumbnail': 'https://replay-assets.rtl.lu/2022/01/26/screenshot_20220126104933_3274749_12b249833469b0d6e4440a1dec83cdfa.jpg', } }, { # today.lu 'url': 'https://today.rtl.lu/entertainment/news/a/1936203.html', 'info_dict': { 'id': '1936203', 'ext': 'mp4', 'title': 'Once Upon A Time...zu Lëtzebuerg: The Three Witches\' Tower', 'description': 'The witchy theme continues in the latest episode of Once Upon A Time...', 'thumbnail': 'https://replay-assets.rtl.lu/2022/07/02/screenshot_20220702122859_3290019_412dc5185951b7f6545a4039c8be9235.jpg', } }] class RTLLuLiveIE(RTLLuBaseIE): _VALID_URL = r'https?://www\.rtl\.lu/(?:tele|radio)/(?P<id>live(?:-\d+)?|lauschteren)' _TESTS = [{ # Tele:live 'url': 'https://www.rtl.lu/tele/live', 'info_dict': { 'id': 'live', 'ext': 'mp4', 'live_status': 'is_live', 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}', 'thumbnail': 'https://static.rtl.lu/livestream/channel1.jpg', } }, { # Tele:live-2 'url': 'https://www.rtl.lu/tele/live-2', 'info_dict': { 'id': 'live-2', 'ext': 'mp4', 'live_status': 'is_live', 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}', 'thumbnail': 'https://static.rtl.lu/livestream/channel2.jpg', } }, { # Radio:lauschteren 'url': 'https://www.rtl.lu/radio/lauschteren', 'info_dict': { 'id': 'lauschteren', 'ext': 'mp4', 'live_status': 'is_live', 'title': r're:RTL - Radio LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}', 'thumbnail': 'https://static.rtl.lu/livestream/rtlradiowebtv.jpg', } }] class RTLLuRadioIE(RTLLuBaseIE): _VALID_URL = r'https?://www\.rtl\.lu/radio/(?:[\w-]+)/s/(?P<id>\d+)(\.html)?' _TESTS = [{ 'url': 'https://www.rtl.lu/radio/5-vir-12/s/4033058.html', 'info_dict': { 'id': '4033058', 'ext': 'mp3', 'description': 'md5:f855a4f3e3235393ae47ed1db5d934b9', 'title': '5 vir 12 - Stau um Stau', 'thumbnail': 'https://static.rtl.lu/rtlg//2022/06/24/c9c19e5694a14be46a3647a3760e1f62.jpg', } }] ����������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rtnews.py��������������������������������������������������������0000664�0000000�0000000�00000016735�14277552437�0020566�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import js_to_json class RTNewsIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rt\.com/[^/]+/(?:[^/]+/)?(?P<id>\d+)' _TESTS = [{ 'url': 'https://www.rt.com/sport/546301-djokovic-arrives-belgrade-crowds/', 'playlist_mincount': 2, 'info_dict': { 'id': '546301', 'title': 'Crowds gather to greet deported Djokovic as he returns to Serbia (VIDEO)', 'description': 'md5:1d5bfe1a988d81fd74227cfdf93d314d', 'thumbnail': 'https://cdni.rt.com/files/2022.01/article/61e587a085f540102c3386c1.png' }, }, { 'url': 'https://www.rt.com/shows/in-question/535980-plot-to-assassinate-julian-assange/', 'playlist_mincount': 1, 'info_dict': { 'id': '535980', 'title': 'The plot to assassinate Julian Assange', 'description': 'md5:55279ce5e4441dc1d16e2e4a730152cd', 'thumbnail': 'https://cdni.rt.com/files/2021.09/article/615226f42030274e8879b53d.png' }, 'playlist': [{ 'info_dict': { 'id': '6152271d85f5400464496162', 'ext': 'mp4', 'title': '6152271d85f5400464496162', }, }] }] def _entries(self, webpage): video_urls = set(re.findall(r'https://cdnv\.rt\.com/.*[a-f0-9]+\.mp4', webpage)) for v_url in video_urls: v_id = re.search(r'([a-f0-9]+)\.mp4', v_url).group(1) if v_id: yield { 'id': v_id, 'title': v_id, 'url': v_url, } def _real_extract(self, url): id = self._match_id(url) webpage = self._download_webpage(url, id) return { '_type': 'playlist', 'id': id, 'entries': self._entries(webpage), 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), } class RTDocumentryIE(InfoExtractor): _VALID_URL = r'https?://rtd\.rt\.com/(?:(?:series|shows)/[^/]+|films)/(?P<id>[^/?$&#]+)' _TESTS = [{ 'url': 'https://rtd.rt.com/films/escobars-hitman/', 'info_dict': { 'id': 'escobars-hitman', 'ext': 'mp4', 'title': "Escobar's Hitman. Former drug-gang killer, now loved and loathed in Colombia", 'description': 'md5:647c76984b7cb9a8b52a567e87448d88', 'thumbnail': 'https://cdni.rt.com/rtd-files/films/escobars-hitman/escobars-hitman_11.jpg', 'average_rating': 8.53, 'duration': 3134.0 }, 'params': {'skip_download': True} }, { 'url': 'https://rtd.rt.com/shows/the-kalashnikova-show-military-secrets-anna-knishenko/iskander-tactical-system-natos-headache/', 'info_dict': { 'id': 'iskander-tactical-system-natos-headache', 'ext': 'mp4', 'title': "Iskander tactical system. NATO's headache | The Kalashnikova Show. Episode 10", 'description': 'md5:da7c24a0aa67bc2bb88c86658508ca87', 'thumbnail': 'md5:89de8ce38c710b7c501ff02d47e2aa89', 'average_rating': 9.27, 'duration': 274.0, 'timestamp': 1605726000, 'view_count': int, 'upload_date': '20201118' }, 'params': {'skip_download': True} }, { 'url': 'https://rtd.rt.com/series/i-am-hacked-trailer/introduction-to-safe-digital-life-ep2/', 'info_dict': { 'id': 'introduction-to-safe-digital-life-ep2', 'ext': 'mp4', 'title': 'How to Keep your Money away from Hackers | I am Hacked. Episode 2', 'description': 'md5:c46fa9a5af86c0008c45a3940a8cce87', 'thumbnail': 'md5:a5e81b9bf5aed8f5e23d9c053601b825', 'average_rating': 10.0, 'duration': 1524.0, 'timestamp': 1636977600, 'view_count': int, 'upload_date': '20211115' }, 'params': {'skip_download': True} }] def _real_extract(self, url): id = self._match_id(url) webpage = self._download_webpage(url, id) ld_json = self._search_json_ld(webpage, None, fatal=False) if not ld_json: self.raise_no_formats('No video/audio found at the provided url.', expected=True) media_json = self._parse_json( self._search_regex(r'(?s)\'Med\'\s*:\s*\[\s*({.+})\s*\]\s*};', webpage, 'media info'), id, transform_source=js_to_json) if 'title' not in ld_json and 'title' in media_json: ld_json['title'] = media_json['title'] formats = [{'url': src['file']} for src in media_json.get('sources') or [] if src.get('file')] return { 'id': id, 'thumbnail': media_json.get('image'), 'formats': formats, **ld_json } class RTDocumentryPlaylistIE(InfoExtractor): _VALID_URL = r'https?://rtd\.rt\.com/(?:series|shows)/(?P<id>[^/]+)/$' _TESTS = [{ 'url': 'https://rtd.rt.com/series/i-am-hacked-trailer/', 'playlist_mincount': 6, 'info_dict': { 'id': 'i-am-hacked-trailer', }, }, { 'url': 'https://rtd.rt.com/shows/the-kalashnikova-show-military-secrets-anna-knishenko/', 'playlist_mincount': 34, 'info_dict': { 'id': 'the-kalashnikova-show-military-secrets-anna-knishenko', }, }] def _entries(self, webpage, id): video_urls = set(re.findall(r'list-2__link\s*"\s*href="([^"]+)"', webpage)) for v_url in video_urls: if id not in v_url: continue yield self.url_result( 'https://rtd.rt.com%s' % v_url, ie=RTDocumentryIE.ie_key()) def _real_extract(self, url): id = self._match_id(url) webpage = self._download_webpage(url, id) return { '_type': 'playlist', 'id': id, 'entries': self._entries(webpage, id), } class RuptlyIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ruptly\.tv/[a-z]{2}/videos/(?P<id>\d+-\d+)' _TESTS = [{ 'url': 'https://www.ruptly.tv/en/videos/20220112-020-Japan-Double-trouble-Tokyo-zoo-presents-adorable-panda-twins', 'info_dict': { 'id': '20220112-020', 'ext': 'mp4', 'title': 'Japan: Double trouble! Tokyo zoo presents adorable panda twins | Video Ruptly', 'description': 'md5:85a8da5fdb31486f0562daf4360ce75a', 'thumbnail': 'https://storage.ruptly.tv/thumbnails/20220112-020/i6JQKnTNpYuqaXsR/i6JQKnTNpYuqaXsR.jpg' }, 'params': {'skip_download': True} }] def _real_extract(self, url): id = self._match_id(url) webpage = self._download_webpage(url, id) m3u8_url = self._search_regex(r'preview_url"\s?:\s?"(https?://storage\.ruptly\.tv/video_projects/.+\.m3u8)"', webpage, 'm3u8 url', fatal=False) if not m3u8_url: self.raise_no_formats('No video/audio found at the provided url.', expected=True) formats, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, id, ext='mp4') return { 'id': id, 'formats': formats, 'subtitles': subs, 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), } �����������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rtp.py�����������������������������������������������������������0000664�0000000�0000000�00000006453�14277552437�0020045�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import js_to_json import re import json import urllib.parse import base64 class RTPIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?' _TESTS = [{ 'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas', 'md5': 'e736ce0c665e459ddb818546220b4ef8', 'info_dict': { 'id': 'e174042', 'ext': 'mp3', 'title': 'Paixões Cruzadas', 'description': 'As paixões musicais de António Cartaxo e António Macedo', 'thumbnail': r're:^https?://.*\.jpg', }, }, { 'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas', 'only_matching': True, }] _RX_OBFUSCATION = re.compile(r'''(?xs) atob\s*\(\s*decodeURIComponent\s*\(\s* (\[[0-9A-Za-z%,'"]*\]) \s*\.\s*join\(\s*(?:""|'')\s*\)\s*\)\s*\) ''') def __unobfuscate(self, data, *, video_id): if data.startswith('{'): data = self._RX_OBFUSCATION.sub( lambda m: json.dumps( base64.b64decode(urllib.parse.unquote( ''.join(self._parse_json(m.group(1), video_id)) )).decode('iso-8859-1')), data) return js_to_json(data) def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._html_search_meta( 'twitter:title', webpage, display_name='title', fatal=True) f, config = self._search_regex( r'''(?sx) var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s* var\s+player1\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/) ''', webpage, 'player config', group=('f', 'config')) f = self._parse_json( f, video_id, lambda data: self.__unobfuscate(data, video_id=video_id)) config = self._parse_json( config, video_id, lambda data: self.__unobfuscate(data, video_id=video_id)) formats = [] if isinstance(f, dict): f_hls = f.get('hls') if f_hls is not None: formats.extend(self._extract_m3u8_formats( f_hls, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')) f_dash = f.get('dash') if f_dash is not None: formats.extend(self._extract_mpd_formats(f_dash, video_id, mpd_id='dash')) else: formats.append({ 'format_id': 'f', 'url': f, 'vcodec': 'none' if config.get('mediaType') == 'audio' else None, }) subtitles = {} vtt = config.get('vtt') if vtt is not None: for lcode, lname, url in vtt: subtitles.setdefault(lcode, []).append({ 'name': lname, 'url': url, }) return { 'id': video_id, 'title': title, 'formats': formats, 'description': self._html_search_meta(['description', 'twitter:description'], webpage), 'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage), 'subtitles': subtitles, } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rtrfm.py���������������������������������������������������������0000664�0000000�0000000�00000005304�14277552437�0020364�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor class RTRFMIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rtrfm\.com\.au/(?:shows|show-episode)/(?P<id>[^/?\#&]+)' _TESTS = [ { 'url': 'https://rtrfm.com.au/shows/breakfast/', 'md5': '46168394d3a5ce237cf47e85d0745413', 'info_dict': { 'id': 'breakfast-2021-11-16', 'ext': 'mp3', 'series': 'Breakfast with Taylah', 'title': r're:^Breakfast with Taylah \d{4}-\d{2}-\d{2}$', 'description': 'md5:0979c3ab1febfbec3f1ccb743633c611', }, 'skip': 'ID and md5 changes daily', }, { 'url': 'https://rtrfm.com.au/show-episode/breakfast-2021-11-11/', 'md5': '396bedf1e40f96c62b30d4999202a790', 'info_dict': { 'id': 'breakfast-2021-11-11', 'ext': 'mp3', 'series': 'Breakfast with Taylah', 'title': 'Breakfast with Taylah 2021-11-11', 'description': 'md5:0979c3ab1febfbec3f1ccb743633c611', }, }, { 'url': 'https://rtrfm.com.au/show-episode/breakfast-2020-06-01/', 'md5': '594027f513ec36a24b15d65007a24dff', 'info_dict': { 'id': 'breakfast-2020-06-01', 'ext': 'mp3', 'series': 'Breakfast with Taylah', 'title': 'Breakfast with Taylah 2020-06-01', 'description': r're:^Breakfast with Taylah ', }, 'skip': 'This audio has expired', }, ] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) show, date, title = self._search_regex( r'''\.playShow(?:From)?\(['"](?P<show>[^'"]+)['"],\s*['"](?P<date>[0-9]{4}-[0-9]{2}-[0-9]{2})['"],\s*['"](?P<title>[^'"]+)['"]''', webpage, 'details', group=('show', 'date', 'title')) url = self._download_json( 'https://restreams.rtrfm.com.au/rzz', show, 'Downloading MP3 URL', query={'n': show, 'd': date})['u'] # This is the only indicator of an error until trying to download the URL and # downloads of mp4 URLs always fail (403 for current episodes, 404 for missing). if '.mp4' in url: url = None self.raise_no_formats('Expired or no episode on this date', expected=True) return { 'id': '%s-%s' % (show, date), 'title': '%s %s' % (title, date), 'series': title, 'url': url, 'release_date': date, 'description': self._og_search_description(webpage), } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rts.py�����������������������������������������������������������0000664�0000000�0000000�00000022451�14277552437�0020044�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .srgssr import SRGSSRIE from ..compat import compat_str from ..utils import ( determine_ext, int_or_none, parse_duration, parse_iso8601, unescapeHTML, urljoin, ) class RTSIE(SRGSSRIE): IE_DESC = 'RTS.ch' _VALID_URL = r'rts:(?P<rts_id>\d+)|https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html' _TESTS = [ { 'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html', 'md5': '753b877968ad8afaeddccc374d4256a5', 'info_dict': { 'id': '3449373', 'display_id': 'les-enfants-terribles', 'ext': 'mp4', 'duration': 1488, 'title': 'Les Enfants Terribles', 'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.', 'uploader': 'Divers', 'upload_date': '19680921', 'timestamp': -40280400, 'thumbnail': r're:^https?://.*\.image', 'view_count': int, }, 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], }, { 'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html', 'info_dict': { 'id': '5624065', 'title': 'Passe-moi les jumelles', }, 'playlist_mincount': 4, }, { 'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html', 'info_dict': { 'id': '5745975', 'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski', 'ext': 'mp4', 'duration': 48, 'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski', 'description': 'Hockey - Playoff', 'uploader': 'Hockey', 'upload_date': '20140403', 'timestamp': 1396556882, 'thumbnail': r're:^https?://.*\.image', 'view_count': int, }, 'params': { # m3u8 download 'skip_download': True, }, 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], 'skip': 'Blocked outside Switzerland', }, { 'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html', 'md5': '9bb06503773c07ce83d3cbd793cebb91', 'info_dict': { 'id': '5745356', 'display_id': 'londres-cachee-par-un-epais-smog', 'ext': 'mp4', 'duration': 33, 'title': 'Londres cachée par un épais smog', 'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.', 'uploader': 'L\'actu en vidéo', 'upload_date': '20140403', 'timestamp': 1396537322, 'thumbnail': r're:^https?://.*\.image', 'view_count': int, }, 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], }, { 'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html', 'md5': 'dd8ef6a22dff163d063e2a52bc8adcae', 'info_dict': { 'id': '5706148', 'display_id': 'urban-hippie-de-damien-krisl-03-04-2014', 'ext': 'mp3', 'duration': 123, 'title': '"Urban Hippie", de Damien Krisl', 'description': 'Des Hippies super glam.', 'upload_date': '20140403', 'timestamp': 1396551600, }, }, { # article with videos on rhs 'url': 'http://www.rts.ch/sport/hockey/6693917-hockey-davos-decroche-son-31e-titre-de-champion-de-suisse.html', 'info_dict': { 'id': '6693917', 'title': 'Hockey: Davos décroche son 31e titre de champion de Suisse', }, 'playlist_mincount': 5, }, { 'url': 'http://pages.rts.ch/emissions/passe-moi-les-jumelles/5624065-entre-ciel-et-mer.html', 'only_matching': True, } ] def _real_extract(self, url): m = self._match_valid_url(url) media_id = m.group('rts_id') or m.group('id') display_id = m.group('display_id') or media_id def download_json(internal_id): return self._download_json( 'http://www.rts.ch/a/%s.html?f=json/article' % internal_id, display_id) all_info = download_json(media_id) # media_id extracted out of URL is not always a real id if 'video' not in all_info and 'audio' not in all_info: entries = [] for item in all_info.get('items', []): item_url = item.get('url') if not item_url: continue entries.append(self.url_result(item_url, 'RTS')) if not entries: page, urlh = self._download_webpage_handle(url, display_id) if re.match(self._VALID_URL, urlh.geturl()).group('id') != media_id: return self.url_result(urlh.geturl(), 'RTS') # article with videos on rhs videos = re.findall( r'<article[^>]+class="content-item"[^>]*>\s*<a[^>]+data-video-urn="urn:([^"]+)"', page) if not videos: videos = re.findall( r'(?s)<iframe[^>]+class="srg-player"[^>]+src="[^"]+urn:([^"]+)"', page) if videos: entries = [self.url_result('srgssr:%s' % video_urn, 'SRGSSR') for video_urn in videos] if entries: return self.playlist_result(entries, media_id, all_info.get('title')) internal_id = self._html_search_regex( r'<(?:video|audio) data-id="([0-9]+)"', page, 'internal video id') all_info = download_json(internal_id) media_type = 'video' if 'video' in all_info else 'audio' # check for errors self._get_media_data('rts', media_type, media_id) info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio'] title = info['title'] def extract_bitrate(url): return int_or_none(self._search_regex( r'-([0-9]+)k\.', url, 'bitrate', default=None)) formats = [] streams = info.get('streams', {}) for format_id, format_url in streams.items(): if format_id == 'hds_sd' and 'hds' in streams: continue if format_id == 'hls_sd' and 'hls' in streams: continue ext = determine_ext(format_url) if ext in ('m3u8', 'f4m'): format_url = self._get_tokenized_src(format_url, media_id, format_id) if ext == 'f4m': formats.extend(self._extract_f4m_formats( format_url + ('?' if '?' not in format_url else '&') + 'hdcore=3.4.0', media_id, f4m_id=format_id, fatal=False)) else: formats.extend(self._extract_m3u8_formats( format_url, media_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False)) else: formats.append({ 'format_id': format_id, 'url': format_url, 'tbr': extract_bitrate(format_url), }) download_base = 'http://rtsww%s-d.rts.ch/' % ('-a' if media_type == 'audio' else '') for media in info.get('media', []): media_url = media.get('url') if not media_url or re.match(r'https?://', media_url): continue rate = media.get('rate') ext = media.get('ext') or determine_ext(media_url, 'mp4') format_id = ext if rate: format_id += '-%dk' % rate formats.append({ 'format_id': format_id, 'url': urljoin(download_base, media_url), 'tbr': rate or extract_bitrate(media_url), }) self._check_formats(formats, media_id) self._sort_formats(formats) duration = info.get('duration') or info.get('cutout') or info.get('cutduration') if isinstance(duration, compat_str): duration = parse_duration(duration) return { 'id': media_id, 'display_id': display_id, 'formats': formats, 'title': title, 'description': info.get('intro'), 'duration': duration, 'view_count': int_or_none(info.get('plays')), 'uploader': info.get('programName'), 'timestamp': parse_iso8601(info.get('broadcast_date')), 'thumbnail': unescapeHTML(info.get('preview_image_url')), } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rtve.py����������������������������������������������������������0000664�0000000�0000000�00000031036�14277552437�0020213�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import base64 import io import struct from .common import InfoExtractor from ..compat import compat_b64decode from ..utils import ( ExtractorError, determine_ext, float_or_none, qualities, remove_end, remove_start, try_get, ) class RTVEALaCartaIE(InfoExtractor): IE_NAME = 'rtve.es:alacarta' IE_DESC = 'RTVE a la carta' _VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', 'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43', 'info_dict': { 'id': '2491869', 'ext': 'mp4', 'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia', 'duration': 5024.566, 'series': 'Balonmano', }, 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], }, { 'note': 'Live stream', 'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/', 'info_dict': { 'id': '1694255', 'ext': 'mp4', 'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'is_live': True, }, 'params': { 'skip_download': 'live stream', }, }, { 'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/', 'md5': 'd850f3c8731ea53952ebab489cf81cbf', 'info_dict': { 'id': '4236788', 'ext': 'mp4', 'title': 'Servir y proteger - Capítulo 104', 'duration': 3222.0, }, 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], }, { 'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve', 'only_matching': True, }, { 'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/', 'only_matching': True, }] def _real_initialize(self): user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode('utf-8')).decode('utf-8') self._manager = self._download_json( 'http://www.rtve.es/odin/loki/' + user_agent_b64, None, 'Fetching manager info')['manager'] @staticmethod def _decrypt_url(png): encrypted_data = io.BytesIO(compat_b64decode(png)[8:]) while True: length = struct.unpack('!I', encrypted_data.read(4))[0] chunk_type = encrypted_data.read(4) if chunk_type == b'IEND': break data = encrypted_data.read(length) if chunk_type == b'tEXt': alphabet_data, text = data.split(b'\0') quality, url_data = text.split(b'%%') alphabet = [] e = 0 d = 0 for l in alphabet_data.decode('iso-8859-1'): if d == 0: alphabet.append(l) d = e = (e + 1) % 4 else: d -= 1 url = '' f = 0 e = 3 b = 1 for letter in url_data.decode('iso-8859-1'): if f == 0: l = int(letter) * 10 f = 1 else: if e == 0: l += int(letter) url += alphabet[l] e = (b + 3) % 4 f = 0 b += 1 else: e -= 1 yield quality.decode(), url encrypted_data.read(4) # CRC def _extract_png_formats(self, video_id): png = self._download_webpage( 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id), video_id, 'Downloading url information', query={'q': 'v2'}) q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL']) formats = [] for quality, video_url in self._decrypt_url(png): ext = determine_ext(video_url) if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( video_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) elif ext == 'mpd': formats.extend(self._extract_mpd_formats( video_url, video_id, 'dash', fatal=False)) else: formats.append({ 'format_id': quality, 'quality': q(quality), 'url': video_url, }) self._sort_formats(formats) return formats def _real_extract(self, url): video_id = self._match_id(url) info = self._download_json( 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, video_id)['page']['items'][0] if info['state'] == 'DESPU': raise ExtractorError('The video is no longer available', expected=True) title = info['title'].strip() formats = self._extract_png_formats(video_id) subtitles = None sbt_file = info.get('sbtFile') if sbt_file: subtitles = self.extract_subtitles(video_id, sbt_file) is_live = info.get('live') is True return { 'id': video_id, 'title': title, 'formats': formats, 'thumbnail': info.get('image'), 'subtitles': subtitles, 'duration': float_or_none(info.get('duration'), 1000), 'is_live': is_live, 'series': info.get('programTitle'), } def _get_subtitles(self, video_id, sub_file): subs = self._download_json( sub_file + '.json', video_id, 'Downloading subtitles info')['page']['items'] return dict( (s['lang'], [{'ext': 'vtt', 'url': s['src']}]) for s in subs) class RTVEAudioIE(RTVEALaCartaIE): IE_NAME = 'rtve.es:audio' IE_DESC = 'RTVE audio' _VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/[^/]+/[^/]+/(?P<id>[0-9]+)' _TESTS = [{ 'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/', 'md5': 'ae06d27bff945c4e87a50f89f6ce48ce', 'info_dict': { 'id': '5889192', 'ext': 'mp3', 'title': 'Códigos informáticos', 'thumbnail': r're:https?://.+/1598856591583.jpg', 'duration': 349.440, 'series': 'A hombros de gigantes', }, }, { 'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/', 'md5': '072855ab89a9450e0ba314c717fa5ebc', 'info_dict': { 'id': '5791165', 'ext': 'mp3', 'title': 'Ignatius Farray', 'thumbnail': r're:https?://.+/1613243011863.jpg', 'duration': 3559.559, 'series': 'En Radio 3' }, }, { 'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/', 'md5': '0eadab248cc8dd193fa5765712e84d5c', 'info_dict': { 'id': '6082623', 'ext': 'mp3', 'title': 'Capítulo 26 y último: La muerte de Victor', 'thumbnail': r're:https?://.+/1632147445707.jpg', 'duration': 3174.086, 'series': 'Frankenstein o el moderno Prometeo' }, }] def _extract_png_formats(self, audio_id): """ This function retrieves media related png thumbnail which obfuscate valuable information about the media. This information is decrypted via base class _decrypt_url function providing media quality and media url """ png = self._download_webpage( 'http://www.rtve.es/ztnr/movil/thumbnail/%s/audios/%s.png' % (self._manager, audio_id), audio_id, 'Downloading url information', query={'q': 'v2'}) q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL']) formats = [] for quality, audio_url in self._decrypt_url(png): ext = determine_ext(audio_url) if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( audio_url, audio_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) elif ext == 'mpd': formats.extend(self._extract_mpd_formats( audio_url, audio_id, 'dash', fatal=False)) else: formats.append({ 'format_id': quality, 'quality': q(quality), 'url': audio_url, }) self._sort_formats(formats) return formats def _real_extract(self, url): audio_id = self._match_id(url) info = self._download_json( 'https://www.rtve.es/api/audios/%s.json' % audio_id, audio_id)['page']['items'][0] return { 'id': audio_id, 'title': info['title'].strip(), 'thumbnail': info.get('thumbnail'), 'duration': float_or_none(info.get('duration'), 1000), 'series': try_get(info, lambda x: x['programInfo']['title']), 'formats': self._extract_png_formats(audio_id), } class RTVEInfantilIE(RTVEALaCartaIE): IE_NAME = 'rtve.es:infantil' IE_DESC = 'RTVE infantil' _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/' _TESTS = [{ 'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/', 'md5': '5747454717aedf9f9fdf212d1bcfc48d', 'info_dict': { 'id': '3040283', 'ext': 'mp4', 'title': 'Maneras de vivir', 'thumbnail': r're:https?://.+/1426182947956\.JPG', 'duration': 357.958, }, 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], }] class RTVELiveIE(RTVEALaCartaIE): IE_NAME = 'rtve.es:live' IE_DESC = 'RTVE.es live streams' _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' _TESTS = [{ 'url': 'http://www.rtve.es/directo/la-1/', 'info_dict': { 'id': 'la-1', 'ext': 'mp4', 'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', }, 'params': { 'skip_download': 'live stream', } }] def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es') title = remove_start(title, 'Estoy viendo ') vidplayer_id = self._search_regex( (r'playerId=player([0-9]+)', r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)', r'data-id=["\'](\d+)'), webpage, 'internal video ID') return { 'id': video_id, 'title': title, 'formats': self._extract_png_formats(vidplayer_id), 'is_live': True, } class RTVETelevisionIE(InfoExtractor): IE_NAME = 'rtve.es:television' _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' _TEST = { 'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', 'info_dict': { 'id': '3069778', 'ext': 'mp4', 'title': 'Documentos TV - La revolución del móvil', 'duration': 3496.948, }, 'params': { 'skip_download': True, }, } def _real_extract(self, url): page_id = self._match_id(url) webpage = self._download_webpage(url, page_id) alacarta_url = self._search_regex( r'data-location="alacarta_videos"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&', webpage, 'alacarta url', default=None) if alacarta_url is None: raise ExtractorError( 'The webpage doesn\'t contain any video', expected=True) return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key()) ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rtvnh.py���������������������������������������������������������0000664�0000000�0000000�00000004241�14277552437�0020372�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ExtractorError class RTVNHIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.rtvnh.nl/video/131946', 'md5': 'cdbec9f44550763c8afc96050fa747dc', 'info_dict': { 'id': '131946', 'ext': 'mp4', 'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw', 'thumbnail': r're:^https?:.*\.jpg$' } } def _real_extract(self, url): video_id = self._match_id(url) meta = self._parse_json(self._download_webpage( 'http://www.rtvnh.nl/video/json?m=' + video_id, video_id), video_id) status = meta.get('status') if status != 200: raise ExtractorError( '%s returned error code %d' % (self.IE_NAME, status), expected=True) formats = [] rtmp_formats = self._extract_smil_formats( 'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id) formats.extend(rtmp_formats) for rtmp_format in rtmp_formats: rtmp_url = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) rtsp_format = rtmp_format.copy() del rtsp_format['play_path'] del rtsp_format['ext'] rtsp_format.update({ 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), 'url': rtmp_url.replace('rtmp://', 'rtsp://'), 'protocol': 'rtsp', }) formats.append(rtsp_format) http_base_url = rtmp_url.replace('rtmp://', 'http://') formats.extend(self._extract_m3u8_formats( http_base_url + '/playlist.m3u8', video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) formats.extend(self._extract_f4m_formats( http_base_url + '/manifest.f4m', video_id, f4m_id='hds', fatal=False)) self._sort_formats(formats) return { 'id': video_id, 'title': meta['title'].strip(), 'thumbnail': meta.get('image'), 'formats': formats } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rtvs.py����������������������������������������������������������0000664�0000000�0000000�00000006610�14277552437�0020231�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import ( parse_duration, traverse_obj, unified_timestamp, ) class RTVSIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv(?:/\d+)?/(?P<id>\d+)/?(?:[#?]|$)' _TESTS = [{ # radio archive 'url': 'http://www.rtvs.sk/radio/archiv/11224/414872', 'md5': '134d5d6debdeddf8a5d761cbc9edacb8', 'info_dict': { 'id': '414872', 'ext': 'mp3', 'title': 'Ostrov pokladov 1 časť.mp3', 'duration': 2854, 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0000/b1R8.rtvs.jpg', 'display_id': '135331', } }, { # tv archive 'url': 'http://www.rtvs.sk/televizia/archiv/8249/63118', 'info_dict': { 'id': '63118', 'ext': 'mp4', 'title': 'Amaro Džives - Náš deň', 'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.', 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0031/L7Qm.amaro_dzives_png.jpg', 'timestamp': 1428555900, 'upload_date': '20150409', 'duration': 4986, } }, { # tv archive 'url': 'https://www.rtvs.sk/televizia/archiv/18083?utm_source=web&utm_medium=rozcestnik&utm_campaign=Robin', 'info_dict': { 'id': '18083', 'ext': 'mp4', 'title': 'Robin', 'description': 'md5:2f70505a7b8364491003d65ff7a0940a', 'timestamp': 1636652760, 'display_id': '307655', 'duration': 831, 'upload_date': '20211111', 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0916/robin.jpg', } }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) iframe_id = self._search_regex( r'<iframe[^>]+id\s*=\s*"player_[^_]+_([0-9]+)"', webpage, 'Iframe ID') iframe_url = self._search_regex( fr'<iframe[^>]+id\s*=\s*"player_[^_]+_{re.escape(iframe_id)}"[^>]+src\s*=\s*"([^"]+)"', webpage, 'Iframe URL') webpage = self._download_webpage(iframe_url, video_id, 'Downloading iframe') json_url = self._search_regex(r'var\s+url\s*=\s*"([^"]+)"\s*\+\s*ruurl', webpage, 'json URL') data = self._download_json(f'https:{json_url}b=mozilla&p=win&v=97&f=0&d=1', video_id) if data.get('clip'): data['playlist'] = [data['clip']] if traverse_obj(data, ('playlist', 0, 'sources', 0, 'type')) == 'audio/mp3': formats = [{'url': traverse_obj(data, ('playlist', 0, 'sources', 0, 'src'))}] else: formats = self._extract_m3u8_formats(traverse_obj(data, ('playlist', 0, 'sources', 0, 'src')), video_id) self._sort_formats(formats) return { 'id': video_id, 'display_id': iframe_id, 'title': traverse_obj(data, ('playlist', 0, 'title')), 'description': traverse_obj(data, ('playlist', 0, 'description')), 'duration': parse_duration(traverse_obj(data, ('playlist', 0, 'length'))), 'thumbnail': traverse_obj(data, ('playlist', 0, 'image')), 'timestamp': unified_timestamp(traverse_obj(data, ('playlist', 0, 'datetime_create'))), 'formats': formats } ������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/rtvslo.py��������������������������������������������������������0000664�0000000�0000000�00000014044�14277552437�0020564�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( ExtractorError, parse_duration, traverse_obj, unified_timestamp, url_or_none, ) class RTVSLOIE(InfoExtractor): IE_NAME = 'rtvslo.si' _VALID_URL = r'''(?x) https?://(?: (?:365|4d)\.rtvslo.si/arhiv/[^/?#&;]+| (?:www\.)?rtvslo\.si/rtv365/arhiv )/(?P<id>\d+)''' _GEO_COUNTRIES = ['SI'] _API_BASE = 'https://api.rtvslo.si/ava/{}/{}?client_id=82013fb3a531d5414f478747c1aca622' SUB_LANGS_MAP = {'Slovenski': 'sl'} _TESTS = [ { 'url': 'https://www.rtvslo.si/rtv365/arhiv/174842550?s=tv', 'info_dict': { 'id': '174842550', 'ext': 'flv', 'release_timestamp': 1643140032, 'upload_date': '20220125', 'series': 'Dnevnik', 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/92/dnevnik_3_wide2.jpg', 'description': 'md5:76a18692757aeb8f0f51221106277dd2', 'timestamp': 1643137046, 'title': 'Dnevnik', 'series_id': '92', 'release_date': '20220125', 'duration': 1789, }, }, { 'url': 'https://365.rtvslo.si/arhiv/utrip/174843754', 'info_dict': { 'id': '174843754', 'ext': 'mp4', 'series_id': '94', 'release_date': '20220129', 'timestamp': 1643484455, 'title': 'Utrip', 'duration': 813, 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/94/utrip_1_wide2.jpg', 'description': 'md5:77f2892630c7b17bb7a5bb84319020c9', 'release_timestamp': 1643485825, 'upload_date': '20220129', 'series': 'Utrip', }, }, { 'url': 'https://365.rtvslo.si/arhiv/il-giornale-della-sera/174844609', 'info_dict': { 'id': '174844609', 'ext': 'mp3', 'series_id': '106615841', 'title': 'Il giornale della sera', 'duration': 1328, 'series': 'Il giornale della sera', 'timestamp': 1643743800, 'release_timestamp': 1643745424, 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/il-giornale-della-sera_wide2.jpg', 'upload_date': '20220201', 'tbr': 128000, 'release_date': '20220201', }, }, { 'url': 'https://4d.rtvslo.si/arhiv/dnevnik/174842550', 'only_matching': True } ] def _real_extract(self, url): v_id = self._match_id(url) meta = self._download_json(self._API_BASE.format('getRecordingDrm', v_id), v_id)['response'] thumbs = [{'id': k, 'url': v, 'http_headers': {'Accept': 'image/jpeg'}} for k, v in (meta.get('images') or {}).items()] subs = {} for s in traverse_obj(meta, 'subs', 'subtitles', default=[]): lang = self.SUB_LANGS_MAP.get(s.get('language'), s.get('language') or 'und') subs.setdefault(lang, []).append({ 'url': s.get('file'), 'ext': traverse_obj(s, 'format', expected_type=str.lower), }) jwt = meta.get('jwt') if not jwt: raise ExtractorError('Site did not provide an authentication token, cannot proceed.') media = self._download_json(self._API_BASE.format('getMedia', v_id), v_id, query={'jwt': jwt})['response'] formats = [] adaptive_url = traverse_obj(media, ('addaptiveMedia', 'hls_sec'), expected_type=url_or_none) if adaptive_url: formats = self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=['smil']) adaptive_url = traverse_obj(media, ('addaptiveMedia_sl', 'hls_sec'), expected_type=url_or_none) if adaptive_url: for f in self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=['smil']): formats.append({ **f, 'format_id': 'sign-' + f['format_id'], 'format_note': 'Sign language interpretation', 'preference': -10, 'language': ( 'slv' if f.get('language') == 'eng' and f.get('acodec') != 'none' else f.get('language')) }) formats.extend( { 'url': f['streams'][strm], 'ext': traverse_obj(f, 'mediaType', expected_type=str.lower), 'width': f.get('width'), 'height': f.get('height'), 'tbr': f.get('bitrate'), 'filesize': f.get('filesize'), } for strm in ('http', 'https') for f in media.get('mediaFiles') or [] if traverse_obj(f, ('streams', strm)) ) if any('intermission.mp4' in x['url'] for x in formats): self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) if any('dummy_720p.mp4' in x.get('manifest_url', '') for x in formats) and meta.get('stub') == 'error': raise ExtractorError(f'{self.IE_NAME} said: Clip not available', expected=True) self._sort_formats(formats) return { 'id': v_id, 'webpage_url': ''.join(traverse_obj(meta, ('canonical', ('domain', 'path')))), 'title': meta.get('title'), 'formats': formats, 'subtitles': subs, 'thumbnails': thumbs, 'description': meta.get('description'), 'timestamp': unified_timestamp(traverse_obj(meta, 'broadcastDate', ('broadcastDates', 0))), 'release_timestamp': unified_timestamp(meta.get('recordingDate')), 'duration': meta.get('duration') or parse_duration(meta.get('length')), 'tags': meta.get('genre'), 'series': meta.get('showName'), 'series_id': meta.get('showId'), } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2022.08.19/yt_dlp/extractor/ruhd.py����������������������������������������������������������0000664�0000000�0000000�00000002771�14277552437�0020201�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor class RUHDIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P<id>\d+)' _TEST = { 'url': 'http://www.ruhd.ru/play.php?vid=207', 'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83', 'info_dict': { 'id': '207', 'ext': 'divx', 'title': 'КОТ бааааам', 'description': 'классный кот)', 'thumbnail': r're:^http://.*\.jpg$', } } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_url = self._html_search_regex( r'<param name="src" value="([^"]+)"', webpage, 'video url') title = self._html_search_regex( r'<title>([^<]+)   RUHD\.ru - Видео Высокого качества №1 в России!', webpage, 'title') description = self._html_search_regex( r'(?s)
(.+?)', webpage, 'description', fatal=False) thumbnail = self._html_search_regex( r'\d+)' _TESTS = [ { 'url': 'https://rule34video.com/videos/3065157/shot-it-mmd-hmv/', 'md5': 'ffccac2c23799dabbd192621ae4d04f3', 'info_dict': { 'id': '3065157', 'ext': 'mp4', 'title': 'Shot It-(mmd hmv)', 'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065157/preview.jpg', 'duration': 347.0, 'age_limit': 18 } }, { 'url': 'https://rule34video.com/videos/3065296/lara-in-trouble-ep-7-wildeerstudio/', 'md5': '6bb5169f9f6b38cd70882bf2e64f6b86', 'info_dict': { 'id': '3065296', 'ext': 'mp4', 'title': 'Lara in Trouble Ep. 7 [WildeerStudio]', 'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065296/preview.jpg', 'duration': 938.0, 'age_limit': 18 } }, ] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) formats = [] for mobj in re.finditer(r']+href="(?P[^"]+download=true[^"]+)".*>(?P[^\s]+) (?P[^<]+)p', webpage): url, ext, quality = mobj.groups() formats.append({ 'url': url, 'ext': ext.lower(), 'quality': quality, }) title = self._html_extract_title(webpage) thumbnail = self._html_search_regex(r'preview_url:\s+\'([^\']+)\'', webpage, 'thumbnail', default=None) duration = self._html_search_regex(r'"icon-clock">\s+((?:\d+:?)+)', webpage, 'duration', default=None) self._sort_formats(formats) return { 'id': video_id, 'formats': formats, 'title': title, 'thumbnail': thumbnail, 'duration': parse_duration(duration), 'age_limit': 18 } yt-dlp-2022.08.19/yt_dlp/extractor/rumble.py000066400000000000000000000120221427755243700205130ustar00rootroot00000000000000import itertools import re from .common import InfoExtractor from ..compat import compat_str, compat_HTTPError from ..utils import ( determine_ext, int_or_none, parse_iso8601, try_get, unescapeHTML, ExtractorError, ) class RumbleEmbedIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P[0-9a-z]+)' _EMBED_REGEX = [fr'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P{_VALID_URL})'] _TESTS = [{ 'url': 'https://rumble.com/embed/v5pv5f', 'md5': '36a18a049856720189f30977ccbb2c34', 'info_dict': { 'id': 'v5pv5f', 'ext': 'mp4', 'title': 'WMAR 2 News Latest Headlines | October 20, 6pm', 'timestamp': 1571611968, 'upload_date': '20191020', 'channel_url': 'https://rumble.com/c/WMAR', 'channel': 'WMAR', 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.OvCc-small-WMAR-2-News-Latest-Headline.jpg', 'duration': 234, 'uploader': 'WMAR', } }, { 'url': 'https://rumble.com/embed/vslb7v', 'md5': '7418035de1a30a178b8af34dc2b6a52b', 'info_dict': { 'id': 'vslb7v', 'ext': 'mp4', 'title': 'Defense Sec. says US Commitment to NATO Defense \'Ironclad\'', 'timestamp': 1645142135, 'upload_date': '20220217', 'channel_url': 'https://rumble.com/c/CyberTechNews', 'channel': 'CTNews', 'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg', 'duration': 901, 'uploader': 'CTNews', } }, { 'url': 'https://rumble.com/embed/ufe9n.v5pv5f', 'only_matching': True, }] @classmethod def _extract_embed_urls(cls, url, webpage): embeds = tuple(super()._extract_embed_urls(url, webpage)) if embeds: return embeds return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer( r'', webpage).group(1), strict=False)) if settings: video_ids = set(traverse_obj(settings, ( 'props', 'pageProps', 'page', 'assetData', 'splitBody', ..., 'video', 'sourceId')) or []) if video_ids: return [f'http://www.ruutu.fi/video/{v}' for v in video_ids] video_id = traverse_obj(settings, ( 'props', 'pageProps', 'page', 'assetData', 'mainVideo', 'sourceId')) if video_id: return [f'http://www.ruutu.fi/video/{video_id}'] def _real_extract(self, url): video_id = self._match_id(url) video_xml = self._download_xml( '%s/media-xml-cache' % self._API_BASE, video_id, query={'id': video_id}) formats = [] processed_urls = [] def extract_formats(node): for child in node: if child.tag.endswith('Files'): extract_formats(child) elif child.tag.endswith('File'): video_url = child.text if (not video_url or video_url in processed_urls or any(p in video_url for p in ('NOT_USED', 'NOT-USED'))): continue processed_urls.append(video_url) ext = determine_ext(video_url) auth_video_url = url_or_none(self._download_webpage( '%s/auth/access/v2' % self._API_BASE, video_id, note='Downloading authenticated %s stream URL' % ext, fatal=False, query={'stream': video_url})) if auth_video_url: processed_urls.append(auth_video_url) video_url = auth_video_url if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( video_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) elif ext == 'f4m': formats.extend(self._extract_f4m_formats( video_url, video_id, f4m_id='hds', fatal=False)) elif ext == 'mpd': # video-only and audio-only streams are of different # duration resulting in out of sync issue continue formats.extend(self._extract_mpd_formats( video_url, video_id, mpd_id='dash', fatal=False)) elif ext == 'mp3' or child.tag == 'AudioMediaFile': formats.append({ 'format_id': 'audio', 'url': video_url, 'vcodec': 'none', }) else: proto = compat_urllib_parse_urlparse(video_url).scheme if not child.tag.startswith('HTTP') and proto != 'rtmp': continue preference = -1 if proto == 'rtmp' else 1 label = child.get('label') tbr = int_or_none(child.get('bitrate')) format_id = '%s-%s' % (proto, label if label else tbr) if label or tbr else proto if not self._is_valid_url(video_url, video_id, format_id): continue width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]] formats.append({ 'format_id': format_id, 'url': video_url, 'width': width, 'height': height, 'tbr': tbr, 'preference': preference, }) extract_formats(video_xml.find('./Clip')) def pv(name): value = try_call(lambda: find_xpath_attr( video_xml, './Clip/PassthroughVariables/variable', 'name', name).get('value')) if value != 'NA': return value or None if not formats: if (not self.get_param('allow_unplayable_formats') and xpath_text(video_xml, './Clip/DRM', default=None)): self.report_drm(video_id) ns_st_cds = pv('ns_st_cds') if ns_st_cds != 'free': raise ExtractorError('This video is %s.' % ns_st_cds, expected=True) self._sort_formats(formats) themes = pv('themes') return { 'id': video_id, 'title': xpath_attr(video_xml, './/Behavior/Program', 'program_name', 'title', fatal=True), 'description': xpath_attr(video_xml, './/Behavior/Program', 'description', 'description'), 'thumbnail': xpath_attr(video_xml, './/Behavior/Startpicture', 'href', 'thumbnail'), 'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')) or int_or_none(pv('runtime')), 'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')), 'upload_date': unified_strdate(pv('date_start')), 'series': pv('series_name'), 'season_number': int_or_none(pv('season_number')), 'episode_number': int_or_none(pv('episode_number')), 'categories': themes.split(',') if themes else None, 'formats': formats, } yt-dlp-2022.08.19/yt_dlp/extractor/ruv.py000066400000000000000000000152671427755243700200570ustar00rootroot00000000000000from .common import InfoExtractor from ..utils import ( determine_ext, parse_duration, traverse_obj, unified_timestamp, ) class RuvIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P[^/]+(?:/\d+)?)' _TESTS = [{ # m3u8 'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516', 'md5': '66347652f4e13e71936817102acc1724', 'info_dict': { 'id': '1144499', 'display_id': 'fh-valur/20170516', 'ext': 'mp4', 'title': 'FH - Valur', 'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.', 'timestamp': 1494963600, 'upload_date': '20170516', }, }, { # mp3 'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619', 'md5': '395ea250c8a13e5fdb39d4670ef85378', 'info_dict': { 'id': '1153630', 'display_id': 'morgunutvarpid/20170619', 'ext': 'mp3', 'title': 'Morgunútvarpið', 'description': 'md5:a4cf1202c0a1645ca096b06525915418', 'timestamp': 1497855000, 'upload_date': '20170619', }, }, { 'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614', 'only_matching': True, }, { 'url': 'http://www.ruv.is/node/1151854', 'only_matching': True, }, { 'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun', 'only_matching': True, }, { 'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619', 'only_matching': True, }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) title = self._og_search_title(webpage) FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P(?:(?!\1).)+)\1' media_url = self._html_search_regex( FIELD_RE % 'src', webpage, 'video URL', group='url') video_id = self._search_regex( r']+\bhref=["\']https?://www\.ruv\.is/node/(\d+)', webpage, 'video id', default=display_id) ext = determine_ext(media_url) if ext == 'm3u8': formats = self._extract_m3u8_formats( media_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') elif ext == 'mp3': formats = [{ 'format_id': 'mp3', 'url': media_url, 'vcodec': 'none', }] else: formats = [{ 'url': media_url, }] description = self._og_search_description(webpage, default=None) thumbnail = self._og_search_thumbnail( webpage, default=None) or self._search_regex( FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False) timestamp = unified_timestamp(self._html_search_meta( 'article:published_time', webpage, 'timestamp', fatal=False)) return { 'id': video_id, 'display_id': display_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'timestamp': timestamp, 'formats': formats, } class RuvSpilaIE(InfoExtractor): IE_NAME = 'ruv.is:spila' _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:(?:sjon|ut)varp|(?:krakka|ung)ruv)/spila/.+/(?P[0-9]+)/(?P[a-z0-9]+)' _TESTS = [{ 'url': 'https://www.ruv.is/sjonvarp/spila/ithrottir/30657/9jcnd4', 'info_dict': { 'id': '9jcnd4', 'ext': 'mp4', 'title': '01.02.2022', 'chapters': 'count:4', 'timestamp': 1643743500, 'upload_date': '20220201', 'thumbnail': 'https://d38kdhuogyllre.cloudfront.net/fit-in/1960x/filters:quality(65)/hd_posters/94boog-iti3jg.jpg', 'description': 'Íþróttafréttir.', 'age_limit': 0, }, }, { 'url': 'https://www.ruv.is/utvarp/spila/i-ljosi-sogunnar/23795/7hqkre', 'info_dict': { 'id': '7hqkre', 'ext': 'mp3', 'thumbnail': 'https://d38kdhuogyllre.cloudfront.net/fit-in/1960x/filters:quality(65)/hd_posters/7hqkre-7uepao.jpg', 'description': 'md5:8d7046549daff35e9a3190dc9901a120', 'chapters': [], 'upload_date': '20220204', 'timestamp': 1643965500, 'title': 'Nellie Bly II', 'age_limit': 0, }, }, { 'url': 'https://www.ruv.is/ungruv/spila/ungruv/28046/8beuph', 'only_matching': True }, { 'url': 'https://www.ruv.is/krakkaruv/spila/krakkafrettir/30712/9jbgb0', 'only_matching': True }] def _real_extract(self, url): display_id, series_id = self._match_valid_url(url).group('id', 'series_id') program = self._download_json( 'https://www.ruv.is/gql/', display_id, query={'query': '''{ Program(id: %s){ title image description short_description episodes(id: {value: "%s"}) { rating title duration file image firstrun description clips { time text } subtitles { name value } } } }''' % (series_id, display_id)})['data']['Program'] episode = program['episodes'][0] subs = {} for trk in episode.get('subtitles'): if trk.get('name') and trk.get('value'): subs.setdefault(trk['name'], []).append({'url': trk['value'], 'ext': 'vtt'}) media_url = episode['file'] if determine_ext(media_url) == 'm3u8': formats = self._extract_m3u8_formats(media_url, display_id) else: formats = [{'url': media_url}] clips = [ {'start_time': parse_duration(c.get('time')), 'title': c.get('text')} for c in episode.get('clips') or []] return { 'id': display_id, 'title': traverse_obj(program, ('episodes', 0, 'title'), 'title'), 'description': traverse_obj( program, ('episodes', 0, 'description'), 'description', 'short_description', expected_type=lambda x: x or None), 'subtitles': subs, 'thumbnail': episode.get('image', '').replace('$$IMAGESIZE$$', '1960') or None, 'timestamp': unified_timestamp(episode.get('firstrun')), 'formats': formats, 'age_limit': episode.get('rating'), 'chapters': clips } yt-dlp-2022.08.19/yt_dlp/extractor/safari.py000066400000000000000000000231361427755243700205020ustar00rootroot00000000000000import json import re from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_urlparse, ) from ..utils import ( ExtractorError, update_url_query, ) class SafariBaseIE(InfoExtractor): _LOGIN_URL = 'https://learning.oreilly.com/accounts/login/' _NETRC_MACHINE = 'safari' _API_BASE = 'https://learning.oreilly.com/api/v1' _API_FORMAT = 'json' LOGGED_IN = False def _perform_login(self, username, password): _, urlh = self._download_webpage_handle( 'https://learning.oreilly.com/accounts/login-check/', None, 'Downloading login page') def is_logged(urlh): return 'learning.oreilly.com/home/' in urlh.geturl() if is_logged(urlh): self.LOGGED_IN = True return redirect_url = urlh.geturl() parsed_url = compat_urlparse.urlparse(redirect_url) qs = compat_parse_qs(parsed_url.query) next_uri = compat_urlparse.urljoin( 'https://api.oreilly.com', qs['next'][0]) auth, urlh = self._download_json_handle( 'https://www.oreilly.com/member/auth/login/', None, 'Logging in', data=json.dumps({ 'email': username, 'password': password, 'redirect_uri': next_uri, }).encode(), headers={ 'Content-Type': 'application/json', 'Referer': redirect_url, }, expected_status=400) credentials = auth.get('credentials') if (not auth.get('logged_in') and not auth.get('redirect_uri') and credentials): raise ExtractorError( 'Unable to login: %s' % credentials, expected=True) # oreilly serves two same instances of the following cookies # in Set-Cookie header and expects first one to be actually set for cookie in ('groot_sessionid', 'orm-jwt', 'orm-rt'): self._apply_first_set_cookie_header(urlh, cookie) _, urlh = self._download_webpage_handle( auth.get('redirect_uri') or next_uri, None, 'Completing login',) if is_logged(urlh): self.LOGGED_IN = True return raise ExtractorError('Unable to log in') class SafariIE(SafariBaseIE): IE_NAME = 'safari' IE_DESC = 'safaribooksonline.com online video' _VALID_URL = r'''(?x) https?:// (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/ (?: library/view/[^/]+/(?P[^/]+)/(?P[^/?\#&]+)\.html| videos/[^/]+/[^/]+/(?P[^-]+-[^/?\#&]+) ) ''' _TESTS = [{ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', 'md5': 'dcc5a425e79f2564148652616af1f2a3', 'info_dict': { 'id': '0_qbqx90ic', 'ext': 'mp4', 'title': 'Introduction to Hadoop Fundamentals LiveLessons', 'timestamp': 1437758058, 'upload_date': '20150724', 'uploader_id': 'stork', }, }, { # non-digits in course id 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', 'only_matching': True, }, { 'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html', 'only_matching': True, }, { 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00', 'only_matching': True, }, { 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro', 'only_matching': True, }, { 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/00_SeriesIntro.html', 'only_matching': True, }] _PARTNER_ID = '1926081' _UICONF_ID = '29375172' def _real_extract(self, url): mobj = self._match_valid_url(url) reference_id = mobj.group('reference_id') if reference_id: video_id = reference_id partner_id = self._PARTNER_ID ui_id = self._UICONF_ID else: video_id = '%s-%s' % (mobj.group('course_id'), mobj.group('part')) webpage, urlh = self._download_webpage_handle(url, video_id) mobj = re.match(self._VALID_URL, urlh.geturl()) reference_id = mobj.group('reference_id') if not reference_id: reference_id = self._search_regex( r'data-reference-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'kaltura reference id', group='id') partner_id = self._search_regex( r'data-partner-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'kaltura widget id', default=self._PARTNER_ID, group='id') ui_id = self._search_regex( r'data-ui-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'kaltura uiconf id', default=self._UICONF_ID, group='id') query = { 'wid': '_%s' % partner_id, 'uiconf_id': ui_id, 'flashvars[referenceId]': reference_id, } if self.LOGGED_IN: kaltura_session = self._download_json( '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), video_id, 'Downloading kaltura session JSON', 'Unable to download kaltura session JSON', fatal=False, headers={'Accept': 'application/json'}) if kaltura_session: session = kaltura_session.get('session') if session: query['flashvars[ks]'] = session return self.url_result(update_url_query( 'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query), 'Kaltura') class SafariApiIE(SafariBaseIE): IE_NAME = 'safari:api' _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/api/v1/book/(?P[^/]+)/chapter(?:-content)?/(?P[^/?#&]+)\.html' _TESTS = [{ 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', 'only_matching': True, }, { 'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html', 'only_matching': True, }] def _real_extract(self, url): mobj = self._match_valid_url(url) part = self._download_json( url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')), 'Downloading part JSON') web_url = part['web_url'] if 'library/view' in web_url: web_url = web_url.replace('library/view', 'videos') natural_keys = part['natural_key'] web_url = f'{web_url.rsplit("/", 1)[0]}/{natural_keys[0]}-{natural_keys[1][:-5]}' return self.url_result(web_url, SafariIE.ie_key()) class SafariCourseIE(SafariBaseIE): IE_NAME = 'safari:course' IE_DESC = 'safaribooksonline.com online courses' _VALID_URL = r'''(?x) https?:// (?: (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/ (?: library/view/[^/]+| api/v1/book| videos/[^/]+ )| techbus\.safaribooksonline\.com ) /(?P[^/]+) ''' _TESTS = [{ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', 'info_dict': { 'id': '9780133392838', 'title': 'Hadoop Fundamentals LiveLessons', }, 'playlist_count': 22, 'skip': 'Requires safaribooksonline account credentials', }, { 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', 'only_matching': True, }, { 'url': 'http://techbus.safaribooksonline.com/9780134426365', 'only_matching': True, }, { 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314', 'only_matching': True, }, { 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838', 'only_matching': True, }, { 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', 'only_matching': True, }] @classmethod def suitable(cls, url): return (False if SafariIE.suitable(url) or SafariApiIE.suitable(url) else super(SafariCourseIE, cls).suitable(url)) def _real_extract(self, url): course_id = self._match_id(url) course_json = self._download_json( '%s/book/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT), course_id, 'Downloading course JSON') if 'chapters' not in course_json: raise ExtractorError( 'No chapters found for course %s' % course_id, expected=True) entries = [ self.url_result(chapter, SafariApiIE.ie_key()) for chapter in course_json['chapters']] course_title = course_json['title'] return self.playlist_result(entries, course_id, course_title) yt-dlp-2022.08.19/yt_dlp/extractor/saitosan.py000066400000000000000000000056561427755243700210650ustar00rootroot00000000000000from .common import InfoExtractor from ..utils import ExtractorError, try_get class SaitosanIE(InfoExtractor): IE_NAME = 'Saitosan' _VALID_URL = r'https?://(?:www\.)?saitosan\.net/bview.html\?id=(?P[0-9]+)' _TESTS = [{ 'url': 'http://www.saitosan.net/bview.html?id=10031846', 'info_dict': { 'id': '10031846', 'ext': 'mp4', 'title': '井下原 和弥', 'uploader': '井下原 和弥', 'thumbnail': 'http://111.171.196.85:8088/921f916f-7f55-4c97-b92e-5d9d0fef8f5f/thumb', 'is_live': True, }, 'params': { # m3u8 download 'skip_download': True, }, 'skip': 'Broadcasts are ephemeral', }, { 'url': 'http://www.saitosan.net/bview.html?id=10031795', 'info_dict': { 'id': '10031795', 'ext': 'mp4', 'title': '橋本', 'uploader': '橋本', 'thumbnail': 'http://111.171.196.85:8088/1a3933e1-a01a-483b-8931-af15f37f8082/thumb', 'is_live': True, }, 'params': { # m3u8 download 'skip_download': True, }, 'skip': 'Broadcasts are ephemeral', }] def _real_extract(self, url): b_id = self._match_id(url) base = 'http://hankachi.saitosan-api.net:8002/socket.io/?transport=polling&EIO=3' sid = self._download_socket_json(base, b_id, note='Opening socket').get('sid') base += '&sid=' + sid self._download_webpage(base, b_id, note='Polling socket') payload = '420["room_start_join",{"room_id":"%s"}]' % b_id payload = '%s:%s' % (len(payload), payload) self._download_webpage(base, b_id, data=payload, note='Polling socket with payload') response = self._download_socket_json(base, b_id, note='Polling socket') if not response.get('ok'): err = response.get('error') or {} raise ExtractorError( '%s said: %s - %s' % (self.IE_NAME, err.get('code', '?'), err.get('msg', 'Unknown')) if err else 'The socket reported that the broadcast could not be joined. Maybe it\'s offline or the URL is incorrect', expected=True, video_id=b_id) self._download_webpage(base, b_id, data='26:421["room_finish_join",{}]', note='Polling socket') b_data = self._download_socket_json(base, b_id, note='Getting broadcast metadata from socket') m3u8_url = b_data.get('url') self._download_webpage(base, b_id, data='1:1', note='Closing socket', fatal=False) return { 'id': b_id, 'title': b_data.get('name'), 'formats': self._extract_m3u8_formats(m3u8_url, b_id, 'mp4', live=True), 'thumbnail': m3u8_url.replace('av.m3u8', 'thumb'), 'uploader': try_get(b_data, lambda x: x['broadcast_user']['name']), # same as title 'is_live': True } yt-dlp-2022.08.19/yt_dlp/extractor/samplefocus.py000066400000000000000000000073511427755243700215570ustar00rootroot00000000000000import re from .common import InfoExtractor from ..utils import ( extract_attributes, get_element_by_attribute, int_or_none, ) class SampleFocusIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?samplefocus\.com/samples/(?P[^/?&#]+)' _TESTS = [{ 'url': 'https://samplefocus.com/samples/lil-peep-sad-emo-guitar', 'md5': '48c8d62d60be467293912e0e619a5120', 'info_dict': { 'id': '40316', 'display_id': 'lil-peep-sad-emo-guitar', 'ext': 'mp3', 'title': 'Lil Peep Sad Emo Guitar', 'thumbnail': r're:^https?://.+\.png', 'license': 'Standard License', 'uploader': 'CapsCtrl', 'uploader_id': 'capsctrl', 'like_count': int, 'comment_count': int, 'categories': ['Samples', 'Guitar', 'Electric guitar'], }, }, { 'url': 'https://samplefocus.com/samples/dababy-style-bass-808', 'only_matching': True }, { 'url': 'https://samplefocus.com/samples/young-chop-kick', 'only_matching': True }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) sample_id = self._search_regex( r']+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P\d+)', webpage, 'sample id', group='id') title = self._og_search_title(webpage, fatal=False) or self._html_search_regex( r'

(.+?)

', webpage, 'title') mp3_url = self._search_regex( r']+id=(["\'])sample_mp3\1[^>]+value=(["\'])(?P(?:(?!\2).)+)', webpage, 'mp3', fatal=False, group='url') or extract_attributes(self._search_regex( r']+itemprop=(["\'])contentUrl\1[^>]*>', webpage, 'mp3 url', group=0))['content'] thumbnail = self._og_search_thumbnail(webpage) or self._html_search_regex( r']+class=(?:["\'])waveform responsive-img[^>]+src=(["\'])(?P(?:(?!\1).)+)', webpage, 'mp3', fatal=False, group='url') comments = [] for author_id, author, body in re.findall(r'(?s)]+class="comment-author">]+href="/users/([^"]+)">([^"]+).+?]+class="comment-body">([^>]+)

', webpage): comments.append({ 'author': author, 'author_id': author_id, 'text': body, }) uploader_id = uploader = None mobj = re.search(r'>By ]+href="/users/([^"]+)"[^>]*>([^<]+)', webpage) if mobj: uploader_id, uploader = mobj.groups() breadcrumb = get_element_by_attribute('typeof', 'BreadcrumbList', webpage) categories = [] if breadcrumb: for _, name in re.findall(r']+property=(["\'])name\1[^>]*>([^<]+)', breadcrumb): categories.append(name) def extract_count(klass): return int_or_none(self._html_search_regex( r']+class=(?:["\'])?%s-count[^>]*>(\d+)' % klass, webpage, klass, fatal=False)) return { 'id': sample_id, 'title': title, 'url': mp3_url, 'display_id': display_id, 'thumbnail': thumbnail, 'uploader': uploader, 'license': self._html_search_regex( r']+href=(["\'])/license\1[^>]*>(?P[^<]+)<', webpage, 'license', fatal=False, group='license'), 'uploader_id': uploader_id, 'like_count': extract_count('sample-%s-favorites' % sample_id), 'comment_count': extract_count('comments'), 'comments': comments, 'categories': categories, } yt-dlp-2022.08.19/yt_dlp/extractor/sapo.py000066400000000000000000000105231427755243700201730ustar00rootroot00000000000000import re from .common import InfoExtractor from ..utils import ( parse_duration, unified_strdate, ) class SapoIE(InfoExtractor): IE_DESC = 'SAPO Vídeos' _VALID_URL = r'https?://(?:(?:v2|www)\.)?videos\.sapo\.(?:pt|cv|ao|mz|tl)/(?P[\da-zA-Z]{20})' _TESTS = [ { 'url': 'http://videos.sapo.pt/UBz95kOtiWYUMTA5Ghfi', 'md5': '79ee523f6ecb9233ac25075dee0eda83', 'note': 'SD video', 'info_dict': { 'id': 'UBz95kOtiWYUMTA5Ghfi', 'ext': 'mp4', 'title': 'Benfica - Marcas na Hitória', 'description': 'md5:c9082000a128c3fd57bf0299e1367f22', 'duration': 264, 'uploader': 'tiago_1988', 'upload_date': '20080229', 'categories': ['benfica', 'cabral', 'desporto', 'futebol', 'geovanni', 'hooijdonk', 'joao', 'karel', 'lisboa', 'miccoli'], }, }, { 'url': 'http://videos.sapo.pt/IyusNAZ791ZdoCY5H5IF', 'md5': '90a2f283cfb49193fe06e861613a72aa', 'note': 'HD video', 'info_dict': { 'id': 'IyusNAZ791ZdoCY5H5IF', 'ext': 'mp4', 'title': 'Codebits VII - Report', 'description': 'md5:6448d6fd81ce86feac05321f354dbdc8', 'duration': 144, 'uploader': 'codebits', 'upload_date': '20140427', 'categories': ['codebits', 'codebits2014'], }, }, { 'url': 'http://v2.videos.sapo.pt/yLqjzPtbTimsn2wWBKHz', 'md5': 'e5aa7cc0bdc6db9b33df1a48e49a15ac', 'note': 'v2 video', 'info_dict': { 'id': 'yLqjzPtbTimsn2wWBKHz', 'ext': 'mp4', 'title': 'Hipnose Condicionativa 4', 'description': 'md5:ef0481abf8fb4ae6f525088a6dadbc40', 'duration': 692, 'uploader': 'sapozen', 'upload_date': '20090609', 'categories': ['condicionativa', 'heloisa', 'hipnose', 'miranda', 'sapo', 'zen'], }, }, ] def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') item = self._download_xml( 'http://rd3.videos.sapo.pt/%s/rss2' % video_id, video_id).find('./channel/item') title = item.find('./title').text description = item.find('./{http://videos.sapo.pt/mrss/}synopse').text thumbnail = item.find('./{http://search.yahoo.com/mrss/}content').get('url') duration = parse_duration(item.find('./{http://videos.sapo.pt/mrss/}time').text) uploader = item.find('./{http://videos.sapo.pt/mrss/}author').text upload_date = unified_strdate(item.find('./pubDate').text) view_count = int(item.find('./{http://videos.sapo.pt/mrss/}views').text) comment_count = int(item.find('./{http://videos.sapo.pt/mrss/}comment_count').text) tags = item.find('./{http://videos.sapo.pt/mrss/}tags').text categories = tags.split() if tags else [] age_limit = 18 if item.find('./{http://videos.sapo.pt/mrss/}m18').text == 'true' else 0 video_url = item.find('./{http://videos.sapo.pt/mrss/}videoFile').text video_size = item.find('./{http://videos.sapo.pt/mrss/}videoSize').text.split('x') formats = [{ 'url': video_url, 'ext': 'mp4', 'format_id': 'sd', 'width': int(video_size[0]), 'height': int(video_size[1]), }] if item.find('./{http://videos.sapo.pt/mrss/}HD').text == 'true': formats.append({ 'url': re.sub(r'/mov/1$', '/mov/39', video_url), 'ext': 'mp4', 'format_id': 'hd', 'width': 1280, 'height': 720, }) self._sort_formats(formats) return { 'id': video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'duration': duration, 'uploader': uploader, 'upload_date': upload_date, 'view_count': view_count, 'comment_count': comment_count, 'categories': categories, 'age_limit': age_limit, 'formats': formats, } yt-dlp-2022.08.19/yt_dlp/extractor/savefrom.py000066400000000000000000000017621427755243700210600ustar00rootroot00000000000000import os.path from .common import InfoExtractor class SaveFromIE(InfoExtractor): IE_NAME = 'savefrom.net' _VALID_URL = r'https?://[^.]+\.savefrom\.net/\#url=(?P.*)$' _TEST = { 'url': 'http://en.savefrom.net/#url=http://youtube.com/watch?v=UlVRAPW2WJY&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com', 'info_dict': { 'id': 'UlVRAPW2WJY', 'ext': 'mp4', 'title': 'About Team Radical MMA | MMA Fighting', 'upload_date': '20120816', 'uploader': 'Howcast', 'uploader_id': 'Howcast', 'description': r're:(?s).* Hi, my name is Rene Dreifuss\. And I\'m here to show you some MMA.*', }, 'params': { 'skip_download': True } } def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = os.path.splitext(url.split('/')[-1])[0] return self.url_result(mobj.group('url'), video_id=video_id) yt-dlp-2022.08.19/yt_dlp/extractor/sbs.py000066400000000000000000000074461427755243700200320ustar00rootroot00000000000000from .common import InfoExtractor from ..utils import ( smuggle_url, ExtractorError, ) class SBSIE(InfoExtractor): IE_DESC = 'sbs.com.au' _VALID_URL = r'''(?x) https?://(?:www\.)?sbs\.com\.au/(?: ondemand(?: /video/(?:single/)?| /movie/[^/]+/| .*?\bplay=|/watch/ )|news/(?:embeds/)?video/ )(?P[0-9]+)''' _EMBED_REGEX = [r'''(?x)] (?: ]+?src= ) (["\'])(?Phttps?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1'''] _TESTS = [{ # Original URL is handled by the generic IE which finds the iframe: # http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation 'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed', 'md5': '3150cf278965eeabb5b4cea1c963fe0a', 'info_dict': { 'id': '_rFBPRPO4pMR', 'ext': 'mp4', 'title': 'Dingo Conservation (The Feed)', 'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5', 'thumbnail': r're:http://.*\.jpg', 'duration': 308, 'timestamp': 1408613220, 'upload_date': '20140821', 'uploader': 'SBSC', }, }, { 'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed', 'only_matching': True, }, { 'url': 'http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9', 'only_matching': True, }, { 'url': 'https://www.sbs.com.au/ondemand/?play=1836638787723', 'only_matching': True, }, { 'url': 'https://www.sbs.com.au/ondemand/program/inside-windsor-castle?play=1283505731842', 'only_matching': True, }, { 'url': 'https://www.sbs.com.au/news/embeds/video/1840778819866', 'only_matching': True, }, { 'url': 'https://www.sbs.com.au/ondemand/watch/1698704451971', 'only_matching': True, }, { 'url': 'https://www.sbs.com.au/ondemand/movie/coherence/1469404227931', 'only_matching': True, }, { 'note': 'Live stream', 'url': 'https://www.sbs.com.au/ondemand/video/1726824003663/sbs-24x7-live-stream-nsw', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) player_params = self._download_json( 'http://www.sbs.com.au/api/video_pdkvars/id/%s?form=json' % video_id, video_id) error = player_params.get('error') if error: error_message = 'Sorry, The video you are looking for does not exist.' video_data = error.get('results') or {} error_code = error.get('errorCode') if error_code == 'ComingSoon': error_message = '%s is not yet available.' % video_data.get('title', '') elif error_code in ('Forbidden', 'intranetAccessOnly'): error_message = 'Sorry, This video cannot be accessed via this website' elif error_code == 'Expired': error_message = 'Sorry, %s is no longer available.' % video_data.get('title', '') raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) urls = player_params['releaseUrls'] theplatform_url = (urls.get('progressive') or urls.get('html') or urls.get('standard') or player_params['relatedItemsURL']) return { '_type': 'url_transparent', 'ie_key': 'ThePlatform', 'id': video_id, 'url': smuggle_url(self._proto_relative_url(theplatform_url), {'force_smil_url': True}), 'is_live': player_params.get('streamType') == 'live', } yt-dlp-2022.08.19/yt_dlp/extractor/screencast.py000066400000000000000000000107621427755243700213700ustar00rootroot00000000000000import urllib.request from .common import InfoExtractor from ..compat import compat_parse_qs from ..utils import ExtractorError class ScreencastIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?screencast\.com/t/(?P[a-zA-Z0-9]+)' _TESTS = [{ 'url': 'http://www.screencast.com/t/3ZEjQXlT', 'md5': '917df1c13798a3e96211dd1561fded83', 'info_dict': { 'id': '3ZEjQXlT', 'ext': 'm4v', 'title': 'Color Measurement with Ocean Optics Spectrometers', 'description': 'md5:240369cde69d8bed61349a199c5fb153', 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', } }, { 'url': 'http://www.screencast.com/t/V2uXehPJa1ZI', 'md5': 'e8e4b375a7660a9e7e35c33973410d34', 'info_dict': { 'id': 'V2uXehPJa1ZI', 'ext': 'mov', 'title': 'The Amadeus Spectrometer', 'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit', 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', } }, { 'url': 'http://www.screencast.com/t/aAB3iowa', 'md5': 'dedb2734ed00c9755761ccaee88527cd', 'info_dict': { 'id': 'aAB3iowa', 'ext': 'mp4', 'title': 'Google Earth Export', 'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.', 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', } }, { 'url': 'http://www.screencast.com/t/X3ddTrYh', 'md5': '669ee55ff9c51988b4ebc0877cc8b159', 'info_dict': { 'id': 'X3ddTrYh', 'ext': 'wmv', 'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression', 'description': 'md5:7b9f393bc92af02326a5c5889639eab0', 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', } }, { 'url': 'http://screencast.com/t/aAB3iowa', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_url = self._html_search_regex( r'(?:(?!\1).)+)\1', webpage, 'video url', default=None, group='url') if video_url is None: video_url = self._html_search_meta( 'og:video', webpage, default=None) if video_url is None: raise ExtractorError('Cannot find video') title = self._og_search_title(webpage, default=None) if title is None: title = self._html_search_regex( [r'Title: ([^<]+)
', r'class="tabSeperator">>(.+?)<', r'([^<]+)'], webpage, 'title') thumbnail = self._og_search_thumbnail(webpage) description = self._og_search_description(webpage, default=None) if description is None: description = self._html_search_meta('description', webpage) return { 'id': video_id, 'url': video_url, 'title': title, 'description': description, 'thumbnail': thumbnail, } yt-dlp-2022.08.19/yt_dlp/extractor/screencastomatic.py000066400000000000000000000036101427755243700225570ustar00rootroot00000000000000from .common import InfoExtractor from ..utils import ( get_element_by_class, int_or_none, remove_start, strip_or_none, unified_strdate, ) class ScreencastOMaticIE(InfoExtractor): _VALID_URL = r'https?://screencast-o-matic\.com/(?:(?:watch|player)/|embed\?.*?\bsc=)(?P[0-9a-zA-Z]+)' _TESTS = [{ 'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl', 'md5': '483583cb80d92588f15ccbedd90f0c18', 'info_dict': { 'id': 'c2lD3BeOPl', 'ext': 'mp4', 'title': 'Welcome to 3-4 Philosophy @ DECV!', 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.', 'duration': 369, 'upload_date': '20141216', } }, { 'url': 'http://screencast-o-matic.com/player/c2lD3BeOPl', 'only_matching': True, }, { 'url': 'http://screencast-o-matic.com/embed?ff=true&sc=cbV2r4Q5TL&fromPH=true&a=1', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( 'https://screencast-o-matic.com/player/' + video_id, video_id) info = self._parse_html5_media_entries(url, webpage, video_id)[0] info.update({ 'id': video_id, 'title': get_element_by_class('overlayTitle', webpage), 'description': strip_or_none(get_element_by_class('overlayDescription', webpage)) or None, 'duration': int_or_none(self._search_regex( r'player\.duration\s*=\s*function\(\)\s*{\s*return\s+(\d+);\s*};', webpage, 'duration', default=None)), 'upload_date': unified_strdate(remove_start( get_element_by_class('overlayPublished', webpage), 'Published: ')), }) return info yt-dlp-2022.08.19/yt_dlp/extractor/scrippsnetworks.py000066400000000000000000000126651427755243700225220ustar00rootroot00000000000000import json import hashlib from .aws import AWSIE from .anvato import AnvatoIE from .common import InfoExtractor from ..utils import ( smuggle_url, urlencode_postdata, xpath_text, ) class ScrippsNetworksWatchIE(AWSIE): IE_NAME = 'scrippsnetworks:watch' _VALID_URL = r'''(?x) https?:// watch\. (?Pgeniuskitchen)\.com/ (?: player\.[A-Z0-9]+\.html\#| show/(?:[^/]+/){2}| player/ ) (?P\d+) ''' _TESTS = [{ 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/', 'info_dict': { 'id': '4194875', 'ext': 'mp4', 'title': 'Ample Hills Ice Cream Bike', 'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.', 'uploader': 'ANV', 'upload_date': '20171011', 'timestamp': 1507698000, }, 'params': { 'skip_download': True, }, 'add_ie': [AnvatoIE.ie_key()], }] _SNI_TABLE = { 'geniuskitchen': 'genius', } _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1' _AWS_PROXY_HOST = 'web.api.video.snidigital.com' _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback' def _real_extract(self, url): mobj = self._match_valid_url(url) site_id, video_id = mobj.group('site', 'id') aws_identity_id_json = json.dumps({ 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION }).encode('utf-8') token = self._download_json( 'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id, data=aws_identity_id_json, headers={ 'Accept': '*/*', 'Content-Type': 'application/x-amz-json-1.1', 'Referer': url, 'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(), 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken', 'X-Amz-User-Agent': self._AWS_USER_AGENT, })['Token'] sts = self._download_xml( 'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({ 'Action': 'AssumeRoleWithWebIdentity', 'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role', 'RoleSessionName': 'web-identity', 'Version': '2011-06-15', 'WebIdentityToken': token, }), headers={ 'Referer': url, 'X-Amz-User-Agent': self._AWS_USER_AGENT, 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8', }) def get(key): return xpath_text( sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key, fatal=True) mcp_id = self._aws_execute_api({ 'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id), 'access_key': get('AccessKeyId'), 'secret_key': get('SecretAccessKey'), 'session_token': get('SessionToken'), }, video_id)['results'][0]['mcpId'] return self.url_result( smuggle_url( 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id, {'geo_countries': ['US']}), AnvatoIE.ie_key(), video_id=mcp_id) class ScrippsNetworksIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?Pcookingchanneltv|discovery|(?:diy|food)network|hgtv|travelchannel)\.com/videos/[0-9a-z-]+-(?P\d+)' _TESTS = [{ 'url': 'https://www.cookingchanneltv.com/videos/the-best-of-the-best-0260338', 'info_dict': { 'id': '0260338', 'ext': 'mp4', 'title': 'The Best of the Best', 'description': 'Catch a new episode of MasterChef Canada Tuedsay at 9/8c.', 'timestamp': 1475678834, 'upload_date': '20161005', 'uploader': 'SCNI-SCND', }, 'add_ie': ['ThePlatform'], }, { 'url': 'https://www.diynetwork.com/videos/diy-barnwood-tablet-stand-0265790', 'only_matching': True, }, { 'url': 'https://www.foodnetwork.com/videos/chocolate-strawberry-cake-roll-7524591', 'only_matching': True, }, { 'url': 'https://www.hgtv.com/videos/cookie-decorating-101-0301929', 'only_matching': True, }, { 'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184', 'only_matching': True, }, { 'url': 'https://www.discovery.com/videos/guardians-of-the-glades-cooking-with-tom-cobb-5578368', 'only_matching': True, }] _ACCOUNT_MAP = { 'cookingchanneltv': 2433005105, 'discovery': 2706091867, 'diynetwork': 2433004575, 'foodnetwork': 2433005105, 'hgtv': 2433004575, 'travelchannel': 2433005739, } _TP_TEMPL = 'https://link.theplatform.com/s/ip77QC/media/guid/%d/%s?mbr=true' def _real_extract(self, url): site, guid = self._match_valid_url(url).groups() return self.url_result(smuggle_url( self._TP_TEMPL % (self._ACCOUNT_MAP[site], guid), {'force_smil_url': True}), 'ThePlatform', guid) yt-dlp-2022.08.19/yt_dlp/extractor/scrolller.py000066400000000000000000000073051427755243700212360ustar00rootroot00000000000000import json from .common import InfoExtractor from ..utils import determine_ext, int_or_none class ScrolllerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?scrolller\.com/(?P[\w-]+)' _TESTS = [{ 'url': 'https://scrolller.com/a-helping-hand-1k9pxikxkw', 'info_dict': { 'id': 'a-helping-hand-1k9pxikxkw', 'ext': 'mp4', 'thumbnail': 'https://zepto.scrolller.com/a-helping-hand-3ty9q8x094-540x960.jpg', 'title': 'A helping hand', 'age_limit': 0, } }, { 'url': 'https://scrolller.com/tigers-chasing-a-drone-c5d1f2so6j', 'info_dict': { 'id': 'tigers-chasing-a-drone-c5d1f2so6j', 'ext': 'mp4', 'thumbnail': 'https://zepto.scrolller.com/tigers-chasing-a-drone-az9pkpguwe-540x303.jpg', 'title': 'Tigers chasing a drone', 'age_limit': 0, } }, { 'url': 'https://scrolller.com/baby-rhino-smells-something-9chhugsv9p', 'info_dict': { 'id': 'baby-rhino-smells-something-9chhugsv9p', 'ext': 'mp4', 'thumbnail': 'https://atto.scrolller.com/hmm-whats-that-smell-bh54mf2c52-300x224.jpg', 'title': 'Baby rhino smells something', 'age_limit': 0, } }, { 'url': 'https://scrolller.com/its-all-fun-and-games-cco8jjmoh7', 'info_dict': { 'id': 'its-all-fun-and-games-cco8jjmoh7', 'ext': 'mp4', 'thumbnail': 'https://atto.scrolller.com/its-all-fun-and-games-3amk9vg7m3-540x649.jpg', 'title': 'It\'s all fun and games...', 'age_limit': 0, } }, { 'url': 'https://scrolller.com/may-the-force-be-with-you-octokuro-yeytg1fs7a', 'info_dict': { 'id': 'may-the-force-be-with-you-octokuro-yeytg1fs7a', 'ext': 'mp4', 'thumbnail': 'https://thumbs2.redgifs.com/DarkStarchyNautilus-poster.jpg', 'title': 'May the force be with you (Octokuro)', 'age_limit': 18, } }] def _real_extract(self, url): video_id = self._match_id(url) query = { 'query': '''{ getSubredditPost(url:"/%s"){ id title isNsfw mediaSources{ url width height } } }''' % video_id } video_data = self._download_json( 'https://api.scrolller.com/api/v2/graphql', video_id, data=json.dumps(query).encode(), headers={'Content-Type': 'application/json'})['data']['getSubredditPost'] formats, thumbnails = [], [] for source in video_data['mediaSources']: if determine_ext(source.get('url')) in ('jpg', 'png'): thumbnails.append({ 'url': source['url'], 'width': int_or_none(source.get('width')), 'height': int_or_none(source.get('height')), }) elif source.get('url'): formats.append({ 'url': source['url'], 'width': int_or_none(source.get('width')), 'height': int_or_none(source.get('height')), }) if not formats: self.raise_no_formats('There is no video.', expected=True, video_id=video_id) self._sort_formats(formats) return { 'id': video_id, 'title': video_data.get('title'), 'thumbnails': thumbnails, 'formats': formats, 'age_limit': 18 if video_data.get('isNsfw') else 0 } yt-dlp-2022.08.19/yt_dlp/extractor/scte.py000066400000000000000000000113301427755243700201640ustar00rootroot00000000000000import re from .common import InfoExtractor from ..utils import ( decode_packed_codes, ExtractorError, urlencode_postdata, ) class SCTEBaseIE(InfoExtractor): _LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx' _NETRC_MACHINE = 'scte' def _perform_login(self, username, password): login_popup = self._download_webpage( self._LOGIN_URL, None, 'Downloading login popup') def is_logged(webpage): return any(re.search(p, webpage) for p in ( r'class=["\']welcome\b', r'>Sign Out<')) # already logged in if is_logged(login_popup): return login_form = self._hidden_inputs(login_popup) login_form.update({ 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username, 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password, 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on', }) response = self._download_webpage( self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form)) if '|pageRedirect|' not in response and not is_logged(response): error = self._html_search_regex( r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)\d+)' _TESTS = [{ 'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484', 'info_dict': { 'title': 'Introduction to DOCSIS Engineering Professional', 'id': '31484', }, 'playlist_count': 5, 'skip': 'Requires account credentials', }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._search_regex(r'

(.+?)

', webpage, 'title') context_id = self._search_regex(r'context-(\d+)', webpage, video_id) content_base = 'https://learning.scte.org/pluginfile.php/%s/mod_scorm/content/8/' % context_id context = decode_packed_codes(self._download_webpage( '%smobile/data.js' % content_base, video_id)) data = self._parse_xml( self._search_regex( r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"), video_id) entries = [] for asset in data.findall('.//asset'): asset_url = asset.get('url') if not asset_url or not asset_url.endswith('.mp4'): continue asset_id = self._search_regex( r'video_([^_]+)_', asset_url, 'asset id', default=None) if not asset_id: continue entries.append({ 'id': asset_id, 'title': title, 'url': content_base + asset_url, }) return self.playlist_result(entries, video_id, title) class SCTECourseIE(SCTEBaseIE): _VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P\d+)' _TESTS = [{ 'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491', 'only_matching': True, }, { 'url': 'https://learning.scte.org/course/view.php?id=3639', 'only_matching': True, }, { 'url': 'https://learning.scte.org/course/view.php?id=3073', 'only_matching': True, }] def _real_extract(self, url): course_id = self._match_id(url) webpage = self._download_webpage(url, course_id) title = self._search_regex( r'

(.+?)

', webpage, 'title', default=None) entries = [] for mobj in re.finditer( r'''(?x) ]+ href=(["\']) (?P https?://learning\.scte\.org/mod/ (?Pscorm|subcourse)/view\.php?(?:(?!\1).)*? \bid=\d+ ) ''', webpage): item_url = mobj.group('url') if item_url == url: continue ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm' else SCTECourseIE.ie_key()) entries.append(self.url_result(item_url, ie=ie)) return self.playlist_result(entries, course_id, title) yt-dlp-2022.08.19/yt_dlp/extractor/seeker.py000066400000000000000000000043001427755243700205030ustar00rootroot00000000000000import re from .common import InfoExtractor from ..utils import ( get_element_by_class, strip_or_none, ) class SeekerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P.*)-(?P\d+)\.html' _TESTS = [{ 'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html', 'md5': '897d44bbe0d8986a2ead96de565a92db', 'info_dict': { 'id': 'Elrn3gnY', 'ext': 'mp4', 'title': 'Should Trump Be Required To Release His Tax Returns?', 'description': 'md5:41efa8cfa8d627841045eec7b018eb45', 'timestamp': 1490090165, 'upload_date': '20170321', } }, { 'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html', 'playlist': [ { 'md5': '0497b9f20495174be73ae136949707d2', 'info_dict': { 'id': 'FihYQ8AE', 'ext': 'mp4', 'title': 'The Pros & Cons Of Zoos', 'description': 'md5:d88f99a8ea8e7d25e6ff77f271b1271c', 'timestamp': 1490039133, 'upload_date': '20170320', }, } ], 'info_dict': { 'id': '1834116536', 'title': 'After Gorilla Killing, Changes Ahead for Zoos', 'description': 'The largest association of zoos and others are hoping to learn from recent incidents that led to the shooting deaths of a gorilla and two lions.', }, }] def _real_extract(self, url): display_id, article_id = self._match_valid_url(url).groups() webpage = self._download_webpage(url, display_id) entries = [] for jwp_id in re.findall(r'data-video-id="([a-zA-Z0-9]{8})"', webpage): entries.append(self.url_result( 'jwplatform:' + jwp_id, 'JWPlatform', jwp_id)) return self.playlist_result( entries, article_id, self._og_search_title(webpage), strip_or_none(get_element_by_class('subtitle__text', webpage)) or self._og_search_description(webpage)) yt-dlp-2022.08.19/yt_dlp/extractor/senategov.py000066400000000000000000000210751427755243700212300ustar00rootroot00000000000000import re from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_urlparse, ) from ..utils import ( ExtractorError, parse_qs, unsmuggle_url, ) _COMMITTEES = { 'ag': ('76440', 'http://ag-f.akamaihd.net'), 'aging': ('76442', 'http://aging-f.akamaihd.net'), 'approps': ('76441', 'http://approps-f.akamaihd.net'), 'arch': ('', 'http://ussenate-f.akamaihd.net'), 'armed': ('76445', 'http://armed-f.akamaihd.net'), 'banking': ('76446', 'http://banking-f.akamaihd.net'), 'budget': ('76447', 'http://budget-f.akamaihd.net'), 'cecc': ('76486', 'http://srs-f.akamaihd.net'), 'commerce': ('80177', 'http://commerce1-f.akamaihd.net'), 'csce': ('75229', 'http://srs-f.akamaihd.net'), 'dpc': ('76590', 'http://dpc-f.akamaihd.net'), 'energy': ('76448', 'http://energy-f.akamaihd.net'), 'epw': ('76478', 'http://epw-f.akamaihd.net'), 'ethics': ('76449', 'http://ethics-f.akamaihd.net'), 'finance': ('76450', 'http://finance-f.akamaihd.net'), 'foreign': ('76451', 'http://foreign-f.akamaihd.net'), 'govtaff': ('76453', 'http://govtaff-f.akamaihd.net'), 'help': ('76452', 'http://help-f.akamaihd.net'), 'indian': ('76455', 'http://indian-f.akamaihd.net'), 'intel': ('76456', 'http://intel-f.akamaihd.net'), 'intlnarc': ('76457', 'http://intlnarc-f.akamaihd.net'), 'jccic': ('85180', 'http://jccic-f.akamaihd.net'), 'jec': ('76458', 'http://jec-f.akamaihd.net'), 'judiciary': ('76459', 'http://judiciary-f.akamaihd.net'), 'rpc': ('76591', 'http://rpc-f.akamaihd.net'), 'rules': ('76460', 'http://rules-f.akamaihd.net'), 'saa': ('76489', 'http://srs-f.akamaihd.net'), 'smbiz': ('76461', 'http://smbiz-f.akamaihd.net'), 'srs': ('75229', 'http://srs-f.akamaihd.net'), 'uscc': ('76487', 'http://srs-f.akamaihd.net'), 'vetaff': ('76462', 'http://vetaff-f.akamaihd.net'), } class SenateISVPIE(InfoExtractor): _IE_NAME = 'senate.gov:isvp' _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P.+)' _EMBED_REGEX = [r"]+src=['\"](?Phttps?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]"] _TESTS = [{ 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', 'info_dict': { 'id': 'judiciary031715', 'ext': 'mp4', 'title': 'Integrated Senate Video Player', 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', }, 'params': { # m3u8 download 'skip_download': True, }, }, { 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false', 'info_dict': { 'id': 'commerce011514', 'ext': 'mp4', 'title': 'Integrated Senate Video Player' }, 'params': { # m3u8 download 'skip_download': True, }, }, { 'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi', # checksum differs each time 'info_dict': { 'id': 'intel090613', 'ext': 'mp4', 'title': 'Integrated Senate Video Player' } }, { # From http://www.c-span.org/video/?96791-1 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715', 'only_matching': True, }] def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) qs = compat_parse_qs(self._match_valid_url(url).group('qs')) if not qs.get('filename') or not qs.get('type') or not qs.get('comm'): raise ExtractorError('Invalid URL', expected=True) video_id = re.sub(r'.mp4$', '', qs['filename'][0]) webpage = self._download_webpage(url, video_id) if smuggled_data.get('force_title'): title = smuggled_data['force_title'] else: title = self._html_extract_title(webpage) poster = qs.get('poster') thumbnail = poster[0] if poster else None video_type = qs['type'][0] committee = video_type if video_type == 'arch' else qs['comm'][0] stream_num, domain = _COMMITTEES[committee] formats = [] if video_type == 'arch': filename = video_id if '.' in video_id else video_id + '.mp4' m3u8_url = compat_urlparse.urljoin(domain, 'i/' + filename + '/master.m3u8') formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8') else: hdcore_sign = 'hdcore=3.1.0' url_params = (domain, video_id, stream_num) f4m_url = f'%s/z/%s_1@%s/manifest.f4m?{hdcore_sign}' % url_params m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'): # URLs without the extra param induce an 404 error entry.update({'extra_param_to_segment_url': hdcore_sign}) formats.append(entry) for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'): mobj = re.search(r'(?P(?:-p|-b)).m3u8', entry['url']) if mobj: entry['format_id'] += mobj.group('tag') formats.append(entry) self._sort_formats(formats) return { 'id': video_id, 'title': title, 'formats': formats, 'thumbnail': thumbnail, } class SenateGovIE(InfoExtractor): _IE_NAME = 'senate.gov' _VALID_URL = r'https?:\/\/(?:www\.)?(help|appropriations|judiciary|banking|armed-services|finance)\.senate\.gov' _TESTS = [{ 'url': 'https://www.help.senate.gov/hearings/vaccines-saving-lives-ensuring-confidence-and-protecting-public-health', 'info_dict': { 'id': 'help090920', 'display_id': 'vaccines-saving-lives-ensuring-confidence-and-protecting-public-health', 'title': 'Vaccines: Saving Lives, Ensuring Confidence, and Protecting Public Health', 'description': 'The U.S. Senate Committee on Health, Education, Labor & Pensions', 'ext': 'mp4', }, 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://www.appropriations.senate.gov/hearings/watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD', 'info_dict': { 'id': 'appropsA051518', 'display_id': 'watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD', 'title': 'Review of the FY2019 Budget Request for the U.S. Army', 'ext': 'mp4', }, 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://www.banking.senate.gov/hearings/21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization', 'info_dict': { 'id': 'banking041521', 'display_id': '21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization', 'title': '21st Century Communities: Public Transportation Infrastructure Investment and FAST Act Reauthorization', 'description': 'The Official website of The United States Committee on Banking, Housing, and Urban Affairs', 'ext': 'mp4', }, 'params': {'skip_download': 'm3u8'}, }] def _real_extract(self, url): display_id = self._generic_id(url) webpage = self._download_webpage(url, display_id) parse_info = parse_qs(self._search_regex( r'